Module: Crawlscope::Url

Defined in:
lib/crawlscope/url.rb

Class Method Summary collapse

Class Method Details

.normalize(url, base_url:) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/crawlscope/url.rb', line 9

def normalize(url, base_url:)
  uri = URI.parse(url.to_s)
  uri = URI.join(base_url.to_s, url.to_s) if uri.host.nil?

  normalized_path = uri.path.to_s
  normalized_path = "/" if normalized_path.empty?
  normalized_path = normalized_path.chomp("/")
  normalized_path = "/" if normalized_path.empty?

  host = uri.host.to_s
  host = "#{host}:#{uri.port}" if uri.port && uri.port != uri.default_port

  "#{uri.scheme}://#{host}#{normalized_path}"
rescue URI::InvalidURIError
  url.to_s
end

.normalize_for_base(url, base_url:) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/crawlscope/url.rb', line 26

def normalize_for_base(url, base_url:)
  uri = URI.parse(normalize(url, base_url: base_url))
  base_uri = URI.parse(base_url.to_s)
  unless base_uri.host.to_s.empty?
    uri.scheme = base_uri.scheme
    uri.host = base_uri.host
    uri.port = base_uri.port
  end

  normalize(uri.to_s, base_url: base_url)
rescue URI::InvalidURIError
  url.to_s
end

.path(url) ⇒ Object



40
41
42
43
44
45
46
47
48
# File 'lib/crawlscope/url.rb', line 40

def path(url)
  uri = URI.parse(url.to_s)
  value = uri.path.to_s
  value = "/" if value.empty?
  value = value.chomp("/")
  value.empty? ? "/" : value
rescue URI::InvalidURIError
  nil
end

.remote?(value) ⇒ Boolean

Returns:

  • (Boolean)


50
51
52
53
54
55
# File 'lib/crawlscope/url.rb', line 50

def remote?(value)
  uri = URI.parse(value.to_s)
  !uri.scheme.nil? && !uri.host.nil?
rescue URI::InvalidURIError
  false
end