Module: Crawlscope::Url

Defined in:
lib/crawlscope/url.rb

Class Method Summary collapse

Class Method Details

.normalize(url, base_url:) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/crawlscope/url.rb', line 9

def normalize(url, base_url:)
  uri = URI.parse(url.to_s)
  uri = URI.join(base_url.to_s, url.to_s) if uri.host.nil?

  normalized_path = uri.path.to_s
  normalized_path = "/" if normalized_path.empty?
  normalized_path = normalized_path.chomp("/")
  normalized_path = "/" if normalized_path.empty?

  host = uri.host.to_s
  host = "#{host}:#{uri.port}" if uri.port && uri.port != uri.default_port

  "#{uri.scheme}://#{host}#{normalized_path}"
rescue URI::InvalidURIError
  url.to_s
end

.path(url) ⇒ Object



26
27
28
29
30
31
32
33
34
# File 'lib/crawlscope/url.rb', line 26

def path(url)
  uri = URI.parse(url.to_s)
  value = uri.path.to_s
  value = "/" if value.empty?
  value = value.chomp("/")
  value.empty? ? "/" : value
rescue URI::InvalidURIError
  nil
end

.remote?(value) ⇒ Boolean

Returns:

  • (Boolean)


36
37
38
39
40
41
# File 'lib/crawlscope/url.rb', line 36

def remote?(value)
  uri = URI.parse(value.to_s)
  !uri.scheme.nil? && !uri.host.nil?
rescue URI::InvalidURIError
  false
end