Module: Crawlscope::Url
- Defined in:
- lib/crawlscope/url.rb
Class Method Summary collapse
- .normalize(url, base_url:) ⇒ Object
- .normalize_for_base(url, base_url:) ⇒ Object
- .path(url) ⇒ Object
- .remote?(value) ⇒ Boolean
Class Method Details
.normalize(url, base_url:) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/crawlscope/url.rb', line 9 def normalize(url, base_url:) uri = URI.parse(url.to_s) uri = URI.join(base_url.to_s, url.to_s) if uri.host.nil? normalized_path = uri.path.to_s normalized_path = "/" if normalized_path.empty? normalized_path = normalized_path.chomp("/") normalized_path = "/" if normalized_path.empty? host = uri.host.to_s host = "#{host}:#{uri.port}" if uri.port && uri.port != uri.default_port "#{uri.scheme}://#{host}#{normalized_path}" rescue URI::InvalidURIError url.to_s end |
.normalize_for_base(url, base_url:) ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/crawlscope/url.rb', line 26 def normalize_for_base(url, base_url:) uri = URI.parse(normalize(url, base_url: base_url)) base_uri = URI.parse(base_url.to_s) unless base_uri.host.to_s.empty? uri.scheme = base_uri.scheme uri.host = base_uri.host uri.port = base_uri.port end normalize(uri.to_s, base_url: base_url) rescue URI::InvalidURIError url.to_s end |
.path(url) ⇒ Object
40 41 42 43 44 45 46 47 48 |
# File 'lib/crawlscope/url.rb', line 40 def path(url) uri = URI.parse(url.to_s) value = uri.path.to_s value = "/" if value.empty? value = value.chomp("/") value.empty? ? "/" : value rescue URI::InvalidURIError nil end |
.remote?(value) ⇒ Boolean
50 51 52 53 54 55 |
# File 'lib/crawlscope/url.rb', line 50 def remote?(value) uri = URI.parse(value.to_s) !uri.scheme.nil? && !uri.host.nil? rescue URI::InvalidURIError false end |