Module: URLRewrite
- Included in:
- WaybackMachineDownloader
- Defined in:
- lib/wayback_machine_downloader/url_rewrite.rb
Constant Summary collapse
- SERVER_SIDE_EXTS =
server-side extensions that should work locally
%w[.php .asp .aspx .jsp .cgi .pl .py].freeze
Instance Method Summary collapse
- #rewrite_css_urls(content) ⇒ Object
- #rewrite_html_attr_urls(content) ⇒ Object
- #rewrite_js_urls(content) ⇒ Object
Instance Method Details
#rewrite_css_urls(content) ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/wayback_machine_downloader/url_rewrite.rb', line 25 def rewrite_css_urls(content) # rewrite URLs in CSS content.gsub!(/url\(\s*["']?https?:\/\/web\.archive\.org\/web\/\d+(?:id_)?\/https?:\/\/[^\/]+([^"'\)]*?)["']?\s*\)/i) do path = normalize_path_for_local($1) "url(\"#{path}\")" end # rewrite absolute URLs in CSS content.gsub!(/url\(\s*["']?https?:\/\/[^\/]+([^"'\)]*?)["']?\s*\)/i) do path = normalize_path_for_local($1) "url(\"#{path}\")" end content end |
#rewrite_html_attr_urls(content) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/wayback_machine_downloader/url_rewrite.rb', line 7 def rewrite_html_attr_urls(content) # rewrite URLs to relative paths content.gsub!(/(\s(?:href|src|action|data-src|data-url)=["'])https?:\/\/web\.archive\.org\/web\/\d+(?:id_)?\/https?:\/\/[^\/]+([^"']*)(["'])/i) do prefix, path, suffix = $1, $2, $3 path = normalize_path_for_local(path) "#{prefix}#{path}#{suffix}" end # rewrite absolute URLs to same domain as relative content.gsub!(/(\s(?:href|src|action|data-src|data-url)=["'])https?:\/\/[^\/]+([^"']*)(["'])/i) do prefix, path, suffix = $1, $2, $3 path = normalize_path_for_local(path) "#{prefix}#{path}#{suffix}" end content end |
#rewrite_js_urls(content) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/wayback_machine_downloader/url_rewrite.rb', line 41 def rewrite_js_urls(content) # rewrite archive.org URLs in JavaScript strings content.gsub!(/(["'])https?:\/\/web\.archive\.org\/web\/\d+(?:id_)?\/https?:\/\/[^\/]+([^"']*)(["'])/i) do quote_start, path, quote_end = $1, $2, $3 path = normalize_path_for_local(path) "#{quote_start}#{path}#{quote_end}" end # rewrite absolute URLs in JavaScript content.gsub!(/(["'])https?:\/\/[^\/]+([^"']*)(["'])/i) do quote_start, path, quote_end = $1, $2, $3 next "#{quote_start}http#{$2}#{quote_end}" if $2.start_with?('s://', '://') path = normalize_path_for_local(path) "#{quote_start}#{path}#{quote_end}" end content end |