Class: SourceMonitor::Images::ContentRewriter

Inherits:
Object
  • Object
show all
Defined in:
lib/source_monitor/images/content_rewriter.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(html, base_url: nil) ⇒ ContentRewriter

Returns a new instance of ContentRewriter.



11
12
13
14
# File 'lib/source_monitor/images/content_rewriter.rb', line 11

def initialize(html, base_url: nil)
  @html = html.to_s
  @base_url = base_url
end

Instance Attribute Details

#base_urlObject (readonly)

Returns the value of attribute base_url.



9
10
11
# File 'lib/source_monitor/images/content_rewriter.rb', line 9

def base_url
  @base_url
end

#htmlObject (readonly)

Returns the value of attribute html.



9
10
11
# File 'lib/source_monitor/images/content_rewriter.rb', line 9

def html
  @html
end

Instance Method Details

#image_urlsObject

Returns an array of absolute image URLs found in <img> tags. Skips data: URIs, blank src, and invalid URLs.



18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/source_monitor/images/content_rewriter.rb', line 18

def image_urls
  return [] if html.blank?

  doc = parse_fragment
  urls = []

  doc.css("img[src]").each do |img|
    url = resolve_url(img["src"])
    urls << url if url && downloadable_url?(url)
  end

  urls.uniq
end

#rewriteObject

Rewrites <img src=“…”> attributes by yielding each original URL to the block and replacing with the block’s return value. Returns the rewritten HTML string. If the block returns nil, the original URL is preserved (graceful fallback).



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/source_monitor/images/content_rewriter.rb', line 36

def rewrite
  return html if html.blank?

  doc = parse_fragment

  doc.css("img[src]").each do |img|
    original_url = resolve_url(img["src"])
    next unless original_url && downloadable_url?(original_url)

    new_url = yield(original_url)
    img["src"] = new_url if new_url.present?
  end

  doc.to_html
end