Class: Archaeo::ContentTracker

Inherits:
Object
  • Object
show all
Defined in:
lib/archaeo/content_tracker.rb

Overview

Tracks content changes for a URL across archived snapshots.

Groups snapshots by original URL, then analyzes how content (identified by CDX digest) changed over the given time range.

Instance Method Summary collapse

Constructor Details

#initialize(cdx_api: CdxApi.new, fetcher: Fetcher.new) ⇒ ContentTracker

Returns a new instance of ContentTracker.



42
43
44
45
# File 'lib/archaeo/content_tracker.rb', line 42

def initialize(cdx_api: CdxApi.new, fetcher: Fetcher.new)
  @cdx = cdx_api
  @fetcher = fetcher
end

Instance Method Details

#track(url, from: nil, to: nil) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/archaeo/content_tracker.rb', line 47

def track(url, from: nil, to: nil)
  url = UrlNormalizer.normalize(url)
  ts_from = from ? Timestamp.coerce(from) : nil
  ts_to = to ? Timestamp.coerce(to) : nil

  opts = {}
  opts[:from] = ts_from.to_s if ts_from
  opts[:to] = ts_to.to_s if ts_to

  snapshots = @cdx.snapshots(url, **opts)
    .select(&:success?).to_a

  grouped = group_by_url(snapshots)
  analyze(url, ts_from, ts_to, snapshots, grouped)
end