Class: Archaeo::SnapshotDiff

Inherits:
Object
  • Object
show all
Defined in:
lib/archaeo/snapshot_diff.rb

Overview

Compares two archived snapshots of the same URL.

Produces text diffs, structural change analysis, link and asset change tracking between snapshots at different timestamps.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url:, page_a:, page_b:, timestamp_a:, timestamp_b:) ⇒ SnapshotDiff

Returns a new instance of SnapshotDiff.



13
14
15
16
17
18
19
# File 'lib/archaeo/snapshot_diff.rb', line 13

def initialize(url:, page_a:, page_b:, timestamp_a:, timestamp_b:)
  @url = url
  @page_a = page_a
  @page_b = page_b
  @timestamp_a = Timestamp.coerce(timestamp_a)
  @timestamp_b = Timestamp.coerce(timestamp_b)
end

Instance Attribute Details

#snapshot_aObject (readonly)

Returns the value of attribute snapshot_a.



11
12
13
# File 'lib/archaeo/snapshot_diff.rb', line 11

def snapshot_a
  @snapshot_a
end

#snapshot_bObject (readonly)

Returns the value of attribute snapshot_b.



11
12
13
# File 'lib/archaeo/snapshot_diff.rb', line 11

def snapshot_b
  @snapshot_b
end

#urlObject (readonly)

Returns the value of attribute url.



11
12
13
# File 'lib/archaeo/snapshot_diff.rb', line 11

def url
  @url
end

Instance Method Details

#as_jsonObject



65
66
67
# File 'lib/archaeo/snapshot_diff.rb', line 65

def as_json(*)
  to_h
end

#asset_changesObject



37
38
39
40
41
# File 'lib/archaeo/snapshot_diff.rb', line 37

def asset_changes
  assets_a = extract_assets(@page_a)
  assets_b = extract_assets(@page_b)
  compute_set_diff(assets_a, assets_b)
end

#content_changed?Boolean

Returns:

  • (Boolean)


21
22
23
# File 'lib/archaeo/snapshot_diff.rb', line 21

def content_changed?
  content_digest(@page_a.content) != content_digest(@page_b.content)
end


31
32
33
34
35
# File 'lib/archaeo/snapshot_diff.rb', line 31

def link_changes
  links_a = extract_links(@page_a)
  links_b = extract_links(@page_b)
  compute_set_diff(links_a, links_b)
end

#structural_changesObject



43
44
45
46
47
48
49
# File 'lib/archaeo/snapshot_diff.rb', line 43

def structural_changes
  return {} unless @page_a.html? && @page_b.html?

  elements_a = count_elements(@page_a)
  elements_b = count_elements(@page_b)
  build_element_diff(elements_a, elements_b)
end

#text_diffObject



25
26
27
28
29
# File 'lib/archaeo/snapshot_diff.rb', line 25

def text_diff
  lines_a = @page_a.content.to_s.lines
  lines_b = @page_b.content.to_s.lines
  build_unified_diff(lines_a, lines_b)
end

#to_hObject



51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/archaeo/snapshot_diff.rb', line 51

def to_h
  {
    url: @url,
    timestamp_a: @timestamp_a.to_s,
    timestamp_b: @timestamp_b.to_s,
    content_changed: content_changed?,
    links_added: link_changes[:added],
    links_removed: link_changes[:removed],
    assets_added: asset_changes[:added],
    assets_removed: asset_changes[:removed],
    structural_changes: structural_changes,
  }
end