Class: Archaeo::Snapshot

Inherits:
Object
  • Object
show all
Defined in:
lib/archaeo/snapshot.rb

Overview

A single CDX Server API record representing an archived document.

Maps the seven standard CDX fields and provides the computed archive URL via the ArchiveUrl model.

Constant Summary collapse

FIELDS =
%i[urlkey timestamp original_url
mimetype status_code digest length].freeze
BLOCKED_STATUS =
-1

Instance Method Summary collapse

Constructor Details

#initialize(urlkey:, timestamp:, original_url:, mimetype: nil, status_code: nil, digest: nil, length: nil) ⇒ Snapshot

Returns a new instance of Snapshot.



16
17
18
19
20
21
22
23
24
25
26
# File 'lib/archaeo/snapshot.rb', line 16

def initialize(urlkey:, timestamp:, original_url:,
               mimetype: nil, status_code: nil,
               digest: nil, length: nil)
  @urlkey = urlkey.to_s
  @timestamp = Timestamp.coerce(timestamp)
  @original_url = original_url.to_s
  @mimetype = mimetype.to_s
  @status_code = status_code.to_i
  @digest = digest.to_s
  @length = length.to_i
end

Instance Method Details

#==(other) ⇒ Object Also known as: eql?



126
127
128
# File 'lib/archaeo/snapshot.rb', line 126

def ==(other)
  other.is_a?(self.class) && to_a == other.to_a
end

#ageObject



61
62
63
# File 'lib/archaeo/snapshot.rb', line 61

def age
  Time.now - @timestamp.to_time
end

#archive_urlObject



28
29
30
# File 'lib/archaeo/snapshot.rb', line 28

def archive_url
  ArchiveUrl.new(original_url, timestamp: @timestamp).to_s
end

#as_jsonObject



114
115
116
117
118
119
120
121
122
123
124
# File 'lib/archaeo/snapshot.rb', line 114

def as_json(*)
  {
    urlkey: @urlkey,
    timestamp: @timestamp.to_s,
    original_url: @original_url,
    mimetype: @mimetype,
    status_code: @status_code,
    digest: @digest,
    length: @length,
  }
end

#blocked?Boolean

Returns:

  • (Boolean)


37
38
39
# File 'lib/archaeo/snapshot.rb', line 37

def blocked?
  @status_code == BLOCKED_STATUS
end

#client_error?Boolean

Returns:

  • (Boolean)


49
50
51
# File 'lib/archaeo/snapshot.rb', line 49

def client_error?
  @status_code.between?(400, 499)
end

#duplicate_of?(other) ⇒ Boolean

Returns:

  • (Boolean)


81
82
83
# File 'lib/archaeo/snapshot.rb', line 81

def duplicate_of?(other)
  same_content_as?(other) && timestamp != other.timestamp
end

#error?Boolean

Returns:

  • (Boolean)


57
58
59
# File 'lib/archaeo/snapshot.rb', line 57

def error?
  client_error? || server_error?
end

#fetch(client: HttpClient.new, identity: false) ⇒ Object



85
86
87
88
89
# File 'lib/archaeo/snapshot.rb', line 85

def fetch(client: HttpClient.new, identity: false)
  Fetcher.new(client: client).fetch(
    original_url, timestamp: @timestamp, identity: identity
  )
end

#fetch_with_assets(client: HttpClient.new) ⇒ Object



91
92
93
94
95
# File 'lib/archaeo/snapshot.rb', line 91

def fetch_with_assets(client: HttpClient.new)
  Fetcher.new(client: client).fetch_page_with_assets(
    original_url, timestamp: @timestamp
  )
end

#hashObject



131
132
133
# File 'lib/archaeo/snapshot.rb', line 131

def hash
  to_a.hash
end

#identity_urlObject



32
33
34
35
# File 'lib/archaeo/snapshot.rb', line 32

def identity_url
  ArchiveUrl.new(original_url, timestamp: @timestamp,
                               identity: true).to_s
end

#inspectObject



135
136
137
138
# File 'lib/archaeo/snapshot.rb', line 135

def inspect
  "#<#{self.class.name} #{timestamp} " \
    "#{original_url} status=#{status_code}>"
end

#newer_than?(seconds) ⇒ Boolean

Returns:

  • (Boolean)


69
70
71
# File 'lib/archaeo/snapshot.rb', line 69

def newer_than?(seconds)
  age <= seconds
end

#older_than?(seconds) ⇒ Boolean

Returns:

  • (Boolean)


65
66
67
# File 'lib/archaeo/snapshot.rb', line 65

def older_than?(seconds)
  age > seconds
end

#redirect?Boolean

Returns:

  • (Boolean)


45
46
47
# File 'lib/archaeo/snapshot.rb', line 45

def redirect?
  @status_code.between?(300, 399)
end

#same_content_as?(other) ⇒ Boolean

Returns:

  • (Boolean)


73
74
75
76
77
78
79
# File 'lib/archaeo/snapshot.rb', line 73

def same_content_as?(other)
  return false unless other.is_a?(self.class)
  return false if digest.nil? || digest.empty?
  return false if other.digest.nil? || other.digest.empty?

  digest == other.digest
end

#server_error?Boolean

Returns:

  • (Boolean)


53
54
55
# File 'lib/archaeo/snapshot.rb', line 53

def server_error?
  @status_code.between?(500, 599)
end

#success?Boolean

Returns:

  • (Boolean)


41
42
43
# File 'lib/archaeo/snapshot.rb', line 41

def success?
  @status_code == 200
end

#to_aObject



97
98
99
100
# File 'lib/archaeo/snapshot.rb', line 97

def to_a
  [@urlkey, @timestamp, @original_url, @mimetype,
   @status_code, @digest, @length]
end

#to_hObject



102
103
104
105
106
107
108
109
110
111
112
# File 'lib/archaeo/snapshot.rb', line 102

def to_h
  {
    urlkey: @urlkey,
    timestamp: @timestamp,
    original_url: @original_url,
    mimetype: @mimetype,
    status_code: @status_code,
    digest: @digest,
    length: @length,
  }
end