Class: Archaeo::Page

Inherits:
Object
  • Object
show all
Defined in:
lib/archaeo/page.rb

Overview

Model representing a fetched archived page from the Wayback Machine.

Contains the page content, metadata, and provenance information for a single archived resource. Content is automatically transcoded to UTF-8 from the detected source encoding.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(content:, content_type:, status_code:, archive_url:, original_url:, timestamp:) ⇒ Page

Returns a new instance of Page.



15
16
17
18
19
20
21
22
23
# File 'lib/archaeo/page.rb', line 15

def initialize(content:, content_type:, status_code:,
               archive_url:, original_url:, timestamp:)
  @raw_content = content
  @content_type = content_type
  @status_code = status_code
  @archive_url = archive_url
  @original_url = original_url
  @timestamp = Timestamp.coerce(timestamp)
end

Instance Attribute Details

#archive_urlObject (readonly)

Returns the value of attribute archive_url.



12
13
14
# File 'lib/archaeo/page.rb', line 12

def archive_url
  @archive_url
end

#content_typeObject (readonly)

Returns the value of attribute content_type.



12
13
14
# File 'lib/archaeo/page.rb', line 12

def content_type
  @content_type
end

#original_urlObject (readonly)

Returns the value of attribute original_url.



12
13
14
# File 'lib/archaeo/page.rb', line 12

def original_url
  @original_url
end

#status_codeObject (readonly)

Returns the value of attribute status_code.



12
13
14
# File 'lib/archaeo/page.rb', line 12

def status_code
  @status_code
end

#timestampObject (readonly)

Returns the value of attribute timestamp.



12
13
14
# File 'lib/archaeo/page.rb', line 12

def timestamp
  @timestamp
end

Instance Method Details

#as_jsonObject



82
83
84
85
86
87
88
89
90
91
92
# File 'lib/archaeo/page.rb', line 82

def as_json(*)
  {
    content_type: @content_type,
    status_code: @status_code,
    archive_url: @archive_url,
    original_url: @original_url,
    timestamp: @timestamp.to_s,
    size: size,
    encoding: encoding.to_s,
  }
end

#binary?Boolean

Returns:

  • (Boolean)


57
58
59
# File 'lib/archaeo/page.rb', line 57

def binary?
  !(text? || json? || html?)
end

#contentObject



25
26
27
# File 'lib/archaeo/page.rb', line 25

def content
  @content ||= transcode(@raw_content)
end

#css?Boolean

Returns:

  • (Boolean)


53
54
55
# File 'lib/archaeo/page.rb', line 53

def css?
  @content_type&.include?("text/css")
end

#encodingObject



33
34
35
# File 'lib/archaeo/page.rb', line 33

def encoding
  @encoding ||= detect_encoding
end

#html?Boolean

Returns:

  • (Boolean)


37
38
39
# File 'lib/archaeo/page.rb', line 37

def html?
  @content_type&.include?("text/html")
end

#image?Boolean

Returns:

  • (Boolean)


45
46
47
# File 'lib/archaeo/page.rb', line 45

def image?
  @content_type&.start_with?("image/")
end

#inspectObject



94
95
96
# File 'lib/archaeo/page.rb', line 94

def inspect
  "#<#{self.class.name} #{@content_type} #{size} bytes>"
end

#json?Boolean

Returns:

  • (Boolean)


41
42
43
# File 'lib/archaeo/page.rb', line 41

def json?
  @content_type&.include?("application/json")
end

#sizeObject



29
30
31
# File 'lib/archaeo/page.rb', line 29

def size
  content.length
end

#text?Boolean

Returns:

  • (Boolean)


49
50
51
# File 'lib/archaeo/page.rb', line 49

def text?
  @content_type&.start_with?("text/")
end

#titleObject



61
62
63
64
65
66
67
68
# File 'lib/archaeo/page.rb', line 61

def title
  @title ||= begin
    doc = Nokogiri::HTML(@raw_content)
    doc.at_css("title")&.text&.strip
  rescue StandardError
    nil
  end
end

#to_hObject



70
71
72
73
74
75
76
77
78
79
80
# File 'lib/archaeo/page.rb', line 70

def to_h
  {
    content_type: @content_type,
    status_code: @status_code,
    archive_url: @archive_url,
    original_url: @original_url,
    timestamp: @timestamp,
    size: size,
    encoding: encoding.to_s,
  }
end