Class: Archaeo::Page

Inherits:
Object
  • Object
show all
Defined in:
lib/archaeo/page.rb

Overview

Model representing a fetched archived page from the Wayback Machine.

Contains the page content, metadata, and provenance information for a single archived resource. Content is automatically transcoded to UTF-8 from the detected source encoding.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(content:, content_type:, status_code:, archive_url:, original_url:, timestamp:) ⇒ Page

Returns a new instance of Page.



15
16
17
18
19
20
21
22
23
# File 'lib/archaeo/page.rb', line 15

def initialize(content:, content_type:, status_code:,
               archive_url:, original_url:, timestamp:)
  @raw_content = content
  @content_type = content_type
  @status_code = status_code
  @archive_url = archive_url
  @original_url = original_url
  @timestamp = Timestamp.coerce(timestamp)
end

Instance Attribute Details

#archive_urlObject (readonly)

Returns the value of attribute archive_url.



12
13
14
# File 'lib/archaeo/page.rb', line 12

def archive_url
  @archive_url
end

#content_typeObject (readonly)

Returns the value of attribute content_type.



12
13
14
# File 'lib/archaeo/page.rb', line 12

def content_type
  @content_type
end

#original_urlObject (readonly)

Returns the value of attribute original_url.



12
13
14
# File 'lib/archaeo/page.rb', line 12

def original_url
  @original_url
end

#status_codeObject (readonly)

Returns the value of attribute status_code.



12
13
14
# File 'lib/archaeo/page.rb', line 12

def status_code
  @status_code
end

#timestampObject (readonly)

Returns the value of attribute timestamp.



12
13
14
# File 'lib/archaeo/page.rb', line 12

def timestamp
  @timestamp
end

Instance Method Details

#binary?Boolean

Returns:

  • (Boolean)


53
54
55
# File 'lib/archaeo/page.rb', line 53

def binary?
  !(text? || json? || html?)
end

#contentObject



25
26
27
# File 'lib/archaeo/page.rb', line 25

def content
  @content ||= transcode(@raw_content)
end

#encodingObject



33
34
35
# File 'lib/archaeo/page.rb', line 33

def encoding
  @encoding ||= detect_encoding
end

#html?Boolean

Returns:

  • (Boolean)


37
38
39
# File 'lib/archaeo/page.rb', line 37

def html?
  @content_type&.include?("text/html")
end

#image?Boolean

Returns:

  • (Boolean)


45
46
47
# File 'lib/archaeo/page.rb', line 45

def image?
  @content_type&.start_with?("image/")
end

#json?Boolean

Returns:

  • (Boolean)


41
42
43
# File 'lib/archaeo/page.rb', line 41

def json?
  @content_type&.include?("application/json")
end

#sizeObject



29
30
31
# File 'lib/archaeo/page.rb', line 29

def size
  content.length
end

#text?Boolean

Returns:

  • (Boolean)


49
50
51
# File 'lib/archaeo/page.rb', line 49

def text?
  @content_type&.start_with?("text/")
end

#titleObject



57
58
59
60
61
62
63
64
# File 'lib/archaeo/page.rb', line 57

def title
  @title ||= begin
    doc = Nokogiri::HTML(@raw_content)
    doc.at_css("title")&.text&.strip
  rescue StandardError
    nil
  end
end