Class: Archaeo::Page
- Inherits:
-
Object
- Object
- Archaeo::Page
- Defined in:
- lib/archaeo/page.rb
Overview
Model representing a fetched archived page from the Wayback Machine.
Contains the page content, metadata, and provenance information for a single archived resource. Content is automatically transcoded to UTF-8 from the detected source encoding.
Instance Attribute Summary collapse
-
#archive_url ⇒ Object
readonly
Returns the value of attribute archive_url.
-
#content_type ⇒ Object
readonly
Returns the value of attribute content_type.
-
#original_url ⇒ Object
readonly
Returns the value of attribute original_url.
-
#status_code ⇒ Object
readonly
Returns the value of attribute status_code.
-
#timestamp ⇒ Object
readonly
Returns the value of attribute timestamp.
Instance Method Summary collapse
- #as_json ⇒ Object
- #binary? ⇒ Boolean
- #content ⇒ Object
- #css? ⇒ Boolean
- #encoding ⇒ Object
- #html? ⇒ Boolean
- #image? ⇒ Boolean
-
#initialize(content:, content_type:, status_code:, archive_url:, original_url:, timestamp:) ⇒ Page
constructor
A new instance of Page.
- #inspect ⇒ Object
- #json? ⇒ Boolean
- #size ⇒ Object
- #text? ⇒ Boolean
- #title ⇒ Object
- #to_h ⇒ Object
Constructor Details
#initialize(content:, content_type:, status_code:, archive_url:, original_url:, timestamp:) ⇒ Page
Returns a new instance of Page.
15 16 17 18 19 20 21 22 23 |
# File 'lib/archaeo/page.rb', line 15 def initialize(content:, content_type:, status_code:, archive_url:, original_url:, timestamp:) @raw_content = content @content_type = content_type @status_code = status_code @archive_url = archive_url @original_url = original_url @timestamp = Timestamp.coerce() end |
Instance Attribute Details
#archive_url ⇒ Object (readonly)
Returns the value of attribute archive_url.
12 13 14 |
# File 'lib/archaeo/page.rb', line 12 def archive_url @archive_url end |
#content_type ⇒ Object (readonly)
Returns the value of attribute content_type.
12 13 14 |
# File 'lib/archaeo/page.rb', line 12 def content_type @content_type end |
#original_url ⇒ Object (readonly)
Returns the value of attribute original_url.
12 13 14 |
# File 'lib/archaeo/page.rb', line 12 def original_url @original_url end |
#status_code ⇒ Object (readonly)
Returns the value of attribute status_code.
12 13 14 |
# File 'lib/archaeo/page.rb', line 12 def status_code @status_code end |
#timestamp ⇒ Object (readonly)
Returns the value of attribute timestamp.
12 13 14 |
# File 'lib/archaeo/page.rb', line 12 def @timestamp end |
Instance Method Details
#as_json ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/archaeo/page.rb', line 82 def as_json(*) { content_type: @content_type, status_code: @status_code, archive_url: @archive_url, original_url: @original_url, timestamp: @timestamp.to_s, size: size, encoding: encoding.to_s, } end |
#binary? ⇒ Boolean
57 58 59 |
# File 'lib/archaeo/page.rb', line 57 def binary? !(text? || json? || html?) end |
#content ⇒ Object
25 26 27 |
# File 'lib/archaeo/page.rb', line 25 def content @content ||= transcode(@raw_content) end |
#css? ⇒ Boolean
53 54 55 |
# File 'lib/archaeo/page.rb', line 53 def css? @content_type&.include?("text/css") end |
#encoding ⇒ Object
33 34 35 |
# File 'lib/archaeo/page.rb', line 33 def encoding @encoding ||= detect_encoding end |
#html? ⇒ Boolean
37 38 39 |
# File 'lib/archaeo/page.rb', line 37 def html? @content_type&.include?("text/html") end |
#image? ⇒ Boolean
45 46 47 |
# File 'lib/archaeo/page.rb', line 45 def image? @content_type&.start_with?("image/") end |
#inspect ⇒ Object
94 95 96 |
# File 'lib/archaeo/page.rb', line 94 def inspect "#<#{self.class.name} #{@content_type} #{size} bytes>" end |
#json? ⇒ Boolean
41 42 43 |
# File 'lib/archaeo/page.rb', line 41 def json? @content_type&.include?("application/json") end |
#size ⇒ Object
29 30 31 |
# File 'lib/archaeo/page.rb', line 29 def size content.length end |
#text? ⇒ Boolean
49 50 51 |
# File 'lib/archaeo/page.rb', line 49 def text? @content_type&.start_with?("text/") end |
#title ⇒ Object
61 62 63 64 65 66 67 68 |
# File 'lib/archaeo/page.rb', line 61 def title @title ||= begin doc = Nokogiri::HTML(@raw_content) doc.at_css("title")&.text&.strip rescue StandardError nil end end |
#to_h ⇒ Object
70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/archaeo/page.rb', line 70 def to_h { content_type: @content_type, status_code: @status_code, archive_url: @archive_url, original_url: @original_url, timestamp: @timestamp, size: size, encoding: encoding.to_s, } end |