Class: Archaeo::PageBundle

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/archaeo/page_bundle.rb

Overview

A fetched page together with all its extracted asset URLs.

Bundles a Page with the AssetList discovered from its HTML, providing a single object for complete page archival.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(page:, assets:) ⇒ PageBundle

Returns a new instance of PageBundle.



15
16
17
18
# File 'lib/archaeo/page_bundle.rb', line 15

def initialize(page:, assets:)
  @page = page
  @assets = assets
end

Instance Attribute Details

#assetsObject (readonly)

Returns the value of attribute assets.



13
14
15
# File 'lib/archaeo/page_bundle.rb', line 13

def assets
  @assets
end

#pageObject (readonly)

Returns the value of attribute page.



13
14
15
# File 'lib/archaeo/page_bundle.rb', line 13

def page
  @page
end

Instance Method Details

#as_jsonObject



36
37
38
# File 'lib/archaeo/page_bundle.rb', line 36

def as_json(*)
  { page: @page.as_json, assets: @assets.to_h }
end

#asset_countObject



28
29
30
# File 'lib/archaeo/page_bundle.rb', line 28

def asset_count
  assets.size
end

#download_assets(output_dir:, client: HttpClient.new) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/archaeo/page_bundle.rb', line 44

def download_assets(output_dir:, client: HttpClient.new)
  FileUtils.mkdir_p(output_dir)
  @assets.all.each do |url|
    filename = File.join(output_dir,
                         File.basename(URI.parse(url).path))
    tmp_path = "#{filename}.tmp"
    response = client.get(url)
    File.binwrite(tmp_path, response.body)
    File.rename(tmp_path, filename)
  rescue StandardError
    FileUtils.rm_f(tmp_path) if defined?(tmp_path)
  end
end

#each(&block) ⇒ Object



20
21
22
# File 'lib/archaeo/page_bundle.rb', line 20

def each(&block)
  assets.each(&block)
end

#sizeObject



24
25
26
# File 'lib/archaeo/page_bundle.rb', line 24

def size
  assets.size + 1
end

#to_hObject



32
33
34
# File 'lib/archaeo/page_bundle.rb', line 32

def to_h
  { page: @page.to_h, assets: @assets.to_h }
end

#to_json(*args) ⇒ Object



40
41
42
# File 'lib/archaeo/page_bundle.rb', line 40

def to_json(*args)
  JSON.generate(as_json, *args)
end