Class: Archaeo::SaveApi

Inherits:
Object
  • Object
show all
Defined in:
lib/archaeo/save_api.rb

Overview

Client for the Wayback Machine SavePageNow (SPN) API.

Request the Wayback Machine to archive a URL and retrieve the resulting archive URL and timestamp as a SaveResult.

Constant Summary collapse

ENDPOINT =
"https://web.archive.org/save"
DEFAULT_MAX_TRIES =
8
TIMESTAMP_RE =
%r{web\.archive\.org/web/(\d{14})}

Instance Method Summary collapse

Constructor Details

#initialize(client: HttpClient.new, max_tries: DEFAULT_MAX_TRIES) ⇒ SaveApi

Returns a new instance of SaveApi.



13
14
15
16
17
# File 'lib/archaeo/save_api.rb', line 13

def initialize(client: HttpClient.new,
               max_tries: DEFAULT_MAX_TRIES)
  @client = client
  @max_tries = max_tries
end

Instance Method Details

#batch_save(urls, delay: 2, stop_on_error: false) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/archaeo/save_api.rb', line 26

def batch_save(urls, delay: 2, stop_on_error: false)
  results = []
  urls.each_with_index do |url, i|
    sleep(delay) if i.positive?
    result = save(url)
    results << result
  rescue RateLimitError, SaveFailed => e
    raise e if stop_on_error

    results << SaveResult.new(
      url: url, archive_url: nil, timestamp: nil, cached: false,
    )
  end
  results
end

#save(url) ⇒ Object



19
20
21
22
23
24
# File 'lib/archaeo/save_api.rb', line 19

def save(url)
  url = UrlNormalizer.normalize(url)
  save_url = "#{ENDPOINT}/#{url}"
  start_time = Time.now.utc
  attempt_save(save_url, start_time, url)
end