Class: Archaeo::CdxCache

Inherits:
Object
  • Object
show all
Defined in:
lib/archaeo/cdx_cache.rb

Overview

Persists CDX API query results to disk for resume support.

Caches snapshot lists keyed by query parameters so that interrupted downloads can resume without re-querying CDX.

Constant Summary collapse

CACHE_DIR =
".cache"

Instance Method Summary collapse

Constructor Details

#initialize(base_dir) ⇒ CdxCache

Returns a new instance of CdxCache.



14
15
16
17
# File 'lib/archaeo/cdx_cache.rb', line 14

def initialize(base_dir)
  @base_dir = base_dir
  @cache_dir = File.join(base_dir, CACHE_DIR)
end

Instance Method Details

#cache_key(url, options = {}) ⇒ Object



36
37
38
39
40
41
42
43
44
45
# File 'lib/archaeo/cdx_cache.rb', line 36

def cache_key(url, options = {})
  parts = [url.to_s]
  parts << options[:from].to_s if options[:from]
  parts << options[:to].to_s if options[:to]
  parts << options[:match_type].to_s if options[:match_type]
  parts += Array(options[:filters]).map(&:to_s) if options[:filters]
  parts += Array(options[:collapse]).map(&:to_s) if options[:collapse]
  parts << options[:sort].to_s if options[:sort]
  Digest::SHA256.hexdigest(parts.join("|"))[0, 16]
end

#cached?(url, **options) ⇒ Boolean

Returns:

  • (Boolean)


32
33
34
# File 'lib/archaeo/cdx_cache.rb', line 32

def cached?(url, **options)
  File.exist?(cache_path(cache_key(url, options)))
end

#clear(url = nil, **options) ⇒ Object



47
48
49
50
51
52
53
# File 'lib/archaeo/cdx_cache.rb', line 47

def clear(url = nil, **options)
  if url
    FileUtils.rm_f(cache_path(cache_key(url, options)))
  else
    FileUtils.rm_rf(@cache_dir)
  end
end

#fetch(url, **options) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/archaeo/cdx_cache.rb', line 19

def fetch(url, **options)
  key = cache_key(url, options)
  path = cache_path(key)

  if File.exist?(path)
    load_cache(path)
  else
    snapshots = yield
    save_cache(path, url, options, snapshots)
    snapshots
  end
end