Class: Archaeo::Cli

Inherits:
Thor
  • Object
show all
Defined in:
lib/archaeo/cli.rb

Overview

Command-line interface powered by Thor.

Constant Summary collapse

CDX_OPTION_MAP =
{
  from: :from,
  to: :to,
  match_type: :match_type,
  filter: :filters,
  collapse: :collapse,
  sort: :sort,
  limit: :limit,
}.freeze

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.exit_on_failure?Boolean

Returns:

  • (Boolean)


18
19
20
# File 'lib/archaeo/cli.rb', line 18

def self.exit_on_failure?
  true
end

Instance Method Details

#after(url, timestamp) ⇒ Object



97
98
99
100
101
102
# File 'lib/archaeo/cli.rb', line 97

def after(url, timestamp)
  handle_errors do
    snap = CdxApi.new.after(url, timestamp: timestamp)
    output_snapshot(snap)
  end
end

#asset_audit(url, timestamp) ⇒ Object



242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/archaeo/cli.rb', line 242

def asset_audit(url, timestamp)
  handle_errors do
    bundle = Fetcher.new.fetch_page_with_assets(
      url, timestamp: timestamp
    )
    report = build_audit_report(bundle)
    case options[:format]
    when "json"
      puts JSON.generate(report)
    else
      print_audit_report(report)
    end
  end
end

#available(url) ⇒ Object



119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/archaeo/cli.rb', line 119

def available(url)
  handle_errors do
    result = AvailabilityApi.new.near(
      url, timestamp: options[:timestamp]
    )
    if result.available?
      puts "Available: #{result.archive_url}"
    else
      puts "Not available"
      exit 1
    end
  end
end

#before(url, timestamp) ⇒ Object



87
88
89
90
91
92
# File 'lib/archaeo/cli.rb', line 87

def before(url, timestamp)
  handle_errors do
    snap = CdxApi.new.before(url, timestamp: timestamp)
    output_snapshot(snap)
  end
end

#between(url, from, to) ⇒ Object



108
109
110
111
112
113
114
115
# File 'lib/archaeo/cli.rb', line 108

def between(url, from, to)
  fmt = validate_output_format
  handle_errors do
    cdx = CdxApi.new
    snaps = cdx.between(url, from: from, to: to).to_a
    output_formatted(snaps, fmt)
  end
end

#count(url) ⇒ Object



347
348
349
350
351
# File 'lib/archaeo/cli.rb', line 347

def count(url)
  handle_errors do
    puts CdxApi.new.count(url)
  end
end

#coverage(url) ⇒ Object



358
359
360
361
362
363
364
# File 'lib/archaeo/cli.rb', line 358

def coverage(url)
  handle_errors do
    analyzer = CoverageAnalyzer.new
    report = analyzer.analyze(url, from: options[:from], to: options[:to])
    output_coverage(report)
  end
end

#diff(url, timestamp_a, timestamp_b) ⇒ Object



226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/archaeo/cli.rb', line 226

def diff(url, timestamp_a, timestamp_b)
  handle_errors do
    bundle_a = Fetcher.new.fetch_page_with_assets(
      url, timestamp: timestamp_a
    )
    bundle_b = Fetcher.new.fetch_page_with_assets(
      url, timestamp: timestamp_b
    )
    output_diff(bundle_a.assets, bundle_b.assets,
                timestamp_a, timestamp_b)
  end
end

#download(url) ⇒ Object



287
288
289
290
291
292
293
294
295
296
297
298
299
300
# File 'lib/archaeo/cli.rb', line 287

def download(url)
  handle_errors do
    rate_limiter = RateLimiter.new(
      min_interval: options[:rate_limit].to_f,
    )
    filter = build_filter
    downloader = BulkDownloader.new(
      output_dir: options[:output],
      concurrency: options[:concurrency],
      rate_limiter: rate_limiter,
    )
    download_with_progress(downloader, url, filter)
  end
end

#fetch(url, timestamp) ⇒ Object



157
158
159
160
161
162
163
164
165
# File 'lib/archaeo/cli.rb', line 157

def fetch(url, timestamp)
  handle_errors do
    page = Fetcher.new.fetch(
      url, timestamp: timestamp,
           identity: options[:identity]
    )
    output_page(page)
  end
end

#fetch_assets(url, timestamp) ⇒ Object



170
171
172
173
174
175
176
177
# File 'lib/archaeo/cli.rb', line 170

def fetch_assets(url, timestamp)
  handle_errors do
    bundle = Fetcher.new.fetch_page_with_assets(
      url, timestamp: timestamp
    )
    output_assets(bundle)
  end
end

#health(url) ⇒ Object



307
308
309
310
311
312
313
314
315
316
317
318
# File 'lib/archaeo/cli.rb', line 307

def health(url)
  handle_errors do
    checker = ArchiveHealthCheck.new
    report = checker.check(
      url,
      from: options[:from],
      to: options[:to],
      sample: options[:sample],
    )
    output_health(report)
  end
end

#known_urls(domain) ⇒ Object



325
326
327
328
329
330
331
332
333
334
335
# File 'lib/archaeo/cli.rb', line 325

def known_urls(domain)
  handle_errors do
    match_type = options[:subdomain] ? "domain" : "prefix"
    urls = CdxApi.new.known_urls(domain, match_type: match_type)
    if options[:file]
      save_urls_to_file(urls, options[:file])
    else
      urls.each { |u| puts u }
    end
  end
end

#near(url, timestamp) ⇒ Object



59
60
61
62
63
64
# File 'lib/archaeo/cli.rb', line 59

def near(url, timestamp)
  handle_errors do
    snap = CdxApi.new.near(url, timestamp: timestamp)
    output_snapshot(snap)
  end
end

#newest(url) ⇒ Object



77
78
79
80
81
82
# File 'lib/archaeo/cli.rb', line 77

def newest(url)
  handle_errors do
    snap = CdxApi.new.newest(url)
    output_snapshot(snap)
  end
end

#num_pages(url) ⇒ Object



339
340
341
342
343
# File 'lib/archaeo/cli.rb', line 339

def num_pages(url)
  handle_errors do
    puts CdxApi.new.num_pages(url)
  end
end

#oldest(url) ⇒ Object



68
69
70
71
72
73
# File 'lib/archaeo/cli.rb', line 68

def oldest(url)
  handle_errors do
    snap = CdxApi.new.oldest(url)
    output_snapshot(snap)
  end
end

#rewrite(url, timestamp) ⇒ Object



187
188
189
190
191
192
193
194
195
# File 'lib/archaeo/cli.rb', line 187

def rewrite(url, timestamp)
  handle_errors do
    coerced = Timestamp.coerce(timestamp)
    page = Fetcher.new.fetch(url, timestamp: coerced)
    rewriter = build_rewriter(url, coerced)
    rewritten = rewriter.rewrite_html(page.content)
    output_rewritten(rewritten)
  end
end

#rewrite_local(input_dir) ⇒ Object



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/archaeo/cli.rb', line 206

def rewrite_local(input_dir)
  handle_errors do
    output_dir = options[:output] || input_dir
    local_rewriter = LocalRewriter.new(
      prefix: options[:prefix],
      rewrite_js: options[:rewrite_js],
      rewrite_absolute: options[:rewrite_absolute],
    )
    summary = local_rewriter.rewrite_directory(input_dir, output_dir)
    color = build_color
    warn color.success(
      "Rewrote #{summary.rewritten}/#{summary.total} files " \
      "in #{summary.elapsed.round(1)}s",
    )
  end
end

#save(url) ⇒ Object



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/archaeo/cli.rb', line 136

def save(url)
  handle_errors do
    result = SaveApi.new.save(url)
    label = result.cached? ? "Cached" : "Saved"
    puts "#{label}: #{result.archive_url}"
    if options[:headers] && result.response_headers
      puts "Status: #{result.status_code}"
      puts "Response URL: #{result.response_url}" if result.response_url
      puts "Headers:"
      result.response_headers.each do |k, v|
        puts "  #{k}: #{v}"
      end
    end
  end
end

#search(url, query) ⇒ Object



389
390
391
392
393
394
395
396
397
398
399
400
# File 'lib/archaeo/cli.rb', line 389

def search(url, query)
  handle_errors do
    searcher = ArchiveSearch.new
    results = searcher.search(
      url, query: query,
           from: options[:from], to: options[:to],
           max_results: options[:max_results],
           case_sensitive: options[:case_sensitive]
    )
    output_search_results(results)
  end
end

#snapshot_diff(url, timestamp_a, timestamp_b) ⇒ Object



369
370
371
372
373
374
375
376
377
378
379
380
# File 'lib/archaeo/cli.rb', line 369

def snapshot_diff(url, timestamp_a, timestamp_b)
  handle_errors do
    fetcher = Fetcher.new
    page_a = fetcher.fetch(url, timestamp: timestamp_a)
    page_b = fetcher.fetch(url, timestamp: timestamp_b)
    diff = SnapshotDiff.new(
      url: url, page_a: page_a, page_b: page_b,
      timestamp_a: timestamp_a, timestamp_b: timestamp_b
    )
    output_snapshot_diff(diff)
  end
end

#snapshots(url) ⇒ Object



48
49
50
51
52
53
54
# File 'lib/archaeo/cli.rb', line 48

def snapshots(url)
  fmt = validate_output_format
  handle_errors do
    snaps = fetch_snapshots(url)
    output_formatted(snaps, fmt)
  end
end

#track_changes(url) ⇒ Object



407
408
409
410
411
412
413
# File 'lib/archaeo/cli.rb', line 407

def track_changes(url)
  handle_errors do
    tracker = ContentTracker.new
    report = tracker.track(url, from: options[:from], to: options[:to])
    output_content_changes(report)
  end
end

#versionObject



23
24
25
# File 'lib/archaeo/cli.rb', line 23

def version
  puts "archaeo #{VERSION}"
end

#warc_export(url) ⇒ Object



421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
# File 'lib/archaeo/cli.rb', line 421

def warc_export(url)
  handle_errors do
    fetcher = Fetcher.new
    cdx = CdxApi.new
    opts = {}
    opts[:from] = options[:from] if options[:from]
    opts[:to] = options[:to] if options[:to]
    snapshots = cdx.snapshots(url, **opts)
      .select(&:success?).to_a

    pages = snapshots.filter_map do |snap|
      fetcher.fetch(snap.original_url, timestamp: snap.timestamp)
    rescue Error
      nil
    end

    WarcWriter.new.write(options[:output], pages,
                         compress: options[:gzip])
    color = build_color
    warn color.success("Exported #{pages.size} snapshots to #{options[:output]}")
  end
end