Class: Archaeo::Cli
- Inherits:
-
Thor
- Object
- Thor
- Archaeo::Cli
- Defined in:
- lib/archaeo/cli.rb
Overview
Command-line interface powered by Thor.
Constant Summary collapse
- CDX_OPTION_MAP =
{ from: :from, to: :to, match_type: :match_type, filter: :filters, collapse: :collapse, sort: :sort, limit: :limit, }.freeze
Class Method Summary collapse
Instance Method Summary collapse
- #after(url, timestamp) ⇒ Object
- #asset_audit(url, timestamp) ⇒ Object
- #available(url) ⇒ Object
- #before(url, timestamp) ⇒ Object
- #between(url, from, to) ⇒ Object
- #count(url) ⇒ Object
- #coverage(url) ⇒ Object
- #diff(url, timestamp_a, timestamp_b) ⇒ Object
- #download(url) ⇒ Object
- #fetch(url, timestamp) ⇒ Object
- #fetch_assets(url, timestamp) ⇒ Object
- #health(url) ⇒ Object
- #known_urls(domain) ⇒ Object
- #near(url, timestamp) ⇒ Object
- #newest(url) ⇒ Object
- #num_pages(url) ⇒ Object
- #oldest(url) ⇒ Object
- #rewrite(url, timestamp) ⇒ Object
- #save(url) ⇒ Object
- #search(url, query) ⇒ Object
- #snapshot_diff(url, timestamp_a, timestamp_b) ⇒ Object
- #snapshots(url) ⇒ Object
- #track_changes(url) ⇒ Object
- #version ⇒ Object
- #warc_export(url) ⇒ Object
Class Method Details
.exit_on_failure? ⇒ Boolean
18 19 20 |
# File 'lib/archaeo/cli.rb', line 18 def self.exit_on_failure? true end |
Instance Method Details
#after(url, timestamp) ⇒ Object
97 98 99 100 101 102 |
# File 'lib/archaeo/cli.rb', line 97 def after(url, ) handle_errors do snap = CdxApi.new.after(url, timestamp: ) output_snapshot(snap) end end |
#asset_audit(url, timestamp) ⇒ Object
216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
# File 'lib/archaeo/cli.rb', line 216 def asset_audit(url, ) handle_errors do bundle = Fetcher.new.fetch_page_with_assets( url, timestamp: ) report = build_audit_report(bundle) case [:format] when "json" puts JSON.generate(report) else print_audit_report(report) end end end |
#available(url) ⇒ Object
119 120 121 122 123 124 125 126 127 128 129 130 131 |
# File 'lib/archaeo/cli.rb', line 119 def available(url) handle_errors do result = AvailabilityApi.new.near( url, timestamp: [:timestamp] ) if result.available? puts "Available: #{result.archive_url}" else puts "Not available" exit 1 end end end |
#before(url, timestamp) ⇒ Object
87 88 89 90 91 92 |
# File 'lib/archaeo/cli.rb', line 87 def before(url, ) handle_errors do snap = CdxApi.new.before(url, timestamp: ) output_snapshot(snap) end end |
#between(url, from, to) ⇒ Object
108 109 110 111 112 113 114 115 |
# File 'lib/archaeo/cli.rb', line 108 def between(url, from, to) fmt = validate_output_format handle_errors do cdx = CdxApi.new snaps = cdx.between(url, from: from, to: to).to_a output_formatted(snaps, fmt) end end |
#count(url) ⇒ Object
321 322 323 324 325 |
# File 'lib/archaeo/cli.rb', line 321 def count(url) handle_errors do puts CdxApi.new.count(url) end end |
#coverage(url) ⇒ Object
332 333 334 335 336 337 338 |
# File 'lib/archaeo/cli.rb', line 332 def coverage(url) handle_errors do analyzer = CoverageAnalyzer.new report = analyzer.analyze(url, from: [:from], to: [:to]) output_coverage(report) end end |
#diff(url, timestamp_a, timestamp_b) ⇒ Object
200 201 202 203 204 205 206 207 208 209 210 211 |
# File 'lib/archaeo/cli.rb', line 200 def diff(url, , ) handle_errors do bundle_a = Fetcher.new.fetch_page_with_assets( url, timestamp: ) bundle_b = Fetcher.new.fetch_page_with_assets( url, timestamp: ) output_diff(bundle_a.assets, bundle_b.assets, , ) end end |
#download(url) ⇒ Object
261 262 263 264 265 266 267 268 269 270 271 272 273 274 |
# File 'lib/archaeo/cli.rb', line 261 def download(url) handle_errors do rate_limiter = RateLimiter.new( min_interval: [:rate_limit].to_f, ) filter = build_filter downloader = BulkDownloader.new( output_dir: [:output], concurrency: [:concurrency], rate_limiter: rate_limiter, ) download_with_progress(downloader, url, filter) end end |
#fetch(url, timestamp) ⇒ Object
157 158 159 160 161 162 163 164 165 |
# File 'lib/archaeo/cli.rb', line 157 def fetch(url, ) handle_errors do page = Fetcher.new.fetch( url, timestamp: , identity: [:identity] ) output_page(page) end end |
#fetch_assets(url, timestamp) ⇒ Object
170 171 172 173 174 175 176 177 |
# File 'lib/archaeo/cli.rb', line 170 def fetch_assets(url, ) handle_errors do bundle = Fetcher.new.fetch_page_with_assets( url, timestamp: ) output_assets(bundle) end end |
#health(url) ⇒ Object
281 282 283 284 285 286 287 288 289 290 291 292 |
# File 'lib/archaeo/cli.rb', line 281 def health(url) handle_errors do checker = ArchiveHealthCheck.new report = checker.check( url, from: [:from], to: [:to], sample: [:sample], ) output_health(report) end end |
#known_urls(domain) ⇒ Object
299 300 301 302 303 304 305 306 307 308 309 |
# File 'lib/archaeo/cli.rb', line 299 def known_urls(domain) handle_errors do match_type = [:subdomain] ? "domain" : "prefix" urls = CdxApi.new.known_urls(domain, match_type: match_type) if [:file] save_urls_to_file(urls, [:file]) else urls.each { |u| puts u } end end end |
#near(url, timestamp) ⇒ Object
59 60 61 62 63 64 |
# File 'lib/archaeo/cli.rb', line 59 def near(url, ) handle_errors do snap = CdxApi.new.near(url, timestamp: ) output_snapshot(snap) end end |
#newest(url) ⇒ Object
77 78 79 80 81 82 |
# File 'lib/archaeo/cli.rb', line 77 def newest(url) handle_errors do snap = CdxApi.new.newest(url) output_snapshot(snap) end end |
#num_pages(url) ⇒ Object
313 314 315 316 317 |
# File 'lib/archaeo/cli.rb', line 313 def num_pages(url) handle_errors do puts CdxApi.new.num_pages(url) end end |
#oldest(url) ⇒ Object
68 69 70 71 72 73 |
# File 'lib/archaeo/cli.rb', line 68 def oldest(url) handle_errors do snap = CdxApi.new.oldest(url) output_snapshot(snap) end end |
#rewrite(url, timestamp) ⇒ Object
187 188 189 190 191 192 193 194 195 |
# File 'lib/archaeo/cli.rb', line 187 def rewrite(url, ) handle_errors do coerced = Timestamp.coerce() page = Fetcher.new.fetch(url, timestamp: coerced) rewriter = build_rewriter(url, coerced) rewritten = rewriter.rewrite_html(page.content) output_rewritten(rewritten) end end |
#save(url) ⇒ Object
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
# File 'lib/archaeo/cli.rb', line 136 def save(url) handle_errors do result = SaveApi.new.save(url) label = result.cached? ? "Cached" : "Saved" puts "#{label}: #{result.archive_url}" if [:headers] && result.response_headers puts "Status: #{result.status_code}" puts "Response URL: #{result.response_url}" if result.response_url puts "Headers:" result.response_headers.each do |k, v| puts " #{k}: #{v}" end end end end |
#search(url, query) ⇒ Object
363 364 365 366 367 368 369 370 371 372 373 374 |
# File 'lib/archaeo/cli.rb', line 363 def search(url, query) handle_errors do searcher = ArchiveSearch.new results = searcher.search( url, query: query, from: [:from], to: [:to], max_results: [:max_results], case_sensitive: [:case_sensitive] ) output_search_results(results) end end |
#snapshot_diff(url, timestamp_a, timestamp_b) ⇒ Object
343 344 345 346 347 348 349 350 351 352 353 354 |
# File 'lib/archaeo/cli.rb', line 343 def snapshot_diff(url, , ) handle_errors do fetcher = Fetcher.new page_a = fetcher.fetch(url, timestamp: ) page_b = fetcher.fetch(url, timestamp: ) diff = SnapshotDiff.new( url: url, page_a: page_a, page_b: page_b, timestamp_a: , timestamp_b: ) output_snapshot_diff(diff) end end |
#snapshots(url) ⇒ Object
48 49 50 51 52 53 54 |
# File 'lib/archaeo/cli.rb', line 48 def snapshots(url) fmt = validate_output_format handle_errors do snaps = fetch_snapshots(url) output_formatted(snaps, fmt) end end |
#track_changes(url) ⇒ Object
381 382 383 384 385 386 387 |
# File 'lib/archaeo/cli.rb', line 381 def track_changes(url) handle_errors do tracker = ContentTracker.new report = tracker.track(url, from: [:from], to: [:to]) output_content_changes(report) end end |
#version ⇒ Object
23 24 25 |
# File 'lib/archaeo/cli.rb', line 23 def version puts "archaeo #{VERSION}" end |
#warc_export(url) ⇒ Object
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 |
# File 'lib/archaeo/cli.rb', line 395 def warc_export(url) handle_errors do fetcher = Fetcher.new cdx = CdxApi.new opts = {} opts[:from] = [:from] if [:from] opts[:to] = [:to] if [:to] snapshots = cdx.snapshots(url, **opts) .select(&:success?).to_a pages = snapshots.filter_map do |snap| fetcher.fetch(snap.original_url, timestamp: snap.) rescue Error nil end WarcWriter.new.write([:output], pages, compress: [:gzip]) color = build_color warn color.success("Exported #{pages.size} snapshots to #{[:output]}") end end |