Class: Archaeo::DownloadState

Inherits:
Object
  • Object
show all
Defined in:
lib/archaeo/download_state.rb

Overview

Tracks download progress for resume support.

Persists completed snapshot metadata to a JSONL state file within the output directory, allowing interrupted downloads to resume without re-fetching already downloaded snapshots.

Constant Summary collapse

STATE_FILE =
".archaeo-state"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(output_dir) ⇒ DownloadState

Returns a new instance of DownloadState.



17
18
19
20
21
# File 'lib/archaeo/download_state.rb', line 17

def initialize(output_dir)
  @output_dir = output_dir
  @path = File.join(output_dir, STATE_FILE)
  @mutex = Mutex.new
end

Instance Attribute Details

#output_dirObject (readonly)

Returns the value of attribute output_dir.



15
16
17
# File 'lib/archaeo/download_state.rb', line 15

def output_dir
  @output_dir
end

Instance Method Details

#cleanup_stale(base_dir: @output_dir) ⇒ Object



82
83
84
85
86
87
88
89
90
# File 'lib/archaeo/download_state.rb', line 82

def cleanup_stale(base_dir: @output_dir)
  @mutex.synchronize do
    stale = stale_entries(base_dir: base_dir)
    @entries = entries - stale
    @entries_key = nil
    save
    stale.size
  end
end

#clearObject



57
58
59
60
61
62
63
# File 'lib/archaeo/download_state.rb', line 57

def clear
  @mutex.synchronize do
    @entries = []
    @entries_key = nil
    FileUtils.rm_f(@path)
  end
end

#completed?(timestamp) ⇒ Boolean

Returns:

  • (Boolean)


23
24
25
# File 'lib/archaeo/download_state.rb', line 23

def completed?(timestamp)
  @mutex.synchronize { entries_key.include?(timestamp.to_s) }
end

#entry_for(timestamp) ⇒ Object



41
42
43
# File 'lib/archaeo/download_state.rb', line 41

def entry_for(timestamp)
  @mutex.synchronize { entries.find { |e| e["ts"] == timestamp.to_s } }
end

#file_exists?(timestamp, base_dir: @output_dir) ⇒ Boolean

Returns:

  • (Boolean)


65
66
67
68
69
70
71
# File 'lib/archaeo/download_state.rb', line 65

def file_exists?(timestamp, base_dir: @output_dir)
  entry = entry_for(timestamp)
  return false unless entry

  file_path = find_file(base_dir, timestamp.to_s)
  File.exist?(file_path)
end

#mark_completed(timestamp, url: nil, bytes: nil) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/archaeo/download_state.rb', line 27

def mark_completed(timestamp, url: nil, bytes: nil)
  @mutex.synchronize do
    ts = timestamp.to_s
    return if entries_key.include?(ts)

    entry = { "ts" => ts, "at" => Time.now.utc.iso8601 }
    entry["url"] = url if url
    entry["bytes"] = bytes if bytes
    entries << entry
    @entries_key = nil
    save
  end
end

#sizeObject



49
50
51
# File 'lib/archaeo/download_state.rb', line 49

def size
  @mutex.synchronize { entries.size }
end

#stale_entries(base_dir: @output_dir) ⇒ Object



73
74
75
76
77
78
79
80
# File 'lib/archaeo/download_state.rb', line 73

def stale_entries(base_dir: @output_dir)
  @mutex.synchronize do
    entries.reject do |e|
      path = find_file(base_dir, e["ts"])
      path && File.exist?(path)
    end
  end
end

#timestampsObject



53
54
55
# File 'lib/archaeo/download_state.rb', line 53

def timestamps
  @mutex.synchronize { entries.map { |e| e["ts"] } }
end

#total_bytesObject



45
46
47
# File 'lib/archaeo/download_state.rb', line 45

def total_bytes
  @mutex.synchronize { entries.sum { |e| e["bytes"].to_i } }
end