Class: Rwm::TaskCache

Inherits:
Object
  • Object
show all
Defined in:
lib/rwm/task_cache.rb

Constant Summary collapse

CACHE_HASH_VERSION =

Salt: bump when the hashing scheme changes (file ordering, what gets included, normalisation rules) so existing caches become misses rather than wrong hits.

"v1"

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(workspace, graph) ⇒ TaskCache

Returns a new instance of TaskCache.



27
28
29
30
31
32
33
34
35
# File 'lib/rwm/task_cache.rb', line 27

def initialize(workspace, graph)
  @workspace = workspace
  @graph = graph
  @cache_dir = File.join(workspace.root, ".rwm", "cache")
  @content_hashes = {}
  @content_hash_mutex = Mutex.new
  @cache_declarations = {}
  @declarations_mutex = Mutex.new
end

Class Method Details

.clean(workspace, package_name: nil) ⇒ Object



16
17
18
19
20
21
22
23
24
25
# File 'lib/rwm/task_cache.rb', line 16

def self.clean(workspace, package_name: nil)
  cache_dir = File.join(workspace.root, ".rwm", "cache")
  return unless Dir.exist?(cache_dir)

  if package_name
    Dir.glob(File.join(cache_dir, "#{package_name}-*")).each { |f| File.delete(f) }
  else
    Dir.glob(File.join(cache_dir, "*")).each { |f| File.delete(f) }
  end
end

Instance Method Details

#cache_declarations(package) ⇒ Object

Discover cacheable task declarations by running ‘bundle exec rake rwm:cache_config`



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/rwm/task_cache.rb', line 128

def cache_declarations(package)
  @declarations_mutex.synchronize do
    return @cache_declarations[package.name] if @cache_declarations.key?(package.name)
  end

  Rwm.debug("cache declarations: discovering for #{package.name}")
  output, _, status = Open3.capture3(Rwm.bundle_env(package.path), "bundle", "exec", "rake", "rwm:cache_config", chdir: package.path)
  result = if status.success? && !output.strip.empty?
             JSON.parse(output.strip)
           else
             {}
           end

  @declarations_mutex.synchronize do
    @cache_declarations[package.name] = result
  end
rescue JSON::ParserError
  @declarations_mutex.synchronize do
    @cache_declarations[package.name] = {}
  end
end

#cacheable?(package, task) ⇒ Boolean

Returns true if the task is declared cacheable in the package’s Rakefile

Returns:

  • (Boolean)


38
39
40
41
# File 'lib/rwm/task_cache.rb', line 38

def cacheable?(package, task)
  declarations = cache_declarations(package)
  declarations.key?(task)
end

#cached?(package, task) ⇒ Boolean

Returns true if the (package, task) pair is cached and inputs haven’t changed. Also verifies declared outputs exist (if any).

Returns:

  • (Boolean)


45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/rwm/task_cache.rb', line 45

def cached?(package, task)
  stored = read_stored_hash(package, task)
  unless stored
    Rwm.debug("cache miss: #{package.name}:#{task} (no stored hash)")
    return false
  end

  current = content_hash(package)
  unless stored == current
    Rwm.debug("cache miss: #{package.name}:#{task} (hash changed)")
    return false
  end

  # If outputs are declared, they must exist
  decl = cache_declarations(package)[task]
  if decl && decl["output"]
    unless outputs_exist?(package, decl["output"])
      Rwm.debug("cache miss: #{package.name}:#{task} (outputs missing)")
      return false
    end
  end

  Rwm.debug("cache hit: #{package.name}:#{task}")
  true
end

#content_hash(package) ⇒ Object

Compute a content hash for a package: SHA256 of all source files + dependency hashes. Walks ‘package` and its transitive deps in topological order (deps before dependents) so each dep’s hash is memoised before any package that depends on it is hashed —avoids the unbounded recursion of the natural recursive formulation on deep chains.



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/rwm/task_cache.rb', line 89

def content_hash(package)
  cached = read_memoised_hash(package.name)
  return cached if cached

  needed = @graph.transitive_dependencies(package.name).to_set
  needed << package.name

  @graph.topological_order.each do |name|
    next unless needed.include?(name)
    next if read_memoised_hash(name)

    pkg = name == package.name ? package : @workspace.find_package(name)
    computed = compute_single_package_hash(pkg)
    @content_hash_mutex.synchronize { @content_hashes[pkg.name] ||= computed }
  end

  read_memoised_hash(package.name)
end

#outputs_exist?(package, output_pattern) ⇒ Boolean

Check if declared output files/globs exist in the package directory

Returns:

  • (Boolean)


80
81
82
83
# File 'lib/rwm/task_cache.rb', line 80

def outputs_exist?(package, output_pattern)
  matches = Dir.glob(File.join(package.path, output_pattern))
  !matches.empty?
end

#preload_declarations(packages) ⇒ Object

Preload cache declarations for multiple packages in parallel. Warms the memoization hash so subsequent cacheable?/cached? calls are instant.



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/rwm/task_cache.rb', line 110

def preload_declarations(packages)
  pending = packages.reject { |pkg| @cache_declarations.key?(pkg.name) }
  return if pending.empty?

  Rwm.debug("cache declarations: preloading #{pending.size} package(s) in parallel")
  concurrency = [Etc.nprocessors, pending.size].min
  threads = []

  pending.each_slice((pending.size.to_f / concurrency).ceil) do |batch|
    threads << Thread.new do
      batch.each { |pkg| cache_declarations(pkg) }
    end
  end

  threads.each(&:join)
end

#store(package, task) ⇒ Object

Store the current content hash after a successful task run



72
73
74
75
76
77
# File 'lib/rwm/task_cache.rb', line 72

def store(package, task)
  Rwm.debug("cache store: #{package.name}:#{task}")
  FileUtils.mkdir_p(@cache_dir)
  path = cache_file(package, task)
  File.write(path, content_hash(package))
end