Class: CI::Queue::Redis::Worker

Inherits:
Base
  • Object
show all
Defined in:
lib/ci/queue/redis/worker.rb

Direct Known Subclasses

Grind, TestTimeRecord

Constant Summary collapse

DEFAULT_SLEEP_SECONDS =
0.5

Constants inherited from Base

Base::CONNECTION_ERRORS, Base::DEFAULT_TIMEOUT, Base::TEN_MINUTES

Constants included from Common

Common::CONNECTION_ERRORS

Instance Attribute Summary collapse

Attributes included from Common

#config

Instance Method Summary collapse

Methods inherited from Base

#boot_heartbeat_process!, #created_at=, #custom_config, #custom_middlewares, #ensure_heartbeat_thread_alive!, #exhausted?, #expired?, #increment_test_failed, #max_test_failed?, #progress, #queue_initialized?, #queue_initializing?, #reconnect_attempts, #remaining, #running, #size, #stop_heartbeat!, #streaming?, #test_failed, #test_ids, #to_a, #wait_for_master, #with_heartbeat, #workers_count

Methods included from Common

#flaky?, #report_failure!, #report_success!, #rescue_connection_errors

Constructor Details

#initialize(redis, config) ⇒ Worker

Returns a new instance of Worker.



25
26
27
28
29
30
31
# File 'lib/ci/queue/redis/worker.rb', line 25

def initialize(redis, config)
  @reserved_tests = Concurrent::Set.new
  @reserved_leases = Concurrent::Map.new
  @shutdown_required = false
  @first_reserve_at = nil
  super(redis, config)
end

Instance Attribute Details

#entry_resolverObject

Returns the value of attribute entry_resolver.



22
23
24
# File 'lib/ci/queue/redis/worker.rb', line 22

def entry_resolver
  @entry_resolver
end

#first_reserve_atObject (readonly)

Returns the value of attribute first_reserve_at.



23
24
25
# File 'lib/ci/queue/redis/worker.rb', line 23

def first_reserve_at
  @first_reserve_at
end

Instance Method Details

#acknowledge(entry, error: nil, pipeline: redis) ⇒ Object



200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/ci/queue/redis/worker.rb', line 200

def acknowledge(entry, error: nil, pipeline: redis)
  test_id = CI::Queue::QueueEntry.test_id(entry)
  assert_reserved!(test_id)
  entry = reserved_entries.fetch(test_id, entry)
  lease = @reserved_leases.delete(test_id)
  unreserve_entry(test_id)
  eval_script(
    :acknowledge,
    keys: [key('running'), key('processed'), key('owners'), key('error-reports'), key('requeued-by'), key('leases')],
    argv: [entry, error.to_s, config.redis_ttl, lease.to_s],
    pipeline: pipeline,
  ) == 1
end

#buildObject



177
178
179
# File 'lib/ci/queue/redis/worker.rb', line 177

def build
  @build ||= CI::Queue::Redis::BuildRecord.new(self, redis, config)
end

#distributed?Boolean

Returns:

  • (Boolean)


33
34
35
# File 'lib/ci/queue/redis/worker.rb', line 33

def distributed?
  true
end

#file_loaderObject



181
182
183
# File 'lib/ci/queue/redis/worker.rb', line 181

def file_loader
  @file_loader ||= CI::Queue::FileLoader.new
end

#lease_for(entry) ⇒ Object



191
192
193
194
# File 'lib/ci/queue/redis/worker.rb', line 191

def lease_for(entry)
  test_id = CI::Queue::QueueEntry.test_id(entry)
  @reserved_leases[test_id]
end

#master?Boolean

Returns:

  • (Boolean)


106
107
108
# File 'lib/ci/queue/redis/worker.rb', line 106

def master?
  @master
end

#pollObject



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/ci/queue/redis/worker.rb', line 112

def poll
  wait_for_master(timeout: config.queue_init_timeout, allow_streaming: true)
  attempt = 0
  until shutdown_required? || config.circuit_breakers.any?(&:open?) || exhausted? || max_test_failed?
    if entry = reserve
      attempt = 0
      yield resolve_entry(entry)
    else
      if still_streaming?
        raise LostMaster, "Streaming stalled for more than #{config.lazy_load_streaming_timeout}s" if streaming_stale?
        sleep 0.1
        next
      end
      # Adding exponential backoff to avoid hammering Redis
      # we just stay online here in case a test gets retried or times out so we can afford to wait
      sleep_time = [DEFAULT_SLEEP_SECONDS * (2 ** attempt), Redis.max_sleep_time].min
      attempt += 1
      sleep sleep_time
    end
  end
  redis.pipelined do |pipeline|
    pipeline.expire(key('worker', worker_id, 'queue'), config.redis_ttl)
    pipeline.expire(key('processed'), config.redis_ttl)
  end
rescue *CONNECTION_ERRORS
end

#populate(tests, random: Random.new) ⇒ Object



37
38
39
40
41
42
# File 'lib/ci/queue/redis/worker.rb', line 37

def populate(tests, random: Random.new)
  @index = tests.map { |t| [t.id, t] }.to_h
  entries = Queue.shuffle(tests, random).map { |test| queue_entry_for(test) }
  push(entries)
  self
end

#populated?Boolean

Returns:

  • (Boolean)


86
87
88
# File 'lib/ci/queue/redis/worker.rb', line 86

def populated?
  !!defined?(@index)
end

#release!Object



247
248
249
250
251
252
253
254
# File 'lib/ci/queue/redis/worker.rb', line 247

def release!
  eval_script(
    :release,
    keys: [key('running'), key('worker', worker_id, 'queue'), key('owners'), key('leases')],
    argv: [],
  )
  nil
end

#report_worker_error(error) ⇒ Object



196
197
198
# File 'lib/ci/queue/redis/worker.rb', line 196

def report_worker_error(error)
  build.report_worker_error(error)
end

#requeue(entry, offset: Redis.requeue_offset) ⇒ Object



214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# File 'lib/ci/queue/redis/worker.rb', line 214

def requeue(entry, offset: Redis.requeue_offset)
  test_id = CI::Queue::QueueEntry.test_id(entry)
  assert_reserved!(test_id)
  entry = reserved_entries.fetch(test_id, entry)
  lease = @reserved_leases.delete(test_id)
  unreserve_entry(test_id)
  global_max_requeues = config.global_max_requeues(total)

  requeued = config.max_requeues > 0 && global_max_requeues > 0 && eval_script(
    :requeue,
    keys: [
      key('processed'),
      key('requeues-count'),
      key('queue'),
      key('running'),
      key('worker', worker_id, 'queue'),
      key('owners'),
      key('error-reports'),
      key('requeued-by'),
      key('leases'),
    ],
    argv: [config.max_requeues, global_max_requeues, entry, offset, config.redis_ttl, lease.to_s],
  ) == 1

  unless requeued
    reserved_tests << test_id
    reserved_entries[test_id] = entry
    reserved_entry_ids[entry] = test_id
    @reserved_leases[test_id] = lease if lease
  end
  requeued
end

#retry_queueObject



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/ci/queue/redis/worker.rb', line 153

def retry_queue
  failures = build.failed_tests.to_set
  log = redis.lrange(key('worker', worker_id, 'queue'), 0, -1)
  # Keep full entries (test_id + file_path) so lazy loading can resolve them.
  # Filter by test_id against failures without stripping file paths.
  log.select! { |entry| failures.include?(CI::Queue::QueueEntry.test_id(entry)) }
  log.uniq! { |entry| CI::Queue::QueueEntry.test_id(entry) }
  log.reverse!

  if log.empty?
    # Per-worker log has no matching failures — this worker didn't run
    # the failing tests (e.g. Buildkite rebuild with new worker IDs,
    # or a different parallel slot). Fall back to ALL unresolved
    # failures from error-reports so any worker can retry them.
    log = redis.hkeys(key('error-reports'))
  end

  Retry.new(log, config, redis: redis)
end

#retrying?Boolean

Returns:

  • (Boolean)


140
141
142
143
144
# File 'lib/ci/queue/redis/worker.rb', line 140

def retrying?
  redis.exists?(key('worker', worker_id, 'queue'))
rescue *CONNECTION_ERRORS
  false
end

#shutdown!Object



98
99
100
# File 'lib/ci/queue/redis/worker.rb', line 98

def shutdown!
  @shutdown_required = true
end

#shutdown_required?Boolean

Returns:

  • (Boolean)


102
103
104
# File 'lib/ci/queue/redis/worker.rb', line 102

def shutdown_required?
  @shutdown_required
end

#stream_populate(tests, random: Random.new, batch_size: 10_000) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/ci/queue/redis/worker.rb', line 44

def stream_populate(tests, random: Random.new, batch_size: 10_000)
  batch_size = batch_size.to_i
  batch_size = 1 if batch_size < 1

  value = key('setup', worker_id)
  _, status = redis.pipelined do |pipeline|
    pipeline.set(key('master-status'), value, nx: true)
    pipeline.get(key('master-status'))
  end

  if @master = (value == status)
    @total = 0
    puts "Worker elected as leader, streaming tests to the queue."

    duration = measure do
      start_streaming!
      buffer = []

      tests.each do |test|
        buffer << test

        if buffer.size >= batch_size
          push_batch(buffer, random)
          buffer.clear
        end
      end

      push_batch(buffer, random) unless buffer.empty?
      finalize_streaming
    end

    puts "Streamed #{@total} tests in #{duration.round(2)}s."
    $stdout.flush
  end

  register
  redis.expire(key('workers'), config.redis_ttl)
  self
rescue *CONNECTION_ERRORS
  raise if @master
end

#supervisorObject



173
174
175
# File 'lib/ci/queue/redis/worker.rb', line 173

def supervisor
  Supervisor.new(redis_url, config)
end

#totalObject



90
91
92
93
94
95
96
# File 'lib/ci/queue/redis/worker.rb', line 90

def total
  return @total if defined?(@total) && @total

  redis.get(key('total')).to_i
rescue *CONNECTION_ERRORS
  @total || 0
end

#worker_queue_lengthObject



185
186
187
188
189
# File 'lib/ci/queue/redis/worker.rb', line 185

def worker_queue_length
  redis.llen(key('worker', worker_id, 'queue'))
rescue *CONNECTION_ERRORS
  nil
end