Module: Legion::LLM::Fleet::Dispatcher

Extended by:
Legion::Logging::Helper
Defined in:
lib/legion/llm/fleet/dispatcher.rb

Constant Summary collapse

ENVELOPE_KEYS =
%i[
  app_id caller correlation_id expires_at idempotency_key identity message_context operation
  model priority protocol_version provider provider_instance reply_to request_id routing_key
  signed_token timeout timeout_seconds trace_context ttl
].freeze
LEGACY_FIELDS =
%i[schema_version request_type fleet_correlation_id].freeze

Class Method Summary collapse

Class Method Details

.build_envelope(operation:, request_opts:, message_context:, routing_key: nil, reply_to: nil) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/legion/llm/fleet/dispatcher.rb', line 58

def build_envelope(operation:, request_opts:, message_context:, routing_key: nil, reply_to: nil)
  provider = fetch_option(request_opts, :provider) || 'ollama'
  reject_legacy_fields!(request_opts)
  provider_instance = fetch_option(request_opts, :provider_instance) ||
                      fetch_option(request_opts, :instance) || 'default'
  model = fetch_option(request_opts, :model)
  timeout = resolve_timeout(operation: operation, override: fetch_option(request_opts, :timeout))
  request_id = next_request_id
  correlation_id = next_request_id
  reply_to ||= ReplyDispatcher.agent_queue_name
  routing_key ||= build_routing_key(
    provider:                provider,
    operation:               operation,
    model:                   model,
    provider_instance:       provider_instance,
    context_window:          context_window_from(request_opts),
    boundary:                fetch_option(request_opts, :network_boundary),
    eligibility_fingerprint: fetch_option(request_opts, :eligibility_fingerprint),
    routing_style:           fetch_option(request_opts, :routing_style)
  )

  envelope = {
    protocol_version:  ::Legion::Extensions::Llm::Fleet::Protocol::VERSION,
    request_id:        request_id,
    correlation_id:    correlation_id,
    idempotency_key:   fetch_option(request_opts, :idempotency_key) || "idem_#{SecureRandom.uuid}",
    operation:         operation,
    provider:          provider,
    provider_instance: provider_instance,
    model:             model,
    params:            request_params(request_opts),
    routing_key:       routing_key,
    reply_to:          reply_to,
    message_context:   message_context || {},
    caller:            fetch_option(request_opts, :caller) || default_caller,
    identity:          Legion::LLM::PublisherIdentity.current,
    trace_context:     fetch_option(request_opts, :trace_context) || {},
    timeout_seconds:   timeout,
    expires_at:        (Time.now.utc + timeout).iso8601,
    ttl:               effective_ttl(request_opts, timeout)
  }
  envelope[:signed_token] = dispatch_auth_required? ? TokenIssuer.issue(envelope) : 'unsigned'
  envelope
end

.build_routing_key(provider:, operation:, model:, provider_instance: nil, context_window: nil, boundary: nil, eligibility_fingerprint: nil, routing_style: nil) ⇒ Object



111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/legion/llm/fleet/dispatcher.rb', line 111

def build_routing_key(provider:, operation:, model:, provider_instance: nil, context_window: nil, boundary: nil,
                      eligibility_fingerprint: nil, routing_style: nil)
  style = routing_style || default_routing_style
  return Lane.offering_key(instance_id: provider_instance || provider, model: model, operation: operation) if style.to_s == 'offering_lane'

  if style.to_s == 'shared_lane'
    return Lane.routing_key(operation: operation, model: model, context_window: context_window,
                            boundary: boundary, eligibility_fingerprint: eligibility_fingerprint)
  end

  "llm.request.#{provider}.#{operation}.#{sanitize_model(model)}"
end

.context_window_from(options) ⇒ Object



128
129
130
131
132
133
134
# File 'lib/legion/llm/fleet/dispatcher.rb', line 128

def context_window_from(options)
  limits = fetch_option(options, :limits) || {}
  fetch_option(options, :context_window) ||
    fetch_option(options, :max_context_size) ||
    fetch_option(options, :max_input_tokens) ||
    fetch_option(limits, :context_window)
end

.default_callerObject



289
290
291
292
293
294
295
# File 'lib/legion/llm/fleet/dispatcher.rb', line 289

def default_caller
  {
    source:       'legion-llm',
    component:    'fleet_dispatcher',
    requested_by: Legion::LLM::PublisherIdentity.requested_by
  }
end

.default_routing_styleObject



124
125
126
# File 'lib/legion/llm/fleet/dispatcher.rb', line 124

def default_routing_style
  Legion::Settings.dig(:llm, :fleet, :dispatch, :routing_style) || :shared_lane
end

.dispatch(operation: nil, request: nil, message_context: {}, routing_key: nil, reply_to: nil, **opts) ⇒ Object

Raises:

  • (ArgumentError)


27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/legion/llm/fleet/dispatcher.rb', line 27

def dispatch(operation: nil, request: nil, message_context: {}, routing_key: nil, reply_to: nil, **opts)
  operation = normalize_operation(operation || fetch_option(request, :operation) || opts[:operation])
  raise ArgumentError, 'operation is required for fleet dispatch' unless operation

  request_opts = normalize_request(request).merge(opts)
  log.debug "[llm][fleet][dispatcher] action=dispatch.enter operation=#{operation} " \
            "model=#{fetch_option(request_opts, :model)} routing_key=#{routing_key} fleet_available=#{fleet_available?}"
  return error_result('fleet_unavailable', message_context: message_context) unless fleet_available?

  envelope = build_envelope(
    operation:       operation,
    request_opts:    request_opts,
    message_context: message_context,
    routing_key:     routing_key,
    reply_to:        reply_to
  )
  future = register_response(envelope[:correlation_id], expected_delivery(envelope))
  publish_result = publish_request(**envelope)
  unless publish_accepted?(publish_result)
    return publish_error_result(publish_result, envelope[:correlation_id],
                                message_context: message_context)
  end

  wait_for_response(
    envelope[:correlation_id],
    timeout:         envelope[:timeout_seconds],
    message_context: message_context,
    future:          future
  )
end

.dispatch_auth_required?Boolean

Returns:

  • (Boolean)


282
283
284
285
286
287
# File 'lib/legion/llm/fleet/dispatcher.rb', line 282

def dispatch_auth_required?
  value = Legion::Settings.dig(:llm, :fleet, :dispatch, :require_auth)
  return value != false unless value.nil?

  Legion::Settings.dig(:llm, :fleet, :auth, :require_signed_token) != false
end

.effective_ttl(options, timeout) ⇒ Object



136
137
138
139
140
141
# File 'lib/legion/llm/fleet/dispatcher.rb', line 136

def effective_ttl(options, timeout)
  ttl = fetch_option(options, :ttl)
  return ttl if ttl

  fetch_option(options, :expiration_seconds) || timeout
end

.error_result(reason, message_context: {}) ⇒ Object



272
273
274
# File 'lib/legion/llm/fleet/dispatcher.rb', line 272

def error_result(reason, message_context: {})
  { success: false, error: reason, message_context: message_context }
end

.expected_delivery(envelope) ⇒ Object



103
104
105
106
107
108
109
# File 'lib/legion/llm/fleet/dispatcher.rb', line 103

def expected_delivery(envelope)
  {
    protocol_version: envelope[:protocol_version],
    operation:        envelope[:operation],
    correlation_id:   envelope[:correlation_id]
  }
end

.fetch_option(hash, key) ⇒ Object



167
168
169
170
171
172
173
174
# File 'lib/legion/llm/fleet/dispatcher.rb', line 167

def fetch_option(hash, key)
  return nil unless hash.respond_to?(:key?)

  string_key = key.to_s
  return hash[string_key] if hash.key?(string_key)

  hash[key] if hash.key?(key)
end

.fleet_available?Boolean

Returns:

  • (Boolean)


180
181
182
# File 'lib/legion/llm/fleet/dispatcher.rb', line 180

def fleet_available?
  transport_ready? && fleet_enabled?
end

.fleet_enabled?Boolean

Returns:

  • (Boolean)


188
189
190
# File 'lib/legion/llm/fleet/dispatcher.rb', line 188

def fleet_enabled?
  Legion::Settings[:llm][:fleet][:dispatch][:enabled] != false
end

.legacy_field_present?(hash, key) ⇒ Boolean

Returns:

  • (Boolean)


149
150
151
152
153
# File 'lib/legion/llm/fleet/dispatcher.rb', line 149

def legacy_field_present?(hash, key)
  return false unless hash.respond_to?(:key?)

  hash.key?(key) || hash.key?(key.to_s)
end

.next_request_idObject



201
202
203
# File 'lib/legion/llm/fleet/dispatcher.rb', line 201

def next_request_id
  "req_#{SecureRandom.uuid}"
end

.normalize_operation(operation) ⇒ Object



276
277
278
279
280
# File 'lib/legion/llm/fleet/dispatcher.rb', line 276

def normalize_operation(operation)
  return nil if operation.to_s.empty?

  operation.to_sym
end

.normalize_request(request) ⇒ Object



159
160
161
162
163
164
165
# File 'lib/legion/llm/fleet/dispatcher.rb', line 159

def normalize_request(request)
  return {} unless request.respond_to?(:to_h)

  request.to_h.transform_keys do |key|
    key.respond_to?(:to_sym) ? key.to_sym : key
  end
end

.publish_accepted?(publish_result) ⇒ Boolean

Returns:

  • (Boolean)


218
219
220
# File 'lib/legion/llm/fleet/dispatcher.rb', line 218

def publish_accepted?(publish_result)
  publish_result.is_a?(Hash) && publish_result[:accepted] == true
end

.publish_error_result(publish_result, correlation_id, message_context: {}) ⇒ Object



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/legion/llm/fleet/dispatcher.rb', line 233

def publish_error_result(publish_result, correlation_id, message_context: {})
  ReplyDispatcher.deregister(correlation_id)
  status = publish_result.is_a?(Hash) ? publish_result[:status]&.to_sym : :failed
  error = case status
          when :unroutable
            'no_fleet_queue'
          when :nacked
            'fleet_backpressure'
          when :confirm_timeout
            'fleet_publish_timeout'
          else
            'fleet_publish_failed'
          end
  {
    success:         false,
    error:           error,
    publish_status:  status,
    correlation_id:  correlation_id,
    message_context: message_context
  }
end

.publish_request(**opts) ⇒ Object



209
210
211
212
213
214
215
216
# File 'lib/legion/llm/fleet/dispatcher.rb', line 209

def publish_request(**opts)
  log.debug("[llm][fleet][dispatcher] action=publish_request correlation_id=#{opts[:correlation_id]} routing_key=#{opts[:routing_key]}")
  require 'legion/extensions/llm/transport/messages/fleet_request'
  ::Legion::Extensions::Llm::Transport::Messages::FleetRequest.new(**opts).publish(request_publish_options)
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'llm.fleet.dispatcher.publish_request')
  { accepted: false, status: :failed, error: e.message }
end

.register_response(correlation_id, expected = {}) ⇒ Object



205
206
207
# File 'lib/legion/llm/fleet/dispatcher.rb', line 205

def register_response(correlation_id, expected = {})
  ReplyDispatcher.register(correlation_id, expected: expected)
end

.reject_legacy_fields!(request_opts) ⇒ Object



143
144
145
146
147
# File 'lib/legion/llm/fleet/dispatcher.rb', line 143

def reject_legacy_fields!(request_opts)
  LEGACY_FIELDS.each do |field|
    raise ArgumentError, "#{field} is not supported by fleet protocol v2" if legacy_field_present?(request_opts, field)
  end
end

.request_params(request_opts) ⇒ Object



155
156
157
# File 'lib/legion/llm/fleet/dispatcher.rb', line 155

def request_params(request_opts)
  normalize_request(request_opts).except(*ENVELOPE_KEYS)
end

.request_publish_optionsObject



222
223
224
225
226
227
228
229
230
231
# File 'lib/legion/llm/fleet/dispatcher.rb', line 222

def request_publish_options
  dispatch = Legion::Settings[:llm][:fleet][:dispatch]
  {
    mandatory:                  dispatch[:mandatory],
    publisher_confirm:          dispatch[:publisher_confirm],
    publish_confirm_timeout_ms: dispatch[:publish_confirm_timeout_ms] || 500,
    spool:                      dispatch[:spool],
    return_result:              true
  }
end

.resolve_timeout(operation: :default, request_type: nil, override: nil) ⇒ Object



192
193
194
195
196
197
198
199
# File 'lib/legion/llm/fleet/dispatcher.rb', line 192

def resolve_timeout(operation: :default, request_type: nil, override: nil)
  return override if override

  op = (operation || request_type || :default).to_sym
  dispatch = Legion::Settings.dig(:llm, :fleet, :dispatch) || {}
  timeouts = dispatch[:timeouts] || {}
  fetch_option(timeouts, op) || dispatch[:timeout_seconds] || 30
end

.sanitize_model(model) ⇒ Object



176
177
178
# File 'lib/legion/llm/fleet/dispatcher.rb', line 176

def sanitize_model(model)
  model.to_s.gsub(':', '.')
end

.timeout_result(correlation_id, timeout, message_context: {}) ⇒ Object



267
268
269
270
# File 'lib/legion/llm/fleet/dispatcher.rb', line 267

def timeout_result(correlation_id, timeout, message_context: {})
  { success: false, error: 'fleet_timeout', correlation_id: correlation_id,
    timeout: timeout, message_context: message_context }
end

.transport_ready?Boolean

Returns:

  • (Boolean)


184
185
186
# File 'lib/legion/llm/fleet/dispatcher.rb', line 184

def transport_ready?
  Legion::Settings.dig(:transport, :connected) == true
end

.wait_for_response(correlation_id, timeout:, message_context: {}, future: nil) ⇒ Object



255
256
257
258
259
260
261
262
263
264
265
# File 'lib/legion/llm/fleet/dispatcher.rb', line 255

def wait_for_response(correlation_id, timeout:, message_context: {}, future: nil)
  log.debug "[llm][fleet][dispatcher] action=wait_for_response correlation_id=#{correlation_id} timeout=#{timeout}"
  future ||= ReplyDispatcher.register(correlation_id)
  result = future.value!(timeout)
  result || timeout_result(correlation_id, timeout, message_context: message_context)
rescue Concurrent::CancelledOperationError => e
  handle_exception(e, level: :debug, handled: true, operation: 'llm.fleet.dispatcher.wait_cancelled')
  timeout_result(correlation_id, timeout, message_context: message_context)
ensure
  ReplyDispatcher.deregister(correlation_id)
end