Module: Legion::LLM::Inference::Executor::Routing

Included in:
Legion::LLM::Inference::Executor
Defined in:
lib/legion/llm/inference/executor/routing.rb

Overview

Routing-area methods extracted from Executor verbatim (P4b §1.5, refactor-under-green). Operates on Executor instance state; see P4b-decomposition-embed.md §1.1 for the ivar contract this mixin reads/writes.

Instance Method Summary collapse

Instance Method Details

#apply_proactive_tier_assignment(state) ⇒ Object



292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
# File 'lib/legion/llm/inference/executor/routing.rb', line 292

def apply_proactive_tier_assignment(state)
  # Forced assignments carry security/privacy constraints and override
  # caller-supplied tier/intent. Advisory assignments only fill blanks.
  if @proactive_tier_assignment&.dig(:forced)
    state[:tier] = @proactive_tier_assignment[:tier]
    state[:tier_explicit] = true
    state[:intent] = merge_routing_intent(state[:intent], @proactive_tier_assignment[:intent])
    log.info "[llm][routing] action=forced_tier source=#{@proactive_tier_assignment[:source]} tier=#{state[:tier]}"
  elsif @proactive_tier_assignment && !state[:tier] && !state[:intent] && !state[:instance] &&
        !state[:provider] && !state[:model]
    state[:tier] = @proactive_tier_assignment[:tier]
    state[:tier_explicit] = true
    state[:intent] = @proactive_tier_assignment[:intent]
  end
  state
end

#apply_routing_resolution(state, resolution) ⇒ Object



393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
# File 'lib/legion/llm/inference/executor/routing.rb', line 393

def apply_routing_resolution(state, resolution)
  provider_changed = resolution.provider && resolution.provider != state[:provider]
  state[:provider] = resolution.provider
  state[:instance] = if resolution.instance
                       resolution.instance
                     elsif provider_changed
                       nil
                     else
                       state[:instance]
                     end
  state[:model] = resolution.model
  state[:tier] = resolution.tier
  state[:offering_id] = resolution.offering_id || state[:offering_id]
  state[:offering_metadata] = resolution. unless resolution..empty?
  @audit[:'routing:provider_selection'] = {
    outcome: :success,
    detail: "selected #{state[:provider]}:#{state[:model]} via #{resolution.rule}",
    data: { strategy: resolution.rule, tier: resolution.tier, instance: state[:instance],
            offering_id: state[:offering_id], offering_metadata: state[:offering_metadata] }.compact,
    duration_ms: 0, timestamp: Time.now
  }
  state
end

#chain_required_capabilitiesObject



196
197
198
199
200
201
202
203
204
# File 'lib/legion/llm/inference/executor/routing.rb', line 196

def chain_required_capabilities
  caps = []
  caps << :streaming if @request.stream == true
  caps << :tools     if native_tools_requested_for_routing?
  caps
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'llm.pipeline.chain_required_capabilities')
  []
end

#estimate_request_tokensObject



185
186
187
188
189
190
191
192
193
194
# File 'lib/legion/llm/inference/executor/routing.rb', line 185

def estimate_request_tokens
  # Estimate total tokens from current request messages + conversation history.
  # This is used by the router to exclude models whose context window can't fit.
  all_messages = []
  all_messages.concat(@enrichments['context:conversation_history'] || [])
  all_messages.concat(@request.messages || [])
  return 0 if all_messages.empty?

  estimate_message_tokens(all_messages)
end

#fallback_model_for_resolved_provider(auto_route) ⇒ Object

When routing resolved a provider but no model, source the model from that provider’s own catalog (Inventory SSOT) — never the global default_model, which may belong to a different provider. This prevents pairing e.g. anthropic with a vllm-family global default. The global default applies only when no provider resolved, or the resolved provider IS the configured default_provider (so the global default legitimately belongs to it).



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/legion/llm/inference/executor/routing.rb', line 119

def fallback_model_for_resolved_provider(auto_route)
  return nil if auto_route

  if @resolved_provider && Router.respond_to?(:inventory_default_model)
    provider_model = Router.inventory_default_model(@resolved_provider, @resolved_instance)
    return provider_model if provider_model
  end

  global = Legion::Settings[:llm][:default_model]
  return nil if global.nil? || global.to_s.empty?

  default_provider = Legion::Settings[:llm][:default_provider]&.to_sym
  return global if @resolved_provider.nil? || @resolved_provider.to_sym == default_provider

  nil
end

#inferred_provider_tier(provider) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/legion/llm/inference/executor/routing.rb', line 25

def inferred_provider_tier(provider)
  return nil unless provider

  meta = Call::Registry.(provider, @resolved_instance || :default)
  return meta[:tier].to_sym if meta.is_a?(Hash) && meta[:tier]
  return Router.provider_tier(provider) if defined?(Router) && Router.respond_to?(:provider_tier)

  Router::PROVIDER_TIER.fetch(provider.to_sym, nil) if defined?(Router::PROVIDER_TIER)
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'llm.pipeline.inferred_provider_tier',
                      provider: provider)
  nil
end

#local_provider?Boolean

Returns:

  • (Boolean)


21
22
23
# File 'lib/legion/llm/inference/executor/routing.rb', line 21

def local_provider?
  %i[ollama vllm].include?(@resolved_provider&.to_sym)
end

#merge_response_offering_metadata(metadata) ⇒ Object



458
459
460
461
462
463
464
465
466
# File 'lib/legion/llm/inference/executor/routing.rb', line 458

def ()
  return unless .is_a?(Hash)

  offering = ([:offering] || ['offering'] || )
  return if offering.empty?

  @resolved_offering_metadata = @resolved_offering_metadata.merge(offering)
  @resolved_offering_id = @resolved_offering_metadata[:offering_id] if @resolved_offering_id.nil?
end

#merge_routing_intent(existing, assignment) ⇒ Object



424
425
426
427
428
# File 'lib/legion/llm/inference/executor/routing.rb', line 424

def merge_routing_intent(existing, assignment)
  existing_hash = existing.is_a?(Hash) ? existing : {}
  assignment_hash = assignment.is_a?(Hash) ? assignment : {}
  existing_hash.merge(assignment_hash)
end

#native_tools_requested_for_routing?Boolean

Returns:

  • (Boolean)


261
262
263
264
265
266
267
268
269
# File 'lib/legion/llm/inference/executor/routing.rb', line 261

def native_tools_requested_for_routing?
  Array(@request.tools).any? ||
    requested_deferred_tool_names.any? ||
    @triggered_tools.any? ||
    Tools::Special.pinned_definitions.any?
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'llm.pipeline.routing_tools_required')
  false
end

#normalize_offering_metadata(value) ⇒ Object



13
14
15
16
17
18
19
# File 'lib/legion/llm/inference/executor/routing.rb', line 13

def (value)
  return {} unless value.is_a?(Hash)

  value.each_with_object({}) do |(key, ), normalized|
    normalized[key.respond_to?(:to_sym) ? key.to_sym : key] = 
  end
end

#normalize_required_capabilities(capabilities) ⇒ Object



271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# File 'lib/legion/llm/inference/executor/routing.rb', line 271

def normalize_required_capabilities(capabilities)
  aliases = {
    function_calling: :tools,
    functions:        :tools,
    tool:             :tools,
    tool_use:         :tools,
    stream:           :streaming,
    stream_chat:      :streaming
  }
  Array(capabilities).compact.each_with_object([]) do |capability, normalized|
    next unless capability.respond_to?(:to_s)

    capability_sym = capability.to_s.downcase.strip.to_sym
    next if capability_sym.to_s.empty?

    normalized << capability_sym
    alias_sym = aliases[capability_sym]
    normalized << alias_sym if alias_sym
  end.uniq
end

#provider_scoped_instance(instance, provider, preserve_unknown:) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/legion/llm/inference/executor/routing.rb', line 142

def provider_scoped_instance(instance, provider, preserve_unknown:)
  return nil if instance.nil? || instance.to_s.empty? || provider.nil? || provider.to_s.empty?

  provider_sym = provider.to_sym
  instance_sym = instance.to_sym
  return instance_sym if Call::Registry.registered?(provider_sym, instance: instance_sym)

  if Call::Registry.registered?(provider_sym)
    # Provider is registered but the specific instance is not.
    # Only return nil if there's at least one instance registered for this provider.
    instances = Call::Registry.instances_for(provider_sym)
    return nil if instances.is_a?(Array) && instances.any?
  end

  preserve_unknown ? instance_sym : nil
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'llm.pipeline.provider_scoped_instance')
  preserve_unknown ? instance : nil
end

#record_forced_tier_selectionObject



430
431
432
433
434
435
436
437
438
439
440
441
# File 'lib/legion/llm/inference/executor/routing.rb', line 430

def record_forced_tier_selection
  return unless @proactive_tier_assignment&.dig(:forced)

  @audit[:'routing:provider_selection'] = {
    outcome:     :success,
    detail:      "forced tier #{@resolved_tier} by #{@proactive_tier_assignment[:source]}",
    data:        { tier: @resolved_tier, strategy: @proactive_tier_assignment[:source],
                   provider: @resolved_provider, model: @resolved_model }.compact,
    duration_ms: 0,
    timestamp:   Time.now
  }
end

#request_has_vision_content?Boolean

Returns:

  • (Boolean)


244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# File 'lib/legion/llm/inference/executor/routing.rb', line 244

def request_has_vision_content?
  return true if @request.modality == :vision

  @request.messages.any? do |msg|
    content = msg[:content] || msg['content']
    next false unless content.is_a?(Array)

    content.any? do |block|
      next false unless block.is_a?(Hash)

      type = (block[:type] || block['type']).to_s
      type == 'image' || type == 'image_url' ||
        (block[:source] && (block.dig(:source, :type) || block.dig(:source, 'type')).to_s == 'base64')
    end
  end
end

#request_requires_thinking?Boolean

Returns:

  • (Boolean)


232
233
234
235
236
237
238
239
240
241
242
# File 'lib/legion/llm/inference/executor/routing.rb', line 232

def request_requires_thinking?
  thinking = @request.thinking
  return true if thinking.is_a?(Hash) && thinking.any?
  return true if thinking.respond_to?(:to_h) && thinking.to_h.any?

  extra = @request.extra || {}
  return false unless extra.is_a?(Hash)

  normalized_extra = extra.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
  !!(normalized_extra[:thinking] || normalized_extra[:reasoning] || normalized_extra[:max_thinking_tokens])
end

#resolve_model_to_local_provider(state) ⇒ Object

If the caller named a model but gave no explicit provider/tier/instance, search discovered providers for that model with a healthy circuit. On a hit: pin provider + instance so normal routing runs against the local copy. On a miss: clear the model name and set auto_route so the pipeline picks the best available provider rather than blindly forwarding a frontier model name.

Deliberate Discovery read (NOT Inventory.offerings): this pin must match only models that are actually running/pulled locally. Inventory.offerings also includes static provider catalogs (e.g. the full Anthropic model list), so routing through it here would pin frontier model names to providers that merely advertise them — the opposite of “local copy.”



320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
# File 'lib/legion/llm/inference/executor/routing.rb', line 320

def resolve_model_to_local_provider(state)
  return state if state[:provider_explicit] || state[:tier_explicit] || state[:instance_explicit]
  return state if state[:provider] || state[:tier] || state[:instance]
  return state unless state[:model] && defined?(Discovery) && defined?(Router)

  model = state[:model].to_s
  all_discovered = Array(Discovery.cached_discovered_models)
  return state if all_discovered.empty?

  candidates = all_discovered.select do |m|
    dn = m[:model].to_s
    dn == model || dn.start_with?("#{model}:")
  end
  return state if candidates.empty?

  healthy = candidates.find do |m|
    provider = m[:provider]
    instance = m[:instance]
    # Must be both locally registered and circuit-closed.
    # A discovered model on a remote-only provider (e.g. Anthropic on a
    # vLLM-only node) should not pin — fall through to auto_route.
    next false unless Call::Registry.registered?(provider, instance: instance)

    Router.health_tracker.circuit_state(provider, instance: instance) != :open
  end

  if healthy
    log.info "[llm][executor] action=model_discovery_pin model=#{model} provider=#{healthy[:provider]} instance=#{healthy[:instance]}"
    state[:provider] = healthy[:provider]
    state[:instance] = healthy[:instance]
  else
    log.info "[llm][executor] action=model_discovery_miss model=#{model} falling_back=auto_route"
    state[:model] = nil
    state[:auto_route] = true
  end

  state
end

#resolve_provider_instance(requested_instance, provider) ⇒ Object



136
137
138
139
140
# File 'lib/legion/llm/inference/executor/routing.rb', line 136

def resolve_provider_instance(requested_instance, provider)
  return provider_scoped_instance(requested_instance, provider, preserve_unknown: true) if requested_instance

  provider_scoped_instance(Legion::Settings[:llm][:default_instance], provider, preserve_unknown: false)
end

#resolve_routing_state(state) ⇒ Object



359
360
361
362
363
364
365
366
367
368
369
370
371
# File 'lib/legion/llm/inference/executor/routing.rb', line 359

def resolve_routing_state(state)
  return state unless defined?(Router)

  explicit_route = state[:provider_explicit] || state[:instance_explicit] || state[:tier_explicit]
  auto_route = state[:auto_route] == true
  intent_route = state[:intent_explicit] && state[:intent] && Router.routing_enabled?
  return state unless explicit_route || auto_route || intent_route

  resolution = routing_resolution_for(state)
  return state unless resolution

  apply_routing_resolution(state, resolution)
end

#routing_field_explicit?(flags, key, value) ⇒ Boolean

Returns:

  • (Boolean)


417
418
419
420
421
422
# File 'lib/legion/llm/inference/executor/routing.rb', line 417

def routing_field_explicit?(flags, key, value)
  return false if value.nil? || value.to_s.empty?
  return true unless flags.is_a?(Hash)

  flags.fetch(key, flags.fetch(key.to_s, true)) == true
end

#routing_intent_for_request(intent) ⇒ Object



210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# File 'lib/legion/llm/inference/executor/routing.rb', line 210

def routing_intent_for_request(intent)
  normalized = if intent.is_a?(Hash)
                 intent.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
               else
                 {}
               end
  required = normalize_required_capabilities(
    normalized.delete(:required_capabilities) || normalized.delete(:requires)
  )

  normalized[:operation] = :stream if @request.stream == true
  normalized[:operation] ||= :chat
  normalized[:effort] ||= :moderate

  required << :streaming if @request.stream == true
  required << :tools if native_tools_requested_for_routing?
  required << :vision if request_has_vision_content?
  required << :thinking if request_requires_thinking?
  normalized[:required_capabilities] = required.uniq if required.any?
  normalized
end

#routing_intent_present?(intent) ⇒ Boolean

Returns:

  • (Boolean)


206
207
208
# File 'lib/legion/llm/inference/executor/routing.rb', line 206

def routing_intent_present?(intent)
  intent.is_a?(Hash) && intent.any?
end

#routing_request_stateObject



162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/legion/llm/inference/executor/routing.rb', line 162

def routing_request_state
  routing_explicit = @request.extra[:routing_explicit]
  request_intent = @request.extra[:intent]
  instance = @request.routing[:instance] || @request.routing[:instance_id] || @request.routing[:provider_instance]
  tier = @request.extra[:tier]
  {
    provider:          @request.routing[:provider],
    instance:          instance,
    model:             @request.routing[:model],
    offering_id:       @request.routing[:offering_id] || @request.routing[:id],
    offering_metadata: (@request.routing[:offering_metadata] ||
                                                   @request.routing[:offering]),
    intent:            routing_intent_for_request(request_intent),
    intent_explicit:   routing_intent_present?(request_intent),
    tier:              tier,
    auto_route:        @request.extra[:auto_route],
    provider_explicit: routing_field_explicit?(routing_explicit, :provider, @request.routing[:provider]),
    instance_explicit: routing_field_explicit?(routing_explicit, :instance, instance),
    tier_explicit:     routing_field_explicit?(routing_explicit, :tier, tier),
    estimated_tokens:  estimate_request_tokens
  }
end

#routing_resolution_for(state) ⇒ Object



373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
# File 'lib/legion/llm/inference/executor/routing.rb', line 373

def routing_resolution_for(state)
  if state[:auto_route] == true || (state[:intent_explicit] && state[:intent] && pipeline_escalation_enabled?)
    @escalation_chain = Router.resolve_chain(
      intent:                 state[:intent],
      tier:                   state[:tier],
      model:                  state[:model],
      provider:               state[:provider],
      instance:               state[:instance],
      max_escalations:        pipeline_escalation_max_attempts,
      allow_default_fallback: state[:auto_route] != true,
      estimated_tokens:       state[:estimated_tokens]
    )
    @escalation_chain.primary
  else
    Router.resolve(intent: state[:intent], tier: state[:tier], model: state[:model],
                   provider: state[:provider], instance: state[:instance],
                   estimated_tokens: state[:estimated_tokens])
  end
end

#step_request_normalizationObject



443
444
445
446
# File 'lib/legion/llm/inference/executor/routing.rb', line 443

def step_request_normalization
  @exchange_id = Tracing.exchange_id
  Thread.current[:legion_log_exchange_id] = @exchange_id
end

#step_routingObject

Raises:



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/legion/llm/inference/executor/routing.rb', line 71

def step_routing
  log.debug "[llm][executor] action=step_routing.enter requested_provider=#{@request.routing[:provider]} requested_model=#{@request.routing[:model]}"
  @timestamps[:routing_start] = Time.now
  state = resolve_routing_state(apply_proactive_tier_assignment(resolve_model_to_local_provider(routing_request_state)))
  auto_route = state[:auto_route] == true

  inferred = state[:model] && Router.infer_provider_for_model(state[:model])
  inferred = nil unless state[:provider] || (inferred && Call::Registry.registered?(inferred))
  @resolved_provider = state[:provider] ||
                       inferred ||
                       (Legion::Settings[:llm][:default_provider] unless auto_route)
  @resolved_instance = resolve_provider_instance(state[:instance], @resolved_provider)

  # If the resolved provider differs from the model's natural provider, swap to the
  # provider's default model — sending "claude-sonnet-4-6" to vllm would fail.
  resolved_model = state[:model]
  if resolved_model && @resolved_provider
    model_natural = Router.infer_provider_for_model(resolved_model)
    if model_natural && model_natural != @resolved_provider
      log.debug "[llm][executor] action=model_provider_mismatch model=#{resolved_model} " \
                "natural_provider=#{model_natural} resolved_provider=#{@resolved_provider} swapping"
      resolved_model = nil
    end
  end
  @resolved_model = resolved_model || fallback_model_for_resolved_provider(auto_route)
  raise ProviderError, 'Auto routing could not resolve an available LLM provider/model' if auto_route && (@resolved_provider.nil? || @resolved_model.nil?)

  @resolved_tier = state[:tier]&.to_sym || inferred_provider_tier(@resolved_provider)
  @resolved_offering_id = state[:offering_id]
  @resolved_offering_metadata = state[:offering_metadata]
  record_forced_tier_selection unless @audit[:'routing:provider_selection']

  log.info '[llm][inference] resolved ' \
           "provider=#{@resolved_provider} instance=#{@resolved_instance || 'default'} " \
           "model=#{@resolved_model} offering_id=#{@resolved_offering_id}"
  @timeline.record(
    category: :audit, key: 'routing:provider_selection',
    direction: :internal, detail: "routed to #{@resolved_provider}:#{@resolved_model}",
    from: 'router', to: 'pipeline'
  )
end

#step_tier_assignmentObject



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/legion/llm/inference/executor/routing.rb', line 39

def step_tier_assignment
  gaia_hint = @enrichments['gaia:routing_hint']
  classification = @enrichments['classification:scan']
  assignment = Steps::TierAssigner.assign(
    caller:          @request.caller,
    classification:  classification,
    priority:        @request.priority,
    gaia_hint:       gaia_hint,
    existing_tier:   @request.extra[:tier],
    existing_intent: @request.extra[:intent]
  )
  return unless assignment

  @proactive_tier_assignment = assignment
  @audit[:'routing:tier_assignment'] = {
    outcome:     :success,
    detail:      "proactive tier=#{assignment[:tier]} source=#{assignment[:source]}",
    data:        assignment,
    duration_ms: 0,
    timestamp:   Time.now
  }
  @timeline.record(
    category: :audit, key: 'routing:tier_assignment',
    direction: :internal,
    detail: "tier=#{assignment[:tier]} assigned by #{assignment[:source]}",
    from: 'tier_assigner', to: 'pipeline'
  )
rescue StandardError => e
  @warnings << "tier assignment error: #{e.message}"
  handle_exception(e, level: :warn, operation: 'llm.pipeline.step_tier_assignment')
end

#use_native_dispatch?(provider) ⇒ Boolean

Returns:

  • (Boolean)


448
449
450
451
452
453
454
455
456
# File 'lib/legion/llm/inference/executor/routing.rb', line 448

def use_native_dispatch?(provider)
  return false unless defined?(Call::Dispatch)
  return false unless provider

  layer_settings = Legion::Settings.dig(:llm, :provider_layer) || {}
  mode = (layer_settings[:mode] || 'auto').to_s

  %w[native auto].include?(mode)
end