Module: Legion::LLM::Router

Extended by:
Candidates, RegistryLookup, Legion::Logging::Helper
Defined in:
lib/legion/llm/router.rb,
lib/legion/llm/router/rule.rb,
lib/legion/llm/router/arbitrage.rb,
lib/legion/llm/router/candidates.rb,
lib/legion/llm/router/resolution.rb,
lib/legion/llm/router/availability.rb,
lib/legion/llm/router/health_tracker.rb,
lib/legion/llm/router/registry_lookup.rb,
lib/legion/llm/router/escalation/chain.rb

Defined Under Namespace

Modules: Arbitrage, Availability, Candidates, RegistryLookup Classes: EscalationChain, HealthTracker, Resolution, Rule

Constant Summary collapse

PROVIDER_TIER =
{ bedrock: :cloud, anthropic: :frontier, openai: :frontier,
gemini: :cloud, azure: :cloud, ollama: :local, vllm: :fleet }.freeze
PROVIDER_ORDER =
%i[ollama vllm bedrock azure gemini anthropic openai].freeze
TIER_EXTERNAL =
Set[:cloud, :frontier].freeze
TIER_RANK =
{ local: 0, direct: 1, fleet: 2, cloud: 3, frontier: 4 }.freeze
CAPABILITY_ALIASES =
{
  function_calling: :tools,
  functions:        :tools,
  tool:             :tools,
  tool_use:         :tools,
  stream:           :streaming,
  stream_chat:      :streaming
}.freeze
CANONICAL_EFFORT_LEVELS =
%i[low moderate high reasoning].freeze
EFFORT_ALIASES =
{ medium: :moderate }.freeze
EFFORT_LEVELS =
(CANONICAL_EFFORT_LEVELS + EFFORT_ALIASES.keys).freeze
EFFORT_RANK =
{ low: 0, moderate: 1, high: 2, reasoning: 3 }.freeze
OPERATIONS =
%i[chat stream embed image structured_output].freeze
OPERATION_ALIASES =
{ completion: :chat, stream_chat: :stream, embedding: :embed }.freeze
DEFAULT_OPERATION =
:chat
DEFAULT_EFFORT =
:moderate
OLLAMA_MODEL_PATTERN =
%r{[:/]}

Class Method Summary collapse

Class Method Details

.auto_rules_populated?Boolean

Returns:

  • (Boolean)


175
176
177
# File 'lib/legion/llm/router.rb', line 175

def auto_rules_populated?
  @auto_rules_populated == true
end

.build_escalation_chain(provider:, model:, tier:, instance: nil, max_attempts: nil, estimated_tokens: nil, required_capabilities: []) ⇒ Object



284
285
286
287
288
289
290
291
292
293
294
295
296
297
# File 'lib/legion/llm/router.rb', line 284

def build_escalation_chain(provider:, model:, tier:, instance: nil, max_attempts: nil,
                           estimated_tokens: nil, required_capabilities: [])
  primary = explicit_resolution(tier, provider, model, instance)
  fallbacks = build_fallback_resolutions(
    exclude_provider: provider,
    exclude_instance: instance,
    primary_tier:     tier
  )
  resolutions = ([primary] + fallbacks).compact.uniq { |r| [r.provider, r.instance, r.model] }
  resolutions = filter_chain_resolutions(resolutions, estimated_tokens:      estimated_tokens,
                                                      required_capabilities: required_capabilities)
  max = max_attempts || escalation_max_attempts
  EscalationChain.new(resolutions: resolutions, max_attempts: max)
end

.build_fallback_resolutions(exclude_provider: nil, exclude_instance: nil, primary_tier: nil) ⇒ Object



299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
# File 'lib/legion/llm/router.rb', line 299

def build_fallback_resolutions(exclude_provider: nil, exclude_instance: nil, primary_tier: nil)
  ranks = tier_rank
  primary_rank = primary_tier ? (ranks[primary_tier.to_sym] || 99) : 99

  candidates = Call::Registry.all_instances.filter_map do |entry|
    next if entry[:provider] == exclude_provider&.to_sym &&
            (exclude_instance.nil? || entry[:instance] == (exclude_instance&.to_sym || :default))

    # Source from Inventory (SSOT) when the instance has no configured
    # registry default — e.g. a whitelist-restricted instance whose
    # policy-aware default resolved to nil. Without this, such a sibling
    # instance (a second account offering the same model) is dropped from
    # the escalation chain entirely.
    model = registry_default_model(entry) || inventory_default_model(entry[:provider], entry[:instance])
    next unless model

    entry_tier = PROVIDER_TIER.fetch(entry[:provider], :frontier)
    Resolution.new(
      tier:     entry_tier,
      provider: entry[:provider],
      instance: entry[:instance],
      model:    model,
      rule:     'escalation_fallback'
    )
  end

  candidates.sort_by do |r|
    r_rank = ranks[r.tier] || 99
    rank_diff = r_rank - primary_rank
    bucket = if rank_diff.zero?
               0
             elsif rank_diff.positive?
               1
             else
               2
             end
    [bucket, r_rank]
  end
end

.discover_provider_for_model(model) ⇒ Object



69
70
71
72
73
74
75
76
77
78
# File 'lib/legion/llm/router.rb', line 69

def discover_provider_for_model(model)
  return nil unless defined?(Discovery) && Discovery.respond_to?(:cached_discovered_models)

  model_s = model.to_s
  entry = Array(Discovery.cached_discovered_models).find do |m|
    dn = m[:model].to_s
    dn == model_s || dn.start_with?("#{model_s}:")
  end
  entry&.dig(:provider)
end

.explicit_resolution(tier, provider, model, instance = nil) ⇒ Object



229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# File 'lib/legion/llm/router.rb', line 229

def explicit_resolution(tier, provider, model, instance = nil)
  # Track whether the caller explicitly specified a provider (before validation may clear it)
  provider_explicit = !provider.nil?

  # Validate provider hint against registry — if the hinted provider isn't registered,
  # fall through to tier-based or default resolution rather than committing to a dead end.
  if provider && !Call::Registry.registered?(provider.to_sym)
    log.debug "[llm][router] action=explicit_resolution.provider_not_registered provider=#{provider} falling_back"
    provider = nil
  end

  registry_entry = if provider
                     registry_entry_for_provider(provider.to_sym, instance: instance&.to_sym)
                   elsif tier
                     registry_entry_for_tier(tier)
                   end
  resolved_provider = if provider
                        provider.to_sym
                      else
                        registry_entry&.[](:provider) ||
                          (tier && default_provider_for_tier(tier)) ||
                          Legion::Settings[:llm][:default_provider]&.to_sym ||
                          :anthropic
                      end

  # If the resolved provider differs from the model's natural provider, swap to the
  # provider's default model — sending "claude-sonnet-4-6" to vllm would fail.
  # Only swap when the provider was explicitly specified AND we can positively identify
  # the model's natural provider. If the provider was auto-resolved from tier/defaults,
  # trust the caller's model choice. Unknown model patterns (nil) are allowed through
  # since they may be custom/registry models.
  model_natural_provider = model && infer_provider_for_model(model)
  if provider_explicit && model && resolved_provider && model_natural_provider && model_natural_provider != resolved_provider
    log.debug "[llm][router] action=explicit_resolution.model_provider_mismatch model=#{model} " \
              "natural_provider=#{model_natural_provider} resolved_provider=#{resolved_provider}"
    model = nil
  end

  resolved_instance = registry_entry&.[](:instance) || instance
  resolved_model    = model ||
                      inventory_default_model(resolved_provider, resolved_instance) ||
                      registry_default_model(registry_entry) ||
                      (tier && default_model_for_tier(tier))
  resolved_tier     = tier || PROVIDER_TIER.fetch(resolved_provider, :frontier)

  Resolution.new(
    tier:     resolved_tier,
    provider: resolved_provider,
    model:    resolved_model,
    instance: resolved_instance,
    rule:     'explicit',
    metadata: (registry_entry)
  )
end

.health_trackerObject



167
168
169
# File 'lib/legion/llm/router.rb', line 167

def health_tracker
  @health_tracker ||= build_health_tracker
end

.infer_provider_for_model(model) ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/legion/llm/router.rb', line 52

def infer_provider_for_model(model)
  return nil if model.nil? || model.to_s.empty?

  discovered = discover_provider_for_model(model)
  return discovered if discovered

  model_s = model.to_s
  return :bedrock if model_s.start_with?('us.')
  return :bedrock if model_s.match?(/\A(anthropic|meta|mistral|cohere|amazon|ai21)\./i)
  return :openai if model_s.match?(/\Agpt-|\Ao[134]-/)
  return :anthropic if model_s.start_with?('claude-')
  return :gemini if model_s.start_with?('gemini-')
  return :ollama if model_s.match?(OLLAMA_MODEL_PATTERN)

  nil
end

.inventory_default_model(provider, instance = nil) ⇒ Object

The provider’s own default model from Inventory — the single source of truth (already whitelist/blacklist-filtered and discovery-fed). Sourcing a model here guarantees an explicit provider is paired only with a model it actually offers: anthropic resolves to its own offered model, never a stale registry default or a global default that belongs to a different provider (the anthropic->qwen pairing class). Returns nil when Inventory has no catalog for the provider (cold boot), so callers fall through to their existing fallbacks.



88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/legion/llm/router.rb', line 88

def inventory_default_model(provider, instance = nil)
  return nil unless provider && defined?(Inventory)

  candidates = Inventory.routing_candidates(provider: provider.to_sym)
  return nil if candidates.nil? || candidates.empty?

  inst = (instance || :default).to_s
  offering = candidates.find { |o| (o[:instance_id] || o[:provider_instance]).to_s == inst } || candidates.first
  model = offering[:model] || offering[:canonical_model_alias]
  model&.to_s
rescue StandardError => e
  handle_exception(e, level: :debug, handled: true, operation: 'router.inventory_default_model')
  nil
end

.populate_auto_rules(discovered_instances) ⇒ Object



179
180
181
182
183
184
# File 'lib/legion/llm/router.rb', line 179

def populate_auto_rules(discovered_instances)
  raw = Discovery::RuleGenerator.generate(discovered_instances)
  @auto_rules = raw.map { |h| Rule.from_hash(h.transform_keys(&:to_sym)) }
  @auto_rules_populated = true
  log.info("[llm][router] auto_rules_populated count=#{@auto_rules.size}")
end

.reset!Object



186
187
188
189
190
# File 'lib/legion/llm/router.rb', line 186

def reset!
  @health_tracker = nil
  @auto_rules = []
  @auto_rules_populated = false
end

.resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}, estimated_tokens: nil) ⇒ Resolution?

Resolve an LLM routing intent to a tier/provider/model decision.

Model, provider, and tier are treated as preference hints — they bias scoring toward matching candidates but do not bypass rule evaluation. This allows the router to apply policy (cost, privacy, health) and fall back to a better local match when the hinted provider is unavailable.

Parameters:

  • intent (Hash, nil) (defaults to: nil)

    routing intent (capability, privacy, etc.)

  • tier (Symbol, nil) (defaults to: nil)

    tier preference hint

  • model (String, nil) (defaults to: nil)

    model preference hint

  • provider (Symbol, nil) (defaults to: nil)

    provider preference hint

  • estimated_tokens (Integer, nil) (defaults to: nil)

    estimated total token count for context window filtering

Returns:



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/legion/llm/router.rb', line 116

def resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}, estimated_tokens: nil)
  log.debug "[llm][router] action=resolve.enter intent=#{intent} tier=#{tier} model=#{model} provider=#{provider} instance=#{instance} estimated_tokens=#{estimated_tokens}"

  merged = merge_defaults(intent)
  rules = load_rules
  candidates = select_candidates(rules, merged, exclude: exclude, estimated_tokens: estimated_tokens)
  best = pick_best(candidates, intent: merged, hints: { tier: tier, provider: provider, model: model })
  resolution = best&.to_resolution

  # When a provider hint is explicitly passed but the best rule targets a DIFFERENT provider,
  # the hint has no matching rule to boost. If the hinted provider is registered (can actually
  # serve requests), fall through to explicit_resolution which honors the hint directly.
  # Without this, auto-rules for discoverable providers (vllm/ollama) always win over
  # non-discoverable providers (bedrock/anthropic) that have no auto-generated rules.
  if resolution && provider && resolution.provider.to_sym != provider.to_sym &&
     Call::Registry.registered?(provider.to_sym)
    log.info "[llm][router] action=resolve.hint_mismatch hinted_provider=#{provider} " \
             "matched_provider=#{resolution.provider} falling_through_to_explicit"
    resolution = nil
  end

  if resolution
    log.info "[llm][router] action=resolve.matched tier=#{resolution.tier} provider=#{resolution.provider} " \
             "model=#{resolution.model} rule=#{resolution.rule}"
  end

  # If no rules matched (or hint mismatch), fall back to explicit resolution from hints, then arbitrage.
  unless resolution
    trace_info = (@last_candidate_trace || {}).reject { |_, v| v.zero? }
    log.warn "[llm][router] action=resolve.no_rules_matched intent=#{merged} candidates_evaluated=#{rules.size} " \
             "rejections=#{trace_info}"
    resolution = explicit_resolution(tier, provider, model, instance)
  end

  resolution || arbitrage_fallback(intent)
end

.resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true, estimated_tokens: nil) ⇒ Object



153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/legion/llm/router.rb', line 153

def resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, max_escalations: nil,
                  exclude: {}, allow_default_fallback: true, estimated_tokens: nil)
  log.debug "[llm][router] action=resolve_chain.enter intent=#{intent} tier=#{tier} max_escalations=#{max_escalations} estimated_tokens=#{estimated_tokens}"
  max = max_escalations || escalation_max_attempts

  if routing_enabled? && intent
    chain_from_intent(intent, max, hints: { tier: tier, provider: provider, model: model, instance: instance },
                      exclude: exclude, allow_default_fallback: allow_default_fallback,
                      estimated_tokens: estimated_tokens)
  else
    chain_from_defaults(model, provider, max, hints: { tier: tier, instance: instance }, allow_default_fallback: allow_default_fallback)
  end
end

.routing_enabled?Boolean

Returns:

  • (Boolean)


171
172
173
# File 'lib/legion/llm/router.rb', line 171

def routing_enabled?
  Legion::Settings.dig(:llm, :routing, :enabled) == true && auto_rules_populated?
end

.tier_available?(tier) ⇒ Boolean

Check whether a tier can be used right now. :local — always available :direct — always available (remote self-hosted instances) :fleet — available when Legion::Transport is loaded :cloud — available unless privacy mode :frontier — available unless privacy mode

Returns:

  • (Boolean)


214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/legion/llm/router.rb', line 214

def tier_available?(tier)
  sym = tier.to_sym
  if external_tier?(sym) && privacy_mode?
    log.debug "[llm][router] action=tier_available tier=#{sym} available=false reason=privacy_mode"
    return false
  end
  if sym == :fleet
    available = Legion.const_defined?('Transport', false)
    log.debug "[llm][router] action=tier_available tier=fleet available=#{available}"
    return available
  end

  true
end

.tier_priorityObject



192
193
194
195
196
197
198
199
200
201
202
# File 'lib/legion/llm/router.rb', line 192

def tier_priority
  configured = Legion::Settings[:llm][:tier_order]
  configured = Legion::Settings[:llm][:routing][:tier_order] if configured.nil? || Array(configured).empty?
  configured = Legion::Settings[:llm][:routing][:tier_priority] if configured.nil? || Array(configured).empty?
  normalized = Array(configured).filter_map { |tier| tier.to_sym if tier.respond_to?(:to_sym) }
  normalized = TIER_RANK.keys if normalized.empty?
  (normalized + TIER_RANK.keys).uniq
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'router.tier_priority')
  TIER_RANK.keys
end

.tier_rankObject



204
205
206
# File 'lib/legion/llm/router.rb', line 204

def tier_rank
  tier_priority.each_with_index.to_h
end