Module: Legion::LLM::Router

Extended by:
Legion::Logging::Helper
Defined in:
lib/legion/llm/router.rb,
lib/legion/llm/router/rule.rb,
lib/legion/llm/router/arbitrage.rb,
lib/legion/llm/router/resolution.rb,
lib/legion/llm/router/health_tracker.rb,
lib/legion/llm/router/escalation/chain.rb

Defined Under Namespace

Modules: Arbitrage Classes: EscalationChain, HealthTracker, Resolution, Rule

Constant Summary collapse

PROVIDER_TIER =
{ bedrock: :cloud, anthropic: :frontier, openai: :frontier,
gemini: :cloud, azure: :cloud, ollama: :local, vllm: :fleet }.freeze
PROVIDER_ORDER =
%i[ollama vllm bedrock azure gemini anthropic openai].freeze
TIER_EXTERNAL =
Set[:cloud, :frontier].freeze
TIER_RANK =
{ local: 0, direct: 1, fleet: 2, cloud: 3, frontier: 4 }.freeze
CAPABILITY_ALIASES =
{
  function_calling: :tools,
  functions:        :tools,
  tool:             :tools,
  tool_use:         :tools,
  stream:           :streaming,
  stream_chat:      :streaming
}.freeze
OLLAMA_MODEL_PATTERN =
%r{[:/]}

Class Method Summary collapse

Class Method Details

.auto_rules_populated?Boolean

Returns:

  • (Boolean)


123
124
125
# File 'lib/legion/llm/router.rb', line 123

def auto_rules_populated?
  @auto_rules_populated == true
end

.discover_provider_for_model(model) ⇒ Object



54
55
56
57
58
59
60
61
62
63
# File 'lib/legion/llm/router.rb', line 54

def discover_provider_for_model(model)
  return nil unless defined?(Discovery) && Discovery.respond_to?(:cached_discovered_models)

  model_s = model.to_s
  entry = Array(Discovery.cached_discovered_models).find do |m|
    dn = m[:model].to_s
    dn == model_s || dn.start_with?("#{model_s}:")
  end
  entry&.dig(:provider)
end

.explicit_resolution(tier, provider, model, instance = nil) ⇒ Object



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/legion/llm/router.rb', line 177

def explicit_resolution(tier, provider, model, instance = nil)
  # Track whether the caller explicitly specified a provider (before validation may clear it)
  provider_explicit = !provider.nil?

  # Validate provider hint against registry — if the hinted provider isn't registered,
  # fall through to tier-based or default resolution rather than committing to a dead end.
  if provider && !Call::Registry.registered?(provider.to_sym)
    log.debug "[llm][router] action=explicit_resolution.provider_not_registered provider=#{provider} falling_back"
    provider = nil
  end

  registry_entry = if provider
                     registry_entry_for_provider(provider.to_sym, instance: instance&.to_sym)
                   elsif tier
                     registry_entry_for_tier(tier)
                   end
  resolved_provider = if provider
                        provider.to_sym
                      else
                        registry_entry&.[](:provider) ||
                          (tier && default_provider_for_tier(tier)) ||
                          Legion::Settings[:llm][:default_provider]&.to_sym ||
                          :anthropic
                      end

  # If the resolved provider differs from the model's natural provider, swap to the
  # provider's default model — sending "claude-sonnet-4-6" to vllm would fail.
  # Only swap when the provider was explicitly specified AND we can positively identify
  # the model's natural provider. If the provider was auto-resolved from tier/defaults,
  # trust the caller's model choice. Unknown model patterns (nil) are allowed through
  # since they may be custom/registry models.
  model_natural_provider = model && infer_provider_for_model(model)
  if provider_explicit && model && resolved_provider && model_natural_provider && model_natural_provider != resolved_provider
    log.debug "[llm][router] action=explicit_resolution.model_provider_mismatch model=#{model} " \
              "natural_provider=#{model_natural_provider} resolved_provider=#{resolved_provider}"
    model = nil
  end

  resolved_model    = model || registry_default_model(registry_entry) || (tier && default_model_for_tier(tier))
  resolved_instance = registry_entry&.[](:instance) || instance
  resolved_tier     = tier || PROVIDER_TIER.fetch(resolved_provider, :frontier)

  Resolution.new(
    tier:     resolved_tier,
    provider: resolved_provider,
    model:    resolved_model,
    instance: resolved_instance,
    rule:     'explicit',
    metadata: (registry_entry)
  )
end

.health_trackerObject



115
116
117
# File 'lib/legion/llm/router.rb', line 115

def health_tracker
  @health_tracker ||= build_health_tracker
end

.infer_provider_for_model(model) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/legion/llm/router.rb', line 37

def infer_provider_for_model(model)
  return nil if model.nil? || model.to_s.empty?

  discovered = discover_provider_for_model(model)
  return discovered if discovered

  model_s = model.to_s
  return :bedrock if model_s.start_with?('us.')
  return :bedrock if model_s.match?(/\A(anthropic|meta|mistral|cohere|amazon|ai21)\./i)
  return :openai if model_s.match?(/\Agpt-|\Ao[134]-/)
  return :anthropic if model_s.start_with?('claude-')
  return :gemini if model_s.start_with?('gemini-')
  return :ollama if model_s.match?(OLLAMA_MODEL_PATTERN)

  nil
end

.populate_auto_rules(discovered_instances) ⇒ Object



127
128
129
130
131
132
# File 'lib/legion/llm/router.rb', line 127

def populate_auto_rules(discovered_instances)
  raw = Discovery::RuleGenerator.generate(discovered_instances)
  @auto_rules = raw.map { |h| Rule.from_hash(h.transform_keys(&:to_sym)) }
  @auto_rules_populated = true
  log.info("[llm][router] auto_rules_populated count=#{@auto_rules.size}")
end

.reset!Object



134
135
136
137
138
# File 'lib/legion/llm/router.rb', line 134

def reset!
  @health_tracker = nil
  @auto_rules = []
  @auto_rules_populated = false
end

.resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}, estimated_tokens: nil, **_opts) ⇒ Resolution?

Resolve an LLM routing intent to a tier/provider/model decision.

Model, provider, and tier are treated as preference hints — they bias scoring toward matching candidates but do not bypass rule evaluation. This allows the router to apply policy (cost, privacy, health) and fall back to a better local match when the hinted provider is unavailable.

Parameters:

  • intent (Hash, nil) (defaults to: nil)

    routing intent (capability, privacy, etc.)

  • tier (Symbol, nil) (defaults to: nil)

    tier preference hint

  • model (String, nil) (defaults to: nil)

    model preference hint

  • provider (Symbol, nil) (defaults to: nil)

    provider preference hint

  • estimated_tokens (Integer, nil) (defaults to: nil)

    estimated total token count for context window filtering

Returns:



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/legion/llm/router.rb', line 78

def resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}, estimated_tokens: nil, **_opts)
  log.debug "[llm][router] action=resolve.enter intent=#{intent} tier=#{tier} model=#{model} provider=#{provider} instance=#{instance} estimated_tokens=#{estimated_tokens}"

  merged = merge_defaults(intent)
  rules = load_rules
  candidates = select_candidates(rules, merged, exclude: exclude, estimated_tokens: estimated_tokens)
  best = pick_best(candidates, hints: { tier: tier, provider: provider, model: model })
  resolution = best&.to_resolution

  if resolution
    log.info "[llm][router] action=resolve.matched tier=#{resolution.tier} provider=#{resolution.provider} " \
             "model=#{resolution.model} rule=#{resolution.rule}"
  end

  # If no rules matched, fall back to explicit resolution from hints, then arbitrage.
  unless resolution
    log.warn "[llm][router] action=resolve.no_rules_matched intent=#{merged} candidates_evaluated=#{rules.size}"
    resolution = explicit_resolution(tier, provider, model, instance)
  end

  resolution || arbitrage_fallback(intent)
end

.resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true, estimated_tokens: nil, **_opts) ⇒ Object



101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/legion/llm/router.rb', line 101

def resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, max_escalations: nil,
                  exclude: {}, allow_default_fallback: true, estimated_tokens: nil, **_opts)
  log.debug "[llm][router] action=resolve_chain.enter intent=#{intent} tier=#{tier} max_escalations=#{max_escalations} estimated_tokens=#{estimated_tokens}"
  max = max_escalations || escalation_max_attempts

  if routing_enabled? && intent
    chain_from_intent(intent, max, hints: { tier: tier, provider: provider, model: model },
                      exclude: exclude, allow_default_fallback: allow_default_fallback,
                      estimated_tokens: estimated_tokens)
  else
    chain_from_defaults(model, provider, max, hints: { tier: tier }, allow_default_fallback: allow_default_fallback)
  end
end

.routing_enabled?Boolean

Returns:

  • (Boolean)


119
120
121
# File 'lib/legion/llm/router.rb', line 119

def routing_enabled?
  Legion::Settings.dig(:llm, :routing, :enabled) == true && auto_rules_populated?
end

.tier_available?(tier) ⇒ Boolean

Check whether a tier can be used right now. :local — always available :direct — always available (remote self-hosted instances) :fleet — available when Legion::Transport is loaded :cloud — available unless privacy mode :frontier — available unless privacy mode

Returns:

  • (Boolean)


162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/legion/llm/router.rb', line 162

def tier_available?(tier)
  sym = tier.to_sym
  if external_tier?(sym) && privacy_mode?
    log.debug "[llm][router] action=tier_available tier=#{sym} available=false reason=privacy_mode"
    return false
  end
  if sym == :fleet
    available = Legion.const_defined?('Transport', false)
    log.debug "[llm][router] action=tier_available tier=fleet available=#{available}"
    return available
  end

  true
end

.tier_priorityObject



140
141
142
143
144
145
146
147
148
149
150
# File 'lib/legion/llm/router.rb', line 140

def tier_priority
  configured = Legion::Settings[:llm][:tier_order]
  configured = Legion::Settings[:llm][:routing][:tier_order] if configured.nil? || Array(configured).empty?
  configured = Legion::Settings[:llm][:routing][:tier_priority] if configured.nil? || Array(configured).empty?
  normalized = Array(configured).filter_map { |tier| tier.to_sym if tier.respond_to?(:to_sym) }
  normalized = TIER_RANK.keys if normalized.empty?
  (normalized + TIER_RANK.keys).uniq
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'router.tier_priority')
  TIER_RANK.keys
end

.tier_rankObject



152
153
154
# File 'lib/legion/llm/router.rb', line 152

def tier_rank
  tier_priority.each_with_index.to_h
end