Module: Legion::LLM::Router
- Extended by:
- Candidates, RegistryLookup, Legion::Logging::Helper
- Defined in:
- lib/legion/llm/router.rb,
lib/legion/llm/router/rule.rb,
lib/legion/llm/router/arbitrage.rb,
lib/legion/llm/router/candidates.rb,
lib/legion/llm/router/resolution.rb,
lib/legion/llm/router/availability.rb,
lib/legion/llm/router/health_tracker.rb,
lib/legion/llm/router/registry_lookup.rb,
lib/legion/llm/router/escalation/chain.rb
Defined Under Namespace
Modules: Arbitrage, Availability, Candidates, RegistryLookup Classes: EscalationChain, HealthTracker, Resolution, Rule
Constant Summary collapse
- PROVIDER_TIER =
{ bedrock: :cloud, anthropic: :frontier, openai: :frontier, gemini: :cloud, azure: :cloud, ollama: :local, vllm: :fleet }.freeze
- PROVIDER_ORDER =
%i[ollama vllm bedrock azure gemini anthropic openai].freeze
- TIER_EXTERNAL =
Set[:cloud, :frontier].freeze
- TIER_RANK =
{ local: 0, direct: 1, fleet: 2, cloud: 3, frontier: 4 }.freeze
- CAPABILITY_ALIASES =
{ function_calling: :tools, functions: :tools, tool: :tools, tool_use: :tools, stream: :streaming, stream_chat: :streaming }.freeze
- CANONICAL_EFFORT_LEVELS =
%i[low moderate high reasoning].freeze
- EFFORT_ALIASES =
{ medium: :moderate }.freeze
- EFFORT_LEVELS =
(CANONICAL_EFFORT_LEVELS + EFFORT_ALIASES.keys).freeze
- EFFORT_RANK =
{ low: 0, moderate: 1, high: 2, reasoning: 3 }.freeze
- OPERATIONS =
%i[chat stream embed image structured_output].freeze
- OPERATION_ALIASES =
{ completion: :chat, stream_chat: :stream, embedding: :embed }.freeze
- DEFAULT_OPERATION =
:chat- DEFAULT_EFFORT =
:moderate- OLLAMA_MODEL_PATTERN =
%r{[:/]}
Class Method Summary collapse
- .auto_rules_populated? ⇒ Boolean
- .build_escalation_chain(provider:, model:, tier:, instance: nil, max_attempts: nil, estimated_tokens: nil, required_capabilities: []) ⇒ Object
- .build_fallback_resolutions(exclude_provider: nil, exclude_instance: nil, primary_tier: nil) ⇒ Object
- .discover_provider_for_model(model) ⇒ Object
- .explicit_resolution(tier, provider, model, instance = nil) ⇒ Object
- .health_tracker ⇒ Object
- .infer_provider_for_model(model) ⇒ Object
-
.inventory_default_model(provider, instance = nil) ⇒ Object
The provider’s own default model from Inventory — the single source of truth (already whitelist/blacklist-filtered and discovery-fed).
- .populate_auto_rules(discovered_instances) ⇒ Object
- .reset! ⇒ Object
-
.resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}, estimated_tokens: nil) ⇒ Resolution?
Resolve an LLM routing intent to a tier/provider/model decision.
- .resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true, estimated_tokens: nil) ⇒ Object
- .routing_enabled? ⇒ Boolean
-
.tier_available?(tier) ⇒ Boolean
Check whether a tier can be used right now.
- .tier_priority ⇒ Object
- .tier_rank ⇒ Object
Class Method Details
.auto_rules_populated? ⇒ Boolean
175 176 177 |
# File 'lib/legion/llm/router.rb', line 175 def auto_rules_populated? @auto_rules_populated == true end |
.build_escalation_chain(provider:, model:, tier:, instance: nil, max_attempts: nil, estimated_tokens: nil, required_capabilities: []) ⇒ Object
284 285 286 287 288 289 290 291 292 293 294 295 296 297 |
# File 'lib/legion/llm/router.rb', line 284 def build_escalation_chain(provider:, model:, tier:, instance: nil, max_attempts: nil, estimated_tokens: nil, required_capabilities: []) primary = explicit_resolution(tier, provider, model, instance) fallbacks = build_fallback_resolutions( exclude_provider: provider, exclude_instance: instance, primary_tier: tier ) resolutions = ([primary] + fallbacks).compact.uniq { |r| [r.provider, r.instance, r.model] } resolutions = filter_chain_resolutions(resolutions, estimated_tokens: estimated_tokens, required_capabilities: required_capabilities) max = max_attempts || escalation_max_attempts EscalationChain.new(resolutions: resolutions, max_attempts: max) end |
.build_fallback_resolutions(exclude_provider: nil, exclude_instance: nil, primary_tier: nil) ⇒ Object
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 |
# File 'lib/legion/llm/router.rb', line 299 def build_fallback_resolutions(exclude_provider: nil, exclude_instance: nil, primary_tier: nil) ranks = tier_rank primary_rank = primary_tier ? (ranks[primary_tier.to_sym] || 99) : 99 candidates = Call::Registry.all_instances.filter_map do |entry| next if entry[:provider] == exclude_provider&.to_sym && (exclude_instance.nil? || entry[:instance] == (exclude_instance&.to_sym || :default)) # Source from Inventory (SSOT) when the instance has no configured # registry default — e.g. a whitelist-restricted instance whose # policy-aware default resolved to nil. Without this, such a sibling # instance (a second account offering the same model) is dropped from # the escalation chain entirely. model = registry_default_model(entry) || inventory_default_model(entry[:provider], entry[:instance]) next unless model entry_tier = PROVIDER_TIER.fetch(entry[:provider], :frontier) Resolution.new( tier: entry_tier, provider: entry[:provider], instance: entry[:instance], model: model, rule: 'escalation_fallback' ) end candidates.sort_by do |r| r_rank = ranks[r.tier] || 99 rank_diff = r_rank - primary_rank bucket = if rank_diff.zero? 0 elsif rank_diff.positive? 1 else 2 end [bucket, r_rank] end end |
.discover_provider_for_model(model) ⇒ Object
69 70 71 72 73 74 75 76 77 78 |
# File 'lib/legion/llm/router.rb', line 69 def discover_provider_for_model(model) return nil unless defined?(Discovery) && Discovery.respond_to?(:cached_discovered_models) model_s = model.to_s entry = Array(Discovery.cached_discovered_models).find do |m| dn = m[:model].to_s dn == model_s || dn.start_with?("#{model_s}:") end entry&.dig(:provider) end |
.explicit_resolution(tier, provider, model, instance = nil) ⇒ Object
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 |
# File 'lib/legion/llm/router.rb', line 229 def explicit_resolution(tier, provider, model, instance = nil) # Track whether the caller explicitly specified a provider (before validation may clear it) provider_explicit = !provider.nil? # Validate provider hint against registry — if the hinted provider isn't registered, # fall through to tier-based or default resolution rather than committing to a dead end. if provider && !Call::Registry.registered?(provider.to_sym) log.debug "[llm][router] action=explicit_resolution.provider_not_registered provider=#{provider} falling_back" provider = nil end registry_entry = if provider registry_entry_for_provider(provider.to_sym, instance: instance&.to_sym) elsif tier registry_entry_for_tier(tier) end resolved_provider = if provider provider.to_sym else registry_entry&.[](:provider) || (tier && default_provider_for_tier(tier)) || Legion::Settings[:llm][:default_provider]&.to_sym || :anthropic end # If the resolved provider differs from the model's natural provider, swap to the # provider's default model — sending "claude-sonnet-4-6" to vllm would fail. # Only swap when the provider was explicitly specified AND we can positively identify # the model's natural provider. If the provider was auto-resolved from tier/defaults, # trust the caller's model choice. Unknown model patterns (nil) are allowed through # since they may be custom/registry models. model_natural_provider = model && infer_provider_for_model(model) if provider_explicit && model && resolved_provider && model_natural_provider && model_natural_provider != resolved_provider log.debug "[llm][router] action=explicit_resolution.model_provider_mismatch model=#{model} " \ "natural_provider=#{model_natural_provider} resolved_provider=#{resolved_provider}" model = nil end resolved_instance = registry_entry&.[](:instance) || instance resolved_model = model || inventory_default_model(resolved_provider, resolved_instance) || registry_default_model(registry_entry) || (tier && default_model_for_tier(tier)) resolved_tier = tier || PROVIDER_TIER.fetch(resolved_provider, :frontier) Resolution.new( tier: resolved_tier, provider: resolved_provider, model: resolved_model, instance: resolved_instance, rule: 'explicit', metadata: (registry_entry) ) end |
.health_tracker ⇒ Object
167 168 169 |
# File 'lib/legion/llm/router.rb', line 167 def health_tracker @health_tracker ||= build_health_tracker end |
.infer_provider_for_model(model) ⇒ Object
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/legion/llm/router.rb', line 52 def infer_provider_for_model(model) return nil if model.nil? || model.to_s.empty? discovered = discover_provider_for_model(model) return discovered if discovered model_s = model.to_s return :bedrock if model_s.start_with?('us.') return :bedrock if model_s.match?(/\A(anthropic|meta|mistral|cohere|amazon|ai21)\./i) return :openai if model_s.match?(/\Agpt-|\Ao[134]-/) return :anthropic if model_s.start_with?('claude-') return :gemini if model_s.start_with?('gemini-') return :ollama if model_s.match?(OLLAMA_MODEL_PATTERN) nil end |
.inventory_default_model(provider, instance = nil) ⇒ Object
The provider’s own default model from Inventory — the single source of truth (already whitelist/blacklist-filtered and discovery-fed). Sourcing a model here guarantees an explicit provider is paired only with a model it actually offers: anthropic resolves to its own offered model, never a stale registry default or a global default that belongs to a different provider (the anthropic->qwen pairing class). Returns nil when Inventory has no catalog for the provider (cold boot), so callers fall through to their existing fallbacks.
88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/legion/llm/router.rb', line 88 def inventory_default_model(provider, instance = nil) return nil unless provider && defined?(Inventory) candidates = Inventory.routing_candidates(provider: provider.to_sym) return nil if candidates.nil? || candidates.empty? inst = (instance || :default).to_s offering = candidates.find { |o| (o[:instance_id] || o[:provider_instance]).to_s == inst } || candidates.first model = offering[:model] || offering[:canonical_model_alias] model&.to_s rescue StandardError => e handle_exception(e, level: :debug, handled: true, operation: 'router.inventory_default_model') nil end |
.populate_auto_rules(discovered_instances) ⇒ Object
179 180 181 182 183 184 |
# File 'lib/legion/llm/router.rb', line 179 def populate_auto_rules(discovered_instances) raw = Discovery::RuleGenerator.generate(discovered_instances) @auto_rules = raw.map { |h| Rule.from_hash(h.transform_keys(&:to_sym)) } @auto_rules_populated = true log.info("[llm][router] auto_rules_populated count=#{@auto_rules.size}") end |
.reset! ⇒ Object
186 187 188 189 190 |
# File 'lib/legion/llm/router.rb', line 186 def reset! @health_tracker = nil @auto_rules = [] @auto_rules_populated = false end |
.resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}, estimated_tokens: nil) ⇒ Resolution?
Resolve an LLM routing intent to a tier/provider/model decision.
Model, provider, and tier are treated as preference hints — they bias scoring toward matching candidates but do not bypass rule evaluation. This allows the router to apply policy (cost, privacy, health) and fall back to a better local match when the hinted provider is unavailable.
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# File 'lib/legion/llm/router.rb', line 116 def resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}, estimated_tokens: nil) log.debug "[llm][router] action=resolve.enter intent=#{intent} tier=#{tier} model=#{model} provider=#{provider} instance=#{instance} estimated_tokens=#{estimated_tokens}" merged = merge_defaults(intent) rules = load_rules candidates = select_candidates(rules, merged, exclude: exclude, estimated_tokens: estimated_tokens) best = pick_best(candidates, intent: merged, hints: { tier: tier, provider: provider, model: model }) resolution = best&.to_resolution # When a provider hint is explicitly passed but the best rule targets a DIFFERENT provider, # the hint has no matching rule to boost. If the hinted provider is registered (can actually # serve requests), fall through to explicit_resolution which honors the hint directly. # Without this, auto-rules for discoverable providers (vllm/ollama) always win over # non-discoverable providers (bedrock/anthropic) that have no auto-generated rules. if resolution && provider && resolution.provider.to_sym != provider.to_sym && Call::Registry.registered?(provider.to_sym) log.info "[llm][router] action=resolve.hint_mismatch hinted_provider=#{provider} " \ "matched_provider=#{resolution.provider} falling_through_to_explicit" resolution = nil end if resolution log.info "[llm][router] action=resolve.matched tier=#{resolution.tier} provider=#{resolution.provider} " \ "model=#{resolution.model} rule=#{resolution.rule}" end # If no rules matched (or hint mismatch), fall back to explicit resolution from hints, then arbitrage. unless resolution trace_info = (@last_candidate_trace || {}).reject { |_, v| v.zero? } log.warn "[llm][router] action=resolve.no_rules_matched intent=#{merged} candidates_evaluated=#{rules.size} " \ "rejections=#{trace_info}" resolution = explicit_resolution(tier, provider, model, instance) end resolution || arbitrage_fallback(intent) end |
.resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true, estimated_tokens: nil) ⇒ Object
153 154 155 156 157 158 159 160 161 162 163 164 165 |
# File 'lib/legion/llm/router.rb', line 153 def resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true, estimated_tokens: nil) log.debug "[llm][router] action=resolve_chain.enter intent=#{intent} tier=#{tier} max_escalations=#{max_escalations} estimated_tokens=#{estimated_tokens}" max = max_escalations || escalation_max_attempts if routing_enabled? && intent chain_from_intent(intent, max, hints: { tier: tier, provider: provider, model: model, instance: instance }, exclude: exclude, allow_default_fallback: allow_default_fallback, estimated_tokens: estimated_tokens) else chain_from_defaults(model, provider, max, hints: { tier: tier, instance: instance }, allow_default_fallback: allow_default_fallback) end end |
.routing_enabled? ⇒ Boolean
171 172 173 |
# File 'lib/legion/llm/router.rb', line 171 def routing_enabled? Legion::Settings.dig(:llm, :routing, :enabled) == true && auto_rules_populated? end |
.tier_available?(tier) ⇒ Boolean
Check whether a tier can be used right now. :local — always available :direct — always available (remote self-hosted instances) :fleet — available when Legion::Transport is loaded :cloud — available unless privacy mode :frontier — available unless privacy mode
214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# File 'lib/legion/llm/router.rb', line 214 def tier_available?(tier) sym = tier.to_sym if external_tier?(sym) && privacy_mode? log.debug "[llm][router] action=tier_available tier=#{sym} available=false reason=privacy_mode" return false end if sym == :fleet available = Legion.const_defined?('Transport', false) log.debug "[llm][router] action=tier_available tier=fleet available=#{available}" return available end true end |
.tier_priority ⇒ Object
192 193 194 195 196 197 198 199 200 201 202 |
# File 'lib/legion/llm/router.rb', line 192 def tier_priority configured = Legion::Settings[:llm][:tier_order] configured = Legion::Settings[:llm][:routing][:tier_order] if configured.nil? || Array(configured).empty? configured = Legion::Settings[:llm][:routing][:tier_priority] if configured.nil? || Array(configured).empty? normalized = Array(configured).filter_map { |tier| tier.to_sym if tier.respond_to?(:to_sym) } normalized = TIER_RANK.keys if normalized.empty? (normalized + TIER_RANK.keys).uniq rescue StandardError => e handle_exception(e, level: :warn, handled: true, operation: 'router.tier_priority') TIER_RANK.keys end |
.tier_rank ⇒ Object
204 205 206 |
# File 'lib/legion/llm/router.rb', line 204 def tier_rank tier_priority.each_with_index.to_h end |