Module: Legion::LLM::Router
- Extended by:
- Legion::Logging::Helper
- Defined in:
- lib/legion/llm/router.rb,
lib/legion/llm/router/rule.rb,
lib/legion/llm/router/arbitrage.rb,
lib/legion/llm/router/resolution.rb,
lib/legion/llm/router/health_tracker.rb,
lib/legion/llm/router/escalation/chain.rb
Defined Under Namespace
Modules: Arbitrage Classes: EscalationChain, HealthTracker, Resolution, Rule
Constant Summary collapse
- PROVIDER_TIER =
{ bedrock: :cloud, anthropic: :frontier, openai: :frontier, gemini: :cloud, azure: :cloud, ollama: :local, vllm: :fleet }.freeze
- PROVIDER_ORDER =
%i[ollama vllm bedrock azure gemini anthropic openai].freeze
- TIER_EXTERNAL =
Set[:cloud, :frontier].freeze
- TIER_RANK =
{ local: 0, direct: 1, fleet: 2, cloud: 3, frontier: 4 }.freeze
- CAPABILITY_ALIASES =
{ function_calling: :tools, functions: :tools, tool: :tools, tool_use: :tools, stream: :streaming, stream_chat: :streaming }.freeze
- OLLAMA_MODEL_PATTERN =
%r{[:/]}
Class Method Summary collapse
- .auto_rules_populated? ⇒ Boolean
- .discover_provider_for_model(model) ⇒ Object
- .explicit_resolution(tier, provider, model, instance = nil) ⇒ Object
- .health_tracker ⇒ Object
- .infer_provider_for_model(model) ⇒ Object
- .populate_auto_rules(discovered_instances) ⇒ Object
- .reset! ⇒ Object
-
.resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}, estimated_tokens: nil, **_opts) ⇒ Resolution?
Resolve an LLM routing intent to a tier/provider/model decision.
- .resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true, estimated_tokens: nil, **_opts) ⇒ Object
- .routing_enabled? ⇒ Boolean
-
.tier_available?(tier) ⇒ Boolean
Check whether a tier can be used right now.
- .tier_priority ⇒ Object
- .tier_rank ⇒ Object
Class Method Details
.auto_rules_populated? ⇒ Boolean
123 124 125 |
# File 'lib/legion/llm/router.rb', line 123 def auto_rules_populated? @auto_rules_populated == true end |
.discover_provider_for_model(model) ⇒ Object
54 55 56 57 58 59 60 61 62 63 |
# File 'lib/legion/llm/router.rb', line 54 def discover_provider_for_model(model) return nil unless defined?(Discovery) && Discovery.respond_to?(:cached_discovered_models) model_s = model.to_s entry = Array(Discovery.cached_discovered_models).find do |m| dn = m[:model].to_s dn == model_s || dn.start_with?("#{model_s}:") end entry&.dig(:provider) end |
.explicit_resolution(tier, provider, model, instance = nil) ⇒ Object
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# File 'lib/legion/llm/router.rb', line 177 def explicit_resolution(tier, provider, model, instance = nil) # Track whether the caller explicitly specified a provider (before validation may clear it) provider_explicit = !provider.nil? # Validate provider hint against registry — if the hinted provider isn't registered, # fall through to tier-based or default resolution rather than committing to a dead end. if provider && !Call::Registry.registered?(provider.to_sym) log.debug "[llm][router] action=explicit_resolution.provider_not_registered provider=#{provider} falling_back" provider = nil end registry_entry = if provider registry_entry_for_provider(provider.to_sym, instance: instance&.to_sym) elsif tier registry_entry_for_tier(tier) end resolved_provider = if provider provider.to_sym else registry_entry&.[](:provider) || (tier && default_provider_for_tier(tier)) || Legion::Settings[:llm][:default_provider]&.to_sym || :anthropic end # If the resolved provider differs from the model's natural provider, swap to the # provider's default model — sending "claude-sonnet-4-6" to vllm would fail. # Only swap when the provider was explicitly specified AND we can positively identify # the model's natural provider. If the provider was auto-resolved from tier/defaults, # trust the caller's model choice. Unknown model patterns (nil) are allowed through # since they may be custom/registry models. model_natural_provider = model && infer_provider_for_model(model) if provider_explicit && model && resolved_provider && model_natural_provider && model_natural_provider != resolved_provider log.debug "[llm][router] action=explicit_resolution.model_provider_mismatch model=#{model} " \ "natural_provider=#{model_natural_provider} resolved_provider=#{resolved_provider}" model = nil end resolved_model = model || registry_default_model(registry_entry) || (tier && default_model_for_tier(tier)) resolved_instance = registry_entry&.[](:instance) || instance resolved_tier = tier || PROVIDER_TIER.fetch(resolved_provider, :frontier) Resolution.new( tier: resolved_tier, provider: resolved_provider, model: resolved_model, instance: resolved_instance, rule: 'explicit', metadata: (registry_entry) ) end |
.health_tracker ⇒ Object
115 116 117 |
# File 'lib/legion/llm/router.rb', line 115 def health_tracker @health_tracker ||= build_health_tracker end |
.infer_provider_for_model(model) ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/legion/llm/router.rb', line 37 def infer_provider_for_model(model) return nil if model.nil? || model.to_s.empty? discovered = discover_provider_for_model(model) return discovered if discovered model_s = model.to_s return :bedrock if model_s.start_with?('us.') return :bedrock if model_s.match?(/\A(anthropic|meta|mistral|cohere|amazon|ai21)\./i) return :openai if model_s.match?(/\Agpt-|\Ao[134]-/) return :anthropic if model_s.start_with?('claude-') return :gemini if model_s.start_with?('gemini-') return :ollama if model_s.match?(OLLAMA_MODEL_PATTERN) nil end |
.populate_auto_rules(discovered_instances) ⇒ Object
127 128 129 130 131 132 |
# File 'lib/legion/llm/router.rb', line 127 def populate_auto_rules(discovered_instances) raw = Discovery::RuleGenerator.generate(discovered_instances) @auto_rules = raw.map { |h| Rule.from_hash(h.transform_keys(&:to_sym)) } @auto_rules_populated = true log.info("[llm][router] auto_rules_populated count=#{@auto_rules.size}") end |
.reset! ⇒ Object
134 135 136 137 138 |
# File 'lib/legion/llm/router.rb', line 134 def reset! @health_tracker = nil @auto_rules = [] @auto_rules_populated = false end |
.resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}, estimated_tokens: nil, **_opts) ⇒ Resolution?
Resolve an LLM routing intent to a tier/provider/model decision.
Model, provider, and tier are treated as preference hints — they bias scoring toward matching candidates but do not bypass rule evaluation. This allows the router to apply policy (cost, privacy, health) and fall back to a better local match when the hinted provider is unavailable.
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
# File 'lib/legion/llm/router.rb', line 78 def resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}, estimated_tokens: nil, **_opts) log.debug "[llm][router] action=resolve.enter intent=#{intent} tier=#{tier} model=#{model} provider=#{provider} instance=#{instance} estimated_tokens=#{estimated_tokens}" merged = merge_defaults(intent) rules = load_rules candidates = select_candidates(rules, merged, exclude: exclude, estimated_tokens: estimated_tokens) best = pick_best(candidates, hints: { tier: tier, provider: provider, model: model }) resolution = best&.to_resolution if resolution log.info "[llm][router] action=resolve.matched tier=#{resolution.tier} provider=#{resolution.provider} " \ "model=#{resolution.model} rule=#{resolution.rule}" end # If no rules matched, fall back to explicit resolution from hints, then arbitrage. unless resolution log.warn "[llm][router] action=resolve.no_rules_matched intent=#{merged} candidates_evaluated=#{rules.size}" resolution = explicit_resolution(tier, provider, model, instance) end resolution || arbitrage_fallback(intent) end |
.resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true, estimated_tokens: nil, **_opts) ⇒ Object
101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/legion/llm/router.rb', line 101 def resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true, estimated_tokens: nil, **_opts) log.debug "[llm][router] action=resolve_chain.enter intent=#{intent} tier=#{tier} max_escalations=#{max_escalations} estimated_tokens=#{estimated_tokens}" max = max_escalations || escalation_max_attempts if routing_enabled? && intent chain_from_intent(intent, max, hints: { tier: tier, provider: provider, model: model }, exclude: exclude, allow_default_fallback: allow_default_fallback, estimated_tokens: estimated_tokens) else chain_from_defaults(model, provider, max, hints: { tier: tier }, allow_default_fallback: allow_default_fallback) end end |
.routing_enabled? ⇒ Boolean
119 120 121 |
# File 'lib/legion/llm/router.rb', line 119 def routing_enabled? Legion::Settings.dig(:llm, :routing, :enabled) == true && auto_rules_populated? end |
.tier_available?(tier) ⇒ Boolean
Check whether a tier can be used right now. :local — always available :direct — always available (remote self-hosted instances) :fleet — available when Legion::Transport is loaded :cloud — available unless privacy mode :frontier — available unless privacy mode
162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
# File 'lib/legion/llm/router.rb', line 162 def tier_available?(tier) sym = tier.to_sym if external_tier?(sym) && privacy_mode? log.debug "[llm][router] action=tier_available tier=#{sym} available=false reason=privacy_mode" return false end if sym == :fleet available = Legion.const_defined?('Transport', false) log.debug "[llm][router] action=tier_available tier=fleet available=#{available}" return available end true end |
.tier_priority ⇒ Object
140 141 142 143 144 145 146 147 148 149 150 |
# File 'lib/legion/llm/router.rb', line 140 def tier_priority configured = Legion::Settings[:llm][:tier_order] configured = Legion::Settings[:llm][:routing][:tier_order] if configured.nil? || Array(configured).empty? configured = Legion::Settings[:llm][:routing][:tier_priority] if configured.nil? || Array(configured).empty? normalized = Array(configured).filter_map { |tier| tier.to_sym if tier.respond_to?(:to_sym) } normalized = TIER_RANK.keys if normalized.empty? (normalized + TIER_RANK.keys).uniq rescue StandardError => e handle_exception(e, level: :warn, handled: true, operation: 'router.tier_priority') TIER_RANK.keys end |
.tier_rank ⇒ Object
152 153 154 |
# File 'lib/legion/llm/router.rb', line 152 def tier_rank tier_priority.each_with_index.to_h end |