Module: Legion::LLM::Router
- Extended by:
- Legion::Logging::Helper
- Defined in:
- lib/legion/llm/router.rb,
lib/legion/llm/router/rule.rb,
lib/legion/llm/router/arbitrage.rb,
lib/legion/llm/router/resolution.rb,
lib/legion/llm/router/health_tracker.rb,
lib/legion/llm/router/escalation/chain.rb
Defined Under Namespace
Modules: Arbitrage Classes: EscalationChain, HealthTracker, Resolution, Rule
Constant Summary collapse
- PROVIDER_TIER =
{ bedrock: :cloud, anthropic: :frontier, openai: :frontier, gemini: :cloud, azure: :cloud, ollama: :local, vllm: :fleet }.freeze
- PROVIDER_ORDER =
%i[ollama vllm bedrock azure gemini anthropic openai].freeze
- TIER_EXTERNAL =
Set[:cloud, :frontier, :openai_compat].freeze
- TIER_RANK =
{ local: 0, direct: 1, fleet: 2, openai_compat: 3, cloud: 4, frontier: 5 }.freeze
- CAPABILITY_ALIASES =
{ function_calling: :tools, functions: :tools, tool: :tools, tool_use: :tools, stream: :streaming, stream_chat: :streaming }.freeze
- OLLAMA_MODEL_PATTERN =
%r{[:/]}
Class Method Summary collapse
- .auto_rules_populated? ⇒ Boolean
- .discover_provider_for_model(model) ⇒ Object
- .explicit_resolution(tier, provider, model, instance = nil) ⇒ Object
- .health_tracker ⇒ Object
- .infer_provider_for_model(model) ⇒ Object
- .populate_auto_rules(discovered_instances) ⇒ Object
- .reset! ⇒ Object
-
.resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}) ⇒ Resolution?
Resolve an LLM routing intent to a tier/provider/model decision.
- .resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true) ⇒ Object
- .routing_enabled? ⇒ Boolean
-
.tier_available?(tier) ⇒ Boolean
Check whether a tier can be used right now.
- .tier_priority ⇒ Object
- .tier_rank ⇒ Object
Class Method Details
.auto_rules_populated? ⇒ Boolean
114 115 116 |
# File 'lib/legion/llm/router.rb', line 114 def auto_rules_populated? @auto_rules_populated == true end |
.discover_provider_for_model(model) ⇒ Object
54 55 56 57 58 59 60 61 62 63 |
# File 'lib/legion/llm/router.rb', line 54 def discover_provider_for_model(model) return nil unless defined?(Discovery) && Discovery.respond_to?(:cached_discovered_models) model_s = model.to_s entry = Array(Discovery.cached_discovered_models).find do |m| dn = m[:model].to_s dn == model_s || dn.start_with?("#{model_s}:") end entry&.dig(:provider) end |
.explicit_resolution(tier, provider, model, instance = nil) ⇒ Object
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
# File 'lib/legion/llm/router.rb', line 174 def explicit_resolution(tier, provider, model, instance = nil) registry_entry = if provider registry_entry_for_provider(provider.to_sym, instance: instance&.to_sym) elsif tier registry_entry_for_tier(tier) end resolved_provider = if provider provider.to_sym else registry_entry&.[](:provider) || (tier && default_provider_for_tier(tier)) || default_settings_provider&.to_sym || :anthropic end resolved_model = model || registry_default_model(registry_entry) || (tier && default_model_for_tier(tier)) resolved_instance = registry_entry&.[](:instance) || instance resolved_tier = tier || PROVIDER_TIER.fetch(resolved_provider, :frontier) Resolution.new( tier: resolved_tier, provider: resolved_provider, model: resolved_model, instance: resolved_instance, rule: 'explicit', metadata: (registry_entry) ) end |
.health_tracker ⇒ Object
103 104 105 |
# File 'lib/legion/llm/router.rb', line 103 def health_tracker @health_tracker ||= build_health_tracker end |
.infer_provider_for_model(model) ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/legion/llm/router.rb', line 37 def infer_provider_for_model(model) return nil if model.nil? || model.to_s.empty? discovered = discover_provider_for_model(model) return discovered if discovered model_s = model.to_s return :bedrock if model_s.start_with?('us.') return :bedrock if model_s.match?(/\A(anthropic|meta|mistral|cohere|amazon|ai21)\./i) return :openai if model_s.match?(/\Agpt-|\Ao[134]-/) return :anthropic if model_s.start_with?('claude-') return :gemini if model_s.start_with?('gemini-') return :ollama if model_s.match?(OLLAMA_MODEL_PATTERN) nil end |
.populate_auto_rules(discovered_instances) ⇒ Object
118 119 120 121 122 123 |
# File 'lib/legion/llm/router.rb', line 118 def populate_auto_rules(discovered_instances) raw = Discovery::RuleGenerator.generate(discovered_instances) @auto_rules = raw.map { |h| Rule.from_hash(h.transform_keys(&:to_sym)) } @auto_rules_populated = true log.info("[llm][router] auto_rules_populated count=#{@auto_rules.size}") end |
.reset! ⇒ Object
125 126 127 128 129 |
# File 'lib/legion/llm/router.rb', line 125 def reset! @health_tracker = nil @auto_rules = [] @auto_rules_populated = false end |
.resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}) ⇒ Resolution?
Resolve an LLM routing intent to a tier/provider/model decision.
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/legion/llm/router.rb', line 72 def resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}) log.debug "[llm][router] action=resolve.enter intent=#{intent} tier=#{tier} model=#{model} provider=#{provider} instance=#{instance}" return explicit_resolution(tier, provider, model, instance) if tier || provider || instance return nil unless routing_enabled? && intent merged = merge_defaults(intent) rules = load_rules candidates = select_candidates(rules, merged, exclude: exclude) best = pick_best(candidates) resolution = best&.to_resolution if resolution log.info("Routed to tier=#{resolution.tier} provider=#{resolution.provider} model=#{resolution.model} via rule='#{resolution.rule}'") else log.debug('Router: no rules matched, resolution is nil') end resolution || arbitrage_fallback(intent) end |
.resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true) ⇒ Object
93 94 95 96 97 98 99 100 101 |
# File 'lib/legion/llm/router.rb', line 93 def resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true) log.debug "[llm][router] action=resolve_chain.enter intent=#{intent} tier=#{tier} max_escalations=#{max_escalations}" max = max_escalations || escalation_max_attempts return EscalationChain.new(resolutions: [explicit_resolution(tier, provider, model, instance)], max_attempts: max) if tier || provider || instance return chain_from_defaults(model, provider, max, allow_default_fallback: allow_default_fallback) unless routing_enabled? && intent chain_from_intent(intent, max, exclude: exclude, allow_default_fallback: allow_default_fallback) end |
.routing_enabled? ⇒ Boolean
107 108 109 110 111 112 |
# File 'lib/legion/llm/router.rb', line 107 def routing_enabled? settings = routing_settings return false if settings.nil? || settings.empty? settings[:enabled] == true && auto_rules_populated? end |
.tier_available?(tier) ⇒ Boolean
Check whether a tier can be used right now. :local — always available :direct — always available (remote self-hosted instances) :fleet — available when Legion::Transport is loaded :openai_compat — available when OpenAI-compatible provider instances are registered :cloud — available unless privacy mode :frontier — available unless privacy mode
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
# File 'lib/legion/llm/router.rb', line 154 def tier_available?(tier) sym = tier.to_sym if external_tier?(sym) && privacy_mode? log.debug "[llm][router] action=tier_available tier=#{sym} available=false reason=privacy_mode" return false end if sym == :fleet available = Legion.const_defined?('Transport', false) log.debug "[llm][router] action=tier_available tier=fleet available=#{available}" return available end if sym == :openai_compat available = openai_compat_available? log.debug "[llm][router] action=tier_available tier=openai_compat available=#{available}" return available end true end |
.tier_priority ⇒ Object
131 132 133 134 135 136 137 138 139 140 141 |
# File 'lib/legion/llm/router.rb', line 131 def tier_priority configured = Legion::LLM::Settings.value(:tier_order, default: nil) configured = routing_settings[:tier_order] if configured.nil? || Array(configured).empty? configured = routing_settings[:tier_priority] if configured.nil? || Array(configured).empty? normalized = Array(configured).filter_map { |tier| tier.to_sym if tier.respond_to?(:to_sym) } normalized = TIER_RANK.keys if normalized.empty? (normalized + TIER_RANK.keys).uniq rescue StandardError => e handle_exception(e, level: :warn, handled: true, operation: 'router.tier_priority') TIER_RANK.keys end |
.tier_rank ⇒ Object
143 144 145 |
# File 'lib/legion/llm/router.rb', line 143 def tier_rank tier_priority.each_with_index.to_h end |