Module: Legion::LLM::Router
- Extended by:
- Legion::Logging::Helper
- Defined in:
- lib/legion/llm/router.rb,
lib/legion/llm/router/rule.rb,
lib/legion/llm/router/arbitrage.rb,
lib/legion/llm/router/resolution.rb,
lib/legion/llm/router/health_tracker.rb,
lib/legion/llm/router/escalation/chain.rb
Defined Under Namespace
Modules: Arbitrage Classes: EscalationChain, HealthTracker, Resolution, Rule
Constant Summary collapse
- PROVIDER_TIER =
{ bedrock: :cloud, anthropic: :frontier, openai: :frontier, gemini: :cloud, azure: :cloud, ollama: :local, vllm: :fleet }.freeze
- PROVIDER_ORDER =
%i[ollama vllm bedrock azure gemini anthropic openai].freeze
- TIER_EXTERNAL =
Set[:cloud, :frontier, :openai_compat].freeze
- TIER_RANK =
{ local: 0, direct: 1, fleet: 2, openai_compat: 3, cloud: 4, frontier: 5 }.freeze
- OLLAMA_MODEL_PATTERN =
%r{[:/]}
Class Method Summary collapse
- .auto_rules_populated? ⇒ Boolean
- .discover_provider_for_model(model) ⇒ Object
- .explicit_resolution(tier, provider, model, instance = nil) ⇒ Object
- .health_tracker ⇒ Object
- .infer_provider_for_model(model) ⇒ Object
- .populate_auto_rules(discovered_instances) ⇒ Object
- .reset! ⇒ Object
-
.resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}) ⇒ Resolution?
Resolve an LLM routing intent to a tier/provider/model decision.
- .resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true) ⇒ Object
- .routing_enabled? ⇒ Boolean
-
.tier_available?(tier) ⇒ Boolean
Check whether a tier can be used right now.
Class Method Details
.auto_rules_populated? ⇒ Boolean
106 107 108 |
# File 'lib/legion/llm/router.rb', line 106 def auto_rules_populated? @auto_rules_populated == true end |
.discover_provider_for_model(model) ⇒ Object
46 47 48 49 50 51 52 53 54 55 |
# File 'lib/legion/llm/router.rb', line 46 def discover_provider_for_model(model) return nil unless defined?(Discovery) && Discovery.respond_to?(:cached_discovered_models) model_s = model.to_s entry = Array(Discovery.cached_discovered_models).find do |m| dn = m[:model].to_s dn == model_s || dn.start_with?("#{model_s}:") end entry&.dig(:provider) end |
.explicit_resolution(tier, provider, model, instance = nil) ⇒ Object
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
# File 'lib/legion/llm/router.rb', line 150 def explicit_resolution(tier, provider, model, instance = nil) registry_entry = if provider registry_entry_for_provider(provider.to_sym, instance: instance&.to_sym) elsif tier registry_entry_for_tier(tier) end resolved_provider = if provider provider.to_sym else registry_entry&.[](:provider) || (tier && default_provider_for_tier(tier)) || default_settings_provider&.to_sym || :anthropic end resolved_model = model || registry_default_model(registry_entry) || (tier && default_model_for_tier(tier)) resolved_instance = registry_entry&.[](:instance) || instance resolved_tier = tier || PROVIDER_TIER.fetch(resolved_provider, :frontier) Resolution.new( tier: resolved_tier, provider: resolved_provider, model: resolved_model, instance: resolved_instance, rule: 'explicit', metadata: (registry_entry) ) end |
.health_tracker ⇒ Object
95 96 97 |
# File 'lib/legion/llm/router.rb', line 95 def health_tracker @health_tracker ||= build_health_tracker end |
.infer_provider_for_model(model) ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/legion/llm/router.rb', line 29 def infer_provider_for_model(model) return nil if model.nil? || model.to_s.empty? discovered = discover_provider_for_model(model) return discovered if discovered model_s = model.to_s return :bedrock if model_s.start_with?('us.') return :bedrock if model_s.match?(/\A(anthropic|meta|mistral|cohere|amazon|ai21)\./i) return :openai if model_s.match?(/\Agpt-|\Ao[134]-/) return :anthropic if model_s.start_with?('claude-') return :gemini if model_s.start_with?('gemini-') return :ollama if model_s.match?(OLLAMA_MODEL_PATTERN) nil end |
.populate_auto_rules(discovered_instances) ⇒ Object
110 111 112 113 114 115 |
# File 'lib/legion/llm/router.rb', line 110 def populate_auto_rules(discovered_instances) raw = Discovery::RuleGenerator.generate(discovered_instances) @auto_rules = raw.map { |h| Rule.from_hash(h.transform_keys(&:to_sym)) } @auto_rules_populated = true log.info("[llm][router] auto_rules_populated count=#{@auto_rules.size}") end |
.reset! ⇒ Object
117 118 119 120 121 |
# File 'lib/legion/llm/router.rb', line 117 def reset! @health_tracker = nil @auto_rules = [] @auto_rules_populated = false end |
.resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}) ⇒ Resolution?
Resolve an LLM routing intent to a tier/provider/model decision.
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/legion/llm/router.rb', line 64 def resolve(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, exclude: {}) log.debug "[llm][router] action=resolve.enter intent=#{intent} tier=#{tier} model=#{model} provider=#{provider} instance=#{instance}" return explicit_resolution(tier, provider, model, instance) if tier || provider || instance return nil unless routing_enabled? && intent merged = merge_defaults(intent) rules = load_rules candidates = select_candidates(rules, merged, exclude: exclude) best = pick_best(candidates) resolution = best&.to_resolution if resolution log.info("Routed to tier=#{resolution.tier} provider=#{resolution.provider} model=#{resolution.model} via rule='#{resolution.rule}'") else log.debug('Router: no rules matched, resolution is nil') end resolution || arbitrage_fallback(intent) end |
.resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true) ⇒ Object
85 86 87 88 89 90 91 92 93 |
# File 'lib/legion/llm/router.rb', line 85 def resolve_chain(intent: nil, tier: nil, model: nil, provider: nil, instance: nil, max_escalations: nil, exclude: {}, allow_default_fallback: true) log.debug "[llm][router] action=resolve_chain.enter intent=#{intent} tier=#{tier} max_escalations=#{max_escalations}" max = max_escalations || escalation_max_attempts return EscalationChain.new(resolutions: [explicit_resolution(tier, provider, model, instance)], max_attempts: max) if tier || provider || instance return chain_from_defaults(model, provider, max, allow_default_fallback: allow_default_fallback) unless routing_enabled? && intent chain_from_intent(intent, max, exclude: exclude, allow_default_fallback: allow_default_fallback) end |
.routing_enabled? ⇒ Boolean
99 100 101 102 103 104 |
# File 'lib/legion/llm/router.rb', line 99 def routing_enabled? settings = routing_settings return false if settings.nil? || settings.empty? settings[:enabled] == true && auto_rules_populated? end |
.tier_available?(tier) ⇒ Boolean
Check whether a tier can be used right now. :local — always available :direct — always available (remote self-hosted instances) :fleet — available when Legion::Transport is loaded :openai_compat — available when OpenAI-compatible provider instances are registered :cloud — available unless privacy mode :frontier — available unless privacy mode
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/legion/llm/router.rb', line 130 def tier_available?(tier) sym = tier.to_sym if external_tier?(sym) && privacy_mode? log.debug "[llm][router] action=tier_available tier=#{sym} available=false reason=privacy_mode" return false end if sym == :fleet available = Legion.const_defined?('Transport', false) log.debug "[llm][router] action=tier_available tier=fleet available=#{available}" return available end if sym == :openai_compat available = openai_compat_available? log.debug "[llm][router] action=tier_available tier=openai_compat available=#{available}" return available end true end |