Module: Legion::LLM::Router

Extended by:
Legion::Logging::Helper
Defined in:
lib/legion/llm/router.rb,
lib/legion/llm/router/resolution.rb,
lib/legion/llm/router/availability.rb,
lib/legion/llm/router/health_tracker.rb

Defined Under Namespace

Modules: Availability Classes: HealthTracker, Resolution

Constant Summary collapse

PROVIDER_TIER =
{ bedrock: :cloud, anthropic: :frontier, openai: :frontier,
gemini: :cloud, azure: :cloud, ollama: :local, vllm: :fleet }.freeze
PROVIDER_ORDER =
%i[ollama vllm bedrock azure gemini anthropic openai].freeze
TIER_EXTERNAL =
Set[:cloud, :frontier].freeze
TIER_RANK =
{ local: 0, direct: 1, fleet: 2, cloud: 3, frontier: 4 }.freeze
CAPABILITY_ALIASES =
{
  function_calling: :tools,
  functions:        :tools,
  tool:             :tools,
  tool_use:         :tools,
  stream:           :streaming,
  stream_chat:      :streaming
}.freeze
CANONICAL_EFFORT_LEVELS =
%i[low moderate high reasoning].freeze
EFFORT_ALIASES =
{ medium: :moderate }.freeze
EFFORT_LEVELS =
(CANONICAL_EFFORT_LEVELS + EFFORT_ALIASES.keys).freeze
EFFORT_RANK =
{ low: 0, moderate: 1, high: 2, reasoning: 3 }.freeze
OPERATIONS =
%i[chat stream embed image structured_output].freeze
OPERATION_ALIASES =
{ completion: :chat, stream_chat: :stream, embedding: :embed }.freeze
DEFAULT_OPERATION =
:chat
DEFAULT_EFFORT =
:moderate
OLLAMA_MODEL_PATTERN =
%r{[:/]}

Class Method Summary collapse

Class Method Details

.auto_rules_populated?Boolean

Returns:

  • (Boolean)


131
132
133
# File 'lib/legion/llm/router.rb', line 131

def auto_rules_populated?
  @auto_rules_populated == true
end

.health_trackerObject



123
124
125
# File 'lib/legion/llm/router.rb', line 123

def health_tracker
  @health_tracker ||= build_health_tracker
end

.infer_provider_for_model(model) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/legion/llm/router.rb', line 86

def infer_provider_for_model(model)
  return nil if model.nil? || model.to_s.empty?

  model_s = model.to_s
  return :bedrock if model_s.start_with?('us.')
  return :bedrock if model_s.match?(/\A(anthropic|meta|mistral|cohere|amazon|ai21)\./i)
  return :openai if model_s.match?(/\Agpt-|\Ao[134]-/)
  return :anthropic if model_s.start_with?('claude-')
  return :gemini if model_s.start_with?('gemini-')
  return :ollama if model_s.match?(OLLAMA_MODEL_PATTERN)

  nil
end

.inventory_default_model(provider, instance = nil) ⇒ Object

The provider’s own default model from Inventory — the single source of truth (already whitelist/blacklist-filtered and discovery-fed). Sourcing a model here guarantees an explicit provider is paired only with a model it actually offers: anthropic resolves to its own offered model, never a stale registry default or a global default that belongs to a different provider (the anthropic->qwen pairing class). Returns nil when Inventory has no catalog for the provider (cold boot), so callers fall through to their existing fallbacks.



108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/legion/llm/router.rb', line 108

def inventory_default_model(provider, instance = nil)
  return nil unless provider && defined?(Inventory)

  candidates = Inventory.lanes_for(provider: provider.to_sym, type: :inference)
  return nil if candidates.nil? || candidates.empty?

  inst = (instance || :default).to_s
  offering = candidates.find { |o| (o[:instance_id] || o[:provider_instance]).to_s == inst } || candidates.first
  model = offering[:model] || offering[:canonical_model_alias]
  model&.to_s
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'router.inventory_default_model')
  nil
end

.populate_auto_rules(_discovered_instances = nil) ⇒ Object

DEPRECATED in v0.14.0; delete in v0.15.0. See GitHub issues:

#155 — remove this stub in v0.15.0 (blocked-by #154)
#154 — drop call sites from 9 lex-llm-* gems


139
140
141
142
143
144
145
# File 'lib/legion/llm/router.rb', line 139

def populate_auto_rules(_discovered_instances = nil, **)
  return if @populate_auto_rules_warned

  @populate_auto_rules_warned = true
  log.warn '[llm][router] populate_auto_rules is deprecated and is a no-op as of v0.14.0; ' \
           'lex-llm-* gems should drop this call (RANKING v2 replaces auto-rules with lane weights)'
end

.request_lane(type:, tiers: [], providers: [], instances: [], models: [], capabilities: [], thinking: :any, privacy: :normal, estimated_context: nil, tried_lanes: [], rng: default_rng) ⇒ Object

Stateless lane selection — pure function of (Inventory snapshot, routing payload). Returns one lane Hash or nil (caller raises NoLaneAvailable / EscalationExhausted).

M1: when filters narrow to a single provider/instance, uses the indexed read (Inventory.lanes_for) instead of full enumeration — same semantics, cheaper. B-E / sonnet W2: lanes are Hashes; use { _1 }, NOT &:lane_weight.



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/legion/llm/router.rb', line 52

def request_lane(
  type:,
  tiers: [], providers: [], instances: [], models: [],
  capabilities: [], thinking: :any, privacy: :normal,
  estimated_context: nil, tried_lanes: [],
  rng: default_rng,
  **
)
  candidates = if providers.size == 1 && instances.size <= 1
                 Legion::LLM::Inventory.lanes_for(
                   provider: providers.first, instance: instances.first, type: type
                 )
               else
                 Legion::LLM::Inventory.lanes
               end

  passing = candidates.select do |lane|
    lane_passes_hard_filters?(
      lane: lane, type: type, tiers: tiers, providers: providers, instances: instances,
      models: models, capabilities: capabilities, thinking: thinking, privacy: privacy,
      estimated_context: estimated_context
    )
  end
  eligible = passing.reject { |lane| tried_lanes.include?(lane[:id]) || lane[:lane_weight].to_i <= 0 }

  return nil if eligible.empty?

  eligible
    .group_by { |lane| lane[:lane_weight] }
    .max_by { |weight, _| weight }
    .last
    .sample(random: rng)
end

.reset!Object



147
148
149
150
151
152
# File 'lib/legion/llm/router.rb', line 147

def reset!
  @health_tracker = nil
  @auto_rules = []
  @auto_rules_populated = false
  @populate_auto_rules_warned = false
end

.routing_enabled?Boolean

Returns:

  • (Boolean)


127
128
129
# File 'lib/legion/llm/router.rb', line 127

def routing_enabled?
  false
end

.tier_available?(tier) ⇒ Boolean

Check whether a tier can be used right now. :local — always available :direct — always available (remote self-hosted instances) :fleet — available when Legion::Transport is loaded :cloud — available unless privacy mode :frontier — available unless privacy mode

Returns:

  • (Boolean)


176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/legion/llm/router.rb', line 176

def tier_available?(tier)
  sym = tier.to_sym
  if external_tier?(sym) && privacy_mode?
    log.debug "[llm][router] action=tier_available tier=#{sym} available=false reason=privacy_mode"
    return false
  end
  if sym == :fleet
    available = Legion.const_defined?('Transport', false)
    log.debug "[llm][router] action=tier_available tier=fleet available=#{available}"
    return available
  end

  true
end

.tier_priorityObject



154
155
156
157
158
159
160
161
162
163
164
# File 'lib/legion/llm/router.rb', line 154

def tier_priority
  configured = Legion::Settings[:llm][:tier_order]
  configured = Legion::Settings[:llm][:routing][:tier_order] if configured.nil? || Array(configured).empty?
  configured = Legion::Settings[:llm][:routing][:tier_priority] if configured.nil? || Array(configured).empty?
  normalized = Array(configured).filter_map { |tier| tier.to_sym if tier.respond_to?(:to_sym) }
  normalized = TIER_RANK.keys if normalized.empty?
  (normalized + TIER_RANK.keys).uniq
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'router.tier_priority')
  TIER_RANK.keys
end

.tier_rankObject



166
167
168
# File 'lib/legion/llm/router.rb', line 166

def tier_rank
  tier_priority.each_with_index.to_h
end