Module: Legion::LLM::Inference::Prompt

Extended by:: Legion::Logging::Helper

Defined in:: lib/legion/llm/inference/prompt.rb

Class Method Summary collapse

.decide(question, options:, tools: []) ⇒ Object

Pick from a set of options with reasoning.
.dispatch(message, intent: nil, tier: nil, provider: nil, model: nil, schema: nil, tools: nil, escalate: nil, max_escalations: 3, thinking: nil, temperature: nil, max_tokens: nil, tracing: nil, agent: nil, caller: nil, cache: nil, quality_check: nil) ⇒ Object

Auto-routed: Router picks the best provider+model based on intent.
.extract(text, schema:, tools: []) ⇒ Object

Extract structured data from unstructured text.
.request(message, provider:, model:, intent: nil, tier: nil, schema: nil, tools: nil, escalate: nil, max_escalations: 3, thinking: nil, temperature: nil, max_tokens: nil, tracing: nil, agent: nil, caller: nil, cache: nil, quality_check: nil) ⇒ Object

Pinned: caller specifies exact provider+model.
.summarize(messages, tools: []) ⇒ Object

Condense a conversation or feedback history into a shorter form.

Class Method Details

.decide(question, options:, tools: []) ⇒ `Object`

Pick from a set of options with reasoning.

# File 'lib/legion/llm/inference/prompt.rb', line 129

def decide(question, options:, tools: [], **)
  prompt = build_decide_prompt(question, options)
  dispatch(prompt, tools: tools, **)
end

.dispatch(message, intent: nil, tier: nil, provider: nil, model: nil, schema: nil, tools: nil, escalate: nil, max_escalations: 3, thinking: nil, temperature: nil, max_tokens: nil, tracing: nil, agent: nil, caller: nil, cache: nil, quality_check: nil) ⇒ `Object`

Auto-routed: Router picks the best provider+model based on intent. Primary entry point for most LLM calls. When provider/model are passed explicitly, they take precedence over routing.

# File 'lib/legion/llm/inference/prompt.rb', line 16

def dispatch(message,
             intent: nil,
             tier: nil,
             provider: nil,
             model: nil,
             schema: nil,
             tools: nil,
             escalate: nil,
             max_escalations: 3,
             thinking: nil,
             temperature: nil,
             max_tokens: nil,
             tracing: nil,
             agent: nil,
             caller: nil,
             cache: nil,
             quality_check: nil,
             **)
  routing_explicit = { provider: !provider.nil?, model: !model.nil?, tier: !tier.nil? }
  resolved_provider = provider
  resolved_model = model
  auto_route = Inference::Request.auto_routing_model?(resolved_model)

  if auto_route
    resolved_provider = nil
    intent ||= Inference::Request.default_auto_routing_intent
  elsif resolved_provider.nil? && resolved_model && defined?(Router)
    resolved_provider = Router.infer_provider_for_model(resolved_model)
  end

  if resolved_provider.nil? && resolved_model.nil? && defined?(Router) && tier
    lane = Router.request_lane(type: :inference, tiers: [tier.to_sym])
    resolved_provider = lane&.dig(:provider_family)
    resolved_model    = lane&.dig(:model)
  end

  if !auto_route && resolved_provider.nil? && resolved_model.nil? && Legion::LLM::Inventory.lanes.none?
    resolved_provider = Legion::Settings[:llm][:default_provider]
    resolved_model = Legion::Settings[:llm][:default_model]
  end

  request(message,
          provider:         resolved_provider,
          model:            resolved_model,
          intent:           intent,
          tier:             tier,
          schema:           schema,
          tools:            tools,
          escalate:         escalate,
          max_escalations:  max_escalations,
          thinking:         thinking,
          temperature:      temperature,
          max_tokens:       max_tokens,
          tracing:          tracing,
          agent:            agent,
          caller:           caller,
          cache:            cache,
          quality_check:    quality_check,
          routing_explicit: routing_explicit,
          **)
end

.extract(text, schema:, tools: []) ⇒ `Object`

Extract structured data from unstructured text.

# File 'lib/legion/llm/inference/prompt.rb', line 123

def extract(text, schema:, tools: [], **)
  prompt = build_extract_prompt(text)
  dispatch(prompt, schema: schema, tools: tools, **)
end

.request(message, provider:, model:, intent: nil, tier: nil, schema: nil, tools: nil, escalate: nil, max_escalations: 3, thinking: nil, temperature: nil, max_tokens: nil, tracing: nil, agent: nil, caller: nil, cache: nil, quality_check: nil) ⇒ `Object`

Pinned: caller specifies exact provider+model. Full pipeline runs in-process.

# File 'lib/legion/llm/inference/prompt.rb', line 79

def request(message,
            provider:,
            model:,
            intent: nil,
            tier: nil,
            schema: nil,
            tools: nil,
            escalate: nil,
            max_escalations: 3,
            thinking: nil,
            temperature: nil,
            max_tokens: nil,
            tracing: nil,
            agent: nil,
            caller: nil,
            cache: nil,
            quality_check: nil,
            **)
  auto_route = Inference::Request.auto_routing_model?(model)
  if !auto_route && (provider.nil? || model.nil?) && Legion::LLM::Inventory.lanes.none?
    raise LLMError, "Prompt.request: provider and model must be set (got provider=#{provider.inspect}, model=#{model.inspect}). " \
                    'Configure Legion::Settings[:llm][:default_provider] and [:default_model], or pass them explicitly.'
  end

  pipeline_request = build_pipeline_request(
    message, provider: provider, model: model, intent: intent, tier: tier,
             schema: schema, tools: tools,
             escalate: escalate, max_escalations: max_escalations,
             thinking: thinking, temperature: temperature, max_tokens: max_tokens,
             tracing: tracing, agent: agent, caller: caller, cache: cache,
             quality_check: quality_check, **
  )

  executor = Inference::Executor.new(pipeline_request)
  executor.call
end

.summarize(messages, tools: []) ⇒ `Object`

Condense a conversation or feedback history into a shorter form.

# File 'lib/legion/llm/inference/prompt.rb', line 117

def summarize(messages, tools: [], **)
  prompt = build_summarize_prompt(messages)
  dispatch(prompt, tools: tools, **)
end

Module: Legion::LLM::Inference::Prompt

Class Method Summary collapse

Class Method Details

.decide(question, options:, tools: []) ⇒ Object

.dispatch(message, intent: nil, tier: nil, provider: nil, model: nil, schema: nil, tools: nil, escalate: nil, max_escalations: 3, thinking: nil, temperature: nil, max_tokens: nil, tracing: nil, agent: nil, caller: nil, cache: nil, quality_check: nil) ⇒ Object

.extract(text, schema:, tools: []) ⇒ Object

.request(message, provider:, model:, intent: nil, tier: nil, schema: nil, tools: nil, escalate: nil, max_escalations: 3, thinking: nil, temperature: nil, max_tokens: nil, tracing: nil, agent: nil, caller: nil, cache: nil, quality_check: nil) ⇒ Object

.summarize(messages, tools: []) ⇒ Object

.decide(question, options:, tools: []) ⇒ `Object`

.dispatch(message, intent: nil, tier: nil, provider: nil, model: nil, schema: nil, tools: nil, escalate: nil, max_escalations: 3, thinking: nil, temperature: nil, max_tokens: nil, tracing: nil, agent: nil, caller: nil, cache: nil, quality_check: nil) ⇒ `Object`

.extract(text, schema:, tools: []) ⇒ `Object`

.request(message, provider:, model:, intent: nil, tier: nil, schema: nil, tools: nil, escalate: nil, max_escalations: 3, thinking: nil, temperature: nil, max_tokens: nil, tracing: nil, agent: nil, caller: nil, cache: nil, quality_check: nil) ⇒ `Object`

.summarize(messages, tools: []) ⇒ `Object`