Module: Legion::LLM::Inference::Prompt

Extended by:
Legion::Logging::Helper
Defined in:
lib/legion/llm/inference/prompt.rb

Class Method Summary collapse

Class Method Details

.decide(question, options:, tools: []) ⇒ Object

Pick from a set of options with reasoning.



135
136
137
138
# File 'lib/legion/llm/inference/prompt.rb', line 135

def decide(question, options:, tools: [], **)
  prompt = build_decide_prompt(question, options)
  dispatch(prompt, tools: tools, **)
end

.dispatch(message, intent: nil, tier: nil, exclude: {}, provider: nil, model: nil, schema: nil, tools: nil, escalate: nil, max_escalations: 3, thinking: nil, temperature: nil, max_tokens: nil, tracing: nil, agent: nil, caller: nil, cache: nil, quality_check: nil) ⇒ Object

Auto-routed: Router picks the best provider+model based on intent. Primary entry point for most LLM calls. When provider/model are passed explicitly, they take precedence over routing.



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/legion/llm/inference/prompt.rb', line 23

def dispatch(message, # rubocop:disable Metrics/ParameterLists
             intent: nil,
             tier: nil,
             exclude: {},
             provider: nil,
             model: nil,
             schema: nil,
             tools: nil,
             escalate: nil,
             max_escalations: 3,
             thinking: nil,
             temperature: nil,
             max_tokens: nil,
             tracing: nil,
             agent: nil,
             caller: nil,
             cache: nil,
             quality_check: nil,
             **)
  routing_explicit = { provider: !provider.nil?, model: !model.nil?, tier: !tier.nil? }
  resolved_provider = provider
  resolved_model = model
  auto_route = Inference::Request.auto_routing_model?(resolved_model)

  if auto_route
    resolved_provider = nil
    intent ||= Inference::Request.default_auto_routing_intent
  elsif resolved_provider.nil? && resolved_model && defined?(Router)
    resolved_provider = Router.infer_provider_for_model(resolved_model)
  end

  if resolved_provider.nil? && resolved_model.nil? && defined?(Router) && Router.routing_enabled? && (intent || tier)
    resolution = Router.resolve(intent: intent, tier: tier, exclude: exclude)
    resolved_provider = resolution&.provider
    resolved_model = resolution&.model
  end

  resolved_provider ||= llm_setting(:default_provider) unless auto_route
  resolved_model ||= llm_setting(:default_model) unless auto_route

  request(message,
          provider:         resolved_provider,
          model:            resolved_model,
          intent:           intent,
          tier:             tier,
          schema:           schema,
          tools:            tools,
          escalate:         escalate,
          max_escalations:  max_escalations,
          thinking:         thinking,
          temperature:      temperature,
          max_tokens:       max_tokens,
          tracing:          tracing,
          agent:            agent,
          caller:           caller,
          cache:            cache,
          quality_check:    quality_check,
          routing_explicit: routing_explicit,
          **)
end

.extract(text, schema:, tools: []) ⇒ Object

Extract structured data from unstructured text.



129
130
131
132
# File 'lib/legion/llm/inference/prompt.rb', line 129

def extract(text, schema:, tools: [], **)
  prompt = build_extract_prompt(text)
  dispatch(prompt, schema: schema, tools: tools, **)
end

.llm_setting(key, default = nil) ⇒ Object



13
14
15
16
17
18
# File 'lib/legion/llm/inference/prompt.rb', line 13

def llm_setting(key, default = nil)
  Legion::LLM::Settings.value(key, default: default)
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'llm.inference.prompt.llm_setting', key: key)
  default
end

.request(message, provider:, model:, intent: nil, tier: nil, schema: nil, tools: nil, escalate: nil, max_escalations: 3, thinking: nil, temperature: nil, max_tokens: nil, tracing: nil, agent: nil, caller: nil, cache: nil, quality_check: nil) ⇒ Object

Pinned: caller specifies exact provider+model. Full pipeline runs in-process.



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/legion/llm/inference/prompt.rb', line 85

def request(message, # rubocop:disable Metrics/ParameterLists
            provider:,
            model:,
            intent: nil,
            tier: nil,
            schema: nil,
            tools: nil,
            escalate: nil,
            max_escalations: 3,
            thinking: nil,
            temperature: nil,
            max_tokens: nil,
            tracing: nil,
            agent: nil,
            caller: nil,
            cache: nil,
            quality_check: nil,
            **)
  auto_route = Inference::Request.auto_routing_model?(model)
  if !auto_route && (provider.nil? || model.nil?)
    raise LLMError, "Prompt.request: provider and model must be set (got provider=#{provider.inspect}, model=#{model.inspect}). " \
                    'Configure Legion::Settings[:llm][:default_provider] and [:default_model], or pass them explicitly.'
  end

  pipeline_request = build_pipeline_request(
    message, provider: provider, model: model, intent: intent, tier: tier,
             schema: schema, tools: tools,
             escalate: escalate, max_escalations: max_escalations,
             thinking: thinking, temperature: temperature, max_tokens: max_tokens,
             tracing: tracing, agent: agent, caller: caller, cache: cache,
             quality_check: quality_check, **
  )

  executor = Inference::Executor.new(pipeline_request)
  executor.call
end

.summarize(messages, tools: []) ⇒ Object

Condense a conversation or feedback history into a shorter form.



123
124
125
126
# File 'lib/legion/llm/inference/prompt.rb', line 123

def summarize(messages, tools: [], **)
  prompt = build_summarize_prompt(messages)
  dispatch(prompt, tools: tools, **)
end