Class: Legion::LLM::Inference::Executor

Inherits:

Object

Object
Legion::LLM::Inference::Executor

show all

Includes:: ContextWindow, Escalation, Routing, ToolInjection, NativeToolLoop, RouteAttempts, Steps::Billing, Steps::Classification, Steps::ConfidenceScoring, Steps::Debate, Steps::GaiaAdvisory, Steps::KnowledgeCapture, Steps::Logging, Steps::Metering, Steps::PostResponse, Steps::PromptCache, Steps::RagContext, Steps::Rbac, Steps::SkillInjector, Steps::StickyPersist, Steps::StickyRunners, Steps::TokenBudget, Steps::ToolCalls, Steps::ToolDiscovery, Steps::ToolHistory, Steps::TriggerMatch, Legion::Logging::Helper

Defined in:: lib/legion/llm/inference/executor.rb,
lib/legion/llm/inference/executor/routing.rb,
lib/legion/llm/inference/executor/escalation.rb,
lib/legion/llm/inference/executor/context_window.rb,
lib/legion/llm/inference/executor/tool_injection.rb,
lib/legion/llm/inference/executor/payload_builder.rb

Defined Under Namespace

Modules: ContextWindow, Escalation, PayloadBuilder, Routing, ToolInjection Classes: ToolResultEvent

Constant Summary collapse

PRE_PROVIDER_STEPS =

%i[
  tracing_init idempotency conversation_uuid context_load
  rbac classification billing gaia_advisory tier_assignment rag_context
  trigger_match sticky_runners skill_injector tool_history_inject tool_discovery
  routing request_normalization token_budget
].freeze

POST_PROVIDER_STEPS =

%i[
  response_normalization post_response metering debate confidence_scoring
  tool_calls sticky_persist
  context_store knowledge_capture response_return
].freeze

STEPS =

(PRE_PROVIDER_STEPS + %i[provider_call] + POST_PROVIDER_STEPS).freeze

ASYNC_SAFE_STEPS =

%i[post_response knowledge_capture response_return].freeze

THINKING_TAG_PAIRS =

[
  ['<thinking>', '</thinking>'],
  ['<think>',    '</think>'],
  ['<thought>',  '</thought>']
].freeze

CONFIG_ERROR_PATTERNS =

[
  /AccessDeniedException/,
  /InvalidModel/i,
  /model.*not found/i,
  /not authorized/i,
  /AWS Marketplace/i
].freeze

REQUEST_PAYLOAD_ERROR_PATTERNS =

[
  /input_schema/i,
  /tools\.\d+/,
  /messages\.\d+/,
  /Field required/i,
  /ValidationException/
].freeze

CONTEXT_OVERFLOW_ERROR_PATTERNS =

[
  /maximum context length/i,
  /context length.*input_tokens/i,
  /prompt contains at least \d+ input tokens/i
].freeze

ASYNC_THREAD_POOL =

Concurrent::FixedThreadPool.new(4, fallback_policy: :caller_runs)

Instance Attribute Summary collapse

#audit ⇒ Object readonly

Returns the value of attribute audit.
#confidence_score ⇒ Object readonly

Returns the value of attribute confidence_score.
#discovered_tools ⇒ Object readonly

Returns the value of attribute discovered_tools.
#enrichments ⇒ Object readonly

Returns the value of attribute enrichments.
#profile ⇒ Object readonly

Returns the value of attribute profile.
#request ⇒ Object readonly

Returns the value of attribute request.
#timeline ⇒ Object readonly

Returns the value of attribute timeline.
#tool_event_handler ⇒ Object

Returns the value of attribute tool_event_handler.
#tracing ⇒ Object readonly

Returns the value of attribute tracing.
#warnings ⇒ Object readonly

Returns the value of attribute warnings.

Instance Method Summary collapse

#call ⇒ Object
#call_responses(body:, stream: false, stream_observer: nil, &block) ⇒ Object

N×N: Delegates to the canonical execution path.
#call_stream(stream_observer: nil, &block) ⇒ Object
#context_accounting ⇒ Object
#initialize(request) ⇒ Executor constructor

A new instance of Executor.

Methods included from Steps::StickyPersist

#step_sticky_persist

Methods included from Steps::ToolHistory

#step_tool_history_inject

Methods included from Steps::StickyRunners

#step_sticky_runners

Methods included from Steps::Metering

build_event, content_fields, flush_spool, identity_fields, operational_fields, publish_event, publish_or_spool, timing_and_context, token_fields

Methods included from Steps::Debate

#debate_enabled?, #gaia_debate_trigger?, #run_debate, #step_debate

Methods included from Steps::PromptCache

#apply_cache_control, #apply_conversation_breakpoint, #sort_tools_deterministically

Methods included from Steps::TokenBudget

#step_token_budget

Methods included from Steps::ConfidenceScoring

#step_confidence_scoring

Methods included from Steps::KnowledgeCapture

#step_knowledge_capture

Methods included from Steps::ToolCalls

#step_tool_calls

Methods included from Steps::ToolDiscovery

#step_tool_discovery

Methods included from Steps::SkillInjector

#step_skill_injector

Methods included from Steps::TriggerMatch

#step_trigger_match

Methods included from Steps::RagContext

#step_rag_context

Methods included from Steps::PostResponse

#step_post_response

Methods included from Steps::GaiaAdvisory

#build_partner_context, #step_gaia_advisory

Methods included from Steps::Billing

#step_billing

Methods included from Steps::Classification

#step_classification

Methods included from Steps::Rbac

#step_rbac

Methods included from ToolInjection

#add_native_tool_definition, #add_pinned_special_tool_definitions, #add_registry_tool_definitions, #add_requested_deferred_tool_definitions_from_settings, #add_settings_extensions_tool_definitions, #client_tool_passthrough_allowed?, #client_tool_passthrough_enabled?, #client_tool_passthrough_list, #client_tool_passthrough_name_variants, #client_tool_policy_variants, #native_dispatch_chat_options, #native_dispatch_options, #native_dispatch_thinking, #native_dispatch_tools, #native_tool_definition_duplicate?, #native_tool_definition_name_variants, #native_tool_definitions, #native_tool_loop_continuation_prompt, #native_tool_loop_system, #non_executable_client_tool?, #record_system_accounting, #record_tool_accounting, #registry_tool_injection_requested?, #request_tool_names, #request_tool_source, #resolve_registry_tool_source

Methods included from ContextWindow

#compact_to_fit, #empty_assistant_message?, #enforce_context_window, #estimate_message_tokens, #last_user_message_index, #native_dispatch_messages, #resolved_context_window, #strip_leading_thinking_block, #strip_thinking_from_history, #tool_result_message?, #trim_oversized_tool_results

Methods included from Escalation

#account_specific_error?, #authentication_error?, #build_routing_payload_from_resolved, #classify_and_accumulate_exclusions, #classify_error, #client_stream_error?, #config_error?, #context_overflow_error?, #emit_error_audit, #emit_escalation_attempt_audit, #emit_escalation_attempt_metering, #error_metadata, #escalation_attempt_hash, #execute_provider_request, #execute_provider_request_native, #execute_provider_request_stream, #execute_provider_request_stream_native, #extract_retry_after, #internal_error?, #pipeline_escalation_enabled?, #pipeline_escalation_max_attempts, #record_escalation_failure, #record_provider_response, #report_provider_failure, #report_provider_health, #request_payload_error?, #run_provider_call_single, #run_provider_call_with_attempts, #select_next_lane, #step_provider_call, #step_provider_call_stream

Methods included from Routing

#apply_proactive_tier_assignment, #apply_routing_resolution, #body_routing_hints_enabled?, #chain_required_capabilities, #estimate_request_tokens, #fallback_model_for_resolved_provider, #inferred_provider_tier, #local_provider?, #merge_response_offering_metadata, #merge_routing_intent, #native_tools_requested_for_routing?, #normalize_offering_metadata, #normalize_required_capabilities, #provider_scoped_instance, #record_forced_tier_selection, #request_has_vision_content?, #request_requires_thinking?, #resolve_model_to_local_provider, #resolve_provider_instance, #resolve_routing_state, #routing_field_explicit?, #routing_intent_for_request, #routing_intent_present?, #routing_model_preference, #routing_request_state, #routing_resolution_for, #step_request_normalization, #step_routing, #step_tier_assignment, #use_native_dispatch?

Constructor Details

#initialize(request) ⇒ `Executor`

Returns a new instance of Executor.

# File 'lib/legion/llm/inference/executor.rb', line 105

def initialize(request)
  @request = request
  @profile = Profile.derive(request.caller)
  @timeline = Timeline.new
  @tracing = nil
  @enrichments = {}
  @audit = {}
  @warnings = []
  @timestamps = { received: Time.now }
  @raw_response = nil
  @exchange_id = nil
  @discovered_tools = []
  @triggered_tools = []
  @resolved_provider = nil
  @resolved_instance = nil
  @resolved_model = nil
  @resolved_tier = nil
  @resolved_offering_id = nil
  @resolved_offering_metadata = {}
  @confidence_score = nil
  @escalation_history = []
  @route_attempts = []
  @current_escalation_context = nil
  @proactive_tier_assignment = nil
  @tool_event_handler = nil
  @sticky_turn_snapshot = nil
  @pending_tool_history = Concurrent::Array.new
  @pending_tool_history_mutex = Mutex.new
  @deferred_tool_audits = []
  @injected_tool_map = {}
  @native_tool_source_map = {}
  @freshly_triggered_keys = []
  @context_accounting = ContextAccounting.empty
end

Instance Attribute Details

#audit ⇒ `Object` (readonly)

Returns the value of attribute audit.



34
35
36

# File 'lib/legion/llm/inference/executor.rb', line 34

def audit
  @audit
end

#confidence_score ⇒ `Object` (readonly)

Returns the value of attribute confidence_score.



34
35
36

# File 'lib/legion/llm/inference/executor.rb', line 34

def confidence_score
  @confidence_score
end

#discovered_tools ⇒ `Object` (readonly)

Returns the value of attribute discovered_tools.



34
35
36

# File 'lib/legion/llm/inference/executor.rb', line 34

def discovered_tools
  @discovered_tools
end

#enrichments ⇒ `Object` (readonly)

Returns the value of attribute enrichments.



34
35
36

# File 'lib/legion/llm/inference/executor.rb', line 34

def enrichments
  @enrichments
end

#profile ⇒ `Object` (readonly)

Returns the value of attribute profile.



34
35
36

# File 'lib/legion/llm/inference/executor.rb', line 34

def profile
  @profile
end

#request ⇒ `Object` (readonly)

Returns the value of attribute request.



34
35
36

# File 'lib/legion/llm/inference/executor.rb', line 34

def request
  @request
end

#timeline ⇒ `Object` (readonly)

Returns the value of attribute timeline.



34
35
36

# File 'lib/legion/llm/inference/executor.rb', line 34

def timeline
  @timeline
end

#tool_event_handler ⇒ `Object`

Returns the value of attribute tool_event_handler.



36
37
38

# File 'lib/legion/llm/inference/executor.rb', line 36

def tool_event_handler
  @tool_event_handler
end

#tracing ⇒ `Object` (readonly)

Returns the value of attribute tracing.



34
35
36

# File 'lib/legion/llm/inference/executor.rb', line 34

def tracing
  @tracing
end

#warnings ⇒ `Object` (readonly)

Returns the value of attribute warnings.



34
35
36

# File 'lib/legion/llm/inference/executor.rb', line 34

def warnings
  @warnings
end

Instance Method Details

#call ⇒ `Object`

# File 'lib/legion/llm/inference/executor.rb', line 140

def call
  set_log_context
  Thread.current[:legion_llm_in_pipeline] = true
  log.debug "[llm][executor] action=call request_id=#{@request.id} profile=#{@profile}"
  execute_steps
  build_response
ensure
  Thread.current[:legion_llm_in_pipeline] = nil
  clear_log_context
end

#call_responses(body:, stream: false, stream_observer: nil, &block) ⇒ `Object`

N×N: Delegates to the canonical execution path. The API namespace translator has already parsed the Responses API format into canonical form. The provider adapter decides how to wire canonical requests internally — the executor is format-agnostic.

# File 'lib/legion/llm/inference/executor.rb', line 172

def call_responses(body:, stream: false, stream_observer: nil, &block) # rubocop:disable Lint/UnusedMethodArgument
  @stream_observer = stream_observer
  set_log_context
  Thread.current[:legion_llm_in_pipeline] = true
  log.debug "[llm][executor] action=call_responses->canonical request_id=#{@request.id} profile=#{@profile} stream=#{stream}"

  execute_pre_provider_steps
  if stream && block
    step_provider_call_stream(&block)
  else
    step_provider_call
  end
  execute_post_provider_steps
  build_response
ensure
  Thread.current[:legion_llm_in_pipeline] = nil
  @stream_observer = nil
  clear_log_context
end

#call_stream(stream_observer: nil, &block) ⇒ `Object`

# File 'lib/legion/llm/inference/executor.rb', line 151

def call_stream(stream_observer: nil, &block)
  @stream_observer = stream_observer
  return call unless block

  set_log_context
  Thread.current[:legion_llm_in_pipeline] = true
  log.debug "[llm][executor] action=call_stream request_id=#{@request.id} profile=#{@profile}"
  execute_pre_provider_steps
  step_provider_call_stream(&block)
  execute_post_provider_steps
  build_response
ensure
  @stream_observer = nil
  Thread.current[:legion_llm_in_pipeline] = nil
  clear_log_context
end

#context_accounting ⇒ `Object`



38
39
40

# File 'lib/legion/llm/inference/executor.rb', line 38

def context_accounting
  @context_accounting ||= ContextAccounting.empty
end

Class: Legion::LLM::Inference::Executor

Defined Under Namespace

Constant Summary collapse

Constants included from Steps::StickyPersist

Constants included from Steps::Debate

Constants included from Steps::KnowledgeCapture

Constants included from Steps::Classification

Constants included from NativeToolLoop