Module: Legion::LLM::ShadowEval

Extended by:: Legion::Logging::Helper

Defined in:: lib/legion/llm/shadow_eval.rb

Constant Summary collapse

MAX_HISTORY =

Class Method Summary collapse

.clear_history ⇒ Object
.compare(primary, shadow, shadow_model) ⇒ Object
.enabled? ⇒ Boolean
.evaluate(primary_response:, messages: nil, shadow_model: nil) ⇒ Object
.history ⇒ Object
.should_sample? ⇒ Boolean
.summary ⇒ Object

Class Method Details

.clear_history ⇒ `Object`



72
73
74

# File 'lib/legion/llm/shadow_eval.rb', line 72

def clear_history
  @history = []
end

.compare(primary, shadow, shadow_model) ⇒ `Object`

# File 'lib/legion/llm/shadow_eval.rb', line 48

def compare(primary, shadow, shadow_model)
  primary_len = primary[:content]&.length || 0
  shadow_len  = shadow[:content]&.length || 0

  primary_cost = estimate_cost(primary[:model], primary[:usage])
  shadow_cost  = estimate_cost(shadow_model, shadow[:usage])

  {
    primary_model:  primary[:model],
    shadow_model:   shadow_model,
    primary_tokens: primary[:usage],
    shadow_tokens:  shadow[:usage],
    length_ratio:   primary_len.zero? ? 0.0 : shadow_len.to_f / primary_len,
    primary_cost:   primary_cost,
    shadow_cost:    shadow_cost,
    cost_savings:   primary_cost.zero? ? 0.0 : ((primary_cost - shadow_cost) / primary_cost).round(4),
    evaluated_at:   Time.now.utc
  }
end

.enabled? ⇒ `Boolean`

Returns:

(Boolean)



12
13
14

# File 'lib/legion/llm/shadow_eval.rb', line 12

def enabled?
  Legion::Settings.dig(:llm, :shadow, :enabled) == true
end

.evaluate(primary_response:, messages: nil, shadow_model: nil) ⇒ `Object`

# File 'lib/legion/llm/shadow_eval.rb', line 23

def evaluate(primary_response:, messages: nil, shadow_model: nil)
  shadow_model ||= Legion::Settings.dig(:llm, :shadow, :model) || 'gpt-4o-mini'
  log.info(
    "[llm][shadow] evaluate primary_model=#{primary_response[:model]} shadow_model=#{shadow_model}"
  )

  shadow_response = Legion::LLM.send(:chat_single,
                                     model: shadow_model, provider: nil,
                                     messages: messages, intent: nil,
                                     tier: nil)

  comparison = compare(primary_response, shadow_response, shadow_model)
  record(comparison)
  log.info(
    "[llm][shadow] recorded primary_model=#{comparison[:primary_model]} " \
    "shadow_model=#{comparison[:shadow_model]} cost_savings=#{comparison[:cost_savings]}"
  )
  Legion::Events.emit('llm.shadow_eval', comparison) if defined?(Legion::Events)
  comparison
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'llm.shadow_eval.evaluate', shadow_model: shadow_model)
  log.error("[llm][shadow] evaluate_failed shadow_model=#{shadow_model} error=#{e.message}")
  { error: e.message, shadow_model: shadow_model }
end

.history ⇒ `Object`



68
69
70

# File 'lib/legion/llm/shadow_eval.rb', line 68

def history
  @history ||= []
end

.should_sample? ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/legion/llm/shadow_eval.rb', line 16

def should_sample?
  return false unless enabled?

  rate = Legion::Settings.dig(:llm, :shadow, :sample_rate) || 0.1
  rand < rate
end

.summary ⇒ `Object`

# File 'lib/legion/llm/shadow_eval.rb', line 76

def summary
  entries = history.dup
  return empty_summary if entries.empty?

  {
    total_evaluations:  entries.size,
    avg_length_ratio:   avg(entries.map { |e| e[:length_ratio] }),
    avg_cost_savings:   avg(entries.map { |e| e[:cost_savings] }),
    total_primary_cost: entries.sum { |e| e[:primary_cost] }.round(6),
    total_shadow_cost:  entries.sum { |e| e[:shadow_cost] }.round(6),
    models_evaluated:   entries.map { |e| e[:shadow_model] }.uniq
  }
end

Module: Legion::LLM::ShadowEval

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.clear_history ⇒ Object

.compare(primary, shadow, shadow_model) ⇒ Object

.enabled? ⇒ Boolean

.evaluate(primary_response:, messages: nil, shadow_model: nil) ⇒ Object

.history ⇒ Object

.should_sample? ⇒ Boolean

.summary ⇒ Object

.clear_history ⇒ `Object`

.compare(primary, shadow, shadow_model) ⇒ `Object`

.enabled? ⇒ `Boolean`

.evaluate(primary_response:, messages: nil, shadow_model: nil) ⇒ `Object`

.history ⇒ `Object`

.should_sample? ⇒ `Boolean`

.summary ⇒ `Object`