Module: Legion::LLM::ShadowEval

Extended by:
Legion::Logging::Helper
Defined in:
lib/legion/llm/shadow_eval.rb

Constant Summary collapse

MAX_HISTORY =
100

Class Method Summary collapse

Class Method Details

.clear_historyObject



72
73
74
# File 'lib/legion/llm/shadow_eval.rb', line 72

def clear_history
  @history = []
end

.compare(primary, shadow, shadow_model) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/legion/llm/shadow_eval.rb', line 48

def compare(primary, shadow, shadow_model)
  primary_len = primary[:content]&.length || 0
  shadow_len  = shadow[:content]&.length || 0

  primary_cost = estimate_cost(primary[:model], primary[:usage])
  shadow_cost  = estimate_cost(shadow_model, shadow[:usage])

  {
    primary_model:  primary[:model],
    shadow_model:   shadow_model,
    primary_tokens: primary[:usage],
    shadow_tokens:  shadow[:usage],
    length_ratio:   primary_len.zero? ? 0.0 : shadow_len.to_f / primary_len,
    primary_cost:   primary_cost,
    shadow_cost:    shadow_cost,
    cost_savings:   primary_cost.zero? ? 0.0 : ((primary_cost - shadow_cost) / primary_cost).round(4),
    evaluated_at:   Time.now.utc
  }
end

.enabled?Boolean

Returns:

  • (Boolean)


12
13
14
# File 'lib/legion/llm/shadow_eval.rb', line 12

def enabled?
  Legion::Settings.dig(:llm, :shadow, :enabled) == true
end

.evaluate(primary_response:, messages: nil, shadow_model: nil) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/legion/llm/shadow_eval.rb', line 23

def evaluate(primary_response:, messages: nil, shadow_model: nil)
  shadow_model ||= Legion::Settings.dig(:llm, :shadow, :model) || 'gpt-4o-mini'
  log.info(
    "[llm][shadow] evaluate primary_model=#{primary_response[:model]} shadow_model=#{shadow_model}"
  )

  shadow_response = Legion::LLM.send(:chat_single,
                                     model: shadow_model, provider: nil,
                                     messages: messages, intent: nil,
                                     tier: nil)

  comparison = compare(primary_response, shadow_response, shadow_model)
  record(comparison)
  log.info(
    "[llm][shadow] recorded primary_model=#{comparison[:primary_model]} " \
    "shadow_model=#{comparison[:shadow_model]} cost_savings=#{comparison[:cost_savings]}"
  )
  Legion::Events.emit('llm.shadow_eval', comparison) if defined?(Legion::Events)
  comparison
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'llm.shadow_eval.evaluate', shadow_model: shadow_model)
  log.error("[llm][shadow] evaluate_failed shadow_model=#{shadow_model} error=#{e.message}")
  { error: e.message, shadow_model: shadow_model }
end

.historyObject



68
69
70
# File 'lib/legion/llm/shadow_eval.rb', line 68

def history
  @history ||= []
end

.should_sample?Boolean

Returns:

  • (Boolean)


16
17
18
19
20
21
# File 'lib/legion/llm/shadow_eval.rb', line 16

def should_sample?
  return false unless enabled?

  rate = Legion::Settings.dig(:llm, :shadow, :sample_rate) || 0.1
  rand < rate
end

.summaryObject



76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/legion/llm/shadow_eval.rb', line 76

def summary
  entries = history.dup
  return empty_summary if entries.empty?

  {
    total_evaluations:  entries.size,
    avg_length_ratio:   avg(entries.map { |e| e[:length_ratio] }),
    avg_cost_savings:   avg(entries.map { |e| e[:cost_savings] }),
    total_primary_cost: entries.sum { |e| e[:primary_cost] }.round(6),
    total_shadow_cost:  entries.sum { |e| e[:shadow_cost] }.round(6),
    models_evaluated:   entries.map { |e| e[:shadow_model] }.uniq
  }
end