Module: Legion::LLM::ShadowEval
- Extended by:
- Legion::Logging::Helper
- Defined in:
- lib/legion/llm/shadow_eval.rb
Constant Summary collapse
- MAX_HISTORY =
100
Class Method Summary collapse
- .clear_history ⇒ Object
- .compare(primary, shadow, shadow_model) ⇒ Object
- .enabled? ⇒ Boolean
- .evaluate(primary_response:, messages: nil, shadow_model: nil) ⇒ Object
- .history ⇒ Object
- .should_sample? ⇒ Boolean
- .summary ⇒ Object
Class Method Details
.clear_history ⇒ Object
72 73 74 |
# File 'lib/legion/llm/shadow_eval.rb', line 72 def clear_history @history = [] end |
.compare(primary, shadow, shadow_model) ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/legion/llm/shadow_eval.rb', line 48 def compare(primary, shadow, shadow_model) primary_len = primary[:content]&.length || 0 shadow_len = shadow[:content]&.length || 0 primary_cost = estimate_cost(primary[:model], primary[:usage]) shadow_cost = estimate_cost(shadow_model, shadow[:usage]) { primary_model: primary[:model], shadow_model: shadow_model, primary_tokens: primary[:usage], shadow_tokens: shadow[:usage], length_ratio: primary_len.zero? ? 0.0 : shadow_len.to_f / primary_len, primary_cost: primary_cost, shadow_cost: shadow_cost, cost_savings: primary_cost.zero? ? 0.0 : ((primary_cost - shadow_cost) / primary_cost).round(4), evaluated_at: Time.now.utc } end |
.enabled? ⇒ Boolean
12 13 14 |
# File 'lib/legion/llm/shadow_eval.rb', line 12 def enabled? Legion::Settings.dig(:llm, :shadow, :enabled) == true end |
.evaluate(primary_response:, messages: nil, shadow_model: nil) ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/legion/llm/shadow_eval.rb', line 23 def evaluate(primary_response:, messages: nil, shadow_model: nil) shadow_model ||= Legion::Settings.dig(:llm, :shadow, :model) || 'gpt-4o-mini' log.info( "[llm][shadow] evaluate primary_model=#{primary_response[:model]} shadow_model=#{shadow_model}" ) shadow_response = Legion::LLM.send(:chat_single, model: shadow_model, provider: nil, messages: , intent: nil, tier: nil) comparison = compare(primary_response, shadow_response, shadow_model) record(comparison) log.info( "[llm][shadow] recorded primary_model=#{comparison[:primary_model]} " \ "shadow_model=#{comparison[:shadow_model]} cost_savings=#{comparison[:cost_savings]}" ) Legion::Events.emit('llm.shadow_eval', comparison) if defined?(Legion::Events) comparison rescue StandardError => e handle_exception(e, level: :warn, operation: 'llm.shadow_eval.evaluate', shadow_model: shadow_model) log.error("[llm][shadow] evaluate_failed shadow_model=#{shadow_model} error=#{e.}") { error: e., shadow_model: shadow_model } end |
.history ⇒ Object
68 69 70 |
# File 'lib/legion/llm/shadow_eval.rb', line 68 def history @history ||= [] end |
.should_sample? ⇒ Boolean
16 17 18 19 20 21 |
# File 'lib/legion/llm/shadow_eval.rb', line 16 def should_sample? return false unless enabled? rate = Legion::Settings.dig(:llm, :shadow, :sample_rate) || 0.1 rand < rate end |
.summary ⇒ Object
76 77 78 79 80 81 82 83 84 85 86 87 88 |
# File 'lib/legion/llm/shadow_eval.rb', line 76 def summary entries = history.dup return empty_summary if entries.empty? { total_evaluations: entries.size, avg_length_ratio: avg(entries.map { |e| e[:length_ratio] }), avg_cost_savings: avg(entries.map { |e| e[:cost_savings] }), total_primary_cost: entries.sum { |e| e[:primary_cost] }.round(6), total_shadow_cost: entries.sum { |e| e[:shadow_cost] }.round(6), models_evaluated: entries.map { |e| e[:shadow_model] }.uniq } end |