Class: RubyLLM::Agents::Eval::EvalRun
- Inherits:
-
Object
- Object
- RubyLLM::Agents::Eval::EvalRun
- Defined in:
- lib/ruby_llm/agents/eval/eval_run.rb
Overview
Aggregate results from running an eval suite.
Provides score calculation, pass/fail counts, failure details, and a formatted summary string.
Instance Attribute Summary collapse
-
#completed_at ⇒ Object
readonly
Returns the value of attribute completed_at.
-
#model ⇒ Object
readonly
Returns the value of attribute model.
-
#pass_threshold ⇒ Object
readonly
Returns the value of attribute pass_threshold.
-
#results ⇒ Object
readonly
Returns the value of attribute results.
-
#started_at ⇒ Object
readonly
Returns the value of attribute started_at.
-
#suite ⇒ Object
readonly
Returns the value of attribute suite.
Instance Method Summary collapse
- #agent_class ⇒ Object
- #duration_ms ⇒ Object
- #errors ⇒ Object
- #failed ⇒ Object
- #failures ⇒ Object
-
#initialize(suite:, results:, model:, pass_threshold:, started_at:, completed_at:) ⇒ EvalRun
constructor
A new instance of EvalRun.
- #passed ⇒ Object
-
#score ⇒ Object
Average score across all test cases (0.0 to 1.0).
- #score_pct ⇒ Object
- #summary ⇒ Object
- #to_h ⇒ Object
- #to_json(*args) ⇒ Object
- #total_cases ⇒ Object
- #total_cost ⇒ Object
Constructor Details
#initialize(suite:, results:, model:, pass_threshold:, started_at:, completed_at:) ⇒ EvalRun
Returns a new instance of EvalRun.
14 15 16 17 18 19 20 21 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 14 def initialize(suite:, results:, model:, pass_threshold:, started_at:, completed_at:) @suite = suite @results = results @model = model @pass_threshold = pass_threshold @started_at = started_at @completed_at = completed_at end |
Instance Attribute Details
#completed_at ⇒ Object (readonly)
Returns the value of attribute completed_at.
11 12 13 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 11 def completed_at @completed_at end |
#model ⇒ Object (readonly)
Returns the value of attribute model.
11 12 13 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 11 def model @model end |
#pass_threshold ⇒ Object (readonly)
Returns the value of attribute pass_threshold.
11 12 13 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 11 def pass_threshold @pass_threshold end |
#results ⇒ Object (readonly)
Returns the value of attribute results.
11 12 13 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 11 def results @results end |
#started_at ⇒ Object (readonly)
Returns the value of attribute started_at.
11 12 13 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 11 def started_at @started_at end |
#suite ⇒ Object (readonly)
Returns the value of attribute suite.
11 12 13 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 11 def suite @suite end |
Instance Method Details
#agent_class ⇒ Object
23 24 25 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 23 def agent_class suite.respond_to?(:agent_class) ? suite.agent_class : suite end |
#duration_ms ⇒ Object
72 73 74 75 76 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 72 def duration_ms return 0 unless started_at && completed_at ((completed_at - started_at) * 1000).to_i end |
#errors ⇒ Object
54 55 56 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 54 def errors results.select(&:errored?) end |
#failed ⇒ Object
46 47 48 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 46 def failed results.count { |r| r.failed?(pass_threshold) } end |
#failures ⇒ Object
50 51 52 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 50 def failures results.select { |r| r.failed?(pass_threshold) } end |
#passed ⇒ Object
42 43 44 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 42 def passed results.count { |r| r.passed?(pass_threshold) } end |
#score ⇒ Object
Average score across all test cases (0.0 to 1.0)
28 29 30 31 32 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 28 def score return 0.0 if results.empty? results.sum { |r| r.score.value } / results.size.to_f end |
#score_pct ⇒ Object
34 35 36 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 34 def score_pct (score * 100).round(1) end |
#summary ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 78 def summary agent_name = agent_class.respond_to?(:name) ? agent_class.name : agent_class.to_s lines = ["#{agent_name} Eval — #{started_at.strftime("%Y-%m-%d %H:%M")}"] lines << "Model: #{model} | Score: #{score_pct}% | #{passed}/#{total_cases} passed" lines << "Cost: $#{"%.4f" % total_cost} | Duration: #{(duration_ms / 1000.0).round(1)}s" if failures.any? lines << "" lines << "Failures:" failures.each do |r| lines << " - #{r.test_case_name}: expected #{r.expected.inspect}, got #{r.actual.inspect} (#{r.score.reason})" end end if errors.any? lines << "" lines << "Errors:" errors.each do |r| lines << " - #{r.test_case_name}: #{r.error.}" end end lines.join("\n") end |
#to_h ⇒ Object
103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 103 def to_h { agent: agent_class.respond_to?(:name) ? agent_class.name : agent_class.to_s, model: model, score: score, score_pct: score_pct, total_cases: total_cases, passed: passed, failed: failed, total_cost: total_cost, duration_ms: duration_ms, results: results.map(&:to_h) } end |
#to_json(*args) ⇒ Object
118 119 120 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 118 def to_json(*args) to_h.to_json(*args) end |
#total_cases ⇒ Object
38 39 40 |
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 38 def total_cases results.size end |