Class: RubyLLM::Agents::Eval::EvalRun

Inherits:
Object
  • Object
show all
Defined in:
lib/ruby_llm/agents/eval/eval_run.rb

Overview

Aggregate results from running an eval suite.

Provides score calculation, pass/fail counts, failure details, and a formatted summary string.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(suite:, results:, model:, pass_threshold:, started_at:, completed_at:) ⇒ EvalRun

Returns a new instance of EvalRun.



14
15
16
17
18
19
20
21
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 14

def initialize(suite:, results:, model:, pass_threshold:, started_at:, completed_at:)
  @suite = suite
  @results = results
  @model = model
  @pass_threshold = pass_threshold
  @started_at = started_at
  @completed_at = completed_at
end

Instance Attribute Details

#completed_atObject (readonly)

Returns the value of attribute completed_at.



11
12
13
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 11

def completed_at
  @completed_at
end

#modelObject (readonly)

Returns the value of attribute model.



11
12
13
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 11

def model
  @model
end

#pass_thresholdObject (readonly)

Returns the value of attribute pass_threshold.



11
12
13
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 11

def pass_threshold
  @pass_threshold
end

#resultsObject (readonly)

Returns the value of attribute results.



11
12
13
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 11

def results
  @results
end

#started_atObject (readonly)

Returns the value of attribute started_at.



11
12
13
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 11

def started_at
  @started_at
end

#suiteObject (readonly)

Returns the value of attribute suite.



11
12
13
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 11

def suite
  @suite
end

Instance Method Details

#agent_classObject



23
24
25
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 23

def agent_class
  suite.respond_to?(:agent_class) ? suite.agent_class : suite
end

#duration_msObject



72
73
74
75
76
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 72

def duration_ms
  return 0 unless started_at && completed_at

  ((completed_at - started_at) * 1000).to_i
end

#errorsObject



54
55
56
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 54

def errors
  results.select(&:errored?)
end

#failedObject



46
47
48
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 46

def failed
  results.count { |r| r.failed?(pass_threshold) }
end

#failuresObject



50
51
52
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 50

def failures
  results.select { |r| r.failed?(pass_threshold) }
end

#passedObject



42
43
44
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 42

def passed
  results.count { |r| r.passed?(pass_threshold) }
end

#scoreObject

Average score across all test cases (0.0 to 1.0)



28
29
30
31
32
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 28

def score
  return 0.0 if results.empty?

  results.sum { |r| r.score.value } / results.size.to_f
end

#score_pctObject



34
35
36
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 34

def score_pct
  (score * 100).round(1)
end

#summaryObject



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 78

def summary
  agent_name = agent_class.respond_to?(:name) ? agent_class.name : agent_class.to_s
  lines = ["#{agent_name} Eval — #{started_at.strftime("%Y-%m-%d %H:%M")}"]
  lines << "Model: #{model} | Score: #{score_pct}% | #{passed}/#{total_cases} passed"
  lines << "Cost: $#{"%.4f" % total_cost} | Duration: #{(duration_ms / 1000.0).round(1)}s"

  if failures.any?
    lines << ""
    lines << "Failures:"
    failures.each do |r|
      lines << "  - #{r.test_case_name}: expected #{r.expected.inspect}, got #{r.actual.inspect} (#{r.score.reason})"
    end
  end

  if errors.any?
    lines << ""
    lines << "Errors:"
    errors.each do |r|
      lines << "  - #{r.test_case_name}: #{r.error.message}"
    end
  end

  lines.join("\n")
end

#to_hObject



103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 103

def to_h
  {
    agent: agent_class.respond_to?(:name) ? agent_class.name : agent_class.to_s,
    model: model,
    score: score,
    score_pct: score_pct,
    total_cases: total_cases,
    passed: passed,
    failed: failed,
    total_cost: total_cost,
    duration_ms: duration_ms,
    results: results.map(&:to_h)
  }
end

#to_json(*args) ⇒ Object



118
119
120
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 118

def to_json(*args)
  to_h.to_json(*args)
end

#total_casesObject



38
39
40
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 38

def total_cases
  results.size
end

#total_costObject



58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/ruby_llm/agents/eval/eval_run.rb', line 58

def total_cost
  results.sum do |r|
    next 0 unless r.execution_id

    if defined?(Execution)
      Execution.find_by(id: r.execution_id)&.total_cost || 0
    else
      0
    end
  end
rescue
  0
end