Class: RubyLLM::Contract::Eval::AggregatedReport

Inherits:
Object
  • Object
show all
Defined in:
lib/ruby_llm/contract/eval/aggregated_report.rb

Overview

Wraps N Reports from repeated runs of the same eval to reduce sampling variance in live mode (temperature=1 on gpt-5 family). Exposes the same duck-type as Report — mean score, mean cost per run, mean latency.

pass_rate reports how many runs passed cleanly (x/N), not case-level pass rate, since the question is “does this candidate reliably pass?”.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(runs) ⇒ AggregatedReport

Returns a new instance of AggregatedReport.

Raises:

  • (ArgumentError)


15
16
17
18
19
20
21
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 15

def initialize(runs)
  raise ArgumentError, "runs must not be empty" if runs.empty?

  @runs = runs.freeze
  @results = runs.flat_map(&:results).freeze
  freeze
end

Instance Attribute Details

#resultsObject (readonly)

Returns the value of attribute results.



13
14
15
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 13

def results
  @results
end

#runsObject (readonly)

Returns the value of attribute runs.



13
14
15
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 13

def runs
  @runs
end

Instance Method Details

#avg_latency_msObject



47
48
49
50
51
52
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 47

def avg_latency_ms
  latencies = @runs.filter_map(&:avg_latency_ms)
  return nil if latencies.empty?

  latencies.sum / latencies.length.to_f
end

#clean_passesObject



82
83
84
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 82

def clean_passes
  @runs.count(&:passed?)
end

#dataset_nameObject



23
24
25
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 23

def dataset_name
  @runs.first.dataset_name
end

#each(&block) ⇒ Object



62
63
64
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 62

def each(&block)
  @results.each(&block)
end

#effective_costObject



108
109
110
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 108

def effective_cost
  total_cost
end

#effective_latency_msObject



119
120
121
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 119

def effective_latency_ms
  avg_latency_ms
end

#escalation_rateObject



94
95
96
97
98
99
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 94

def escalation_rate
  values = @runs.filter_map(&:escalation_rate)
  return nil if values.empty?

  values.sum / values.length.to_f
end

#failuresObject



86
87
88
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 86

def failures
  @runs.flat_map(&:failures)
end

#latency_percentilesObject



123
124
125
126
127
128
129
130
131
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 123

def latency_percentiles
  per_run = @runs.filter_map(&:latency_percentiles)
  return nil if per_run.empty?

  %i[p50 p95 max].each_with_object({}) do |key, acc|
    values = per_run.filter_map { |h| h[key] }
    acc[key] = values.empty? ? nil : values.sum / values.length.to_f
  end
end

#pass_rateObject



54
55
56
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 54

def pass_rate
  "#{clean_passes}/#{@runs.length}"
end

#pass_rate_ratioObject



58
59
60
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 58

def pass_rate_ratio
  clean_passes.to_f / @runs.length
end

#passed?Boolean

Returns:

  • (Boolean)


78
79
80
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 78

def passed?
  @runs.all?(&:passed?)
end


74
75
76
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 74

def print_summary(io = $stdout)
  @runs.first.print_summary(io)
end

#production_mode?Boolean

Returns:

  • (Boolean)


90
91
92
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 90

def production_mode?
  @runs.any?(&:production_mode?)
end

#scoreObject



31
32
33
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 31

def score
  @runs.sum(&:score) / @runs.length.to_f
end

#score_maxObject



39
40
41
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 39

def score_max
  @runs.map(&:score).max
end

#score_minObject



35
36
37
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 35

def score_min
  @runs.map(&:score).min
end

#single_shot_costObject



101
102
103
104
105
106
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 101

def single_shot_cost
  values = @runs.filter_map(&:single_shot_cost)
  return nil if values.empty?

  values.sum / values.length.to_f
end

#single_shot_latency_msObject



112
113
114
115
116
117
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 112

def single_shot_latency_ms
  values = @runs.filter_map(&:single_shot_latency_ms)
  return nil if values.empty?

  values.sum / values.length.to_f
end

#step_nameObject



27
28
29
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 27

def step_name
  @runs.first.step_name
end

#summaryObject



66
67
68
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 66

def summary
  @runs.first.summary
end

#to_sObject



70
71
72
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 70

def to_s
  @runs.first.to_s
end

#total_costObject



43
44
45
# File 'lib/ruby_llm/contract/eval/aggregated_report.rb', line 43

def total_cost
  @runs.sum(&:total_cost) / @runs.length.to_f
end