Class: RubyLLM::Contract::Eval::ReportStats

Inherits:
Object
  • Object
show all
Defined in:
lib/ruby_llm/contract/eval/report_stats.rb

Overview

Computes aggregate metrics for an eval report.

Instance Method Summary collapse

Constructor Details

#initialize(results:) ⇒ ReportStats

Returns a new instance of ReportStats.



8
9
10
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 8

def initialize(results:)
  @results = results
end

Instance Method Details

#avg_latency_msObject



48
49
50
51
52
53
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 48

def avg_latency_ms
  latencies = @results.filter_map(&:duration_ms)
  return nil if latencies.empty?

  latencies.sum.to_f / latencies.length
end

#effective_costObject



87
88
89
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 87

def effective_cost
  total_cost
end

#effective_latency_msObject



100
101
102
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 100

def effective_latency_ms
  avg_latency_ms
end

#escalation_rateObject



73
74
75
76
77
78
79
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 73

def escalation_rate
  return nil unless production_mode?
  return 0.0 if evaluated_results.empty?

  escalated = evaluated_results.count { |r| (r.attempts || []).length > 1 }
  escalated.to_f / evaluated_results.length
end

#evaluated_resultsObject



61
62
63
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 61

def evaluated_results
  @evaluated_results ||= @results.reject { |result| result.step_status == :skipped }
end

#evaluated_results_countObject



65
66
67
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 65

def evaluated_results_count
  evaluated_results.length
end

#failedObject



22
23
24
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 22

def failed
  evaluated_results.count(&:failed?)
end

#failuresObject



30
31
32
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 30

def failures
  evaluated_results.select(&:failed?)
end

#latency_percentilesObject



104
105
106
107
108
109
110
111
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 104

def latency_percentiles
  return nil unless production_mode?

  latencies = evaluated_results.filter_map(&:duration_ms).sort
  return nil if latencies.empty?

  { p50: percentile(latencies, 0.50), p95: percentile(latencies, 0.95), max: latencies.last.to_f }
end

#pass_rateObject



34
35
36
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 34

def pass_rate
  "#{passed}/#{evaluated_results.length}"
end

#pass_rate_ratioObject



38
39
40
41
42
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 38

def pass_rate_ratio
  return 0.0 if evaluated_results.empty?

  passed.to_f / evaluated_results.length
end

#passedObject



18
19
20
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 18

def passed
  evaluated_results.count(&:passed?)
end

#passed?Boolean

Returns:

  • (Boolean)


55
56
57
58
59
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 55

def passed?
  return false if evaluated_results.empty?

  evaluated_results.all?(&:passed?)
end

#production_mode?Boolean

Returns:

  • (Boolean)


69
70
71
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 69

def production_mode?
  evaluated_results.any? { |r| r.respond_to?(:attempts) && r.attempts }
end

#scoreObject



12
13
14
15
16
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 12

def score
  return 0.0 if evaluated_results.empty?

  evaluated_results.sum(&:score) / evaluated_results.length
end

#single_shot_costObject



81
82
83
84
85
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 81

def single_shot_cost
  return nil unless production_mode?

  evaluated_results.sum { |r| first_attempt_cost(r) || r.cost || 0.0 }
end

#single_shot_latency_msObject



91
92
93
94
95
96
97
98
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 91

def single_shot_latency_ms
  return nil unless production_mode?

  latencies = evaluated_results.filter_map { |r| first_attempt_latency(r) || r.duration_ms }
  return nil if latencies.empty?

  latencies.sum.to_f / latencies.length
end

#skippedObject



26
27
28
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 26

def skipped
  @results.count { |result| result.step_status == :skipped }
end

#total_costObject



44
45
46
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 44

def total_cost
  @results.sum { |result| result.cost || 0.0 }
end