Class: RubyLLM::Contract::Eval::ReportStats
- Inherits:
-
Object
- Object
- RubyLLM::Contract::Eval::ReportStats
- Defined in:
- lib/ruby_llm/contract/eval/report_stats.rb
Overview
Computes aggregate metrics for an eval report.
Instance Method Summary collapse
- #avg_latency_ms ⇒ Object
- #effective_cost ⇒ Object
- #effective_latency_ms ⇒ Object
- #escalation_rate ⇒ Object
- #evaluated_results ⇒ Object
- #evaluated_results_count ⇒ Object
- #failed ⇒ Object
- #failures ⇒ Object
-
#initialize(results:) ⇒ ReportStats
constructor
A new instance of ReportStats.
- #latency_percentiles ⇒ Object
- #pass_rate ⇒ Object
- #pass_rate_ratio ⇒ Object
- #passed ⇒ Object
- #passed? ⇒ Boolean
- #production_mode? ⇒ Boolean
- #score ⇒ Object
- #single_shot_cost ⇒ Object
- #single_shot_latency_ms ⇒ Object
- #skipped ⇒ Object
- #total_cost ⇒ Object
Constructor Details
#initialize(results:) ⇒ ReportStats
Returns a new instance of ReportStats.
8 9 10 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 8 def initialize(results:) @results = results end |
Instance Method Details
#avg_latency_ms ⇒ Object
48 49 50 51 52 53 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 48 def avg_latency_ms latencies = @results.filter_map(&:duration_ms) return nil if latencies.empty? latencies.sum.to_f / latencies.length end |
#effective_cost ⇒ Object
87 88 89 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 87 def effective_cost total_cost end |
#effective_latency_ms ⇒ Object
100 101 102 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 100 def effective_latency_ms avg_latency_ms end |
#escalation_rate ⇒ Object
73 74 75 76 77 78 79 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 73 def escalation_rate return nil unless production_mode? return 0.0 if evaluated_results.empty? escalated = evaluated_results.count { |r| (r.attempts || []).length > 1 } escalated.to_f / evaluated_results.length end |
#evaluated_results ⇒ Object
61 62 63 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 61 def evaluated_results @evaluated_results ||= @results.reject { |result| result.step_status == :skipped } end |
#evaluated_results_count ⇒ Object
65 66 67 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 65 def evaluated_results_count evaluated_results.length end |
#failed ⇒ Object
22 23 24 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 22 def failed evaluated_results.count(&:failed?) end |
#failures ⇒ Object
30 31 32 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 30 def failures evaluated_results.select(&:failed?) end |
#latency_percentiles ⇒ Object
104 105 106 107 108 109 110 111 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 104 def latency_percentiles return nil unless production_mode? latencies = evaluated_results.filter_map(&:duration_ms).sort return nil if latencies.empty? { p50: percentile(latencies, 0.50), p95: percentile(latencies, 0.95), max: latencies.last.to_f } end |
#pass_rate ⇒ Object
34 35 36 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 34 def pass_rate "#{passed}/#{evaluated_results.length}" end |
#pass_rate_ratio ⇒ Object
38 39 40 41 42 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 38 def pass_rate_ratio return 0.0 if evaluated_results.empty? passed.to_f / evaluated_results.length end |
#passed ⇒ Object
18 19 20 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 18 def passed evaluated_results.count(&:passed?) end |
#passed? ⇒ Boolean
55 56 57 58 59 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 55 def passed? return false if evaluated_results.empty? evaluated_results.all?(&:passed?) end |
#production_mode? ⇒ Boolean
69 70 71 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 69 def production_mode? evaluated_results.any? { |r| r.respond_to?(:attempts) && r.attempts } end |
#score ⇒ Object
12 13 14 15 16 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 12 def score return 0.0 if evaluated_results.empty? evaluated_results.sum(&:score) / evaluated_results.length end |
#single_shot_cost ⇒ Object
81 82 83 84 85 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 81 def single_shot_cost return nil unless production_mode? evaluated_results.sum { |r| first_attempt_cost(r) || r.cost || 0.0 } end |
#single_shot_latency_ms ⇒ Object
91 92 93 94 95 96 97 98 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 91 def single_shot_latency_ms return nil unless production_mode? latencies = evaluated_results.filter_map { |r| first_attempt_latency(r) || r.duration_ms } return nil if latencies.empty? latencies.sum.to_f / latencies.length end |
#skipped ⇒ Object
26 27 28 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 26 def skipped @results.count { |result| result.step_status == :skipped } end |
#total_cost ⇒ Object
44 45 46 |
# File 'lib/ruby_llm/contract/eval/report_stats.rb', line 44 def total_cost @results.sum { |result| result.cost || 0.0 } end |