Class: RubyLLM::Contract::Eval::ModelComparison
- Inherits:
-
Object
- Object
- RubyLLM::Contract::Eval::ModelComparison
- Defined in:
- lib/ruby_llm/contract/eval/model_comparison.rb
Instance Attribute Summary collapse
-
#configs ⇒ Object
readonly
Returns the value of attribute configs.
-
#eval_name ⇒ Object
readonly
Returns the value of attribute eval_name.
-
#fallback ⇒ Object
readonly
Returns the value of attribute fallback.
-
#reports ⇒ Object
readonly
Returns the value of attribute reports.
Class Method Summary collapse
Instance Method Summary collapse
- #best_for(min_score: 0.0) ⇒ Object
- #cost_for(candidate) ⇒ Object
- #cost_per_point ⇒ Object
-
#initialize(eval_name:, reports:, configs: nil, fallback: nil) ⇒ ModelComparison
constructor
A new instance of ModelComparison.
- #models ⇒ Object
- #print_summary(io = $stdout) ⇒ Object
- #production_mode? ⇒ Boolean
- #production_mode_table ⇒ Object
- #score_for(candidate) ⇒ Object
- #table ⇒ Object
- #to_h ⇒ Object
Constructor Details
#initialize(eval_name:, reports:, configs: nil, fallback: nil) ⇒ ModelComparison
Returns a new instance of ModelComparison.
14 15 16 17 18 19 20 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 14 def initialize(eval_name:, reports:, configs: nil, fallback: nil) @eval_name = eval_name @reports = reports.dup.freeze @configs = (configs || default_configs_from_reports).freeze @fallback = fallback freeze end |
Instance Attribute Details
#configs ⇒ Object (readonly)
Returns the value of attribute configs.
7 8 9 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 7 def configs @configs end |
#eval_name ⇒ Object (readonly)
Returns the value of attribute eval_name.
7 8 9 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 7 def eval_name @eval_name end |
#fallback ⇒ Object (readonly)
Returns the value of attribute fallback.
7 8 9 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 7 def fallback @fallback end |
#reports ⇒ Object (readonly)
Returns the value of attribute reports.
7 8 9 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 7 def reports @reports end |
Class Method Details
.candidate_label(config) ⇒ Object
9 10 11 12 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 9 def self.candidate_label(config) effort = config[:reasoning_effort] effort ? "#{config[:model]} (effort: #{effort})" : config[:model] end |
Instance Method Details
#best_for(min_score: 0.0) ⇒ Object
38 39 40 41 42 43 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 38 def best_for(min_score: 0.0) eligible = @reports.select { |_, report| report.score > 0.0 && report.score >= min_score } return nil if eligible.empty? eligible.min_by { |_, report| report.total_cost }&.first end |
#cost_for(candidate) ⇒ Object
34 35 36 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 34 def cost_for(candidate) @reports[resolve_key(candidate)]&.total_cost end |
#cost_per_point ⇒ Object
45 46 47 48 49 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 45 def cost_per_point @reports.transform_values do |report| report.score.positive? ? report.total_cost / report.score : Float::INFINITY end end |
#models ⇒ Object
26 27 28 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 26 def models @reports.keys end |
#print_summary(io = $stdout) ⇒ Object
123 124 125 126 127 128 129 130 131 132 133 134 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 123 def print_summary(io = $stdout) io.puts "#{@eval_name} — model comparison" io.puts io.puts table io.puts best = best_for(min_score: 0.0) io.puts " Best overall: #{best}" if best cheapest_passing = best_for(min_score: 1.0) io.puts " Cheapest at 100%: #{cheapest_passing}" if cheapest_passing end |
#production_mode? ⇒ Boolean
22 23 24 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 22 def production_mode? !@fallback.nil? end |
#production_mode_table ⇒ Object
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 67 def production_mode_table fallback_label = self.class.candidate_label(@fallback) rows = @reports.map do |label, report| chain = chain_label(label, fallback_label) { chain: chain, report: report, same: chain_same_as_fallback?(label, fallback_label) } end chain_width = [rows.map { |r| r[:chain].length }.max || 0, 20].max lines = [format(" %-#{chain_width}s %-11s %-10s %-14s %-9s %s", "Chain", "single-shot", "escalation", "effective cost", "latency", "score")] lines << " #{"-" * (chain_width + 60)}" rows.each do |row| lines << format_production_row(row, chain_width) end lines.join("\n") end |
#score_for(candidate) ⇒ Object
30 31 32 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 30 def score_for(candidate) @reports[resolve_key(candidate)]&.score end |
#table ⇒ Object
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 51 def table return production_mode_table if production_mode? max_label = [@reports.keys.map(&:length).max || 0, 25].max lines = [format(" %-#{max_label}s Score Cost Avg Latency", "Candidate")] lines << " #{"-" * (max_label + 36)}" @reports.each do |label, report| latency = report.avg_latency_ms ? "#{report.avg_latency_ms.round}ms" : "n/a" cost = report.total_cost.positive? ? "$#{format("%.4f", report.total_cost)}" : "n/a" lines << format(" %-#{max_label}s %6.2f %10s %12s", label, report.score, cost, latency) end lines.join("\n") end |
#to_h ⇒ Object
136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 136 def to_h @reports.transform_values do |report| base = { score: report.score, total_cost: report.total_cost, avg_latency_ms: report.avg_latency_ms, pass_rate: report.pass_rate, pass_rate_ratio: report.pass_rate_ratio, passed: report.passed? } production_mode_metrics(report, base) end end |