Class: RubyLLM::Contract::Eval::ModelComparison

Inherits:
Object
  • Object
show all
Defined in:
lib/ruby_llm/contract/eval/model_comparison.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(eval_name:, reports:, configs: nil, fallback: nil) ⇒ ModelComparison

Returns a new instance of ModelComparison.



14
15
16
17
18
19
20
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 14

def initialize(eval_name:, reports:, configs: nil, fallback: nil)
  @eval_name = eval_name
  @reports = reports.dup.freeze
  @configs = (configs || default_configs_from_reports).freeze
  @fallback = fallback
  freeze
end

Instance Attribute Details

#configsObject (readonly)

Returns the value of attribute configs.



7
8
9
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 7

def configs
  @configs
end

#eval_nameObject (readonly)

Returns the value of attribute eval_name.



7
8
9
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 7

def eval_name
  @eval_name
end

#fallbackObject (readonly)

Returns the value of attribute fallback.



7
8
9
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 7

def fallback
  @fallback
end

#reportsObject (readonly)

Returns the value of attribute reports.



7
8
9
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 7

def reports
  @reports
end

Class Method Details

.candidate_label(config) ⇒ Object



9
10
11
12
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 9

def self.candidate_label(config)
  effort = config[:reasoning_effort]
  effort ? "#{config[:model]} (effort: #{effort})" : config[:model]
end

Instance Method Details

#best_for(min_score: 0.0) ⇒ Object



38
39
40
41
42
43
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 38

def best_for(min_score: 0.0)
  eligible = @reports.select { |_, report| report.score > 0.0 && report.score >= min_score }
  return nil if eligible.empty?

  eligible.min_by { |_, report| report.total_cost }&.first
end

#cost_for(candidate) ⇒ Object



34
35
36
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 34

def cost_for(candidate)
  @reports[resolve_key(candidate)]&.total_cost
end

#cost_per_pointObject



45
46
47
48
49
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 45

def cost_per_point
  @reports.transform_values do |report|
    report.score.positive? ? report.total_cost / report.score : Float::INFINITY
  end
end

#modelsObject



26
27
28
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 26

def models
  @reports.keys
end


123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 123

def print_summary(io = $stdout)
  io.puts "#{@eval_name} — model comparison"
  io.puts
  io.puts table
  io.puts

  best = best_for(min_score: 0.0)
  io.puts "  Best overall: #{best}" if best

  cheapest_passing = best_for(min_score: 1.0)
  io.puts "  Cheapest at 100%: #{cheapest_passing}" if cheapest_passing
end

#production_mode?Boolean

Returns:

  • (Boolean)


22
23
24
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 22

def production_mode?
  !@fallback.nil?
end

#production_mode_tableObject



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 67

def production_mode_table
  fallback_label = self.class.candidate_label(@fallback)
  rows = @reports.map do |label, report|
    chain = chain_label(label, fallback_label)
    { chain: chain, report: report, same: chain_same_as_fallback?(label, fallback_label) }
  end

  chain_width = [rows.map { |r| r[:chain].length }.max || 0, 20].max
  lines = [format("  %-#{chain_width}s  %-11s  %-10s  %-14s  %-9s  %s",
                  "Chain", "single-shot", "escalation", "effective cost", "latency", "score")]
  lines << "  #{"-" * (chain_width + 60)}"

  rows.each do |row|
    lines << format_production_row(row, chain_width)
  end

  lines.join("\n")
end

#score_for(candidate) ⇒ Object



30
31
32
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 30

def score_for(candidate)
  @reports[resolve_key(candidate)]&.score
end

#tableObject



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 51

def table
  return production_mode_table if production_mode?

  max_label = [@reports.keys.map(&:length).max || 0, 25].max
  lines = [format("  %-#{max_label}s  Score       Cost  Avg Latency", "Candidate")]
  lines << "  #{"-" * (max_label + 36)}"

  @reports.each do |label, report|
    latency = report.avg_latency_ms ? "#{report.avg_latency_ms.round}ms" : "n/a"
    cost = report.total_cost.positive? ? "$#{format("%.4f", report.total_cost)}" : "n/a"
    lines << format("  %-#{max_label}s %6.2f %10s %12s", label, report.score, cost, latency)
  end

  lines.join("\n")
end

#to_hObject



136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 136

def to_h
  @reports.transform_values do |report|
    base = {
      score: report.score,
      total_cost: report.total_cost,
      avg_latency_ms: report.avg_latency_ms,
      pass_rate: report.pass_rate,
      pass_rate_ratio: report.pass_rate_ratio,
      passed: report.passed?
    }
    production_mode_metrics(report, base)
  end
end