Class: RubyLLM::Contract::Eval::ModelComparison

Inherits:

Object

Object
RubyLLM::Contract::Eval::ModelComparison

show all

Defined in:: lib/ruby_llm/contract/eval/model_comparison.rb

Instance Attribute Summary collapse

#configs ⇒ Object readonly

Returns the value of attribute configs.
#eval_name ⇒ Object readonly

Returns the value of attribute eval_name.
#fallback ⇒ Object readonly

Returns the value of attribute fallback.
#reports ⇒ Object readonly

Returns the value of attribute reports.

Class Method Summary collapse

.candidate_label(config) ⇒ Object

Instance Method Summary collapse

#best_for(min_score: 0.0) ⇒ Object
#cost_for(candidate) ⇒ Object
#cost_per_point ⇒ Object
#initialize(eval_name:, reports:, configs: nil, fallback: nil) ⇒ ModelComparison constructor

A new instance of ModelComparison.
#models ⇒ Object
#print_summary(io = $stdout) ⇒ Object
#production_mode? ⇒ Boolean
#production_mode_table ⇒ Object
#score_for(candidate) ⇒ Object
#table ⇒ Object
#to_h ⇒ Object

Constructor Details

#initialize(eval_name:, reports:, configs: nil, fallback: nil) ⇒ `ModelComparison`

Returns a new instance of ModelComparison.

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 14

def initialize(eval_name:, reports:, configs: nil, fallback: nil)
  @eval_name = eval_name
  @reports = reports.dup.freeze
  @configs = (configs || default_configs_from_reports).freeze
  @fallback = fallback
  freeze
end

Instance Attribute Details

#configs ⇒ `Object` (readonly)

Returns the value of attribute configs.



7
8
9

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 7

def configs
  @configs
end

#eval_name ⇒ `Object` (readonly)

Returns the value of attribute eval_name.



7
8
9

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 7

def eval_name
  @eval_name
end

#fallback ⇒ `Object` (readonly)

Returns the value of attribute fallback.



7
8
9

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 7

def fallback
  @fallback
end

#reports ⇒ `Object` (readonly)

Returns the value of attribute reports.



7
8
9

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 7

def reports
  @reports
end

Class Method Details

.candidate_label(config) ⇒ `Object`

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 9

def self.candidate_label(config)
  effort = config[:reasoning_effort]
  effort ? "#{config[:model]} (effort: #{effort})" : config[:model]
end

Instance Method Details

#best_for(min_score: 0.0) ⇒ `Object`

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 38

def best_for(min_score: 0.0)
  eligible = @reports.select { |_, report| report.score > 0.0 && report.score >= min_score }
  return nil if eligible.empty?

  eligible.min_by { |_, report| report.total_cost }&.first
end

#cost_for(candidate) ⇒ `Object`



34
35
36

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 34

def cost_for(candidate)
  @reports[resolve_key(candidate)]&.total_cost
end

#cost_per_point ⇒ `Object`

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 45

def cost_per_point
  @reports.transform_values do |report|
    report.score.positive? ? report.total_cost / report.score : Float::INFINITY
  end
end

#models ⇒ `Object`



26
27
28

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 26

def models
  @reports.keys
end

#print_summary(io = $stdout) ⇒ `Object`

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 123

def print_summary(io = $stdout)
  io.puts "#{@eval_name} — model comparison"
  io.puts
  io.puts table
  io.puts

  best = best_for(min_score: 0.0)
  io.puts "  Best overall: #{best}" if best

  cheapest_passing = best_for(min_score: 1.0)
  io.puts "  Cheapest at 100%: #{cheapest_passing}" if cheapest_passing
end

#production_mode? ⇒ `Boolean`

Returns:

(Boolean)



22
23
24

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 22

def production_mode?
  !@fallback.nil?
end

#production_mode_table ⇒ `Object`

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 67

def production_mode_table
  fallback_label = self.class.candidate_label(@fallback)
  rows = @reports.map do |label, report|
    chain = chain_label(label, fallback_label)
    { chain: chain, report: report, same: chain_same_as_fallback?(label, fallback_label) }
  end

  chain_width = [rows.map { |r| r[:chain].length }.max || 0, 20].max
  lines = [format("  %-#{chain_width}s  %-11s  %-10s  %-14s  %-9s  %s",
                  "Chain", "single-shot", "escalation", "effective cost", "latency", "score")]
  lines << "  #{"-" * (chain_width + 60)}"

  rows.each do |row|
    lines << format_production_row(row, chain_width)
  end

  lines.join("\n")
end

#score_for(candidate) ⇒ `Object`



30
31
32

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 30

def score_for(candidate)
  @reports[resolve_key(candidate)]&.score
end

#table ⇒ `Object`

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 51

def table
  return production_mode_table if production_mode?

  max_label = [@reports.keys.map(&:length).max || 0, 25].max
  lines = [format("  %-#{max_label}s  Score       Cost  Avg Latency", "Candidate")]
  lines << "  #{"-" * (max_label + 36)}"

  @reports.each do |label, report|
    latency = report.avg_latency_ms ? "#{report.avg_latency_ms.round}ms" : "n/a"
    cost = report.total_cost.positive? ? "$#{format("%.4f", report.total_cost)}" : "n/a"
    lines << format("  %-#{max_label}s %6.2f %10s %12s", label, report.score, cost, latency)
  end

  lines.join("\n")
end

#to_h ⇒ `Object`

# File 'lib/ruby_llm/contract/eval/model_comparison.rb', line 136

def to_h
  @reports.transform_values do |report|
    base = {
      score: report.score,
      total_cost: report.total_cost,
      avg_latency_ms: report.avg_latency_ms,
      pass_rate: report.pass_rate,
      pass_rate_ratio: report.pass_rate_ratio,
      passed: report.passed?
    }
    production_mode_metrics(report, base)
  end
end

Class: RubyLLM::Contract::Eval::ModelComparison

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(eval_name:, reports:, configs: nil, fallback: nil) ⇒ ModelComparison

Instance Attribute Details

#configs ⇒ Object (readonly)

#eval_name ⇒ Object (readonly)

#fallback ⇒ Object (readonly)

#reports ⇒ Object (readonly)

Class Method Details

.candidate_label(config) ⇒ Object

Instance Method Details

#best_for(min_score: 0.0) ⇒ Object

#cost_for(candidate) ⇒ Object

#cost_per_point ⇒ Object

#models ⇒ Object

#print_summary(io = $stdout) ⇒ Object

#production_mode? ⇒ Boolean

#production_mode_table ⇒ Object

#score_for(candidate) ⇒ Object

#table ⇒ Object

#to_h ⇒ Object