Class: RubyLLM::Contract::Eval::Report

Inherits:
Object
  • Object
show all
Defined in:
lib/ruby_llm/contract/eval/report.rb

Constant Summary collapse

GENERIC_DETAILS =
["passed", "not passed"].freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dataset_name:, results:, step_name: nil) ⇒ Report

Returns a new instance of Report.



12
13
14
15
16
17
# File 'lib/ruby_llm/contract/eval/report.rb', line 12

def initialize(dataset_name:, results:, step_name: nil)
  @dataset_name = dataset_name
  @step_name = step_name
  @results = results.dup.freeze
  freeze
end

Instance Attribute Details

#dataset_nameObject (readonly)

Returns the value of attribute dataset_name.



10
11
12
# File 'lib/ruby_llm/contract/eval/report.rb', line 10

def dataset_name
  @dataset_name
end

#resultsObject (readonly)

Returns the value of attribute results.



10
11
12
# File 'lib/ruby_llm/contract/eval/report.rb', line 10

def results
  @results
end

Instance Method Details

#avg_latency_msObject



50
51
52
53
54
55
# File 'lib/ruby_llm/contract/eval/report.rb', line 50

def avg_latency_ms
  latencies = results.filter_map(&:duration_ms)
  return nil if latencies.empty?

  latencies.sum.to_f / latencies.length
end

#baseline_exists?(path: nil, model: nil) ⇒ Boolean

Returns:

  • (Boolean)


122
123
124
# File 'lib/ruby_llm/contract/eval/report.rb', line 122

def baseline_exists?(path: nil, model: nil)
  File.exist?(path || default_baseline_path(model: model))
end

#compare_with_baseline(path: nil, model: nil) ⇒ Object

Raises:

  • (ArgumentError)


110
111
112
113
114
115
116
117
118
119
120
# File 'lib/ruby_llm/contract/eval/report.rb', line 110

def compare_with_baseline(path: nil, model: nil)
  file = path || default_baseline_path(model: model)
  raise ArgumentError, "No baseline found at #{file}" unless File.exist?(file)

  baseline_data = JSON.parse(File.read(file), symbolize_names: true)
  validate_baseline!(baseline_data)
  BaselineDiff.new(
    baseline_cases: baseline_data[:cases],
    current_cases: results.map { |r| serialize_case(r) }
  )
end

#eachObject



64
65
66
# File 'lib/ruby_llm/contract/eval/report.rb', line 64

def each(&)
  results.each(&)
end

#eval_history(path: nil, model: nil) ⇒ Object



98
99
100
101
# File 'lib/ruby_llm/contract/eval/report.rb', line 98

def eval_history(path: nil, model: nil)
  file = path || default_history_path(model: model)
  EvalHistory.load(file)
end

#failedObject



30
31
32
# File 'lib/ruby_llm/contract/eval/report.rb', line 30

def failed
  evaluated_results.count(&:failed?)
end

#failuresObject



38
39
40
# File 'lib/ruby_llm/contract/eval/report.rb', line 38

def failures
  evaluated_results.select(&:failed?)
end

#pass_rateObject



42
43
44
# File 'lib/ruby_llm/contract/eval/report.rb', line 42

def pass_rate
  "#{passed}/#{evaluated_results.length}"
end

#passedObject



26
27
28
# File 'lib/ruby_llm/contract/eval/report.rb', line 26

def passed
  evaluated_results.count(&:passed?)
end

#passed?Boolean

Returns:

  • (Boolean)


57
58
59
60
61
62
# File 'lib/ruby_llm/contract/eval/report.rb', line 57

def passed?
  evaluated = evaluated_results
  return false if evaluated.empty?

  evaluated.all?(&:passed?)
end


126
127
128
129
130
131
132
133
134
135
136
# File 'lib/ruby_llm/contract/eval/report.rb', line 126

def print_summary(io = $stdout)
  io.puts summary
  io.puts
  results.each do |result|
    icon = result.label
    cost_str = result.cost ? "  #{format_cost(result.cost)}" : ""
    latency_str = result.duration_ms ? "  #{result.duration_ms}ms" : ""
    io.puts "  #{icon}  #{result.name}#{cost_str}#{latency_str}"
    io.puts "        #{result.details}" if result.failed? && useful_details?(result.details)
  end
end

#save_baseline!(path: nil, model: nil) ⇒ Object



103
104
105
106
107
108
# File 'lib/ruby_llm/contract/eval/report.rb', line 103

def save_baseline!(path: nil, model: nil)
  file = path || default_baseline_path(model: model)
  FileUtils.mkdir_p(File.dirname(file))
  File.write(file, JSON.pretty_generate(serialize_for_baseline))
  file
end

#save_history!(path: nil, model: nil) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/ruby_llm/contract/eval/report.rb', line 85

def save_history!(path: nil, model: nil)
  file = path || default_history_path(model: model)
  run_data = {
    date: Time.now.strftime("%Y-%m-%d"),
    score: score,
    total_cost: total_cost,
    pass_rate: pass_rate,
    cases_count: evaluated_results.length
  }
  EvalHistory.append(file, run_data)
  file
end

#scoreObject



19
20
21
22
23
24
# File 'lib/ruby_llm/contract/eval/report.rb', line 19

def score
  evaluated = evaluated_results
  return 0.0 if evaluated.empty?

  evaluated.sum(&:score) / evaluated.length
end

#skippedObject



34
35
36
# File 'lib/ruby_llm/contract/eval/report.rb', line 34

def skipped
  results.count { |r| r.step_status == :skipped }
end

#summaryObject



68
69
70
71
72
73
# File 'lib/ruby_llm/contract/eval/report.rb', line 68

def summary
  parts = ["#{dataset_name}: #{pass_rate} checks passed"]
  parts << "#{skipped} skipped" if skipped.positive?
  parts << format_cost(total_cost) if total_cost.positive?
  parts.join(", ")
end

#to_sObject



77
78
79
80
81
82
83
# File 'lib/ruby_llm/contract/eval/report.rb', line 77

def to_s
  lines = [summary]
  failures.each do |result|
    lines << format_failure(result)
  end
  lines.join("\n")
end

#total_costObject



46
47
48
# File 'lib/ruby_llm/contract/eval/report.rb', line 46

def total_cost
  results.sum { |r| r.cost || 0.0 }
end