Class: RubyLLM::Contract::Eval::Report
- Inherits:
-
Object
- Object
- RubyLLM::Contract::Eval::Report
- Defined in:
- lib/ruby_llm/contract/eval/report.rb
Constant Summary collapse
- GENERIC_DETAILS =
["passed", "not passed"].freeze
Instance Attribute Summary collapse
-
#dataset_name ⇒ Object
readonly
Returns the value of attribute dataset_name.
-
#results ⇒ Object
readonly
Returns the value of attribute results.
Instance Method Summary collapse
- #avg_latency_ms ⇒ Object
- #baseline_exists?(path: nil, model: nil) ⇒ Boolean
- #compare_with_baseline(path: nil, model: nil) ⇒ Object
- #each ⇒ Object
- #eval_history(path: nil, model: nil) ⇒ Object
- #failed ⇒ Object
- #failures ⇒ Object
-
#initialize(dataset_name:, results:, step_name: nil) ⇒ Report
constructor
A new instance of Report.
- #pass_rate ⇒ Object
- #passed ⇒ Object
- #passed? ⇒ Boolean
- #print_summary(io = $stdout) ⇒ Object
- #save_baseline!(path: nil, model: nil) ⇒ Object
- #save_history!(path: nil, model: nil) ⇒ Object
- #score ⇒ Object
- #skipped ⇒ Object
- #summary ⇒ Object
- #to_s ⇒ Object
- #total_cost ⇒ Object
Constructor Details
#initialize(dataset_name:, results:, step_name: nil) ⇒ Report
Returns a new instance of Report.
12 13 14 15 16 17 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 12 def initialize(dataset_name:, results:, step_name: nil) @dataset_name = dataset_name @step_name = step_name @results = results.dup.freeze freeze end |
Instance Attribute Details
#dataset_name ⇒ Object (readonly)
Returns the value of attribute dataset_name.
10 11 12 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 10 def dataset_name @dataset_name end |
#results ⇒ Object (readonly)
Returns the value of attribute results.
10 11 12 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 10 def results @results end |
Instance Method Details
#avg_latency_ms ⇒ Object
50 51 52 53 54 55 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 50 def avg_latency_ms latencies = results.filter_map(&:duration_ms) return nil if latencies.empty? latencies.sum.to_f / latencies.length end |
#baseline_exists?(path: nil, model: nil) ⇒ Boolean
122 123 124 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 122 def baseline_exists?(path: nil, model: nil) File.exist?(path || default_baseline_path(model: model)) end |
#compare_with_baseline(path: nil, model: nil) ⇒ Object
110 111 112 113 114 115 116 117 118 119 120 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 110 def compare_with_baseline(path: nil, model: nil) file = path || default_baseline_path(model: model) raise ArgumentError, "No baseline found at #{file}" unless File.exist?(file) baseline_data = JSON.parse(File.read(file), symbolize_names: true) validate_baseline!(baseline_data) BaselineDiff.new( baseline_cases: baseline_data[:cases], current_cases: results.map { |r| serialize_case(r) } ) end |
#each ⇒ Object
64 65 66 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 64 def each(&) results.each(&) end |
#eval_history(path: nil, model: nil) ⇒ Object
98 99 100 101 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 98 def eval_history(path: nil, model: nil) file = path || default_history_path(model: model) EvalHistory.load(file) end |
#failed ⇒ Object
30 31 32 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 30 def failed evaluated_results.count(&:failed?) end |
#failures ⇒ Object
38 39 40 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 38 def failures evaluated_results.select(&:failed?) end |
#pass_rate ⇒ Object
42 43 44 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 42 def pass_rate "#{passed}/#{evaluated_results.length}" end |
#passed ⇒ Object
26 27 28 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 26 def passed evaluated_results.count(&:passed?) end |
#passed? ⇒ Boolean
57 58 59 60 61 62 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 57 def passed? evaluated = evaluated_results return false if evaluated.empty? evaluated.all?(&:passed?) end |
#print_summary(io = $stdout) ⇒ Object
126 127 128 129 130 131 132 133 134 135 136 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 126 def print_summary(io = $stdout) io.puts summary io.puts results.each do |result| icon = result.label cost_str = result.cost ? " #{format_cost(result.cost)}" : "" latency_str = result.duration_ms ? " #{result.duration_ms}ms" : "" io.puts " #{icon} #{result.name}#{cost_str}#{latency_str}" io.puts " #{result.details}" if result.failed? && useful_details?(result.details) end end |
#save_baseline!(path: nil, model: nil) ⇒ Object
103 104 105 106 107 108 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 103 def save_baseline!(path: nil, model: nil) file = path || default_baseline_path(model: model) FileUtils.mkdir_p(File.dirname(file)) File.write(file, JSON.pretty_generate(serialize_for_baseline)) file end |
#save_history!(path: nil, model: nil) ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 85 def save_history!(path: nil, model: nil) file = path || default_history_path(model: model) run_data = { date: Time.now.strftime("%Y-%m-%d"), score: score, total_cost: total_cost, pass_rate: pass_rate, cases_count: evaluated_results.length } EvalHistory.append(file, run_data) file end |
#score ⇒ Object
19 20 21 22 23 24 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 19 def score evaluated = evaluated_results return 0.0 if evaluated.empty? evaluated.sum(&:score) / evaluated.length end |
#skipped ⇒ Object
34 35 36 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 34 def skipped results.count { |r| r.step_status == :skipped } end |
#summary ⇒ Object
68 69 70 71 72 73 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 68 def summary parts = ["#{dataset_name}: #{pass_rate} checks passed"] parts << "#{skipped} skipped" if skipped.positive? parts << format_cost(total_cost) if total_cost.positive? parts.join(", ") end |
#to_s ⇒ Object
77 78 79 80 81 82 83 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 77 def to_s lines = [summary] failures.each do |result| lines << format_failure(result) end lines.join("\n") end |
#total_cost ⇒ Object
46 47 48 |
# File 'lib/ruby_llm/contract/eval/report.rb', line 46 def total_cost results.sum { |r| r.cost || 0.0 } end |