Class: Qualspec::Suite::Results
- Inherits:
-
Object
- Object
- Qualspec::Suite::Results
- Defined in:
- lib/qualspec/suite/runner.rb
Overview
Results container with multi-dimensional support
Instance Attribute Summary collapse
-
#candidate_models ⇒ Object
readonly
Returns the value of attribute candidate_models.
-
#costs ⇒ Object
readonly
Returns the value of attribute costs.
-
#evaluations ⇒ Object
readonly
Returns the value of attribute evaluations.
-
#finished_at ⇒ Object
readonly
Returns the value of attribute finished_at.
-
#prompts ⇒ Object
readonly
Returns the value of attribute prompts.
-
#responses ⇒ Object
readonly
Returns the value of attribute responses.
-
#started_at ⇒ Object
readonly
Returns the value of attribute started_at.
-
#suite_name ⇒ Object
readonly
Returns the value of attribute suite_name.
-
#timing ⇒ Object
readonly
Returns the value of attribute timing.
Instance Method Summary collapse
- #finish! ⇒ Object
-
#initialize(suite_name) ⇒ Results
constructor
A new instance of Results.
- #record_evaluation(candidate:, scenario:, criteria:, evaluation:, variant: 'default', temperature: nil, winner: nil) ⇒ Object
- #record_response(candidate:, scenario:, response:, variant: 'default', temperature: nil, duration_ms: nil, cost: nil, variant_data: nil) ⇒ Object
-
#scores_by_candidate ⇒ Object
Group scores by candidate, aggregating across all variants.
-
#scores_by_scenario ⇒ Object
Detailed breakdown by scenario + variant.
-
#scores_by_scenario_variant ⇒ Object
Cross-tabulation: scenario × variant.
-
#scores_by_temperature ⇒ Object
Temperature sensitivity analysis.
-
#scores_by_variant ⇒ Object
Group scores by variant.
- #timing_by_candidate ⇒ Object
- #to_h ⇒ Object
Constructor Details
#initialize(suite_name) ⇒ Results
Returns a new instance of Results.
229 230 231 232 233 234 235 236 237 238 239 |
# File 'lib/qualspec/suite/runner.rb', line 229 def initialize(suite_name) @suite_name = suite_name @evaluations = [] @responses = {} # Nested: {candidate => {scenario => {variant => {temp => response}}}} @timing = {} @costs = {} @candidate_models = {} # {candidate_name => model_string} @prompts = {} # {scenario_name => prompt_string} @started_at = Time.now @finished_at = nil end |
Instance Attribute Details
#candidate_models ⇒ Object (readonly)
Returns the value of attribute candidate_models.
226 227 228 |
# File 'lib/qualspec/suite/runner.rb', line 226 def candidate_models @candidate_models end |
#costs ⇒ Object (readonly)
Returns the value of attribute costs.
226 227 228 |
# File 'lib/qualspec/suite/runner.rb', line 226 def costs @costs end |
#evaluations ⇒ Object (readonly)
Returns the value of attribute evaluations.
226 227 228 |
# File 'lib/qualspec/suite/runner.rb', line 226 def evaluations @evaluations end |
#finished_at ⇒ Object (readonly)
Returns the value of attribute finished_at.
226 227 228 |
# File 'lib/qualspec/suite/runner.rb', line 226 def finished_at @finished_at end |
#prompts ⇒ Object (readonly)
Returns the value of attribute prompts.
226 227 228 |
# File 'lib/qualspec/suite/runner.rb', line 226 def prompts @prompts end |
#responses ⇒ Object (readonly)
Returns the value of attribute responses.
226 227 228 |
# File 'lib/qualspec/suite/runner.rb', line 226 def responses @responses end |
#started_at ⇒ Object (readonly)
Returns the value of attribute started_at.
226 227 228 |
# File 'lib/qualspec/suite/runner.rb', line 226 def started_at @started_at end |
#suite_name ⇒ Object (readonly)
Returns the value of attribute suite_name.
226 227 228 |
# File 'lib/qualspec/suite/runner.rb', line 226 def suite_name @suite_name end |
#timing ⇒ Object (readonly)
Returns the value of attribute timing.
226 227 228 |
# File 'lib/qualspec/suite/runner.rb', line 226 def timing @timing end |
Instance Method Details
#finish! ⇒ Object
278 279 280 |
# File 'lib/qualspec/suite/runner.rb', line 278 def finish! @finished_at = Time.now end |
#record_evaluation(candidate:, scenario:, criteria:, evaluation:, variant: 'default', temperature: nil, winner: nil) ⇒ Object
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 |
# File 'lib/qualspec/suite/runner.rb', line 262 def record_evaluation(candidate:, scenario:, criteria:, evaluation:, variant: 'default', temperature: nil, winner: nil) @evaluations << { candidate: candidate, scenario: scenario, variant: variant, temperature: temperature, criteria: criteria, criteria_count: Array(criteria).size, score: evaluation.score, pass: evaluation.pass?, reasoning: evaluation.reasoning, error: evaluation.error, winner: winner } end |
#record_response(candidate:, scenario:, response:, variant: 'default', temperature: nil, duration_ms: nil, cost: nil, variant_data: nil) ⇒ Object
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
# File 'lib/qualspec/suite/runner.rb', line 241 def record_response(candidate:, scenario:, response:, variant: 'default', temperature: nil, duration_ms: nil, cost: nil, variant_data: nil) # Store in nested structure @responses[candidate] ||= {} @responses[candidate][scenario] ||= {} @responses[candidate][scenario][variant] ||= {} @responses[candidate][scenario][variant][temperature] = { content: response, variant_data: variant_data } if duration_ms @timing[candidate] ||= {} @timing[candidate]["#{scenario}/#{variant}"] = duration_ms end return unless cost&.positive? @costs[candidate] ||= 0.0 @costs[candidate] += cost end |
#scores_by_candidate ⇒ Object
Group scores by candidate, aggregating across all variants
283 284 285 286 287 288 289 290 291 292 293 294 295 296 |
# File 'lib/qualspec/suite/runner.rb', line 283 def scores_by_candidate @evaluations.group_by { |e| e[:candidate] }.transform_values do |evals| passed = evals.count { |e| e[:pass] } total = evals.size avg_score = total.positive? ? evals.sum { |e| e[:score] }.to_f / total : 0 { passed: passed, total: total, pass_rate: total.positive? ? (passed.to_f / total * 100).round(1) : 0, avg_score: avg_score.round(2) } end end |
#scores_by_scenario ⇒ Object
Detailed breakdown by scenario + variant
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 |
# File 'lib/qualspec/suite/runner.rb', line 338 def scores_by_scenario @evaluations.group_by { |e| e[:scenario] }.transform_values do |evals| evals.group_by { |e| e[:candidate] }.transform_values do |candidate_evals| total = candidate_evals.size avg_score = (candidate_evals.sum { |e| e[:score] }.to_f / total).round(2) first = candidate_evals.first { score: avg_score, pass: candidate_evals.all? { |e| e[:pass] }, reasoning: first[:reasoning], variant: first[:variant], temperature: first[:temperature] } end end end |
#scores_by_scenario_variant ⇒ Object
Cross-tabulation: scenario × variant
356 357 358 359 360 361 362 363 364 365 366 367 368 |
# File 'lib/qualspec/suite/runner.rb', line 356 def scores_by_scenario_variant @evaluations.group_by { |e| [e[:scenario], e[:variant]] }.transform_values do |evals| evals.group_by { |e| e[:candidate] }.transform_values do |candidate_evals| eval_data = candidate_evals.first { score: eval_data[:score], pass: eval_data[:pass], reasoning: eval_data[:reasoning], temperature: eval_data[:temperature] } end end end |
#scores_by_temperature ⇒ Object
Temperature sensitivity analysis
315 316 317 318 319 320 321 322 323 |
# File 'lib/qualspec/suite/runner.rb', line 315 def scores_by_temperature by_temp = @evaluations.group_by { |e| e[:temperature] } by_temp.transform_values do |evals| { avg_score: (evals.sum { |e| e[:score] }.to_f / evals.size).round(2), pass_rate: (evals.count { |e| e[:pass] }.to_f / evals.size * 100).round(1) } end end |
#scores_by_variant ⇒ Object
Group scores by variant
299 300 301 302 303 304 305 306 307 308 309 310 311 312 |
# File 'lib/qualspec/suite/runner.rb', line 299 def scores_by_variant @evaluations.group_by { |e| e[:variant] }.transform_values do |evals| passed = evals.count { |e| e[:pass] } total = evals.size avg_score = total.positive? ? evals.sum { |e| e[:score] }.to_f / total : 0 { passed: passed, total: total, pass_rate: total.positive? ? (passed.to_f / total * 100).round(1) : 0, avg_score: avg_score.round(2) } end end |
#timing_by_candidate ⇒ Object
325 326 327 328 329 330 331 332 333 334 335 |
# File 'lib/qualspec/suite/runner.rb', line 325 def timing_by_candidate @timing.transform_values do |scenarios| total_ms = scenarios.values.sum avg_ms = !scenarios.empty? ? total_ms / scenarios.size : 0 { total_ms: total_ms, avg_ms: avg_ms.round, count: scenarios.size } end end |
#to_h ⇒ Object
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 |
# File 'lib/qualspec/suite/runner.rb', line 370 def to_h { suite_name: @suite_name, started_at: @started_at.iso8601, finished_at: @finished_at&.iso8601, summary: { by_candidate: scores_by_candidate, by_variant: scores_by_variant, by_temperature: scores_by_temperature }, timing: timing_by_candidate, costs: @costs, by_scenario: scores_by_scenario, by_scenario_variant: scores_by_scenario_variant, evaluations: @evaluations, responses: @responses } end |