Class: Qualspec::Suite::Results

Inherits:
Object
  • Object
show all
Defined in:
lib/qualspec/suite/runner.rb

Overview

Results container with multi-dimensional support

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(suite_name) ⇒ Results

Returns a new instance of Results.



229
230
231
232
233
234
235
236
237
238
239
# File 'lib/qualspec/suite/runner.rb', line 229

def initialize(suite_name)
  @suite_name = suite_name
  @evaluations = []
  @responses = {} # Nested: {candidate => {scenario => {variant => {temp => response}}}}
  @timing = {}
  @costs = {}
  @candidate_models = {} # {candidate_name => model_string}
  @prompts = {}          # {scenario_name => prompt_string}
  @started_at = Time.now
  @finished_at = nil
end

Instance Attribute Details

#candidate_modelsObject (readonly)

Returns the value of attribute candidate_models.



226
227
228
# File 'lib/qualspec/suite/runner.rb', line 226

def candidate_models
  @candidate_models
end

#costsObject (readonly)

Returns the value of attribute costs.



226
227
228
# File 'lib/qualspec/suite/runner.rb', line 226

def costs
  @costs
end

#evaluationsObject (readonly)

Returns the value of attribute evaluations.



226
227
228
# File 'lib/qualspec/suite/runner.rb', line 226

def evaluations
  @evaluations
end

#finished_atObject (readonly)

Returns the value of attribute finished_at.



226
227
228
# File 'lib/qualspec/suite/runner.rb', line 226

def finished_at
  @finished_at
end

#promptsObject (readonly)

Returns the value of attribute prompts.



226
227
228
# File 'lib/qualspec/suite/runner.rb', line 226

def prompts
  @prompts
end

#responsesObject (readonly)

Returns the value of attribute responses.



226
227
228
# File 'lib/qualspec/suite/runner.rb', line 226

def responses
  @responses
end

#started_atObject (readonly)

Returns the value of attribute started_at.



226
227
228
# File 'lib/qualspec/suite/runner.rb', line 226

def started_at
  @started_at
end

#suite_nameObject (readonly)

Returns the value of attribute suite_name.



226
227
228
# File 'lib/qualspec/suite/runner.rb', line 226

def suite_name
  @suite_name
end

#timingObject (readonly)

Returns the value of attribute timing.



226
227
228
# File 'lib/qualspec/suite/runner.rb', line 226

def timing
  @timing
end

Instance Method Details

#finish!Object



278
279
280
# File 'lib/qualspec/suite/runner.rb', line 278

def finish!
  @finished_at = Time.now
end

#record_evaluation(candidate:, scenario:, criteria:, evaluation:, variant: 'default', temperature: nil, winner: nil) ⇒ Object



262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# File 'lib/qualspec/suite/runner.rb', line 262

def record_evaluation(candidate:, scenario:, criteria:, evaluation:, variant: 'default', temperature: nil, winner: nil)
  @evaluations << {
    candidate: candidate,
    scenario: scenario,
    variant: variant,
    temperature: temperature,
    criteria: criteria,
    criteria_count: Array(criteria).size,
    score: evaluation.score,
    pass: evaluation.pass?,
    reasoning: evaluation.reasoning,
    error: evaluation.error,
    winner: winner
  }
end

#record_response(candidate:, scenario:, response:, variant: 'default', temperature: nil, duration_ms: nil, cost: nil, variant_data: nil) ⇒ Object



241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/qualspec/suite/runner.rb', line 241

def record_response(candidate:, scenario:, response:, variant: 'default', temperature: nil, duration_ms: nil, cost: nil, variant_data: nil)
  # Store in nested structure
  @responses[candidate] ||= {}
  @responses[candidate][scenario] ||= {}
  @responses[candidate][scenario][variant] ||= {}
  @responses[candidate][scenario][variant][temperature] = {
    content: response,
    variant_data: variant_data
  }

  if duration_ms
    @timing[candidate] ||= {}
    @timing[candidate]["#{scenario}/#{variant}"] = duration_ms
  end

  return unless cost&.positive?

  @costs[candidate] ||= 0.0
  @costs[candidate] += cost
end

#scores_by_candidateObject

Group scores by candidate, aggregating across all variants



283
284
285
286
287
288
289
290
291
292
293
294
295
296
# File 'lib/qualspec/suite/runner.rb', line 283

def scores_by_candidate
  @evaluations.group_by { |e| e[:candidate] }.transform_values do |evals|
    passed = evals.count { |e| e[:pass] }
    total = evals.size
    avg_score = total.positive? ? evals.sum { |e| e[:score] }.to_f / total : 0

    {
      passed: passed,
      total: total,
      pass_rate: total.positive? ? (passed.to_f / total * 100).round(1) : 0,
      avg_score: avg_score.round(2)
    }
  end
end

#scores_by_scenarioObject

Detailed breakdown by scenario + variant



338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
# File 'lib/qualspec/suite/runner.rb', line 338

def scores_by_scenario
  @evaluations.group_by { |e| e[:scenario] }.transform_values do |evals|
    evals.group_by { |e| e[:candidate] }.transform_values do |candidate_evals|
      total = candidate_evals.size
      avg_score = (candidate_evals.sum { |e| e[:score] }.to_f / total).round(2)
      first = candidate_evals.first
      {
        score: avg_score,
        pass: candidate_evals.all? { |e| e[:pass] },
        reasoning: first[:reasoning],
        variant: first[:variant],
        temperature: first[:temperature]
      }
    end
  end
end

#scores_by_scenario_variantObject

Cross-tabulation: scenario × variant



356
357
358
359
360
361
362
363
364
365
366
367
368
# File 'lib/qualspec/suite/runner.rb', line 356

def scores_by_scenario_variant
  @evaluations.group_by { |e| [e[:scenario], e[:variant]] }.transform_values do |evals|
    evals.group_by { |e| e[:candidate] }.transform_values do |candidate_evals|
      eval_data = candidate_evals.first
      {
        score: eval_data[:score],
        pass: eval_data[:pass],
        reasoning: eval_data[:reasoning],
        temperature: eval_data[:temperature]
      }
    end
  end
end

#scores_by_temperatureObject

Temperature sensitivity analysis



315
316
317
318
319
320
321
322
323
# File 'lib/qualspec/suite/runner.rb', line 315

def scores_by_temperature
  by_temp = @evaluations.group_by { |e| e[:temperature] }
  by_temp.transform_values do |evals|
    {
      avg_score: (evals.sum { |e| e[:score] }.to_f / evals.size).round(2),
      pass_rate: (evals.count { |e| e[:pass] }.to_f / evals.size * 100).round(1)
    }
  end
end

#scores_by_variantObject

Group scores by variant



299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/qualspec/suite/runner.rb', line 299

def scores_by_variant
  @evaluations.group_by { |e| e[:variant] }.transform_values do |evals|
    passed = evals.count { |e| e[:pass] }
    total = evals.size
    avg_score = total.positive? ? evals.sum { |e| e[:score] }.to_f / total : 0

    {
      passed: passed,
      total: total,
      pass_rate: total.positive? ? (passed.to_f / total * 100).round(1) : 0,
      avg_score: avg_score.round(2)
    }
  end
end

#timing_by_candidateObject



325
326
327
328
329
330
331
332
333
334
335
# File 'lib/qualspec/suite/runner.rb', line 325

def timing_by_candidate
  @timing.transform_values do |scenarios|
    total_ms = scenarios.values.sum
    avg_ms = !scenarios.empty? ? total_ms / scenarios.size : 0
    {
      total_ms: total_ms,
      avg_ms: avg_ms.round,
      count: scenarios.size
    }
  end
end

#to_hObject



370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
# File 'lib/qualspec/suite/runner.rb', line 370

def to_h
  {
    suite_name: @suite_name,
    started_at: @started_at.iso8601,
    finished_at: @finished_at&.iso8601,
    summary: {
      by_candidate: scores_by_candidate,
      by_variant: scores_by_variant,
      by_temperature: scores_by_temperature
    },
    timing: timing_by_candidate,
    costs: @costs,
    by_scenario: scores_by_scenario,
    by_scenario_variant: scores_by_scenario_variant,
    evaluations: @evaluations,
    responses: @responses
  }
end