Class: Qualspec::Suite::Results

Inherits:

Object

Object
Qualspec::Suite::Results

show all

Defined in:: lib/qualspec/suite/runner.rb

Overview

Results container with multi-dimensional support

Instance Attribute Summary collapse

#candidate_models ⇒ Object readonly

Returns the value of attribute candidate_models.
#costs ⇒ Object readonly

Returns the value of attribute costs.
#evaluations ⇒ Object readonly

Returns the value of attribute evaluations.
#finished_at ⇒ Object readonly

Returns the value of attribute finished_at.
#prompts ⇒ Object readonly

Returns the value of attribute prompts.
#responses ⇒ Object readonly

Returns the value of attribute responses.
#started_at ⇒ Object readonly

Returns the value of attribute started_at.
#suite_name ⇒ Object readonly

Returns the value of attribute suite_name.
#timing ⇒ Object readonly

Returns the value of attribute timing.

Instance Method Summary collapse

#finish! ⇒ Object
#initialize(suite_name) ⇒ Results constructor

A new instance of Results.
#record_evaluation(candidate:, scenario:, criteria:, evaluation:, variant: 'default', temperature: nil, winner: nil) ⇒ Object
#record_response(candidate:, scenario:, response:, variant: 'default', temperature: nil, duration_ms: nil, cost: nil, variant_data: nil) ⇒ Object
#scores_by_candidate ⇒ Object

Group scores by candidate, aggregating across all variants.
#scores_by_scenario ⇒ Object

Detailed breakdown by scenario + variant.
#scores_by_scenario_variant ⇒ Object

Cross-tabulation: scenario × variant.
#scores_by_temperature ⇒ Object

Temperature sensitivity analysis.
#scores_by_variant ⇒ Object

Group scores by variant.
#timing_by_candidate ⇒ Object
#to_h ⇒ Object

Constructor Details

#initialize(suite_name) ⇒ `Results`

Returns a new instance of Results.

# File 'lib/qualspec/suite/runner.rb', line 229

def initialize(suite_name)
  @suite_name = suite_name
  @evaluations = []
  @responses = {} # Nested: {candidate => {scenario => {variant => {temp => response}}}}
  @timing = {}
  @costs = {}
  @candidate_models = {} # {candidate_name => model_string}
  @prompts = {}          # {scenario_name => prompt_string}
  @started_at = Time.now
  @finished_at = nil
end

Instance Attribute Details

#candidate_models ⇒ `Object` (readonly)

Returns the value of attribute candidate_models.



226
227
228

# File 'lib/qualspec/suite/runner.rb', line 226

def candidate_models
  @candidate_models
end

#costs ⇒ `Object` (readonly)

Returns the value of attribute costs.



226
227
228

# File 'lib/qualspec/suite/runner.rb', line 226

def costs
  @costs
end

#evaluations ⇒ `Object` (readonly)

Returns the value of attribute evaluations.



226
227
228

# File 'lib/qualspec/suite/runner.rb', line 226

def evaluations
  @evaluations
end

#finished_at ⇒ `Object` (readonly)

Returns the value of attribute finished_at.



226
227
228

# File 'lib/qualspec/suite/runner.rb', line 226

def finished_at
  @finished_at
end

#prompts ⇒ `Object` (readonly)

Returns the value of attribute prompts.



226
227
228

# File 'lib/qualspec/suite/runner.rb', line 226

def prompts
  @prompts
end

#responses ⇒ `Object` (readonly)

Returns the value of attribute responses.



226
227
228

# File 'lib/qualspec/suite/runner.rb', line 226

def responses
  @responses
end

#started_at ⇒ `Object` (readonly)

Returns the value of attribute started_at.



226
227
228

# File 'lib/qualspec/suite/runner.rb', line 226

def started_at
  @started_at
end

#suite_name ⇒ `Object` (readonly)

Returns the value of attribute suite_name.



226
227
228

# File 'lib/qualspec/suite/runner.rb', line 226

def suite_name
  @suite_name
end

#timing ⇒ `Object` (readonly)

Returns the value of attribute timing.



226
227
228

# File 'lib/qualspec/suite/runner.rb', line 226

def timing
  @timing
end

Instance Method Details

#finish! ⇒ `Object`



278
279
280

# File 'lib/qualspec/suite/runner.rb', line 278

def finish!
  @finished_at = Time.now
end

#record_evaluation(candidate:, scenario:, criteria:, evaluation:, variant: 'default', temperature: nil, winner: nil) ⇒ `Object`

# File 'lib/qualspec/suite/runner.rb', line 262

def record_evaluation(candidate:, scenario:, criteria:, evaluation:, variant: 'default', temperature: nil, winner: nil)
  @evaluations << {
    candidate: candidate,
    scenario: scenario,
    variant: variant,
    temperature: temperature,
    criteria: criteria,
    criteria_count: Array(criteria).size,
    score: evaluation.score,
    pass: evaluation.pass?,
    reasoning: evaluation.reasoning,
    error: evaluation.error,
    winner: winner
  }
end

#record_response(candidate:, scenario:, response:, variant: 'default', temperature: nil, duration_ms: nil, cost: nil, variant_data: nil) ⇒ `Object`

# File 'lib/qualspec/suite/runner.rb', line 241

def record_response(candidate:, scenario:, response:, variant: 'default', temperature: nil, duration_ms: nil, cost: nil, variant_data: nil)
  # Store in nested structure
  @responses[candidate] ||= {}
  @responses[candidate][scenario] ||= {}
  @responses[candidate][scenario][variant] ||= {}
  @responses[candidate][scenario][variant][temperature] = {
    content: response,
    variant_data: variant_data
  }

  if duration_ms
    @timing[candidate] ||= {}
    @timing[candidate]["#{scenario}/#{variant}"] = duration_ms
  end

  return unless cost&.positive?

  @costs[candidate] ||= 0.0
  @costs[candidate] += cost
end

#scores_by_candidate ⇒ `Object`

Group scores by candidate, aggregating across all variants

# File 'lib/qualspec/suite/runner.rb', line 283

def scores_by_candidate
  @evaluations.group_by { |e| e[:candidate] }.transform_values do |evals|
    passed = evals.count { |e| e[:pass] }
    total = evals.size
    avg_score = total.positive? ? evals.sum { |e| e[:score] }.to_f / total : 0

    {
      passed: passed,
      total: total,
      pass_rate: total.positive? ? (passed.to_f / total * 100).round(1) : 0,
      avg_score: avg_score.round(2)
    }
  end
end

#scores_by_scenario ⇒ `Object`

Detailed breakdown by scenario + variant

# File 'lib/qualspec/suite/runner.rb', line 338

def scores_by_scenario
  @evaluations.group_by { |e| e[:scenario] }.transform_values do |evals|
    evals.group_by { |e| e[:candidate] }.transform_values do |candidate_evals|
      total = candidate_evals.size
      avg_score = (candidate_evals.sum { |e| e[:score] }.to_f / total).round(2)
      first = candidate_evals.first
      {
        score: avg_score,
        pass: candidate_evals.all? { |e| e[:pass] },
        reasoning: first[:reasoning],
        variant: first[:variant],
        temperature: first[:temperature]
      }
    end
  end
end

#scores_by_scenario_variant ⇒ `Object`

Cross-tabulation: scenario × variant

# File 'lib/qualspec/suite/runner.rb', line 356

def scores_by_scenario_variant
  @evaluations.group_by { |e| [e[:scenario], e[:variant]] }.transform_values do |evals|
    evals.group_by { |e| e[:candidate] }.transform_values do |candidate_evals|
      eval_data = candidate_evals.first
      {
        score: eval_data[:score],
        pass: eval_data[:pass],
        reasoning: eval_data[:reasoning],
        temperature: eval_data[:temperature]
      }
    end
  end
end

#scores_by_temperature ⇒ `Object`

Temperature sensitivity analysis

# File 'lib/qualspec/suite/runner.rb', line 315

def scores_by_temperature
  by_temp = @evaluations.group_by { |e| e[:temperature] }
  by_temp.transform_values do |evals|
    {
      avg_score: (evals.sum { |e| e[:score] }.to_f / evals.size).round(2),
      pass_rate: (evals.count { |e| e[:pass] }.to_f / evals.size * 100).round(1)
    }
  end
end

#scores_by_variant ⇒ `Object`

Group scores by variant

# File 'lib/qualspec/suite/runner.rb', line 299

def scores_by_variant
  @evaluations.group_by { |e| e[:variant] }.transform_values do |evals|
    passed = evals.count { |e| e[:pass] }
    total = evals.size
    avg_score = total.positive? ? evals.sum { |e| e[:score] }.to_f / total : 0

    {
      passed: passed,
      total: total,
      pass_rate: total.positive? ? (passed.to_f / total * 100).round(1) : 0,
      avg_score: avg_score.round(2)
    }
  end
end

#timing_by_candidate ⇒ `Object`

# File 'lib/qualspec/suite/runner.rb', line 325

def timing_by_candidate
  @timing.transform_values do |scenarios|
    total_ms = scenarios.values.sum
    avg_ms = !scenarios.empty? ? total_ms / scenarios.size : 0
    {
      total_ms: total_ms,
      avg_ms: avg_ms.round,
      count: scenarios.size
    }
  end
end

#to_h ⇒ `Object`

# File 'lib/qualspec/suite/runner.rb', line 370

def to_h
  {
    suite_name: @suite_name,
    started_at: @started_at.iso8601,
    finished_at: @finished_at&.iso8601,
    summary: {
      by_candidate: scores_by_candidate,
      by_variant: scores_by_variant,
      by_temperature: scores_by_temperature
    },
    timing: timing_by_candidate,
    costs: @costs,
    by_scenario: scores_by_scenario,
    by_scenario_variant: scores_by_scenario_variant,
    evaluations: @evaluations,
    responses: @responses
  }
end

Class: Qualspec::Suite::Results

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(suite_name) ⇒ Results

Instance Attribute Details

#candidate_models ⇒ Object (readonly)

#costs ⇒ Object (readonly)

#evaluations ⇒ Object (readonly)

#finished_at ⇒ Object (readonly)

#prompts ⇒ Object (readonly)

#responses ⇒ Object (readonly)

#started_at ⇒ Object (readonly)

#suite_name ⇒ Object (readonly)

#timing ⇒ Object (readonly)

Instance Method Details

#finish! ⇒ Object

#record_evaluation(candidate:, scenario:, criteria:, evaluation:, variant: 'default', temperature: nil, winner: nil) ⇒ Object

#record_response(candidate:, scenario:, response:, variant: 'default', temperature: nil, duration_ms: nil, cost: nil, variant_data: nil) ⇒ Object

#scores_by_candidate ⇒ Object

#scores_by_scenario ⇒ Object

#scores_by_scenario_variant ⇒ Object

#scores_by_temperature ⇒ Object

#scores_by_variant ⇒ Object

#timing_by_candidate ⇒ Object

#to_h ⇒ Object