Module: Qualspec::RSpec::Helpers

Defined in:
lib/qualspec/rspec/helpers.rb

Overview

Helper methods for RSpec tests Include this in your spec_helper.rb:

RSpec.configure do |config|
  config.include Qualspec::RSpec::Helpers
end

Instance Method Summary collapse

Instance Method Details

#qualspec_compare(responses, criterion, context: nil, threshold: nil) ⇒ ComparisonResult

Compare multiple responses against criteria

Examples:

responses = {
  gpt4: gpt4_response,
  claude: claude_response
}
result = qualspec_compare(responses, "explains clearly")
expect(result.winner).to eq(:claude)
expect(result[:gpt4].score).to be >= 7

Parameters:

  • responses (Hash)

    Hash of name => response

  • criterion (String, Array<String>)

    Evaluation criteria

  • context (String, nil) (defaults to: nil)

    Additional context

  • threshold (Integer, nil) (defaults to: nil)

    Pass threshold (default: 7)

Returns:



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/qualspec/rspec/helpers.rb', line 75

def qualspec_compare(responses, criterion, context: nil, threshold: nil)
  effective_threshold = threshold || Qualspec::RSpec.configuration.default_threshold
  criteria_list = Array(criterion)
  display_criterion = criteria_list.join('; ')

  evaluations = Qualspec.judge.evaluate_comparison(
    responses: responses.transform_values(&:to_s),
    criteria: criteria_list,
    context: context,
    pass_threshold: effective_threshold
  )

  results = wrap_comparison_results(evaluations, responses, display_criterion, effective_threshold)
  ComparisonResult.new(results, criterion: display_criterion)
end

#qualspec_evaluate(response, criterion = nil, rubric: nil, context: nil, threshold: nil) ⇒ EvaluationResult

Evaluate a response against a criterion or rubric

Examples:

With inline criterion

result = qualspec_evaluate(response, "responds in a friendly manner")
expect(result).to be_passing

With rubric

result = qualspec_evaluate(response, rubric: :tool_calling)
expect(result.score).to be >= 8

With context

result = qualspec_evaluate(response, "summarizes accurately",
                           context: "User provided a news article")
expect(result).to be_passing

Parameters:

  • response (String)

    The response to evaluate

  • criterion (String, nil) (defaults to: nil)

    The evaluation criterion (optional if using rubric)

  • rubric (Symbol, nil) (defaults to: nil)

    A pre-defined rubric name

  • context (String, nil) (defaults to: nil)

    Additional context for the judge

  • threshold (Integer, nil) (defaults to: nil)

    Pass threshold (default: 7)

Returns:

Raises:

  • (ArgumentError)


37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/qualspec/rspec/helpers.rb', line 37

def qualspec_evaluate(response, criterion = nil, rubric: nil, context: nil, threshold: nil)
  raise ArgumentError, 'Must provide either criterion or rubric:' if criterion.nil? && rubric.nil?

  effective_threshold = threshold || Qualspec::RSpec.configuration.default_threshold
  criterion_text, display_criterion = resolve_criterion(criterion, rubric)

  evaluation = Qualspec.judge.evaluate(
    response: response.to_s,
    criterion: criterion_text,
    context: context,
    pass_threshold: effective_threshold
  )

  EvaluationResult.new(
    evaluation,
    criterion: display_criterion,
    response: response,
    threshold: effective_threshold
  )
end

#skip_without_qualspec_apiObject

Helper to skip test if qualspec API is unavailable



120
121
122
123
124
125
126
127
128
# File 'lib/qualspec/rspec/helpers.rb', line 120

def skip_without_qualspec_api
  Qualspec.client.chat(
    model: Qualspec.configuration.judge_model,
    messages: [{ role: 'user', content: 'test' }],
    json_mode: false
  )
rescue Qualspec::Client::RequestError => e
  skip "Qualspec API unavailable: #{e.message}"
end

#with_qualspec_cassette(name, record: nil) { ... } ⇒ Object

Wrap a block with VCR cassette recording/playback

Examples:

with_qualspec_cassette("my_test") do
  result = qualspec_evaluate(response, "is helpful")
  expect(result).to be_passing
end

Parameters:

  • name (String)

    Cassette name

  • record (Symbol) (defaults to: nil)

    Recording mode (:new_episodes, :none, :all)

Yields:

  • Block to execute with cassette



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/qualspec/rspec/helpers.rb', line 103

def with_qualspec_cassette(name, record: nil, &block)
  record_mode = record || Qualspec::RSpec.configuration.record_mode

  # Configure VCR with RSpec cassette directory
  Qualspec::Recorder.setup(
    cassette_dir: Qualspec::RSpec.configuration.vcr_cassette_dir
  )

  case record_mode
  when :none
    Qualspec::Recorder.playback(name, &block)
  else
    VCR.use_cassette(name, record: record_mode, &block)
  end
end