Module: Ask::Eval::DSL

Defined in:: lib/ask/eval/dsl.rb

Overview

Minitest DSL mixin. Include this in your test class to get all ask-eval assertion methods.

Examples:

class MyEvalTest < Minitest::Test
  include Ask::Eval::DSL

  test "response quality" do
    assert_faithful my_response, context: docs
    assert_contains my_response, "policy"
  end
end

Instance Method Summary collapse

#assert_contains(output, value, msg = nil) ⇒ Object

Assert the output contains the given substring.
#assert_correctness(output, expected:, model: nil, msg: nil) ⇒ Object

Assert the response matches the expected output.
#assert_email(output, msg = nil) ⇒ Object

Assert the output is a valid email address.
#assert_ends_with(output, suffix, msg = nil) ⇒ Object

Assert the output ends with the given suffix.
#assert_equals(output, value, msg = nil) ⇒ Object

Assert the output equals the given value exactly.
#assert_faithful(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ Object

Assert the response is faithful to the provided context.
#assert_json(output, msg = nil) ⇒ Object

Assert the output is valid JSON.
#assert_max_length(output, max, msg = nil) ⇒ Object

Assert the output has at most ‘max` characters.
#assert_max_tokens(output, max, msg = nil) ⇒ Object

Assert the output has at most ‘max` tokens.
#assert_min_length(output, min, msg = nil) ⇒ Object

Assert the output has at least ‘min` characters.
#assert_not_contains(output, value, msg = nil) ⇒ Object

Assert the output does NOT contain the given substring.
#assert_not_hallucinating(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ Object

Assert the response does NOT hallucinate (all claims are in context).
#assert_regex(output, pattern, msg = nil) ⇒ Object

Assert the output matches the given regex pattern.
#assert_starts_with(output, prefix, msg = nil) ⇒ Object

Assert the output starts with the given prefix.
#assert_url(output, msg = nil) ⇒ Object

Assert the output is a valid URL.
#refute_bias(output, model: nil, msg: nil) ⇒ Object

Refute (assert NOT) the response shows bias.
#refute_toxicity(output, model: nil, msg: nil) ⇒ Object

Refute (assert NOT) the response is toxic.

Instance Method Details

#assert_contains(output, value, msg = nil) ⇒ `Object`

Assert the output contains the given substring.

# File 'lib/ask/eval/dsl.rb', line 21

def assert_contains(output, value, msg = nil)
  result = Assertions::Deterministic.contains(output, value: value)
  assert result[:passed], msg || result[:reason]
end

#assert_correctness(output, expected:, model: nil, msg: nil) ⇒ `Object`

Assert the response matches the expected output.

Parameters:

output (String) —

the LLM response
expected (String) —

expected/reference output
model (Object, nil) (defaults to: nil) —

judge model
msg (String, nil) (defaults to: nil) —

custom failure message

# File 'lib/ask/eval/dsl.rb', line 158

def assert_correctness(output, expected:, model: nil, msg: nil)
  tc = TestCase.new(actual_output: output, expected_output: expected)
  judge = Judges::Correctness.new(model: model)
  result = judge.call(tc)
  assert result.passed, msg || "Correctness check failed: #{result.reason}"
  log_cost(result) if Ask::Eval.configuration.track_cost
end

#assert_email(output, msg = nil) ⇒ `Object`

Assert the output is a valid email address.

# File 'lib/ask/eval/dsl.rb', line 87

def assert_email(output, msg = nil)
  result = Assertions::Deterministic.email(output)
  assert result[:passed], msg || result[:reason]
end

#assert_ends_with(output, suffix, msg = nil) ⇒ `Object`

Assert the output ends with the given suffix.

# File 'lib/ask/eval/dsl.rb', line 57

def assert_ends_with(output, suffix, msg = nil)
  result = Assertions::Deterministic.ends_with(output, suffix: suffix)
  assert result[:passed], msg || result[:reason]
end

#assert_equals(output, value, msg = nil) ⇒ `Object`

Assert the output equals the given value exactly.

# File 'lib/ask/eval/dsl.rb', line 63

def assert_equals(output, value, msg = nil)
  result = Assertions::Deterministic.equals(output, value: value)
  assert result[:passed], msg || result[:reason]
end

#assert_faithful(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ `Object`

Assert the response is faithful to the provided context.

Parameters:

output (String) —

the LLM response
context (String, Array<String>) —

source context
model (Object, nil) (defaults to: nil) —

judge model
threshold (Float) (defaults to: 0.7) —

minimum score (0.0-1.0)
msg (String, nil) (defaults to: nil) —

custom failure message

# File 'lib/ask/eval/dsl.rb', line 101

def assert_faithful(output, context:, model: nil, threshold: 0.7, msg: nil)
  tc = TestCase.new(actual_output: output, context: context)
  judge = Judges::Faithful.new(model: model)
  result = judge.call(tc)
  passed = result.score >= threshold
  assert passed, msg || "Faithfulness check failed: #{result.reason} (score: #{result.score})"
  log_cost(result) if Ask::Eval.configuration.track_cost
end

#assert_json(output, msg = nil) ⇒ `Object`

Assert the output is valid JSON.

# File 'lib/ask/eval/dsl.rb', line 39

def assert_json(output, msg = nil)
  result = Assertions::Deterministic.is_json(output)
  assert result[:passed], msg || result[:reason]
end

#assert_max_length(output, max, msg = nil) ⇒ `Object`

Assert the output has at most ‘max` characters.

# File 'lib/ask/eval/dsl.rb', line 75

def assert_max_length(output, max, msg = nil)
  result = Assertions::Deterministic.max_length(output, max: max)
  assert result[:passed], msg || result[:reason]
end

#assert_max_tokens(output, max, msg = nil) ⇒ `Object`

Assert the output has at most ‘max` tokens.

# File 'lib/ask/eval/dsl.rb', line 45

def assert_max_tokens(output, max, msg = nil)
  result = Assertions::Deterministic.max_tokens(output, max: max)
  assert result[:passed], msg || result[:reason]
end

#assert_min_length(output, min, msg = nil) ⇒ `Object`

Assert the output has at least ‘min` characters.

# File 'lib/ask/eval/dsl.rb', line 69

def assert_min_length(output, min, msg = nil)
  result = Assertions::Deterministic.min_length(output, min: min)
  assert result[:passed], msg || result[:reason]
end

#assert_not_contains(output, value, msg = nil) ⇒ `Object`

Assert the output does NOT contain the given substring.

# File 'lib/ask/eval/dsl.rb', line 27

def assert_not_contains(output, value, msg = nil)
  result = Assertions::Deterministic.not_contains(output, value: value)
  assert result[:passed], msg || result[:reason]
end

#assert_not_hallucinating(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ `Object`

Assert the response does NOT hallucinate (all claims are in context).

Parameters:

output (String) —

the LLM response
context (String, Array<String>) —

source context
model (Object, nil) (defaults to: nil) —

judge model
threshold (Float) (defaults to: 0.7) —

minimum score (0.0-1.0); higher = less hallucination
msg (String, nil) (defaults to: nil) —

custom failure message

# File 'lib/ask/eval/dsl.rb', line 117

def assert_not_hallucinating(output, context:, model: nil, threshold: 0.7, msg: nil)
  tc = TestCase.new(actual_output: output, context: context)
  judge = Judges::Hallucination.new(model: model)
  result = judge.call(tc)
  passed = result.score >= threshold
  assert passed, msg || "Hallucination check failed: #{result.reason} (score: #{result.score})"
  log_cost(result) if Ask::Eval.configuration.track_cost
end

#assert_regex(output, pattern, msg = nil) ⇒ `Object`

Assert the output matches the given regex pattern.

# File 'lib/ask/eval/dsl.rb', line 33

def assert_regex(output, pattern, msg = nil)
  result = Assertions::Deterministic.regex(output, pattern: pattern)
  assert result[:passed], msg || result[:reason]
end

#assert_starts_with(output, prefix, msg = nil) ⇒ `Object`

Assert the output starts with the given prefix.

# File 'lib/ask/eval/dsl.rb', line 51

def assert_starts_with(output, prefix, msg = nil)
  result = Assertions::Deterministic.starts_with(output, prefix: prefix)
  assert result[:passed], msg || result[:reason]
end

#assert_url(output, msg = nil) ⇒ `Object`

Assert the output is a valid URL.

# File 'lib/ask/eval/dsl.rb', line 81

def assert_url(output, msg = nil)
  result = Assertions::Deterministic.url(output)
  assert result[:passed], msg || result[:reason]
end

#refute_bias(output, model: nil, msg: nil) ⇒ `Object`

Refute (assert NOT) the response shows bias.

Parameters:

output (String) —

the LLM response
model (Object, nil) (defaults to: nil) —

judge model
msg (String, nil) (defaults to: nil) —

custom failure message

# File 'lib/ask/eval/dsl.rb', line 131

def refute_bias(output, model: nil, msg: nil)
  tc = TestCase.new(actual_output: output)
  judge = Judges::Bias.new(model: model)
  result = judge.call(tc)
  assert result.passed, msg || "Bias detected: #{result.reason}"
  log_cost(result) if Ask::Eval.configuration.track_cost
end

#refute_toxicity(output, model: nil, msg: nil) ⇒ `Object`

Refute (assert NOT) the response is toxic.

Parameters:

output (String) —

the LLM response
model (Object, nil) (defaults to: nil) —

judge model
msg (String, nil) (defaults to: nil) —

custom failure message

# File 'lib/ask/eval/dsl.rb', line 144

def refute_toxicity(output, model: nil, msg: nil)
  tc = TestCase.new(actual_output: output)
  judge = Judges::Toxicity.new(model: model)
  result = judge.call(tc)
  assert result.passed, msg || "Toxicity detected: #{result.reason}"
  log_cost(result) if Ask::Eval.configuration.track_cost
end

Module: Ask::Eval::DSL

Overview

Examples:

Instance Method Summary collapse

Instance Method Details

#assert_contains(output, value, msg = nil) ⇒ Object

#assert_correctness(output, expected:, model: nil, msg: nil) ⇒ Object

#assert_email(output, msg = nil) ⇒ Object

#assert_ends_with(output, suffix, msg = nil) ⇒ Object

#assert_equals(output, value, msg = nil) ⇒ Object

#assert_faithful(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ Object

#assert_json(output, msg = nil) ⇒ Object

#assert_max_length(output, max, msg = nil) ⇒ Object

#assert_max_tokens(output, max, msg = nil) ⇒ Object

#assert_min_length(output, min, msg = nil) ⇒ Object

#assert_not_contains(output, value, msg = nil) ⇒ Object

#assert_not_hallucinating(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ Object

#assert_regex(output, pattern, msg = nil) ⇒ Object

#assert_starts_with(output, prefix, msg = nil) ⇒ Object

#assert_url(output, msg = nil) ⇒ Object

#refute_bias(output, model: nil, msg: nil) ⇒ Object

#refute_toxicity(output, model: nil, msg: nil) ⇒ Object

#assert_contains(output, value, msg = nil) ⇒ `Object`

#assert_correctness(output, expected:, model: nil, msg: nil) ⇒ `Object`

#assert_email(output, msg = nil) ⇒ `Object`

#assert_ends_with(output, suffix, msg = nil) ⇒ `Object`

#assert_equals(output, value, msg = nil) ⇒ `Object`

#assert_faithful(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ `Object`

#assert_json(output, msg = nil) ⇒ `Object`

#assert_max_length(output, max, msg = nil) ⇒ `Object`

#assert_max_tokens(output, max, msg = nil) ⇒ `Object`

#assert_min_length(output, min, msg = nil) ⇒ `Object`

#assert_not_contains(output, value, msg = nil) ⇒ `Object`

#assert_not_hallucinating(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ `Object`

#assert_regex(output, pattern, msg = nil) ⇒ `Object`

#assert_starts_with(output, prefix, msg = nil) ⇒ `Object`

#assert_url(output, msg = nil) ⇒ `Object`

#refute_bias(output, model: nil, msg: nil) ⇒ `Object`

#refute_toxicity(output, model: nil, msg: nil) ⇒ `Object`