Module: Ask::Eval::DSL

Defined in:
lib/ask/eval/dsl.rb

Overview

Minitest DSL mixin. Include this in your test class to get all ask-eval assertion methods.

Examples:

class MyEvalTest < Minitest::Test
  include Ask::Eval::DSL

  test "response quality" do
    assert_faithful my_response, context: docs
    assert_contains my_response, "policy"
  end
end

Instance Method Summary collapse

Instance Method Details

#assert_contains(output, value, msg = nil) ⇒ Object

Assert the output contains the given substring.



21
22
23
24
# File 'lib/ask/eval/dsl.rb', line 21

def assert_contains(output, value, msg = nil)
  result = Assertions::Deterministic.contains(output, value: value)
  assert result[:passed], msg || result[:reason]
end

#assert_correctness(output, expected:, model: nil, msg: nil) ⇒ Object

Assert the response matches the expected output.

Parameters:

  • output (String)

    the LLM response

  • expected (String)

    expected/reference output

  • model (Object, nil) (defaults to: nil)

    judge model

  • msg (String, nil) (defaults to: nil)

    custom failure message



158
159
160
161
162
163
164
# File 'lib/ask/eval/dsl.rb', line 158

def assert_correctness(output, expected:, model: nil, msg: nil)
  tc = TestCase.new(actual_output: output, expected_output: expected)
  judge = Judges::Correctness.new(model: model)
  result = judge.call(tc)
  assert result.passed, msg || "Correctness check failed: #{result.reason}"
  log_cost(result) if Ask::Eval.configuration.track_cost
end

#assert_email(output, msg = nil) ⇒ Object

Assert the output is a valid email address.



87
88
89
90
# File 'lib/ask/eval/dsl.rb', line 87

def assert_email(output, msg = nil)
  result = Assertions::Deterministic.email(output)
  assert result[:passed], msg || result[:reason]
end

#assert_ends_with(output, suffix, msg = nil) ⇒ Object

Assert the output ends with the given suffix.



57
58
59
60
# File 'lib/ask/eval/dsl.rb', line 57

def assert_ends_with(output, suffix, msg = nil)
  result = Assertions::Deterministic.ends_with(output, suffix: suffix)
  assert result[:passed], msg || result[:reason]
end

#assert_equals(output, value, msg = nil) ⇒ Object

Assert the output equals the given value exactly.



63
64
65
66
# File 'lib/ask/eval/dsl.rb', line 63

def assert_equals(output, value, msg = nil)
  result = Assertions::Deterministic.equals(output, value: value)
  assert result[:passed], msg || result[:reason]
end

#assert_faithful(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ Object

Assert the response is faithful to the provided context.

Parameters:

  • output (String)

    the LLM response

  • context (String, Array<String>)

    source context

  • model (Object, nil) (defaults to: nil)

    judge model

  • threshold (Float) (defaults to: 0.7)

    minimum score (0.0-1.0)

  • msg (String, nil) (defaults to: nil)

    custom failure message



101
102
103
104
105
106
107
108
# File 'lib/ask/eval/dsl.rb', line 101

def assert_faithful(output, context:, model: nil, threshold: 0.7, msg: nil)
  tc = TestCase.new(actual_output: output, context: context)
  judge = Judges::Faithful.new(model: model)
  result = judge.call(tc)
  passed = result.score >= threshold
  assert passed, msg || "Faithfulness check failed: #{result.reason} (score: #{result.score})"
  log_cost(result) if Ask::Eval.configuration.track_cost
end

#assert_json(output, msg = nil) ⇒ Object

Assert the output is valid JSON.



39
40
41
42
# File 'lib/ask/eval/dsl.rb', line 39

def assert_json(output, msg = nil)
  result = Assertions::Deterministic.is_json(output)
  assert result[:passed], msg || result[:reason]
end

#assert_max_length(output, max, msg = nil) ⇒ Object

Assert the output has at most ‘max` characters.



75
76
77
78
# File 'lib/ask/eval/dsl.rb', line 75

def assert_max_length(output, max, msg = nil)
  result = Assertions::Deterministic.max_length(output, max: max)
  assert result[:passed], msg || result[:reason]
end

#assert_max_tokens(output, max, msg = nil) ⇒ Object

Assert the output has at most ‘max` tokens.



45
46
47
48
# File 'lib/ask/eval/dsl.rb', line 45

def assert_max_tokens(output, max, msg = nil)
  result = Assertions::Deterministic.max_tokens(output, max: max)
  assert result[:passed], msg || result[:reason]
end

#assert_min_length(output, min, msg = nil) ⇒ Object

Assert the output has at least ‘min` characters.



69
70
71
72
# File 'lib/ask/eval/dsl.rb', line 69

def assert_min_length(output, min, msg = nil)
  result = Assertions::Deterministic.min_length(output, min: min)
  assert result[:passed], msg || result[:reason]
end

#assert_not_contains(output, value, msg = nil) ⇒ Object

Assert the output does NOT contain the given substring.



27
28
29
30
# File 'lib/ask/eval/dsl.rb', line 27

def assert_not_contains(output, value, msg = nil)
  result = Assertions::Deterministic.not_contains(output, value: value)
  assert result[:passed], msg || result[:reason]
end

#assert_not_hallucinating(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ Object

Assert the response does NOT hallucinate (all claims are in context).

Parameters:

  • output (String)

    the LLM response

  • context (String, Array<String>)

    source context

  • model (Object, nil) (defaults to: nil)

    judge model

  • threshold (Float) (defaults to: 0.7)

    minimum score (0.0-1.0); higher = less hallucination

  • msg (String, nil) (defaults to: nil)

    custom failure message



117
118
119
120
121
122
123
124
# File 'lib/ask/eval/dsl.rb', line 117

def assert_not_hallucinating(output, context:, model: nil, threshold: 0.7, msg: nil)
  tc = TestCase.new(actual_output: output, context: context)
  judge = Judges::Hallucination.new(model: model)
  result = judge.call(tc)
  passed = result.score >= threshold
  assert passed, msg || "Hallucination check failed: #{result.reason} (score: #{result.score})"
  log_cost(result) if Ask::Eval.configuration.track_cost
end

#assert_regex(output, pattern, msg = nil) ⇒ Object

Assert the output matches the given regex pattern.



33
34
35
36
# File 'lib/ask/eval/dsl.rb', line 33

def assert_regex(output, pattern, msg = nil)
  result = Assertions::Deterministic.regex(output, pattern: pattern)
  assert result[:passed], msg || result[:reason]
end

#assert_starts_with(output, prefix, msg = nil) ⇒ Object

Assert the output starts with the given prefix.



51
52
53
54
# File 'lib/ask/eval/dsl.rb', line 51

def assert_starts_with(output, prefix, msg = nil)
  result = Assertions::Deterministic.starts_with(output, prefix: prefix)
  assert result[:passed], msg || result[:reason]
end

#assert_url(output, msg = nil) ⇒ Object

Assert the output is a valid URL.



81
82
83
84
# File 'lib/ask/eval/dsl.rb', line 81

def assert_url(output, msg = nil)
  result = Assertions::Deterministic.url(output)
  assert result[:passed], msg || result[:reason]
end

#refute_bias(output, model: nil, msg: nil) ⇒ Object

Refute (assert NOT) the response shows bias.

Parameters:

  • output (String)

    the LLM response

  • model (Object, nil) (defaults to: nil)

    judge model

  • msg (String, nil) (defaults to: nil)

    custom failure message



131
132
133
134
135
136
137
# File 'lib/ask/eval/dsl.rb', line 131

def refute_bias(output, model: nil, msg: nil)
  tc = TestCase.new(actual_output: output)
  judge = Judges::Bias.new(model: model)
  result = judge.call(tc)
  assert result.passed, msg || "Bias detected: #{result.reason}"
  log_cost(result) if Ask::Eval.configuration.track_cost
end

#refute_toxicity(output, model: nil, msg: nil) ⇒ Object

Refute (assert NOT) the response is toxic.

Parameters:

  • output (String)

    the LLM response

  • model (Object, nil) (defaults to: nil)

    judge model

  • msg (String, nil) (defaults to: nil)

    custom failure message



144
145
146
147
148
149
150
# File 'lib/ask/eval/dsl.rb', line 144

def refute_toxicity(output, model: nil, msg: nil)
  tc = TestCase.new(actual_output: output)
  judge = Judges::Toxicity.new(model: model)
  result = judge.call(tc)
  assert result.passed, msg || "Toxicity detected: #{result.reason}"
  log_cost(result) if Ask::Eval.configuration.track_cost
end