Module: Ask::Eval::DSL
- Defined in:
- lib/ask/eval/dsl.rb
Overview
Minitest DSL mixin. Include this in your test class to get all ask-eval assertion methods.
Instance Method Summary collapse
-
#assert_contains(output, value, msg = nil) ⇒ Object
Assert the output contains the given substring.
-
#assert_correctness(output, expected:, model: nil, msg: nil) ⇒ Object
Assert the response matches the expected output.
-
#assert_email(output, msg = nil) ⇒ Object
Assert the output is a valid email address.
-
#assert_ends_with(output, suffix, msg = nil) ⇒ Object
Assert the output ends with the given suffix.
-
#assert_equals(output, value, msg = nil) ⇒ Object
Assert the output equals the given value exactly.
-
#assert_faithful(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ Object
Assert the response is faithful to the provided context.
-
#assert_json(output, msg = nil) ⇒ Object
Assert the output is valid JSON.
-
#assert_max_length(output, max, msg = nil) ⇒ Object
Assert the output has at most ‘max` characters.
-
#assert_max_tokens(output, max, msg = nil) ⇒ Object
Assert the output has at most ‘max` tokens.
-
#assert_min_length(output, min, msg = nil) ⇒ Object
Assert the output has at least ‘min` characters.
-
#assert_not_contains(output, value, msg = nil) ⇒ Object
Assert the output does NOT contain the given substring.
-
#assert_not_hallucinating(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ Object
Assert the response does NOT hallucinate (all claims are in context).
-
#assert_regex(output, pattern, msg = nil) ⇒ Object
Assert the output matches the given regex pattern.
-
#assert_starts_with(output, prefix, msg = nil) ⇒ Object
Assert the output starts with the given prefix.
-
#assert_url(output, msg = nil) ⇒ Object
Assert the output is a valid URL.
-
#refute_bias(output, model: nil, msg: nil) ⇒ Object
Refute (assert NOT) the response shows bias.
-
#refute_toxicity(output, model: nil, msg: nil) ⇒ Object
Refute (assert NOT) the response is toxic.
Instance Method Details
#assert_contains(output, value, msg = nil) ⇒ Object
Assert the output contains the given substring.
21 22 23 24 |
# File 'lib/ask/eval/dsl.rb', line 21 def assert_contains(output, value, msg = nil) result = Assertions::Deterministic.contains(output, value: value) assert result[:passed], msg || result[:reason] end |
#assert_correctness(output, expected:, model: nil, msg: nil) ⇒ Object
Assert the response matches the expected output.
158 159 160 161 162 163 164 |
# File 'lib/ask/eval/dsl.rb', line 158 def assert_correctness(output, expected:, model: nil, msg: nil) tc = TestCase.new(actual_output: output, expected_output: expected) judge = Judges::Correctness.new(model: model) result = judge.call(tc) assert result.passed, msg || "Correctness check failed: #{result.reason}" log_cost(result) if Ask::Eval.configuration.track_cost end |
#assert_email(output, msg = nil) ⇒ Object
Assert the output is a valid email address.
87 88 89 90 |
# File 'lib/ask/eval/dsl.rb', line 87 def assert_email(output, msg = nil) result = Assertions::Deterministic.email(output) assert result[:passed], msg || result[:reason] end |
#assert_ends_with(output, suffix, msg = nil) ⇒ Object
Assert the output ends with the given suffix.
57 58 59 60 |
# File 'lib/ask/eval/dsl.rb', line 57 def assert_ends_with(output, suffix, msg = nil) result = Assertions::Deterministic.ends_with(output, suffix: suffix) assert result[:passed], msg || result[:reason] end |
#assert_equals(output, value, msg = nil) ⇒ Object
Assert the output equals the given value exactly.
63 64 65 66 |
# File 'lib/ask/eval/dsl.rb', line 63 def assert_equals(output, value, msg = nil) result = Assertions::Deterministic.equals(output, value: value) assert result[:passed], msg || result[:reason] end |
#assert_faithful(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ Object
Assert the response is faithful to the provided context.
101 102 103 104 105 106 107 108 |
# File 'lib/ask/eval/dsl.rb', line 101 def assert_faithful(output, context:, model: nil, threshold: 0.7, msg: nil) tc = TestCase.new(actual_output: output, context: context) judge = Judges::Faithful.new(model: model) result = judge.call(tc) passed = result.score >= threshold assert passed, msg || "Faithfulness check failed: #{result.reason} (score: #{result.score})" log_cost(result) if Ask::Eval.configuration.track_cost end |
#assert_json(output, msg = nil) ⇒ Object
Assert the output is valid JSON.
39 40 41 42 |
# File 'lib/ask/eval/dsl.rb', line 39 def assert_json(output, msg = nil) result = Assertions::Deterministic.is_json(output) assert result[:passed], msg || result[:reason] end |
#assert_max_length(output, max, msg = nil) ⇒ Object
Assert the output has at most ‘max` characters.
75 76 77 78 |
# File 'lib/ask/eval/dsl.rb', line 75 def assert_max_length(output, max, msg = nil) result = Assertions::Deterministic.max_length(output, max: max) assert result[:passed], msg || result[:reason] end |
#assert_max_tokens(output, max, msg = nil) ⇒ Object
Assert the output has at most ‘max` tokens.
45 46 47 48 |
# File 'lib/ask/eval/dsl.rb', line 45 def assert_max_tokens(output, max, msg = nil) result = Assertions::Deterministic.max_tokens(output, max: max) assert result[:passed], msg || result[:reason] end |
#assert_min_length(output, min, msg = nil) ⇒ Object
Assert the output has at least ‘min` characters.
69 70 71 72 |
# File 'lib/ask/eval/dsl.rb', line 69 def assert_min_length(output, min, msg = nil) result = Assertions::Deterministic.min_length(output, min: min) assert result[:passed], msg || result[:reason] end |
#assert_not_contains(output, value, msg = nil) ⇒ Object
Assert the output does NOT contain the given substring.
27 28 29 30 |
# File 'lib/ask/eval/dsl.rb', line 27 def assert_not_contains(output, value, msg = nil) result = Assertions::Deterministic.not_contains(output, value: value) assert result[:passed], msg || result[:reason] end |
#assert_not_hallucinating(output, context:, model: nil, threshold: 0.7, msg: nil) ⇒ Object
Assert the response does NOT hallucinate (all claims are in context).
117 118 119 120 121 122 123 124 |
# File 'lib/ask/eval/dsl.rb', line 117 def assert_not_hallucinating(output, context:, model: nil, threshold: 0.7, msg: nil) tc = TestCase.new(actual_output: output, context: context) judge = Judges::Hallucination.new(model: model) result = judge.call(tc) passed = result.score >= threshold assert passed, msg || "Hallucination check failed: #{result.reason} (score: #{result.score})" log_cost(result) if Ask::Eval.configuration.track_cost end |
#assert_regex(output, pattern, msg = nil) ⇒ Object
Assert the output matches the given regex pattern.
33 34 35 36 |
# File 'lib/ask/eval/dsl.rb', line 33 def assert_regex(output, pattern, msg = nil) result = Assertions::Deterministic.regex(output, pattern: pattern) assert result[:passed], msg || result[:reason] end |
#assert_starts_with(output, prefix, msg = nil) ⇒ Object
Assert the output starts with the given prefix.
51 52 53 54 |
# File 'lib/ask/eval/dsl.rb', line 51 def assert_starts_with(output, prefix, msg = nil) result = Assertions::Deterministic.starts_with(output, prefix: prefix) assert result[:passed], msg || result[:reason] end |
#assert_url(output, msg = nil) ⇒ Object
Assert the output is a valid URL.
81 82 83 84 |
# File 'lib/ask/eval/dsl.rb', line 81 def assert_url(output, msg = nil) result = Assertions::Deterministic.url(output) assert result[:passed], msg || result[:reason] end |
#refute_bias(output, model: nil, msg: nil) ⇒ Object
Refute (assert NOT) the response shows bias.
131 132 133 134 135 136 137 |
# File 'lib/ask/eval/dsl.rb', line 131 def refute_bias(output, model: nil, msg: nil) tc = TestCase.new(actual_output: output) judge = Judges::Bias.new(model: model) result = judge.call(tc) assert result.passed, msg || "Bias detected: #{result.reason}" log_cost(result) if Ask::Eval.configuration.track_cost end |
#refute_toxicity(output, model: nil, msg: nil) ⇒ Object
Refute (assert NOT) the response is toxic.
144 145 146 147 148 149 150 |
# File 'lib/ask/eval/dsl.rb', line 144 def refute_toxicity(output, model: nil, msg: nil) tc = TestCase.new(actual_output: output) judge = Judges::Toxicity.new(model: model) result = judge.call(tc) assert result.passed, msg || "Toxicity detected: #{result.reason}" log_cost(result) if Ask::Eval.configuration.track_cost end |