Module: Ask::Eval

Defined in:
lib/ask/eval.rb,
lib/ask/eval/dsl.rb,
lib/ask/eval/judge.rb,
lib/ask/eval/runner.rb,
lib/ask/eval/version.rb,
lib/ask/eval/test_case.rb,
lib/ask/eval/assertions.rb,
lib/ask/eval/judges/bias.rb,
lib/ask/eval/cost_tracker.rb,
lib/ask/eval/configuration.rb,
lib/ask/eval/judges/faithful.rb,
lib/ask/eval/judges/toxicity.rb,
lib/ask/eval/reporters/junit.rb,
lib/ask/eval/assertions/judge.rb,
lib/ask/eval/reporters/github.rb,
lib/ask/eval/reporters/console.rb,
lib/ask/eval/judges/correctness.rb,
lib/ask/eval/judges/hallucination.rb,
lib/ask/eval/assertions/deterministic.rb

Defined Under Namespace

Modules: Assertions, DSL, Judges, Reporters Classes: AssertionError, Configuration, CostTracker, Error, Judge, Runner, TestCase

Constant Summary collapse

VERSION =
"0.1.0"

Class Method Summary collapse

Class Method Details

.configurationConfiguration

Returns the global configuration.

Returns:



55
56
57
# File 'lib/ask/eval.rb', line 55

def configuration
  @configuration ||= Configuration.new
end

.configure {|Configuration| ... } ⇒ Object

Configure ask-eval globally.

Yields:



50
51
52
# File 'lib/ask/eval.rb', line 50

def configure
  yield configuration
end

.cost_reportHash

Returns cost report from all evaluations.

Returns:

  • (Hash)

    cost report from all evaluations



60
61
62
# File 'lib/ask/eval.rb', line 60

def cost_report
  configuration.cost_report
end

.evaluate(output, assertions, context: nil, input: nil, expected: nil) ⇒ Array<Hash>

Run a batch evaluation.

Parameters:

  • output (String)

    the LLM output

  • assertions (Array<Hash>)

    array of assertion configs

  • context (String, Array<String>, nil) (defaults to: nil)

    source context

  • input (String, nil) (defaults to: nil)

    input/prompt

  • expected (String, nil) (defaults to: nil)

    expected output

Returns:

  • (Array<Hash>)

    array of results



38
39
40
41
42
43
44
45
46
# File 'lib/ask/eval.rb', line 38

def evaluate(output, assertions, context: nil, input: nil, expected: nil)
  test_case = TestCase.new(
    actual_output: output,
    input: input,
    context: context,
    expected_output: expected
  )
  Assertions.evaluate_all(test_case, assertions)
end