Class: OmniAgent::Eval

Inherits:
Object
  • Object
show all
Defined in:
lib/omni_agent/eval.rb,
lib/omni_agent/eval/cli.rb,
lib/omni_agent/eval/run.rb,
lib/omni_agent/eval/case.rb,
lib/omni_agent/eval/cache.rb,
lib/omni_agent/eval/judge.rb,
lib/omni_agent/eval/report.rb,
lib/omni_agent/eval/runner.rb,
lib/omni_agent/eval/outcome.rb,
lib/omni_agent/eval/golden_set.rb,
lib/omni_agent/eval/case_result.rb,
lib/omni_agent/eval/judge_assertion.rb,
lib/omni_agent/eval/output_assertion.rb,
lib/omni_agent/eval/tool_call_assertion.rb

Defined Under Namespace

Modules: CLI, Cache, GoldenSet, Runner Classes: Case, CaseResult, Judge, JudgeAssertion, Outcome, OutputAssertion, Report, Run, ToolCallAssertion

Class Method Summary collapse

Class Method Details

.agent(klass = nil) ⇒ Object



4
5
6
7
# File 'lib/omni_agent/eval.rb', line 4

def agent(klass = nil)
  @agent_class = klass if klass
  @agent_class
end

.configured_agentObject



22
23
24
# File 'lib/omni_agent/eval.rb', line 22

def configured_agent
  @agent_class
end

.configured_casesObject



26
27
28
# File 'lib/omni_agent/eval.rb', line 26

def configured_cases
  @configured_cases || []
end

.eval_case(name, &block) ⇒ Object



9
10
11
# File 'lib/omni_agent/eval.rb', line 9

def eval_case(name, &block)
  @configured_cases = configured_cases + [ Case.new(name, &block) ]
end

.golden_set(path, &block) ⇒ Object



13
14
15
16
17
18
19
20
# File 'lib/omni_agent/eval.rb', line 13

def golden_set(path, &block)
  new_cases = GoldenSet.load(path).each_with_index.map do |row, index|
    case_name = row[:name] || "row #{index}"
    Case.new(case_name, input: row[:input], context: row[:context] || {}, run_alias: row[:run_alias], row: row, &block)
  end

  @configured_cases = configured_cases + new_cases
end

.run_allObject



30
31
32
33
34
35
36
37
# File 'lib/omni_agent/eval.rb', line 30

def run_all
  unless configured_agent
    raise OmniAgent::Error, "#{name} must declare `agent SomeAgentClass` before running evals"
  end

  case_results = configured_cases.map { |eval_case| Runner.run(eval_case, configured_agent) }
  Report.new(case_results)
end