Class: RubyLLM::Agents::Eval::EvalSuite

Inherits:
Object
  • Object
show all
Defined in:
lib/ruby_llm/agents/eval/eval_suite.rb

Overview

Defines test cases for an agent, runs them, scores results.

Examples:

class SupportRouter::Eval < RubyLLM::Agents::EvalSuite
  agent SupportRouter
  test_case "billing", input: { message: "charged twice" }, expected: { route: :billing }
end

run = SupportRouter::Eval.run!
puts run.summary

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.agent_classObject (readonly)

Returns the value of attribute agent_class.



40
41
42
# File 'lib/ruby_llm/agents/eval/eval_suite.rb', line 40

def agent_class
  @agent_class
end

.eval_optionsObject (readonly)

Returns the value of attribute eval_options.



40
41
42
# File 'lib/ruby_llm/agents/eval/eval_suite.rb', line 40

def eval_options
  @eval_options
end

.test_casesObject (readonly)

Returns the value of attribute test_cases.



40
41
42
# File 'lib/ruby_llm/agents/eval/eval_suite.rb', line 40

def test_cases
  @test_cases
end

Class Method Details

.agent(klass) ⇒ Object

— DSL —



50
51
52
# File 'lib/ruby_llm/agents/eval/eval_suite.rb', line 50

def agent(klass)
  @agent_class = klass
end

.dataset(path) ⇒ Object



64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/ruby_llm/agents/eval/eval_suite.rb', line 64

def dataset(path)
  full_path = path.start_with?("/") ? path : Rails.root.join(path).to_s
  cases = YAML.safe_load_file(full_path, permitted_classes: [Symbol], symbolize_names: true)
  cases.each do |tc|
    test_case(
      tc[:name],
      input: tc[:input],
      expected: tc[:expected],
      score: tc[:score]&.to_sym,
      **tc.except(:name, :input, :expected, :score)
    )
  end
end

.eval_model(value) ⇒ Object



78
79
80
# File 'lib/ruby_llm/agents/eval/eval_suite.rb', line 78

def eval_model(value)
  @eval_options[:model] = value
end

.eval_temperature(value) ⇒ Object



82
83
84
# File 'lib/ruby_llm/agents/eval/eval_suite.rb', line 82

def eval_temperature(value)
  @eval_options[:temperature] = value
end

.for(agent_klass, &block) ⇒ Object



128
129
130
131
132
133
# File 'lib/ruby_llm/agents/eval/eval_suite.rb', line 128

def for(agent_klass, &block)
  suite = Class.new(self)
  suite.agent(agent_klass)
  suite.instance_eval(&block) if block
  suite
end

.inherited(subclass) ⇒ Object



42
43
44
45
46
# File 'lib/ruby_llm/agents/eval/eval_suite.rb', line 42

def inherited(subclass)
  super
  subclass.instance_variable_set(:@test_cases, [])
  subclass.instance_variable_set(:@eval_options, {})
end

.run!(model: nil, only: nil, pass_threshold: 0.5, overrides: {}, **options) ⇒ Object

— Running —



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/ruby_llm/agents/eval/eval_suite.rb', line 88

def run!(model: nil, only: nil, pass_threshold: 0.5, overrides: {}, **options)
  validate!
  cases = only ? @test_cases.select { |tc| Array(only).include?(tc.name) } : @test_cases
  resolved_model = model || @eval_options[:model]
  temperature = @eval_options[:temperature]
  started_at = Time.current

  results = cases.map do |tc|
    run_single(tc, model: resolved_model, temperature: temperature, overrides: overrides)
  end

  EvalRun.new(
    suite: self,
    results: results,
    model: resolved_model || (agent_class.respond_to?(:model) ? agent_class.model : nil),
    pass_threshold: pass_threshold,
    started_at: started_at,
    completed_at: Time.current
  )
end

.test_case(name, input:, expected: nil, score: nil, **options) ⇒ Object



54
55
56
57
58
59
60
61
62
# File 'lib/ruby_llm/agents/eval/eval_suite.rb', line 54

def test_case(name, input:, expected: nil, score: nil, **options)
  @test_cases << TestCase.new(
    name: name,
    input: input,
    expected: expected,
    scorer: score,
    options: options
  )
end

.validate!Object

Raises:



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/ruby_llm/agents/eval/eval_suite.rb', line 109

def validate!
  raise ConfigurationError, "No agent class set" unless @agent_class
  raise ConfigurationError, "No test cases defined" if @test_cases.empty?

  @test_cases.each do |tc|
    next if tc.input.is_a?(Proc)
    next unless @agent_class.respond_to?(:params)

    agent_params = @agent_class.params
    required = agent_params.select { |_, v| v[:required] }.keys
    missing = required - tc.input.keys
    if missing.any?
      raise ConfigurationError,
        "Test case '#{tc.name}' missing required params: #{missing.join(", ")}"
    end
  end
  true
end