Class: DSPy::Evals

Inherits:
Object
  • Object
show all
Extended by:
T::Sig
Includes:
Callbacks
Defined in:
lib/dspy/evals.rb,
lib/dspy/evals/version.rb

Overview

Core evaluation framework for DSPy programs Supports single evaluations, batch evaluations, and optimization workflows

Defined Under Namespace

Classes: BatchEvaluationResult, EvaluationResult

Constant Summary collapse

VERSION =
'1.0.2'

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Callbacks

included

Constructor Details

#initialize(program, metric: nil, num_threads: 1, max_errors: 5, failure_score: 0.0, provide_traceback: true, export_scores: false, score_name: 'evaluation') ⇒ Evals

Returns a new instance of Evals.



241
242
243
244
245
246
247
248
249
250
251
252
# File 'lib/dspy/evals.rb', line 241

def initialize(program, metric: nil, num_threads: 1, max_errors: 5, failure_score: 0.0, provide_traceback: true, export_scores: false, score_name: 'evaluation')
  @program = program
  @metric = metric
  @num_threads = num_threads || 1
  @max_errors = max_errors || 5
  @provide_traceback = provide_traceback
  @failure_score = failure_score ? failure_score.to_f : 0.0
  @export_scores = export_scores
  @score_name = score_name
  @last_example_result = nil
  @last_batch_result = nil
end

Instance Attribute Details

#export_scoresObject (readonly)

Returns the value of attribute export_scores.



195
196
197
# File 'lib/dspy/evals.rb', line 195

def export_scores
  @export_scores
end

#failure_scoreObject (readonly)

Returns the value of attribute failure_score.



186
187
188
# File 'lib/dspy/evals.rb', line 186

def failure_score
  @failure_score
end

#last_batch_resultObject (readonly)

Returns the value of attribute last_batch_result.



192
193
194
# File 'lib/dspy/evals.rb', line 192

def last_batch_result
  @last_batch_result
end

#last_example_resultObject (readonly)

Returns the value of attribute last_example_result.



189
190
191
# File 'lib/dspy/evals.rb', line 189

def last_example_result
  @last_example_result
end

#max_errorsObject (readonly)

Returns the value of attribute max_errors.



180
181
182
# File 'lib/dspy/evals.rb', line 180

def max_errors
  @max_errors
end

#metricObject (readonly)

Returns the value of attribute metric.



174
175
176
# File 'lib/dspy/evals.rb', line 174

def metric
  @metric
end

#num_threadsObject (readonly)

Returns the value of attribute num_threads.



177
178
179
# File 'lib/dspy/evals.rb', line 177

def num_threads
  @num_threads
end

#programObject (readonly)

Returns the value of attribute program.



171
172
173
# File 'lib/dspy/evals.rb', line 171

def program
  @program
end

#provide_tracebackObject (readonly)

Returns the value of attribute provide_traceback.



183
184
185
# File 'lib/dspy/evals.rb', line 183

def provide_traceback
  @provide_traceback
end

#score_nameObject (readonly)

Returns the value of attribute score_name.



198
199
200
# File 'lib/dspy/evals.rb', line 198

def score_name
  @score_name
end

Class Method Details

.after_batch(callback = nil, &block) ⇒ Object



220
221
222
# File 'lib/dspy/evals.rb', line 220

def after_batch(callback = nil, &block)
  after(callback, target: :evaluate, &block)
end

.after_example(callback = nil, &block) ⇒ Object



212
213
214
# File 'lib/dspy/evals.rb', line 212

def after_example(callback = nil, &block)
  after(callback, target: :call, &block)
end

.before_batch(callback = nil, &block) ⇒ Object



216
217
218
# File 'lib/dspy/evals.rb', line 216

def before_batch(callback = nil, &block)
  before(callback, target: :evaluate, &block)
end

.before_example(callback = nil, &block) ⇒ Object



208
209
210
# File 'lib/dspy/evals.rb', line 208

def before_example(callback = nil, &block)
  before(callback, target: :call, &block)
end

.reset_callbacks!Object



224
225
226
# File 'lib/dspy/evals.rb', line 224

def reset_callbacks!
  @callbacks = {}
end

Instance Method Details

#call(example, trace: nil) ⇒ Object



256
257
258
# File 'lib/dspy/evals.rb', line 256

def call(example, trace: nil)
  call_with_program(@program, example, trace: trace, track_state: true)
end

#evaluate(devset, display_progress: true, display_table: false, return_outputs: true) ⇒ Object



269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# File 'lib/dspy/evals.rb', line 269

def evaluate(devset, display_progress: true, display_table: false, return_outputs: true)
  run_callbacks(:before, :evaluate, devset: devset)

  DSPy::Context.with_span(
    operation: 'evaluation.batch',
    'dspy.module' => 'Evaluator',
    'evaluation.program' => @program.class.name,
    'evaluation.num_examples' => devset.length,
    'evaluation.has_metric' => !@metric.nil?,
    'evaluation.num_threads' => @num_threads
  ) do
    if display_progress
      puts "Evaluating #{devset.length} examples..."
    end

    results = if parallel_execution?
      evaluate_in_parallel(devset, display_progress: display_progress)
    else
      evaluate_sequential(devset, display_progress: display_progress)
    end

    # Aggregate metrics
    aggregated_metrics = aggregate_metrics(results)

    batch_result = BatchEvaluationResult.new(
      results: results,
      aggregated_metrics: aggregated_metrics
    )

    if display_table
      display_results_table(batch_result)
    end

    # Emit batch completion event
    DSPy.log('evaluation.batch_complete', **{
      'evaluation.program_class' => @program.class.name,
      'evaluation.total_examples' => batch_result.total_examples,
      'evaluation.passed_examples' => batch_result.passed_examples,
      'evaluation.pass_rate' => batch_result.pass_rate,
      'evaluation.aggregated_metrics' => aggregated_metrics
    })

    if display_progress
      puts "Evaluation complete: #{batch_result.passed_examples}/#{batch_result.total_examples} passed (#{(batch_result.pass_rate * 100).round(1)}%)"
    end

    batch_result
  end.then do |batch_result|
    @last_batch_result = batch_result
    emit_batch_observation(devset, batch_result)
    run_callbacks(:after, :evaluate, devset: devset, result: batch_result)
    batch_result
  end
end