Module: DSPy::Teleprompt::Utils

Extended by:
T::Sig
Defined in:
lib/dspy/teleprompt/utils.rb

Overview

Bootstrap utilities for MIPROv2 optimization Handles few-shot example generation and candidate program evaluation

Defined Under Namespace

Classes: BootstrapConfig, BootstrapResult

Class Method Summary collapse

Class Method Details

.create_bootstrapped_demos(student, trainset, max_bootstrapped, max_labeled, metric) ⇒ Object



334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
# File 'lib/dspy/teleprompt/utils.rb', line 334

def self.create_bootstrapped_demos(student, trainset, max_bootstrapped, max_labeled, metric)
  successful_demos = []

  # Execute student on trainset to bootstrap demonstrations
  trainset.each do |example|
    break if successful_demos.size >= max_bootstrapped

    begin
      # Call student with input
      prediction = student.call(**example.input_values)
      prediction_hash = prediction.respond_to?(:to_h) ? prediction.to_h : prediction

      # Check if prediction matches expected output
      success = if metric
        metric.call(example, prediction_hash)
      else
        example.matches_prediction?(prediction_hash)
      end

      if success
        # Extract only output fields from prediction
        output_fields = extract_output_fields_for_demo(prediction_hash, example.signature_class)

        demo = DSPy::FewShotExample.new(
          input: example.input_values,
          output: output_fields
        )
        successful_demos << demo
      end
    rescue StandardError => e
      # Continue on errors
      DSPy.logger.warn("Bootstrap error: #{e.message}") if DSPy.logger
    end
  end

  # Prepend labeled examples if requested
  if max_labeled > 0
    labeled = trainset.take(max_labeled).map do |ex|
      DSPy::FewShotExample.new(
        input: ex.input_values,
        output: ex.expected_values
      )
    end
    successful_demos = labeled + successful_demos
  end

  successful_demos
end

.create_candidate_sets(successful_examples, config) ⇒ Object



542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
# File 'lib/dspy/teleprompt/utils.rb', line 542

def self.create_candidate_sets(successful_examples, config)
  return [] if successful_examples.empty?

  # Use DataHandler for efficient sampling
  data_handler = DataHandler.new(successful_examples)
  set_size = [config.max_bootstrapped_examples, successful_examples.size].min

  # Create candidate sets efficiently
  candidate_sets = data_handler.create_candidate_sets(
    config.num_candidate_sets,
    set_size,
    random_state: 42  # For reproducible results
  )

  candidate_sets
end

.create_labeled_demos(trainset, max_labeled, labeled_sample, rng) ⇒ Object



309
310
311
312
313
314
315
316
317
318
319
320
321
322
# File 'lib/dspy/teleprompt/utils.rb', line 309

def self.create_labeled_demos(trainset, max_labeled, labeled_sample, rng)
  examples = if labeled_sample
    trainset.sample([max_labeled, trainset.size].min, random: rng)
  else
    trainset.take(max_labeled)
  end

  examples.map do |ex|
    DSPy::FewShotExample.new(
      input: ex.input_values,
      output: ex.expected_values
    )
  end
end

.create_minibatch(trainset, batch_size = 50, rng = nil) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/dspy/teleprompt/utils.rb', line 30

def self.create_minibatch(trainset, batch_size = 50, rng = nil)
  # Ensure batch_size isn't larger than the size of the dataset
  actual_batch_size = [batch_size, trainset.size].min

  # Randomly sample from trainset
  # If RNG is provided, use it for reproducible sampling
  if rng
    trainset.sample(actual_batch_size, random: rng)
  else
    trainset.sample(actual_batch_size)
  end
end

.create_n_fewshot_demo_sets(student, num_candidate_sets, trainset, max_bootstrapped_demos: 3, max_labeled_demos: 3, min_num_samples: 1, metric: nil, teacher_settings: {}, seed: nil, include_non_bootstrapped: true, labeled_sample: true) ⇒ Object



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
# File 'lib/dspy/teleprompt/utils.rb', line 233

def self.create_n_fewshot_demo_sets(
  student,
  num_candidate_sets,
  trainset,
  max_bootstrapped_demos: 3,
  max_labeled_demos: 3,
  min_num_samples: 1,
  metric: nil,
  teacher_settings: {},
  seed: nil,
  include_non_bootstrapped: true,
  labeled_sample: true
)
  demo_candidates = Hash.new { |h, k| h[k] = [] }
  rng = seed ? Random.new(seed) : Random.new

  # Determine number of predictors exposed by the student module
  num_predictors = if student.respond_to?(:predictors)
    predictors = Array(student.predictors)
    predictors.empty? ? 1 : predictors.size
  else
    1
  end

  # Adjust for 3 special seeds (-3, -2, -1)
  adjusted_num_sets = num_candidate_sets - 3

  # Loop from -3 to adjusted_num_sets (exclusive)
  (-3...adjusted_num_sets).each do |current_seed|
    case current_seed
    when -3  # ZeroShot strategy
      next unless include_non_bootstrapped
      # Empty demo sets for all predictors
      num_predictors.times { |idx| demo_candidates[idx] << [] }

    when -2  # LabeledOnly strategy
      next unless include_non_bootstrapped && max_labeled_demos > 0
      # Sample or take labeled examples
      labeled_demos = create_labeled_demos(trainset, max_labeled_demos, labeled_sample, rng)
      num_predictors.times { |idx| demo_candidates[idx] << labeled_demos }

    when -1  # Unshuffled strategy
      # Bootstrap without shuffle
      bootstrapped_demos = create_bootstrapped_demos(
        student, trainset, max_bootstrapped_demos, max_labeled_demos, metric
      )
      num_predictors.times { |idx| demo_candidates[idx] << bootstrapped_demos }

    else  # Shuffled strategies (seed >= 0)
      # Shuffle trainset with current seed
      seed_rng = Random.new(current_seed)
      shuffled_trainset = trainset.shuffle(random: seed_rng)

      # Random demo count between min and max
      num_demos = seed_rng.rand(min_num_samples..max_bootstrapped_demos)

      # Bootstrap with shuffled data
      bootstrapped_demos = create_bootstrapped_demos(
        student, shuffled_trainset, num_demos, max_labeled_demos, metric
      )
      num_predictors.times { |idx| demo_candidates[idx] << bootstrapped_demos }
    end
  end

  demo_candidates
end

.create_successful_bootstrap_example(original_example, prediction) ⇒ Object



566
567
568
569
570
571
572
573
574
575
576
577
578
579
# File 'lib/dspy/teleprompt/utils.rb', line 566

def self.create_successful_bootstrap_example(original_example, prediction)
  # Convert prediction to FewShotExample format
  DSPy::Example.new(
    signature_class: original_example.signature_class,
    input: original_example.input_values,
    expected: prediction,
    id: "bootstrap_#{original_example.id || SecureRandom.uuid}",
    metadata: {
      source: "bootstrap",
      original_expected: original_example.expected_values,
      bootstrap_timestamp: Time.now.iso8601
    }
  )
end

.default_metric_for_examples(examples) ⇒ Object



607
608
609
610
611
612
613
# File 'lib/dspy/teleprompt/utils.rb', line 607

def self.default_metric_for_examples(examples)
  if examples.first.is_a?(DSPy::Example)
    proc { |example, prediction| example.matches_prediction?(prediction) }
  else
    nil
  end
end

.emit_bootstrap_complete_event(statistics) ⇒ Object



617
618
619
620
621
622
623
624
625
# File 'lib/dspy/teleprompt/utils.rb', line 617

def self.emit_bootstrap_complete_event(statistics)
  DSPy.log('optimization.bootstrap_complete', **{
    'bootstrap.successful_count' => statistics[:successful_count],
    'bootstrap.failed_count' => statistics[:failed_count],
    'bootstrap.success_rate' => statistics[:success_rate],
    'bootstrap.candidate_sets_created' => statistics[:candidate_sets_created],
    'bootstrap.average_set_size' => statistics[:average_set_size]
  })
end

.emit_bootstrap_example_event(index, success, error) ⇒ Object



629
630
631
632
633
634
635
# File 'lib/dspy/teleprompt/utils.rb', line 629

def self.emit_bootstrap_example_event(index, success, error)
  DSPy.log('optimization.bootstrap_example', **{
    'bootstrap.example_index' => index,
    'bootstrap.success' => success,
    'bootstrap.error' => error
  })
end

.ensure_typed_examples(examples) ⇒ Object

Raises:

  • (ArgumentError)


464
465
466
467
468
# File 'lib/dspy/teleprompt/utils.rb', line 464

def self.ensure_typed_examples(examples)
  return examples if examples.all? { |ex| ex.is_a?(DSPy::Example) }
  
  raise ArgumentError, "All examples must be DSPy::Example instances. Legacy format support has been removed. Please convert your examples to use the structured format with :input and :expected keys."
end

.eval_candidate_program(program, examples, config: BootstrapConfig.new, metric: nil) ⇒ Object



404
405
406
407
408
409
410
411
# File 'lib/dspy/teleprompt/utils.rb', line 404

def self.eval_candidate_program(program, examples, config: BootstrapConfig.new, metric: nil)
  # Use minibatch evaluation for large datasets
  if examples.size > config.minibatch_size
    eval_candidate_program_minibatch(program, examples, config, metric)
  else
    eval_candidate_program_full(program, examples, config, metric)
  end
end

.eval_candidate_program_full(program, examples, config, metric) ⇒ Object



447
448
449
450
451
452
453
454
455
456
457
458
# File 'lib/dspy/teleprompt/utils.rb', line 447

def self.eval_candidate_program_full(program, examples, config, metric)
  # Create evaluator with proper configuration
  evaluator = DSPy::Evals.new(
    program,
    metric: metric || default_metric_for_examples(examples),
    num_threads: config.num_threads,
    max_errors: config.max_errors
  )

  # Run evaluation
  evaluator.evaluate(examples, display_progress: false)
end

.eval_candidate_program_minibatch(program, examples, config, metric) ⇒ Object



422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
# File 'lib/dspy/teleprompt/utils.rb', line 422

def self.eval_candidate_program_minibatch(program, examples, config, metric)
  DSPy::Context.with_span(
    operation: 'optimization.minibatch_evaluation',
    'dspy.module' => 'Bootstrap',
    'minibatch.total_examples' => examples.size,
    'minibatch.size' => config.minibatch_size,
    'minibatch.num_batches' => (examples.size.to_f / config.minibatch_size).ceil
  ) do
    # Randomly sample a minibatch for evaluation
    sample_size = [config.minibatch_size, examples.size].min
    sampled_examples = examples.sample(sample_size)
    
    eval_candidate_program_full(program, sampled_examples, config, metric)
  end
end

.extract_output_fields_for_demo(prediction_hash, signature_class) ⇒ Object



390
391
392
393
# File 'lib/dspy/teleprompt/utils.rb', line 390

def self.extract_output_fields_for_demo(prediction_hash, signature_class)
  output_field_names = signature_class.output_field_descriptors.keys
  prediction_hash.slice(*output_field_names)
end

.extract_output_fields_from_prediction(prediction, signature_class) ⇒ Object



588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
# File 'lib/dspy/teleprompt/utils.rb', line 588

def self.extract_output_fields_from_prediction(prediction, signature_class)
  prediction_hash = prediction.to_h
  
  # Get output field names from signature
  output_fields = signature_class.output_field_descriptors.keys
  
  # Filter prediction to only include output fields
  filtered_expected = {}
  output_fields.each do |field_name|
    if prediction_hash.key?(field_name)
      filtered_expected[field_name] = prediction_hash[field_name]
    end
  end
  
  filtered_expected
end

.generate_successful_examples(program, examples, config, metric) ⇒ Object



479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
# File 'lib/dspy/teleprompt/utils.rb', line 479

def self.generate_successful_examples(program, examples, config, metric)
  successful = []
  failed = []
  error_count = 0

  # Use DataHandler for efficient shuffling
  data_handler = DataHandler.new(examples)
  shuffled_examples = data_handler.shuffle(random_state: 42)

  shuffled_examples.each_with_index do |example, index|
    break if successful.size >= config.max_labeled_examples
    break if error_count >= config.max_errors

    begin
      # Run program on example input
      prediction = program.call(**example.input_values)
      
      # Check if prediction matches expected output
      prediction_hash = extract_output_fields_from_prediction(prediction, example.signature_class)
      
      if metric
        success = metric.call(example, prediction_hash)
      else
        success = example.matches_prediction?(prediction_hash)
      end

      if success
        # Create a new example with the successful prediction as reasoning/context
        successful_example = create_successful_bootstrap_example(example, prediction_hash)
        successful << successful_example
        
        emit_bootstrap_example_event(index, true, nil)
      else
        failed << example
        emit_bootstrap_example_event(index, false, "Prediction did not match expected output")
      end

    rescue StandardError => error
      error_count += 1
      failed << example
      emit_bootstrap_example_event(index, false, error.message)
      
      # Log error but continue processing
      DSPy.logger.warn("Bootstrap error on example #{index}: #{error.message}")
      
      # Stop if too many errors
      if error_count >= config.max_errors
        DSPy.logger.error("Too many bootstrap errors (#{error_count}), stopping early")
        break
      end
    end
  end

  [successful, failed]
end

.get_program_with_highest_avg_score(param_score_dict, fully_evaled_param_combos) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/dspy/teleprompt/utils.rb', line 55

def self.get_program_with_highest_avg_score(param_score_dict, fully_evaled_param_combos)
  # Calculate the mean for each combination of categorical parameters, based on past trials
  results = []
  param_score_dict.each do |key, values|
    scores = values.map { |v| v[0] }
    mean = scores.sum.to_f / scores.size
    program = values[0][1]
    params = values[0][2]
    results << [key, mean, program, params]
  end

  # Sort results by the mean in descending order
  sorted_results = results.sort_by { |_key, mean, _program, _params| -mean }

  # Find the combination with the highest mean, skip fully evaluated ones
  sorted_results.each do |key, mean, program, params|
    next if fully_evaled_param_combos.include?(key)
    return [program, mean, key, params]
  end

  # If no valid program is found, return the last valid one
  _key, mean, program, params = sorted_results.last
  [program, mean, _key, params]
end

.infer_signature_class(examples) ⇒ Object



639
640
641
642
643
644
645
646
647
648
649
650
651
# File 'lib/dspy/teleprompt/utils.rb', line 639

def self.infer_signature_class(examples)
  return nil if examples.empty?

  first_example = examples.first
  
  if first_example.is_a?(DSPy::Example)
    first_example.signature_class
  elsif first_example.is_a?(Hash) && first_example[:signature_class]
    first_example[:signature_class]
  else
    nil
  end
end

.save_candidate_program(program, log_dir, trial_num, note: nil) ⇒ Object



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/dspy/teleprompt/utils.rb', line 96

def self.save_candidate_program(program, log_dir, trial_num, note: nil)
  return nil if log_dir.nil?

  # Ensure the directory exists
  eval_programs_dir = File.join(log_dir, "evaluated_programs")
  FileUtils.mkdir_p(eval_programs_dir) unless Dir.exist?(eval_programs_dir)

  # Define the save path for the program
  filename = if note
    "program_#{trial_num}_#{note}.json"
  else
    "program_#{trial_num}.json"
  end
  save_path = File.join(eval_programs_dir, filename)

  # Save the program
  program.save(save_path)

  save_path
end