Module: DSPy::Teleprompt::Utils

Extended by:: T::Sig

Defined in:: lib/dspy/teleprompt/utils.rb

Overview

Bootstrap utilities for MIPROv2 optimization Handles few-shot example generation and candidate program evaluation

Defined Under Namespace

Classes: BootstrapConfig, BootstrapResult

Class Method Summary collapse

.create_bootstrapped_demos(student, trainset, max_bootstrapped, max_labeled, metric) ⇒ Object
.create_candidate_sets(successful_examples, config) ⇒ Object
.create_labeled_demos(trainset, max_labeled, labeled_sample, rng) ⇒ Object
.create_minibatch(trainset, batch_size = 50, rng = nil) ⇒ Object
.create_n_fewshot_demo_sets(student, num_candidate_sets, trainset, max_bootstrapped_demos: 3, max_labeled_demos: 3, min_num_samples: 1, metric: nil, teacher_settings: {}, seed: nil, include_non_bootstrapped: true, labeled_sample: true) ⇒ Object
.create_successful_bootstrap_example(original_example, prediction) ⇒ Object
.default_metric_for_examples(examples) ⇒ Object
.emit_bootstrap_complete_event(statistics) ⇒ Object
.emit_bootstrap_example_event(index, success, error) ⇒ Object
.ensure_typed_examples(examples) ⇒ Object
.eval_candidate_program(program, examples, config: BootstrapConfig.new, metric: nil) ⇒ Object
.eval_candidate_program_full(program, examples, config, metric) ⇒ Object
.eval_candidate_program_minibatch(program, examples, config, metric) ⇒ Object
.extract_output_fields_for_demo(prediction_hash, signature_class) ⇒ Object
.extract_output_fields_from_prediction(prediction, signature_class) ⇒ Object
.generate_successful_examples(program, examples, config, metric) ⇒ Object
.get_program_with_highest_avg_score(param_score_dict, fully_evaled_param_combos) ⇒ Object
.infer_signature_class(examples) ⇒ Object
.save_candidate_program(program, log_dir, trial_num, note: nil) ⇒ Object

Class Method Details

.create_bootstrapped_demos(student, trainset, max_bootstrapped, max_labeled, metric) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 334

def self.create_bootstrapped_demos(student, trainset, max_bootstrapped, max_labeled, metric)
  successful_demos = []

  # Execute student on trainset to bootstrap demonstrations
  trainset.each do |example|
    break if successful_demos.size >= max_bootstrapped

    begin
      # Call student with input
      prediction = student.call(**example.input_values)
      prediction_hash = prediction.respond_to?(:to_h) ? prediction.to_h : prediction

      # Check if prediction matches expected output
      success = if metric
        metric.call(example, prediction_hash)
      else
        example.matches_prediction?(prediction_hash)
      end

      if success
        # Extract only output fields from prediction
        output_fields = extract_output_fields_for_demo(prediction_hash, example.signature_class)

        demo = DSPy::FewShotExample.new(
          input: example.input_values,
          output: output_fields
        )
        successful_demos << demo
      end
    rescue StandardError => e
      # Continue on errors
      DSPy.logger.warn("Bootstrap error: #{e.message}") if DSPy.logger
    end
  end

  # Prepend labeled examples if requested
  if max_labeled > 0
    labeled = trainset.take(max_labeled).map do |ex|
      DSPy::FewShotExample.new(
        input: ex.input_values,
        output: ex.expected_values
      )
    end
    successful_demos = labeled + successful_demos
  end

  successful_demos
end

.create_candidate_sets(successful_examples, config) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 542

def self.create_candidate_sets(successful_examples, config)
  return [] if successful_examples.empty?

  # Use DataHandler for efficient sampling
  data_handler = DataHandler.new(successful_examples)
  set_size = [config.max_bootstrapped_examples, successful_examples.size].min

  # Create candidate sets efficiently
  candidate_sets = data_handler.create_candidate_sets(
    config.num_candidate_sets,
    set_size,
    random_state: 42  # For reproducible results
  )

  candidate_sets
end

.create_labeled_demos(trainset, max_labeled, labeled_sample, rng) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 309

def self.create_labeled_demos(trainset, max_labeled, labeled_sample, rng)
  examples = if labeled_sample
    trainset.sample([max_labeled, trainset.size].min, random: rng)
  else
    trainset.take(max_labeled)
  end

  examples.map do |ex|
    DSPy::FewShotExample.new(
      input: ex.input_values,
      output: ex.expected_values
    )
  end
end

.create_minibatch(trainset, batch_size = 50, rng = nil) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 30

def self.create_minibatch(trainset, batch_size = 50, rng = nil)
  # Ensure batch_size isn't larger than the size of the dataset
  actual_batch_size = [batch_size, trainset.size].min

  # Randomly sample from trainset
  # If RNG is provided, use it for reproducible sampling
  if rng
    trainset.sample(actual_batch_size, random: rng)
  else
    trainset.sample(actual_batch_size)
  end
end

.create_n_fewshot_demo_sets(student, num_candidate_sets, trainset, max_bootstrapped_demos: 3, max_labeled_demos: 3, min_num_samples: 1, metric: nil, teacher_settings: {}, seed: nil, include_non_bootstrapped: true, labeled_sample: true) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 233

def self.create_n_fewshot_demo_sets(
  student,
  num_candidate_sets,
  trainset,
  max_bootstrapped_demos: 3,
  max_labeled_demos: 3,
  min_num_samples: 1,
  metric: nil,
  teacher_settings: {},
  seed: nil,
  include_non_bootstrapped: true,
  labeled_sample: true
)
  demo_candidates = Hash.new { |h, k| h[k] = [] }
  rng = seed ? Random.new(seed) : Random.new

  # Determine number of predictors exposed by the student module
  num_predictors = if student.respond_to?(:predictors)
    predictors = Array(student.predictors)
    predictors.empty? ? 1 : predictors.size
  else
    1
  end

  # Adjust for 3 special seeds (-3, -2, -1)
  adjusted_num_sets = num_candidate_sets - 3

  # Loop from -3 to adjusted_num_sets (exclusive)
  (-3...adjusted_num_sets).each do |current_seed|
    case current_seed
    when -3  # ZeroShot strategy
      next unless include_non_bootstrapped
      # Empty demo sets for all predictors
      num_predictors.times { |idx| demo_candidates[idx] << [] }

    when -2  # LabeledOnly strategy
      next unless include_non_bootstrapped && max_labeled_demos > 0
      # Sample or take labeled examples
      labeled_demos = create_labeled_demos(trainset, max_labeled_demos, labeled_sample, rng)
      num_predictors.times { |idx| demo_candidates[idx] << labeled_demos }

    when -1  # Unshuffled strategy
      # Bootstrap without shuffle
      bootstrapped_demos = create_bootstrapped_demos(
        student, trainset, max_bootstrapped_demos, max_labeled_demos, metric
      )
      num_predictors.times { |idx| demo_candidates[idx] << bootstrapped_demos }

    else  # Shuffled strategies (seed >= 0)
      # Shuffle trainset with current seed
      seed_rng = Random.new(current_seed)
      shuffled_trainset = trainset.shuffle(random: seed_rng)

      # Random demo count between min and max
      num_demos = seed_rng.rand(min_num_samples..max_bootstrapped_demos)

      # Bootstrap with shuffled data
      bootstrapped_demos = create_bootstrapped_demos(
        student, shuffled_trainset, num_demos, max_labeled_demos, metric
      )
      num_predictors.times { |idx| demo_candidates[idx] << bootstrapped_demos }
    end
  end

  demo_candidates
end

.create_successful_bootstrap_example(original_example, prediction) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 566

def self.create_successful_bootstrap_example(original_example, prediction)
  # Convert prediction to FewShotExample format
  DSPy::Example.new(
    signature_class: original_example.signature_class,
    input: original_example.input_values,
    expected: prediction,
    id: "bootstrap_#{original_example.id || SecureRandom.uuid}",
    metadata: {
      source: "bootstrap",
      original_expected: original_example.expected_values,
      bootstrap_timestamp: Time.now.iso8601
    }
  )
end

.default_metric_for_examples(examples) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 607

def self.default_metric_for_examples(examples)
  if examples.first.is_a?(DSPy::Example)
    proc { |example, prediction| example.matches_prediction?(prediction) }
  else
    nil
  end
end

.emit_bootstrap_complete_event(statistics) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 617

def self.emit_bootstrap_complete_event(statistics)
  DSPy.log('optimization.bootstrap_complete', **{
    'bootstrap.successful_count' => statistics[:successful_count],
    'bootstrap.failed_count' => statistics[:failed_count],
    'bootstrap.success_rate' => statistics[:success_rate],
    'bootstrap.candidate_sets_created' => statistics[:candidate_sets_created],
    'bootstrap.average_set_size' => statistics[:average_set_size]
  })
end

.emit_bootstrap_example_event(index, success, error) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 629

def self.emit_bootstrap_example_event(index, success, error)
  DSPy.log('optimization.bootstrap_example', **{
    'bootstrap.example_index' => index,
    'bootstrap.success' => success,
    'bootstrap.error' => error
  })
end

.ensure_typed_examples(examples) ⇒ `Object`

Raises:

(ArgumentError)

# File 'lib/dspy/teleprompt/utils.rb', line 464

def self.ensure_typed_examples(examples)
  return examples if examples.all? { |ex| ex.is_a?(DSPy::Example) }
  
  raise ArgumentError, "All examples must be DSPy::Example instances. Legacy format support has been removed. Please convert your examples to use the structured format with :input and :expected keys."
end

.eval_candidate_program(program, examples, config: BootstrapConfig.new, metric: nil) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 404

def self.eval_candidate_program(program, examples, config: BootstrapConfig.new, metric: nil)
  # Use minibatch evaluation for large datasets
  if examples.size > config.minibatch_size
    eval_candidate_program_minibatch(program, examples, config, metric)
  else
    eval_candidate_program_full(program, examples, config, metric)
  end
end

.eval_candidate_program_full(program, examples, config, metric) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 447

def self.eval_candidate_program_full(program, examples, config, metric)
  # Create evaluator with proper configuration
  evaluator = DSPy::Evals.new(
    program,
    metric: metric || default_metric_for_examples(examples),
    num_threads: config.num_threads,
    max_errors: config.max_errors
  )

  # Run evaluation
  evaluator.evaluate(examples, display_progress: false)
end

.eval_candidate_program_minibatch(program, examples, config, metric) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 422

def self.eval_candidate_program_minibatch(program, examples, config, metric)
  DSPy::Context.with_span(
    operation: 'optimization.minibatch_evaluation',
    'dspy.module' => 'Bootstrap',
    'minibatch.total_examples' => examples.size,
    'minibatch.size' => config.minibatch_size,
    'minibatch.num_batches' => (examples.size.to_f / config.minibatch_size).ceil
  ) do
    # Randomly sample a minibatch for evaluation
    sample_size = [config.minibatch_size, examples.size].min
    sampled_examples = examples.sample(sample_size)
    
    eval_candidate_program_full(program, sampled_examples, config, metric)
  end
end

.extract_output_fields_for_demo(prediction_hash, signature_class) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 390

def self.extract_output_fields_for_demo(prediction_hash, signature_class)
  output_field_names = signature_class.output_field_descriptors.keys
  prediction_hash.slice(*output_field_names)
end

.extract_output_fields_from_prediction(prediction, signature_class) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 588

def self.extract_output_fields_from_prediction(prediction, signature_class)
  prediction_hash = prediction.to_h
  
  # Get output field names from signature
  output_fields = signature_class.output_field_descriptors.keys
  
  # Filter prediction to only include output fields
  filtered_expected = {}
  output_fields.each do |field_name|
    if prediction_hash.key?(field_name)
      filtered_expected[field_name] = prediction_hash[field_name]
    end
  end
  
  filtered_expected
end

.generate_successful_examples(program, examples, config, metric) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 479

def self.generate_successful_examples(program, examples, config, metric)
  successful = []
  failed = []
  error_count = 0

  # Use DataHandler for efficient shuffling
  data_handler = DataHandler.new(examples)
  shuffled_examples = data_handler.shuffle(random_state: 42)

  shuffled_examples.each_with_index do |example, index|
    break if successful.size >= config.max_labeled_examples
    break if error_count >= config.max_errors

    begin
      # Run program on example input
      prediction = program.call(**example.input_values)
      
      # Check if prediction matches expected output
      prediction_hash = extract_output_fields_from_prediction(prediction, example.signature_class)
      
      if metric
        success = metric.call(example, prediction_hash)
      else
        success = example.matches_prediction?(prediction_hash)
      end

      if success
        # Create a new example with the successful prediction as reasoning/context
        successful_example = create_successful_bootstrap_example(example, prediction_hash)
        successful << successful_example
        
        emit_bootstrap_example_event(index, true, nil)
      else
        failed << example
        emit_bootstrap_example_event(index, false, "Prediction did not match expected output")
      end

    rescue StandardError => error
      error_count += 1
      failed << example
      emit_bootstrap_example_event(index, false, error.message)
      
      # Log error but continue processing
      DSPy.logger.warn("Bootstrap error on example #{index}: #{error.message}")
      
      # Stop if too many errors
      if error_count >= config.max_errors
        DSPy.logger.error("Too many bootstrap errors (#{error_count}), stopping early")
        break
      end
    end
  end

  [successful, failed]
end

.get_program_with_highest_avg_score(param_score_dict, fully_evaled_param_combos) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 55

def self.get_program_with_highest_avg_score(param_score_dict, fully_evaled_param_combos)
  # Calculate the mean for each combination of categorical parameters, based on past trials
  results = []
  param_score_dict.each do |key, values|
    scores = values.map { |v| v[0] }
    mean = scores.sum.to_f / scores.size
    program = values[0][1]
    params = values[0][2]
    results << [key, mean, program, params]
  end

  # Sort results by the mean in descending order
  sorted_results = results.sort_by { |_key, mean, _program, _params| -mean }

  # Find the combination with the highest mean, skip fully evaluated ones
  sorted_results.each do |key, mean, program, params|
    next if fully_evaled_param_combos.include?(key)
    return [program, mean, key, params]
  end

  # If no valid program is found, return the last valid one
  _key, mean, program, params = sorted_results.last
  [program, mean, _key, params]
end

.infer_signature_class(examples) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 639

def self.infer_signature_class(examples)
  return nil if examples.empty?

  first_example = examples.first
  
  if first_example.is_a?(DSPy::Example)
    first_example.signature_class
  elsif first_example.is_a?(Hash) && first_example[:signature_class]
    first_example[:signature_class]
  else
    nil
  end
end

.save_candidate_program(program, log_dir, trial_num, note: nil) ⇒ `Object`

# File 'lib/dspy/teleprompt/utils.rb', line 96

def self.save_candidate_program(program, log_dir, trial_num, note: nil)
  return nil if log_dir.nil?

  # Ensure the directory exists
  eval_programs_dir = File.join(log_dir, "evaluated_programs")
  FileUtils.mkdir_p(eval_programs_dir) unless Dir.exist?(eval_programs_dir)

  # Define the save path for the program
  filename = if note
    "program_#{trial_num}_#{note}.json"
  else
    "program_#{trial_num}.json"
  end
  save_path = File.join(eval_programs_dir, filename)

  # Save the program
  program.save(save_path)

  save_path
end

Module: DSPy::Teleprompt::Utils

Overview

Defined Under Namespace

Class Method Summary collapse

Class Method Details

.create_bootstrapped_demos(student, trainset, max_bootstrapped, max_labeled, metric) ⇒ Object

.create_candidate_sets(successful_examples, config) ⇒ Object

.create_labeled_demos(trainset, max_labeled, labeled_sample, rng) ⇒ Object

.create_minibatch(trainset, batch_size = 50, rng = nil) ⇒ Object

.create_n_fewshot_demo_sets(student, num_candidate_sets, trainset, max_bootstrapped_demos: 3, max_labeled_demos: 3, min_num_samples: 1, metric: nil, teacher_settings: {}, seed: nil, include_non_bootstrapped: true, labeled_sample: true) ⇒ Object

.create_successful_bootstrap_example(original_example, prediction) ⇒ Object

.default_metric_for_examples(examples) ⇒ Object

.emit_bootstrap_complete_event(statistics) ⇒ Object

.emit_bootstrap_example_event(index, success, error) ⇒ Object

.ensure_typed_examples(examples) ⇒ Object

.eval_candidate_program(program, examples, config: BootstrapConfig.new, metric: nil) ⇒ Object

.eval_candidate_program_full(program, examples, config, metric) ⇒ Object

.eval_candidate_program_minibatch(program, examples, config, metric) ⇒ Object

.extract_output_fields_for_demo(prediction_hash, signature_class) ⇒ Object

.extract_output_fields_from_prediction(prediction, signature_class) ⇒ Object

.generate_successful_examples(program, examples, config, metric) ⇒ Object

.get_program_with_highest_avg_score(param_score_dict, fully_evaled_param_combos) ⇒ Object

.infer_signature_class(examples) ⇒ Object

.save_candidate_program(program, log_dir, trial_num, note: nil) ⇒ Object

.create_bootstrapped_demos(student, trainset, max_bootstrapped, max_labeled, metric) ⇒ `Object`

.create_candidate_sets(successful_examples, config) ⇒ `Object`

.create_labeled_demos(trainset, max_labeled, labeled_sample, rng) ⇒ `Object`

.create_minibatch(trainset, batch_size = 50, rng = nil) ⇒ `Object`

.create_n_fewshot_demo_sets(student, num_candidate_sets, trainset, max_bootstrapped_demos: 3, max_labeled_demos: 3, min_num_samples: 1, metric: nil, teacher_settings: {}, seed: nil, include_non_bootstrapped: true, labeled_sample: true) ⇒ `Object`

.create_successful_bootstrap_example(original_example, prediction) ⇒ `Object`

.default_metric_for_examples(examples) ⇒ `Object`

.emit_bootstrap_complete_event(statistics) ⇒ `Object`

.emit_bootstrap_example_event(index, success, error) ⇒ `Object`

.ensure_typed_examples(examples) ⇒ `Object`

.eval_candidate_program(program, examples, config: BootstrapConfig.new, metric: nil) ⇒ `Object`

.eval_candidate_program_full(program, examples, config, metric) ⇒ `Object`

.eval_candidate_program_minibatch(program, examples, config, metric) ⇒ `Object`

.extract_output_fields_for_demo(prediction_hash, signature_class) ⇒ `Object`

.extract_output_fields_from_prediction(prediction, signature_class) ⇒ `Object`

.generate_successful_examples(program, examples, config, metric) ⇒ `Object`

.get_program_with_highest_avg_score(param_score_dict, fully_evaled_param_combos) ⇒ `Object`

.infer_signature_class(examples) ⇒ `Object`

.save_candidate_program(program, log_dir, trial_num, note: nil) ⇒ `Object`