Module: RubyLLM::Contract::Concerns::EvalHost

Includes:: ContextHelpers, ProductionModeContext

Included in:: Pipeline::Base, Step::Base

Defined in:: lib/ruby_llm/contract/concerns/eval_host.rb

Constant Summary collapse

SAMPLE_RESPONSE_COMPARE_WARNING =

"[ruby_llm-contract] compare_with ignores sample_response. " \
"Without model: or context: { adapter: ... }, both sides will be skipped " \
"and the A/B comparison is not meaningful.".freeze

Instance Method Summary collapse

#clear_file_sourced_evals! ⇒ Object
#compare_models(eval_name, models: [], candidates: [], context: {}, runs: 1, production_mode: nil) ⇒ Object
#compare_with(other_step, eval:, model: nil, context: {}) ⇒ Object

Compare this step (candidate) with another step (baseline) using the baseline’s eval definition as single source of truth.
#define_eval(name) ⇒ Object
#eval_defined? ⇒ Boolean
#eval_names ⇒ Object
#run_eval(name = nil, context: {}, concurrency: nil) ⇒ Object

Instance Method Details

#clear_file_sourced_evals! ⇒ `Object`

# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 30

def clear_file_sourced_evals!
  return unless defined?(@file_sourced_evals) && defined?(@eval_definitions)

  @file_sourced_evals.each { |key| @eval_definitions.delete(key) }
  @file_sourced_evals.clear
end

#compare_models(eval_name, models: [], candidates: [], context: {}, runs: 1, production_mode: nil) ⇒ `Object`

Raises:

(ArgumentError)

# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 74

def compare_models(eval_name, models: [], candidates: [], context: {}, runs: 1, production_mode: nil)
  raise ArgumentError, "Pass either models: or candidates:, not both" if models.any? && candidates.any?

  runs = coerce_runs(runs)

  context = safe_context(context)
  candidate_configs = normalize_candidates(models, candidates)
  reject_production_mode_on_pipeline!(production_mode)
  fallback_config = normalize_production_mode(production_mode)

  reports = {}
  configs = {}
  candidate_configs.each do |config|
    label = Eval::ModelComparison.candidate_label(config)
    model_context = build_candidate_context(context, config, fallback_config)
    per_run = Array.new(runs) { run_single_eval(eval_name, model_context) }
    reports[label] = runs == 1 ? per_run.first : Eval::AggregatedReport.new(per_run)
    configs[label] = config
  end

  Eval::ModelComparison.new(
    eval_name: eval_name, reports: reports, configs: configs, fallback: fallback_config
  )
end

#compare_with(other_step, eval:, model: nil, context: {}) ⇒ `Object`

Compare this step (candidate) with another step (baseline) using the baseline’s eval definition as single source of truth.

Requires a real adapter or model in context. sample_response is intentionally NOT used, because A/B testing with canned data gives identical results for both sides rather than a real comparison.

Raises:

(ArgumentError)

# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 60

def compare_with(other_step, eval:, model: nil, context: {})
  ctx = comparison_context(context, model)
  baseline_defn = baseline_eval_definition(other_step, eval)
  raise ArgumentError, "No eval '#{eval}' on baseline step #{other_step}" unless baseline_defn

  dataset = baseline_defn.build_dataset
  warn_sample_response_compare(ctx, baseline_defn)

  my_report = Eval::Runner.run(step: self, dataset: dataset, context: isolate_context(ctx))
  other_report = Eval::Runner.run(step: other_step, dataset: dataset, context: isolate_context(ctx))

  Eval::PromptDiff.new(candidate: my_report, baseline: other_report)
end

#define_eval(name) ⇒ `Object`

# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 14

def define_eval(name, &)
  @eval_definitions ||= {}
  @file_sourced_evals ||= Set.new
  key = name.to_s

  if @eval_definitions.key?(key) && !Thread.current[:ruby_llm_contract_reloading]
    warn "[ruby_llm-contract] Redefining eval '#{key}' on #{self}. " \
         "This replaces the previous definition."
  end

  @eval_definitions[key] = Eval::EvalDefinition.new(key, step_class: self, &)
  @file_sourced_evals.add(key) if Thread.current[:ruby_llm_contract_reloading]
  Contract.register_eval_host(self)
  register_subclasses(self)
end

#eval_defined? ⇒ `Boolean`

Returns:

(Boolean)



41
42
43

# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 41

def eval_defined?
  !all_eval_definitions.empty?
end

#eval_names ⇒ `Object`



37
38
39

# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 37

def eval_names
  all_eval_definitions.keys
end

#run_eval(name = nil, context: {}, concurrency: nil) ⇒ `Object`

# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 45

def run_eval(name = nil, context: {}, concurrency: nil)
  context = safe_context(context)
  if name
    run_single_eval(name, context, concurrency: concurrency)
  else
    run_all_own_evals(context, concurrency: concurrency)
  end
end

Module: RubyLLM::Contract::Concerns::EvalHost

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#clear_file_sourced_evals! ⇒ Object

#compare_models(eval_name, models: [], candidates: [], context: {}, runs: 1, production_mode: nil) ⇒ Object

#compare_with(other_step, eval:, model: nil, context: {}) ⇒ Object

#define_eval(name) ⇒ Object

#eval_defined? ⇒ Boolean

#eval_names ⇒ Object