Module: RubyLLM::Contract::Concerns::EvalHost
- Includes:
- ContextHelpers
- Included in:
- Pipeline::Base, Step::Base
- Defined in:
- lib/ruby_llm/contract/concerns/eval_host.rb
Constant Summary collapse
- SAMPLE_RESPONSE_COMPARE_WARNING =
"[ruby_llm-contract] compare_with ignores sample_response. " \ "Without model: or context: { adapter: ... }, both sides will be skipped " \ "and the A/B comparison is not meaningful.".freeze
Instance Method Summary collapse
- #clear_file_sourced_evals! ⇒ Object
- #compare_models(eval_name, models: [], candidates: [], context: {}) ⇒ Object
-
#compare_with(other_step, eval:, model: nil, context: {}) ⇒ Object
Compare this step (candidate) with another step (baseline) using the baseline’s eval definition as single source of truth.
- #define_eval(name) ⇒ Object
- #eval_defined? ⇒ Boolean
- #eval_names ⇒ Object
- #run_eval(name = nil, context: {}, concurrency: nil) ⇒ Object
Instance Method Details
#clear_file_sourced_evals! ⇒ Object
29 30 31 32 33 34 |
# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 29 def clear_file_sourced_evals! return unless defined?(@file_sourced_evals) && defined?(@eval_definitions) @file_sourced_evals.each { |key| @eval_definitions.delete(key) } @file_sourced_evals.clear end |
#compare_models(eval_name, models: [], candidates: [], context: {}) ⇒ Object
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 73 def compare_models(eval_name, models: [], candidates: [], context: {}) raise ArgumentError, "Pass either models: or candidates:, not both" if models.any? && candidates.any? context = safe_context(context) candidate_configs = normalize_candidates(models, candidates) reports = {} configs = {} candidate_configs.each do |config| label = Eval::ModelComparison.candidate_label(config) model_context = isolate_context(context).merge(model: config[:model]) model_context[:reasoning_effort] = config[:reasoning_effort] if config[:reasoning_effort] reports[label] = run_single_eval(eval_name, model_context) configs[label] = config end Eval::ModelComparison.new(eval_name: eval_name, reports: reports, configs: configs) end |
#compare_with(other_step, eval:, model: nil, context: {}) ⇒ Object
Compare this step (candidate) with another step (baseline) using the baseline’s eval definition as single source of truth.
Requires a real adapter or model in context. sample_response is intentionally NOT used, because A/B testing with canned data gives identical results for both sides rather than a real comparison.
59 60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 59 def compare_with(other_step, eval:, model: nil, context: {}) ctx = comparison_context(context, model) baseline_defn = baseline_eval_definition(other_step, eval) raise ArgumentError, "No eval '#{eval}' on baseline step #{other_step}" unless baseline_defn dataset = baseline_defn.build_dataset warn_sample_response_compare(ctx, baseline_defn) my_report = Eval::Runner.run(step: self, dataset: dataset, context: isolate_context(ctx)) other_report = Eval::Runner.run(step: other_step, dataset: dataset, context: isolate_context(ctx)) Eval::PromptDiff.new(candidate: my_report, baseline: other_report) end |
#define_eval(name) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 13 def define_eval(name, &) @eval_definitions ||= {} @file_sourced_evals ||= Set.new key = name.to_s if @eval_definitions.key?(key) && !Thread.current[:ruby_llm_contract_reloading] warn "[ruby_llm-contract] Redefining eval '#{key}' on #{self}. " \ "This replaces the previous definition." end @eval_definitions[key] = Eval::EvalDefinition.new(key, step_class: self, &) @file_sourced_evals.add(key) if Thread.current[:ruby_llm_contract_reloading] Contract.register_eval_host(self) register_subclasses(self) end |
#eval_defined? ⇒ Boolean
40 41 42 |
# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 40 def eval_defined? !all_eval_definitions.empty? end |
#eval_names ⇒ Object
36 37 38 |
# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 36 def eval_names all_eval_definitions.keys end |
#run_eval(name = nil, context: {}, concurrency: nil) ⇒ Object
44 45 46 47 48 49 50 51 |
# File 'lib/ruby_llm/contract/concerns/eval_host.rb', line 44 def run_eval(name = nil, context: {}, concurrency: nil) context = safe_context(context) if name run_single_eval(name, context, concurrency: concurrency) else run_all_own_evals(context, concurrency: concurrency) end end |