Class: RubyLLM::Contract::Eval::PromptDiffComparator
- Inherits:
-
Object
- Object
- RubyLLM::Contract::Eval::PromptDiffComparator
- Defined in:
- lib/ruby_llm/contract/eval/prompt_diff_comparator.rb
Overview
Encapsulates the safety and mismatch rules for prompt A/B comparison.
Instance Method Summary collapse
- #baseline_empty? ⇒ Boolean
- #baseline_score ⇒ Object
- #candidate_empty? ⇒ Boolean
- #candidate_score ⇒ Object
- #case_names_match? ⇒ Boolean
- #cases_comparable? ⇒ Boolean
- #empty_comparison? ⇒ Boolean
- #expected_mismatches ⇒ Object
-
#initialize(candidate_cases:, baseline_cases:, diff:) ⇒ PromptDiffComparator
constructor
A new instance of PromptDiffComparator.
- #input_mismatches ⇒ Object
- #mismatched_cases ⇒ Object
- #safe_to_switch? ⇒ Boolean
- #score_regressions ⇒ Object
Constructor Details
#initialize(candidate_cases:, baseline_cases:, diff:) ⇒ PromptDiffComparator
Returns a new instance of PromptDiffComparator.
8 9 10 11 12 13 |
# File 'lib/ruby_llm/contract/eval/prompt_diff_comparator.rb', line 8 def initialize(candidate_cases:, baseline_cases:, diff:) @candidate_cases = candidate_cases @baseline_cases = baseline_cases @diff = diff @baseline_case_index = baseline_cases.to_h { |case_data| [case_data[:name], case_data] } end |
Instance Method Details
#baseline_empty? ⇒ Boolean
79 80 81 |
# File 'lib/ruby_llm/contract/eval/prompt_diff_comparator.rb', line 79 def baseline_empty? @baseline_cases.empty? end |
#baseline_score ⇒ Object
71 72 73 |
# File 'lib/ruby_llm/contract/eval/prompt_diff_comparator.rb', line 71 def baseline_score @diff.baseline_score end |
#candidate_empty? ⇒ Boolean
75 76 77 |
# File 'lib/ruby_llm/contract/eval/prompt_diff_comparator.rb', line 75 def candidate_empty? @candidate_cases.empty? end |
#candidate_score ⇒ Object
67 68 69 |
# File 'lib/ruby_llm/contract/eval/prompt_diff_comparator.rb', line 67 def candidate_score @diff.current_score end |
#case_names_match? ⇒ Boolean
23 24 25 |
# File 'lib/ruby_llm/contract/eval/prompt_diff_comparator.rb', line 23 def case_names_match? case_names(@baseline_cases) == case_names(@candidate_cases) end |
#cases_comparable? ⇒ Boolean
27 28 29 |
# File 'lib/ruby_llm/contract/eval/prompt_diff_comparator.rb', line 27 def cases_comparable? case_signatures(@baseline_cases) == case_signatures(@candidate_cases) end |
#empty_comparison? ⇒ Boolean
83 84 85 |
# File 'lib/ruby_llm/contract/eval/prompt_diff_comparator.rb', line 83 def empty_comparison? baseline_empty? || candidate_empty? end |
#expected_mismatches ⇒ Object
45 46 47 |
# File 'lib/ruby_llm/contract/eval/prompt_diff_comparator.rb', line 45 def expected_mismatches attribute_mismatches(:expected, :baseline_expected, :candidate_expected) end |
#input_mismatches ⇒ Object
41 42 43 |
# File 'lib/ruby_llm/contract/eval/prompt_diff_comparator.rb', line 41 def input_mismatches attribute_mismatches(:input, :baseline_input, :candidate_input) end |
#mismatched_cases ⇒ Object
31 32 33 34 35 36 37 38 39 |
# File 'lib/ruby_llm/contract/eval/prompt_diff_comparator.rb', line 31 def mismatched_cases baseline_names = case_names(@baseline_cases) candidate_names = case_names(@candidate_cases) { only_in_baseline: baseline_names - candidate_names, only_in_candidate: candidate_names - baseline_names } end |
#safe_to_switch? ⇒ Boolean
15 16 17 18 19 20 21 |
# File 'lib/ruby_llm/contract/eval/prompt_diff_comparator.rb', line 15 def safe_to_switch? return false if empty_comparison? return false unless cases_comparable? return false if score_regressions.any? !@diff.regressed? end |
#score_regressions ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/ruby_llm/contract/eval/prompt_diff_comparator.rb', line 49 def score_regressions @candidate_cases.filter_map do |candidate_case| baseline_case = @baseline_case_index[candidate_case[:name]] next unless baseline_case baseline_score = baseline_case[:score] candidate_score = candidate_case[:score] next unless candidate_score < baseline_score { case: candidate_case[:name], baseline_score: baseline_score, candidate_score: candidate_score, delta: (candidate_score - baseline_score).round(4) } end end |