Module: CompletionKit::MetricCalibrationExamples

Defined in:
app/services/completion_kit/metric_calibration_examples.rb

Constant Summary collapse

DEFAULT_JUDGE_EXAMPLE_LIMIT =
5

Class Method Summary collapse

Class Method Details

.borderlines_for(metric, limit: 6) ⇒ Object



15
16
17
# File 'app/services/completion_kit/metric_calibration_examples.rb', line 15

def borderlines_for(metric, limit: 6)
  calibrations_for(metric, verdict: "borderline", limit: limit)
end

.calibrations_for(metric, verdict:, limit:) ⇒ Object



31
32
33
34
35
36
37
# File 'app/services/completion_kit/metric_calibration_examples.rb', line 31

def calibrations_for(metric, verdict:, limit:)
  base = Calibration.where(metric_id: metric.id, verdict: verdict)
  current_version = MetricVersion.current.find_by(metric_id: metric.id)
  scoped = current_version ? base.where(metric_version_id: current_version.id) : base
  effective = scoped.exists? ? scoped : base
  map_examples(effective.includes(response: :reviews).order(created_at: :desc).limit(limit), metric)
end

.disagreements_for(metric, limit: 8) ⇒ Object



11
12
13
# File 'app/services/completion_kit/metric_calibration_examples.rb', line 11

def disagreements_for(metric, limit: 8)
  calibrations_for(metric, verdict: "disagree", limit: limit)
end

.for(metric, limit: 8) ⇒ Object



7
8
9
# File 'app/services/completion_kit/metric_calibration_examples.rb', line 7

def for(metric, limit: 8)
  disagreements_for(metric, limit: limit)
end

.judge_examples_for(metric, exclude_response_id: nil, limit: DEFAULT_JUDGE_EXAMPLE_LIMIT) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
# File 'app/services/completion_kit/metric_calibration_examples.rb', line 19

def judge_examples_for(metric, exclude_response_id: nil, limit: DEFAULT_JUDGE_EXAMPLE_LIMIT)
  current_version = MetricVersion.current.find_by(metric_id: metric.id)
  return [] unless current_version

  relation = Calibration
             .where(metric_id: metric.id, metric_version_id: current_version.id, excluded_from_examples: false)
             .where.not(corrected_score: nil)
  relation = relation.where.not(response_id: exclude_response_id) if exclude_response_id
  map_examples(relation.includes(response: :reviews).order(created_at: :desc).limit(limit), metric)
    .reject { |example| example[:judge_score].nil? }
end

.map_examples(relation, metric) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'app/services/completion_kit/metric_calibration_examples.rb', line 39

def map_examples(relation, metric)
  relation.map do |cal|
    review = cal.response.reviews.find { |r| r.metric_id == metric.id }
    {
      id: cal.id,
      run_id: cal.run_id,
      response_id: cal.response_id,
      input: cal.response.input_data,
      output: cal.response.response_text,
      judge_score: review&.ai_score,
      judge_feedback: review&.ai_feedback,
      human_score: cal.corrected_score,
      human_note: cal.note
    }
  end
end