Module: DSPy::Scores::Evaluators

Extended by:
T::Sig
Defined in:
lib/dspy/scores/evaluators.rb

Overview

Built-in evaluators for common evaluation patterns Each evaluator returns a ScoreEvent that can be exported to Langfuse

Class Method Summary collapse

Class Method Details

.contains(output:, expected:, name: 'contains', ignore_case: false, comment: nil, trace_id: nil, observation_id: nil, emit: true) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/dspy/scores/evaluators.rb', line 68

def self.contains(
  output:,
  expected:,
  name: 'contains',
  ignore_case: false,
  comment: nil,
  trace_id: nil,
  observation_id: nil,
  emit: true
)
  match = if ignore_case
            output.downcase.include?(expected.downcase)
          else
            output.include?(expected)
          end

  DSPy::Scores.create(
    name: name,
    value: match ? 1.0 : 0.0,
    data_type: DataType::Numeric,
    comment: comment || (match ? 'Contains expected' : 'Does not contain expected'),
    trace_id: trace_id,
    observation_id: observation_id,
    emit: emit
  )
end

.exact_match(output:, expected:, name: 'exact_match', ignore_case: false, comment: nil, trace_id: nil, observation_id: nil, emit: true) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/dspy/scores/evaluators.rb', line 27

def self.exact_match(
  output:,
  expected:,
  name: 'exact_match',
  ignore_case: false,
  comment: nil,
  trace_id: nil,
  observation_id: nil,
  emit: true
)
  match = if ignore_case
            output.downcase == expected.downcase
          else
            output == expected
          end

  DSPy::Scores.create(
    name: name,
    value: match ? 1.0 : 0.0,
    data_type: DataType::Numeric,
    comment: comment || (match ? 'Exact match' : 'No match'),
    trace_id: trace_id,
    observation_id: observation_id,
    emit: emit
  )
end

.json_valid(output:, name: 'json_valid', comment: nil, trace_id: nil, observation_id: nil, emit: true) ⇒ Object



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/dspy/scores/evaluators.rb', line 220

def self.json_valid(
  output:,
  name: 'json_valid',
  comment: nil,
  trace_id: nil,
  observation_id: nil,
  emit: true
)
  valid = begin
    JSON.parse(output)
    true
  rescue JSON::ParserError
    false
  end

  DSPy::Scores.create(
    name: name,
    value: valid ? 1.0 : 0.0,
    data_type: DataType::Numeric,
    comment: comment || (valid ? 'Valid JSON' : 'Invalid JSON'),
    trace_id: trace_id,
    observation_id: observation_id,
    emit: emit
  )
end

.length_check(output:, min_length: nil, max_length: nil, name: 'length_check', comment: nil, trace_id: nil, observation_id: nil, emit: true) ⇒ Object



145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/dspy/scores/evaluators.rb', line 145

def self.length_check(
  output:,
  min_length: nil,
  max_length: nil,
  name: 'length_check',
  comment: nil,
  trace_id: nil,
  observation_id: nil,
  emit: true
)
  length = output.length
  valid = true
  valid = false if min_length && length < min_length
  valid = false if max_length && length > max_length

  DSPy::Scores.create(
    name: name,
    value: valid ? 1.0 : 0.0,
    data_type: DataType::Numeric,
    comment: comment || "Length: #{length} (min: #{min_length || 'none'}, max: #{max_length || 'none'})",
    trace_id: trace_id,
    observation_id: observation_id,
    emit: emit
  )
end

.levenshtein_distance(str1, str2) ⇒ Object



248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# File 'lib/dspy/scores/evaluators.rb', line 248

def self.levenshtein_distance(str1, str2)
  m = str1.length
  n = str2.length

  return n if m.zero?
  return m if n.zero?

  # Create distance matrix
  d = Array.new(m + 1) { Array.new(n + 1, 0) }

  # Initialize first column
  (0..m).each { |i| d[i][0] = i }
  # Initialize first row
  (0..n).each { |j| d[0][j] = j }

  # Fill in the rest of the matrix
  (1..m).each do |i|
    (1..n).each do |j|
      cost = str1[i - 1] == str2[j - 1] ? 0 : 1
      d[i][j] = [
        d[i - 1][j] + 1,     # deletion
        d[i][j - 1] + 1,     # insertion
        d[i - 1][j - 1] + cost # substitution
      ].min
    end
  end

  d[m][n]
end

.regex_match(output:, pattern:, name: 'regex_match', comment: nil, trace_id: nil, observation_id: nil, emit: true) ⇒ Object



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/dspy/scores/evaluators.rb', line 108

def self.regex_match(
  output:,
  pattern:,
  name: 'regex_match',
  comment: nil,
  trace_id: nil,
  observation_id: nil,
  emit: true
)
  regex = pattern.is_a?(Regexp) ? pattern : Regexp.new(pattern)
  match = regex.match?(output)

  DSPy::Scores.create(
    name: name,
    value: match ? 1.0 : 0.0,
    data_type: DataType::Numeric,
    comment: comment || (match ? 'Regex matched' : 'Regex did not match'),
    trace_id: trace_id,
    observation_id: observation_id,
    emit: emit
  )
end

.similarity(output:, expected:, name: 'similarity', comment: nil, trace_id: nil, observation_id: nil, emit: true) ⇒ Object



184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/dspy/scores/evaluators.rb', line 184

def self.similarity(
  output:,
  expected:,
  name: 'similarity',
  comment: nil,
  trace_id: nil,
  observation_id: nil,
  emit: true
)
  distance = levenshtein_distance(output, expected)
  max_length = [output.length, expected.length].max
  score = max_length.zero? ? 1.0 : 1.0 - (distance.to_f / max_length)

  DSPy::Scores.create(
    name: name,
    value: score.round(4),
    data_type: DataType::Numeric,
    comment: comment || "Levenshtein distance: #{distance}",
    trace_id: trace_id,
    observation_id: observation_id,
    emit: emit
  )
end