Module: Legion::Apollo::Helpers::Similarity

Extended by:
Logging::Helper
Defined in:
lib/legion/apollo/helpers/similarity.rb

Overview

Pure cosine similarity math and match classification for Apollo vectors.

Constant Summary collapse

EXACT_MATCH_THRESHOLD =
0.95
HIGH_SIMILARITY_THRESHOLD =
0.85
CORROBORATION_THRESHOLD =
0.75
0.5

Class Method Summary collapse

Class Method Details

.classify_match(similarity) ⇒ Object

rubocop:disable Metrics/MethodLength



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/legion/apollo/helpers/similarity.rb', line 48

def classify_match(similarity) # rubocop:disable Metrics/MethodLength
  case similarity
  when EXACT_MATCH_THRESHOLD..1.0                          then :exact
  when HIGH_SIMILARITY_THRESHOLD...EXACT_MATCH_THRESHOLD   then :high
  when CORROBORATION_THRESHOLD...HIGH_SIMILARITY_THRESHOLD  then :corroboration
  when RELATED_THRESHOLD...CORROBORATION_THRESHOLD          then :related
  else :unrelated
  end
rescue StandardError => e
  handle_exception(
    e,
    level:      :debug,
    operation:  'apollo.helpers.similarity.classify_match',
    similarity: similarity
  )
  :unrelated
end

.cosine_similarity(vec_a, vec_b) ⇒ Object

rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/legion/apollo/helpers/similarity.rb', line 19

def cosine_similarity(vec_a, vec_b) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity
  return 0.0 if vec_a.nil? || vec_b.nil? || vec_a.empty? || vec_b.empty?
  return 0.0 unless vec_a.size == vec_b.size

  dot = 0.0
  mag_a = 0.0
  mag_b = 0.0

  vec_a.size.times do |i|
    a = vec_a[i].to_f
    b = vec_b[i].to_f
    dot   += a * b
    mag_a += a * a
    mag_b += b * b
  end

  denom = Math.sqrt(mag_a) * Math.sqrt(mag_b)
  denom.zero? ? 0.0 : (dot / denom)
rescue StandardError => e
  handle_exception(
    e,
    level:      :debug,
    operation:  'apollo.helpers.similarity.cosine_similarity',
    vec_a_size: Array(vec_a).size,
    vec_b_size: Array(vec_b).size
  )
  0.0
end