Class: LlmOptimizer::SemanticCache

Inherits:
Object
  • Object
show all
Defined in:
lib/llm_optimizer/semantic_cache.rb

Constant Summary collapse

KEY_NAMESPACE =
"llm_optimizer:cache:"

Instance Method Summary collapse

Constructor Details

#initialize(redis_client, threshold:, ttl:) ⇒ SemanticCache

Returns a new instance of SemanticCache.



10
11
12
13
14
# File 'lib/llm_optimizer/semantic_cache.rb', line 10

def initialize(redis_client, threshold:, ttl:)
  @redis     = redis_client
  @threshold = threshold
  @ttl       = ttl
end

Instance Method Details

#cosine_similarity(vec_a, vec_b) ⇒ Object



58
59
60
61
62
63
64
65
# File 'lib/llm_optimizer/semantic_cache.rb', line 58

def cosine_similarity(vec_a, vec_b)
  dot    = vec_a.zip(vec_b).sum { |a, b| a * b }
  mag_a  = Math.sqrt(vec_a.sum { |x| x * x })
  mag_b  = Math.sqrt(vec_b.sum { |x| x * x })
  return 0.0 if mag_a.zero? || mag_b.zero?

  dot / (mag_a * mag_b)
end

#lookup(embedding) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/llm_optimizer/semantic_cache.rb', line 30

def lookup(embedding)
  keys = @redis.keys("#{KEY_NAMESPACE}*")
  return nil if keys.empty?

  best_score    = -Float::INFINITY
  best_response = nil

  keys.each do |key|
    raw = @redis.get(key)
    next unless raw

    entry = MessagePack.unpack(raw)
    # Unpack the binary string back to 64-bit doubles
    stored_embedding = entry["embedding"].unpack("G*")
    score = cosine_similarity(embedding, stored_embedding)

    if score > best_score
      best_score    = score
      best_response = entry["response"]
    end
  end

  best_score >= @threshold ? best_response : nil
rescue ::Redis::BaseError => e
  warn "[llm_optimizer] SemanticCache lookup failed: #{e.message}"
  nil
end

#store(embedding, response) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/llm_optimizer/semantic_cache.rb', line 16

def store(embedding, response)
  key     = cache_key(embedding)
  # Serialize embedding as raw 64-bit big-endian doubles to preserve full
  # Float precision. MessagePack silently downcasts Ruby Float to 32-bit,
  # which corrupts cosine similarity on deserialization.
  payload = MessagePack.pack({
                               "embedding" => embedding.pack("G*"), # binary string, lossless
                               "response" => response
                             })
  @redis.set(key, payload, ex: @ttl)
rescue ::Redis::BaseError => e
  warn "[llm_optimizer] SemanticCache store failed: #{e.message}"
end