Module: Glancer::Retriever

Defined in:
lib/glancer/retriever.rb

Class Method Summary collapse

Class Method Details

.cosine_similarity(vec1, vec2) ⇒ Object



109
110
111
112
113
114
115
116
# File 'lib/glancer/retriever.rb', line 109

def cosine_similarity(vec1, vec2)
  dot = vec1.zip(vec2).map { |a, b| a * b }.sum
  mag1 = Math.sqrt(vec1.sum { |x| x**2 })
  mag2 = Math.sqrt(vec2.sum { |x| x**2 })
  return 0.0 if mag1.zero? || mag2.zero?

  dot / (mag1 * mag2)
end

.perform_ruby_search(query_embedding) ⇒ Object



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/glancer/retriever.rb', line 65

def perform_ruby_search(query_embedding)
  results = Glancer::Embedding.all.map do |record|
    # Calculate similarity between query and stored document
    score = cosine_similarity(query_embedding, record.embedding)
    weighted_score = score * weight_for(record.source_type)

    { record: record, score: weighted_score }
  end

  sorted = results.sort_by { |r| -r[:score] }

  # Filter by min_score threshold
  top_matches = sorted
                .select { |r| r[:score] >= Glancer.configuration.min_score }
                .first(Glancer.configuration.k)

  # Fallback: if nothing passes the threshold, use best available results so the
  # LLM always has some schema context rather than generating blind code.
  if top_matches.empty? && sorted.any?
    top_matches = sorted.first(Glancer.configuration.k)
    Glancer::Utils::Logger.warn("Retriever",
                                "No results above min_score (#{Glancer.configuration.min_score}); " \
                                "using top #{top_matches.size} result(s) as fallback")
  end

  top_matches = top_matches.map do |r|
    r[:record].tap do |record|
      record.define_singleton_method(:score) { r[:score] }
    end
  end

  Glancer::Utils::Logger.info("Retriever", "Found #{top_matches.size} relevant document(s)")
  top_matches
end

.search(query) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/glancer/retriever.rb', line 49

def search(query)
  Glancer::Utils::Logger.info("Retriever", "Searching for top #{Glancer.configuration.k} results...")

  query_embedding = Glancer::Utils::RateLimitRetry.with_retry(context: "Retriever") do
    RubyLLM.embed(
      query,
      model: Glancer.configuration.resolved_embedding_model,
      provider: Glancer.configuration.resolved_embedding_provider,
      assume_model_exists: true
    ).vectors
  end

  # @TODO Postgres with native search?
  perform_ruby_search(query_embedding)
end

.store_documents(chunks_with_metadata) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/glancer/retriever.rb', line 9

def store_documents()
  Glancer::Utils::Logger.info("Retriever", "Storing #{.size} document chunk(s)...")

  .each_with_index do |data, idx|
    chunk = data[:content]
    preview = chunk[0..50].gsub(/\s+/, " ").strip

    Glancer::Utils::Logger.debug("Retriever",
                                 "Embedding chunk ##{idx + 1} (#{data[:source_type]} - #{data[:source_path]}): '#{preview}...'")

    vector = Glancer::Utils::RateLimitRetry.with_retry(context: "Retriever") do
      RubyLLM.embed(
        chunk,
        model: Glancer.configuration.resolved_embedding_model,
        provider: Glancer.configuration.resolved_embedding_provider,
        assume_model_exists: true
      ).vectors
    end

    Glancer::Utils::Logger.debug("Retriever",
                                 "Vector size: #{vector.size}, example values: #{vector.first(5).inspect}")

    Glancer::Embedding.create!(
      content: chunk,
      embedding: vector,
      source_type: data[:source_type],
      source_path: data[:source_path]
    )

    Glancer::Utils::Logger.info("Retriever",
                                "Stored chunk ##{idx + 1} from #{data[:source_type]}: #{data[:source_path]}")
  end

  Glancer::Utils::Logger.info("Retriever", "All chunks stored successfully.")
rescue StandardError => e
  Glancer::Utils::Logger.error("Retriever", "Failed to store document chunks: #{e.class} - #{e.message}")
  Glancer::Utils::Logger.debug("Retriever", "Backtrace:\n#{e.backtrace.join("\n")}")
  raise Glancer::Error, "Document storage failed: #{e.message}"
end

.weight_for(source_type) ⇒ Object



100
101
102
103
104
105
106
107
# File 'lib/glancer/retriever.rb', line 100

def weight_for(source_type)
  case source_type
  when "schema"  then Glancer.configuration.schema_documents_weight
  when "context" then Glancer.configuration.context_documents_weight
  when "models"  then Glancer.configuration.models_documents_weight
  else 1.0
  end
end