Module: Glancer::Retriever

Defined in:
lib/glancer/retriever.rb

Class Method Summary collapse

Class Method Details

.cosine_similarity(vec1, vec2) ⇒ Object



105
106
107
108
109
110
111
112
# File 'lib/glancer/retriever.rb', line 105

def cosine_similarity(vec1, vec2)
  dot = vec1.zip(vec2).map { |a, b| a * b }.sum
  mag1 = Math.sqrt(vec1.sum { |x| x**2 })
  mag2 = Math.sqrt(vec2.sum { |x| x**2 })
  return 0.0 if mag1.zero? || mag2.zero?

  dot / (mag1 * mag2)
end

.perform_ruby_search(query_embedding) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/glancer/retriever.rb', line 61

def perform_ruby_search(query_embedding)
  results = Glancer::Embedding.all.map do |record|
    # Calculate similarity between query and stored document
    score = cosine_similarity(query_embedding, record.embedding)
    weighted_score = score * weight_for(record.source_type)

    { record: record, score: weighted_score }
  end

  sorted = results.sort_by { |r| -r[:score] }

  # Filter by min_score threshold
  top_matches = sorted
                .select { |r| r[:score] >= Glancer.configuration.min_score }
                .first(Glancer.configuration.k)

  # Fallback: if nothing passes the threshold, use best available results so the
  # LLM always has some schema context rather than generating blind code.
  if top_matches.empty? && sorted.any?
    top_matches = sorted.first(Glancer.configuration.k)
    Glancer::Utils::Logger.warn("Retriever",
                                "No results above min_score (#{Glancer.configuration.min_score}); " \
                                "using top #{top_matches.size} result(s) as fallback")
  end

  top_matches = top_matches.map do |r|
    r[:record].tap do |record|
      record.define_singleton_method(:score) { r[:score] }
    end
  end

  Glancer::Utils::Logger.info("Retriever", "Found #{top_matches.size} relevant document(s)")
  top_matches
end

.search(query) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/glancer/retriever.rb', line 47

def search(query)
  Glancer::Utils::Logger.info("Retriever", "Searching for top #{Glancer.configuration.k} results...")

  query_embedding = RubyLLM.embed(
    query,
    model: Glancer.configuration.resolved_embedding_model,
    provider: Glancer.configuration.resolved_embedding_provider,
    assume_model_exists: true
  ).vectors

  # @TODO Postgres with native search?
  perform_ruby_search(query_embedding)
end

.store_documents(chunks_with_metadata) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/glancer/retriever.rb', line 9

def store_documents()
  Glancer::Utils::Logger.info("Retriever", "Storing #{.size} document chunk(s)...")

  .each_with_index do |data, idx|
    chunk = data[:content]
    preview = chunk[0..50].gsub(/\s+/, " ").strip

    Glancer::Utils::Logger.debug("Retriever",
                                 "Embedding chunk ##{idx + 1} (#{data[:source_type]} - #{data[:source_path]}): '#{preview}...'")

    vector = RubyLLM.embed(
      chunk,
      model: Glancer.configuration.resolved_embedding_model,
      provider: Glancer.configuration.resolved_embedding_provider,
      assume_model_exists: true
    ).vectors

    Glancer::Utils::Logger.debug("Retriever",
                                 "Vector size: #{vector.size}, example values: #{vector.first(5).inspect}")

    Glancer::Embedding.create!(
      content: chunk,
      embedding: vector,
      source_type: data[:source_type],
      source_path: data[:source_path]
    )

    Glancer::Utils::Logger.info("Retriever",
                                "Stored chunk ##{idx + 1} from #{data[:source_type]}: #{data[:source_path]}")
  end

  Glancer::Utils::Logger.info("Retriever", "All chunks stored successfully.")
rescue StandardError => e
  Glancer::Utils::Logger.error("Retriever", "Failed to store document chunks: #{e.class} - #{e.message}")
  Glancer::Utils::Logger.debug("Retriever", "Backtrace:\n#{e.backtrace.join("\n")}")
  raise Glancer::Error, "Document storage failed: #{e.message}"
end

.weight_for(source_type) ⇒ Object



96
97
98
99
100
101
102
103
# File 'lib/glancer/retriever.rb', line 96

def weight_for(source_type)
  case source_type
  when "schema"  then Glancer.configuration.schema_documents_weight
  when "context" then Glancer.configuration.context_documents_weight
  when "models"  then Glancer.configuration.models_documents_weight
  else 1.0
  end
end