Module: Glancer::Retriever
- Defined in:
- lib/glancer/retriever.rb
Class Method Summary collapse
- .cosine_similarity(vec1, vec2) ⇒ Object
- .perform_ruby_search(query_embedding) ⇒ Object
- .search(query) ⇒ Object
- .store_documents(chunks_with_metadata) ⇒ Object
- .weight_for(source_type) ⇒ Object
Class Method Details
.cosine_similarity(vec1, vec2) ⇒ Object
109 110 111 112 113 114 115 116 |
# File 'lib/glancer/retriever.rb', line 109 def cosine_similarity(vec1, vec2) dot = vec1.zip(vec2).map { |a, b| a * b }.sum mag1 = Math.sqrt(vec1.sum { |x| x**2 }) mag2 = Math.sqrt(vec2.sum { |x| x**2 }) return 0.0 if mag1.zero? || mag2.zero? dot / (mag1 * mag2) end |
.perform_ruby_search(query_embedding) ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/glancer/retriever.rb', line 65 def perform_ruby_search() results = Glancer::Embedding.all.map do |record| # Calculate similarity between query and stored document score = cosine_similarity(, record.) weighted_score = score * weight_for(record.source_type) { record: record, score: weighted_score } end sorted = results.sort_by { |r| -r[:score] } # Filter by min_score threshold top_matches = sorted .select { |r| r[:score] >= Glancer.configuration.min_score } .first(Glancer.configuration.k) # Fallback: if nothing passes the threshold, use best available results so the # LLM always has some schema context rather than generating blind code. if top_matches.empty? && sorted.any? top_matches = sorted.first(Glancer.configuration.k) Glancer::Utils::Logger.warn("Retriever", "No results above min_score (#{Glancer.configuration.min_score}); " \ "using top #{top_matches.size} result(s) as fallback") end top_matches = top_matches.map do |r| r[:record].tap do |record| record.define_singleton_method(:score) { r[:score] } end end Glancer::Utils::Logger.info("Retriever", "Found #{top_matches.size} relevant document(s)") top_matches end |
.search(query) ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/glancer/retriever.rb', line 49 def search(query) Glancer::Utils::Logger.info("Retriever", "Searching for top #{Glancer.configuration.k} results...") = Glancer::Utils::RateLimitRetry.with_retry(context: "Retriever") do RubyLLM.( query, model: Glancer.configuration., provider: Glancer.configuration., assume_model_exists: true ).vectors end # @TODO Postgres with native search? perform_ruby_search() end |
.store_documents(chunks_with_metadata) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/glancer/retriever.rb', line 9 def store_documents() Glancer::Utils::Logger.info("Retriever", "Storing #{.size} document chunk(s)...") .each_with_index do |data, idx| chunk = data[:content] preview = chunk[0..50].gsub(/\s+/, " ").strip Glancer::Utils::Logger.debug("Retriever", "Embedding chunk ##{idx + 1} (#{data[:source_type]} - #{data[:source_path]}): '#{preview}...'") vector = Glancer::Utils::RateLimitRetry.with_retry(context: "Retriever") do RubyLLM.( chunk, model: Glancer.configuration., provider: Glancer.configuration., assume_model_exists: true ).vectors end Glancer::Utils::Logger.debug("Retriever", "Vector size: #{vector.size}, example values: #{vector.first(5).inspect}") Glancer::Embedding.create!( content: chunk, embedding: vector, source_type: data[:source_type], source_path: data[:source_path] ) Glancer::Utils::Logger.info("Retriever", "Stored chunk ##{idx + 1} from #{data[:source_type]}: #{data[:source_path]}") end Glancer::Utils::Logger.info("Retriever", "All chunks stored successfully.") rescue StandardError => e Glancer::Utils::Logger.error("Retriever", "Failed to store document chunks: #{e.class} - #{e.}") Glancer::Utils::Logger.debug("Retriever", "Backtrace:\n#{e.backtrace.join("\n")}") raise Glancer::Error, "Document storage failed: #{e.}" end |
.weight_for(source_type) ⇒ Object
100 101 102 103 104 105 106 107 |
# File 'lib/glancer/retriever.rb', line 100 def weight_for(source_type) case source_type when "schema" then Glancer.configuration.schema_documents_weight when "context" then Glancer.configuration.context_documents_weight when "models" then Glancer.configuration.models_documents_weight else 1.0 end end |