Module: Glancer::Retriever
- Defined in:
- lib/glancer/retriever.rb
Class Method Summary collapse
- .cosine_similarity(vec1, vec2) ⇒ Object
- .perform_ruby_search(query_embedding) ⇒ Object
- .search(query) ⇒ Object
- .store_documents(chunks_with_metadata) ⇒ Object
- .weight_for(source_type) ⇒ Object
Class Method Details
.cosine_similarity(vec1, vec2) ⇒ Object
105 106 107 108 109 110 111 112 |
# File 'lib/glancer/retriever.rb', line 105 def cosine_similarity(vec1, vec2) dot = vec1.zip(vec2).map { |a, b| a * b }.sum mag1 = Math.sqrt(vec1.sum { |x| x**2 }) mag2 = Math.sqrt(vec2.sum { |x| x**2 }) return 0.0 if mag1.zero? || mag2.zero? dot / (mag1 * mag2) end |
.perform_ruby_search(query_embedding) ⇒ Object
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/glancer/retriever.rb', line 61 def perform_ruby_search() results = Glancer::Embedding.all.map do |record| # Calculate similarity between query and stored document score = cosine_similarity(, record.) weighted_score = score * weight_for(record.source_type) { record: record, score: weighted_score } end sorted = results.sort_by { |r| -r[:score] } # Filter by min_score threshold top_matches = sorted .select { |r| r[:score] >= Glancer.configuration.min_score } .first(Glancer.configuration.k) # Fallback: if nothing passes the threshold, use best available results so the # LLM always has some schema context rather than generating blind code. if top_matches.empty? && sorted.any? top_matches = sorted.first(Glancer.configuration.k) Glancer::Utils::Logger.warn("Retriever", "No results above min_score (#{Glancer.configuration.min_score}); " \ "using top #{top_matches.size} result(s) as fallback") end top_matches = top_matches.map do |r| r[:record].tap do |record| record.define_singleton_method(:score) { r[:score] } end end Glancer::Utils::Logger.info("Retriever", "Found #{top_matches.size} relevant document(s)") top_matches end |
.search(query) ⇒ Object
47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/glancer/retriever.rb', line 47 def search(query) Glancer::Utils::Logger.info("Retriever", "Searching for top #{Glancer.configuration.k} results...") = RubyLLM.( query, model: Glancer.configuration., provider: Glancer.configuration., assume_model_exists: true ).vectors # @TODO Postgres with native search? perform_ruby_search() end |
.store_documents(chunks_with_metadata) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/glancer/retriever.rb', line 9 def store_documents() Glancer::Utils::Logger.info("Retriever", "Storing #{.size} document chunk(s)...") .each_with_index do |data, idx| chunk = data[:content] preview = chunk[0..50].gsub(/\s+/, " ").strip Glancer::Utils::Logger.debug("Retriever", "Embedding chunk ##{idx + 1} (#{data[:source_type]} - #{data[:source_path]}): '#{preview}...'") vector = RubyLLM.( chunk, model: Glancer.configuration., provider: Glancer.configuration., assume_model_exists: true ).vectors Glancer::Utils::Logger.debug("Retriever", "Vector size: #{vector.size}, example values: #{vector.first(5).inspect}") Glancer::Embedding.create!( content: chunk, embedding: vector, source_type: data[:source_type], source_path: data[:source_path] ) Glancer::Utils::Logger.info("Retriever", "Stored chunk ##{idx + 1} from #{data[:source_type]}: #{data[:source_path]}") end Glancer::Utils::Logger.info("Retriever", "All chunks stored successfully.") rescue StandardError => e Glancer::Utils::Logger.error("Retriever", "Failed to store document chunks: #{e.class} - #{e.}") Glancer::Utils::Logger.debug("Retriever", "Backtrace:\n#{e.backtrace.join("\n")}") raise Glancer::Error, "Document storage failed: #{e.}" end |
.weight_for(source_type) ⇒ Object
96 97 98 99 100 101 102 103 |
# File 'lib/glancer/retriever.rb', line 96 def weight_for(source_type) case source_type when "schema" then Glancer.configuration.schema_documents_weight when "context" then Glancer.configuration.context_documents_weight when "models" then Glancer.configuration.models_documents_weight else 1.0 end end |