Module: Noiseless::Adapters::ExecutionModules::PgvectorSupport
- Included in:
- PostgresqlExecution
- Defined in:
- lib/noiseless/adapters/execution_modules/pgvector_support.rb
Overview
pgvector support for semantic/vector search in PostgreSQL Provides similarity search using embeddings
Required:
CREATE EXTENSION IF NOT EXISTS vector;
Table setup:
ALTER TABLE your_table ADD COLUMN vector(1536);
CREATE INDEX ON your_table USING ivfflat ( vector_cosine_ops);
Instance Method Summary collapse
-
#batch_store_embeddings(model, embeddings, column: :embedding) ⇒ Object
Batch store embeddings.
-
#find_similar(record, limit: 10, column: :embedding, exclude_self: true) ⇒ ActiveRecord::Relation
Find similar records to a given record.
-
#hybrid_search(scope, text_query:, embedding:, text_fields:, vector_column: :embedding, text_weight: 0.5, vector_weight: 0.5, limit: 20) ⇒ ActiveRecord::Relation
Hybrid search combining text and vector search.
-
#knn_search(model, embedding, k: 10, column: :embedding, filters: {}) ⇒ Array<Hash>
Execute a KNN (K-Nearest Neighbors) search.
-
#pgvector_available? ⇒ Boolean
Check if pgvector is available.
-
#store_embedding(record, embedding, column: :embedding) ⇒ Object
Store an embedding for a record.
-
#vector_search(scope, embedding, column: :embedding, limit: 20, distance_threshold: nil, distance_metric: :cosine) ⇒ ActiveRecord::Relation
Perform semantic search using vector similarity.
Instance Method Details
#batch_store_embeddings(model, embeddings, column: :embedding) ⇒ Object
Batch store embeddings
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 137 def (model, , column: :embedding) return 0 unless pgvector_available? # Use UPDATE FROM VALUES for efficient batch update values = .map do |id, emb| "(#{ActiveRecord::Base.connection.quote(id)}, '[#{emb.join(',')}]'::vector)" end.join(",") sql = <<~SQL.squish UPDATE #{model.table_name} SET #{column} = v.embedding FROM (VALUES #{values}) AS v(id, embedding) WHERE #{model.table_name}.id = v.id::uuid SQL ActiveRecord::Base.connection.execute(sql) .size rescue StandardError => e Rails.logger.error("Failed to batch store embeddings: #{e.}") 0 end |
#find_similar(record, limit: 10, column: :embedding, exclude_self: true) ⇒ ActiveRecord::Relation
Find similar records to a given record
167 168 169 170 171 172 173 174 175 |
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 167 def find_similar(record, limit: 10, column: :embedding, exclude_self: true) = record.send(column) return record.class.none unless && pgvector_available? scope = record.class.where.not(column => nil) scope = scope.where.not(id: record.id) if exclude_self vector_search(scope, , column: column, limit: limit) end |
#hybrid_search(scope, text_query:, embedding:, text_fields:, vector_column: :embedding, text_weight: 0.5, vector_weight: 0.5, limit: 20) ⇒ ActiveRecord::Relation
Hybrid search combining text and vector search
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 64 def hybrid_search(scope, text_query:, embedding:, text_fields:, vector_column: :embedding, text_weight: 0.5, vector_weight: 0.5, limit: 20) return scope unless pgvector_available? vector_string = "[#{.join(',')}]" text_conditions = text_fields.map { |f| "similarity(#{quoted_column(f)}, ?)" }.join(" + ") text_similarity_count = text_fields.size # Normalized combined score scope.select( "#{scope.table_name}.*", # Text similarity (0-1 per field, averaged) Arel.sql( "(#{text_conditions}) / #{text_similarity_count} * #{text_weight} AS text_score" ), # Vector similarity (convert distance to similarity: 1 - distance for cosine) "(1 - (#{quoted_column(vector_column)} <=> '#{vector_string}')) * #{vector_weight} AS vector_score", # Combined score "(((#{text_conditions}) / #{text_similarity_count}) * #{text_weight} + " \ "(1 - (#{quoted_column(vector_column)} <=> '#{vector_string}')) * #{vector_weight}) AS combined_score" ).where( "#{text_conditions} > 0 OR #{quoted_column(vector_column)} IS NOT NULL", *Array.new(text_similarity_count, text_query) ).order(Arel.sql("combined_score DESC")) .limit(limit) .tap { |s| s.bind_values.concat(Array.new(text_similarity_count, text_query)) } end |
#knn_search(model, embedding, k: 10, column: :embedding, filters: {}) ⇒ Array<Hash>
Execute a KNN (K-Nearest Neighbors) search
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 101 def knn_search(model, , k: 10, column: :embedding, filters: {}) return [] unless pgvector_available? vector_string = "[#{.join(',')}]" scope = model.all scope = scope.where(filters) if filters.any? results = scope.select( "#{model.table_name}.*", "#{quoted_column(column)} <=> '#{vector_string}' AS distance" ).order(Arel.sql("#{quoted_column(column)} <=> '#{vector_string}'")) .limit(k) format_knn_response(results, model) end |
#pgvector_available? ⇒ Boolean
Check if pgvector is available
178 179 180 |
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 178 def pgvector_available? @pgvector_available ||= available_extensions.include?("vector") end |
#store_embedding(record, embedding, column: :embedding) ⇒ Object
Store an embedding for a record
124 125 126 127 128 129 |
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 124 def (record, , column: :embedding) return false unless pgvector_available? vector_string = "[#{.join(',')}]" record.update_column(column, vector_string) end |
#vector_search(scope, embedding, column: :embedding, limit: 20, distance_threshold: nil, distance_metric: :cosine) ⇒ ActiveRecord::Relation
Perform semantic search using vector similarity
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 27 def vector_search(scope, , column: :embedding, limit: 20, distance_threshold: nil, distance_metric: :cosine) return scope unless pgvector_available? vector_string = "[#{.join(',')}]" distance_op = distance_operator(distance_metric) # Build the query with distance calculation scope = scope.select( "#{scope.table_name}.*", "#{quoted_column(column)} #{distance_op} '#{vector_string}' AS vector_distance" ) # Apply distance threshold if specified if distance_threshold scope = scope.where( "#{quoted_column(column)} #{distance_op} '#{vector_string}' < ?", distance_threshold ) end # Order by similarity (ascending distance = more similar) scope.order(Arel.sql("#{quoted_column(column)} #{distance_op} '#{vector_string}'")) .limit(limit) end |