Module: Noiseless::Adapters::ExecutionModules::PgvectorSupport

Included in:
PostgresqlExecution
Defined in:
lib/noiseless/adapters/execution_modules/pgvector_support.rb

Overview

pgvector support for semantic/vector search in PostgreSQL Provides similarity search using embeddings

Required:

CREATE EXTENSION IF NOT EXISTS vector;

Table setup:

ALTER TABLE your_table ADD COLUMN embedding vector(1536);
CREATE INDEX ON your_table USING ivfflat (embedding vector_cosine_ops);

Instance Method Summary collapse

Instance Method Details

#batch_store_embeddings(model, embeddings, column: :embedding) ⇒ Object

Batch store embeddings

Parameters:

  • model (Class)

    The ActiveRecord model

  • embeddings (Hash<String, Array<Float>>)

    Map of ID -> embedding

  • column (Symbol) (defaults to: :embedding)

    The column to store embeddings



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 137

def batch_store_embeddings(model, embeddings, column: :embedding)
  return 0 unless pgvector_available?

  # Use UPDATE FROM VALUES for efficient batch update
  values = embeddings.map do |id, emb|
    "(#{ActiveRecord::Base.connection.quote(id)}, '[#{emb.join(',')}]'::vector)"
  end.join(",")

  sql = <<~SQL.squish
    UPDATE #{model.table_name}
    SET #{column} = v.embedding
    FROM (VALUES #{values}) AS v(id, embedding)
    WHERE #{model.table_name}.id = v.id::uuid
  SQL

  ActiveRecord::Base.connection.execute(sql)
  embeddings.size
rescue StandardError => e
  Rails.logger.error("Failed to batch store embeddings: #{e.message}")
  0
end

#find_similar(record, limit: 10, column: :embedding, exclude_self: true) ⇒ ActiveRecord::Relation

Find similar records to a given record

Parameters:

  • record (ActiveRecord::Base)

    The reference record

  • limit (Integer) (defaults to: 10)

    Number of similar records

  • column (Symbol) (defaults to: :embedding)

    Embedding column

  • exclude_self (Boolean) (defaults to: true)

    Exclude the reference record

Returns:

  • (ActiveRecord::Relation)


167
168
169
170
171
172
173
174
175
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 167

def find_similar(record, limit: 10, column: :embedding, exclude_self: true)
  embedding = record.send(column)
  return record.class.none unless embedding && pgvector_available?

  scope = record.class.where.not(column => nil)
  scope = scope.where.not(id: record.id) if exclude_self

  vector_search(scope, embedding, column: column, limit: limit)
end

#hybrid_search(scope, text_query:, embedding:, text_fields:, vector_column: :embedding, text_weight: 0.5, vector_weight: 0.5, limit: 20) ⇒ ActiveRecord::Relation

Hybrid search combining text and vector search

Parameters:

  • scope (ActiveRecord::Relation)

    Base scope

  • text_query (String)

    Text query for pg_trgm search

  • embedding (Array<Float>)

    Query embedding for vector search

  • text_fields (Array<Symbol>)

    Fields to search with text

  • vector_column (Symbol) (defaults to: :embedding)

    Column containing embeddings

  • text_weight (Float) (defaults to: 0.5)

    Weight for text similarity (0.0-1.0)

  • vector_weight (Float) (defaults to: 0.5)

    Weight for vector similarity (0.0-1.0)

Returns:

  • (ActiveRecord::Relation)


64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 64

def hybrid_search(scope, text_query:, embedding:, text_fields:, vector_column: :embedding,
                  text_weight: 0.5, vector_weight: 0.5, limit: 20)
  return scope unless pgvector_available?

  vector_string = "[#{embedding.join(',')}]"
  text_conditions = text_fields.map { |f| "similarity(#{quoted_column(f)}, ?)" }.join(" + ")
  text_similarity_count = text_fields.size

  # Normalized combined score
  scope.select(
    "#{scope.table_name}.*",
    # Text similarity (0-1 per field, averaged)
    Arel.sql(
      "(#{text_conditions}) / #{text_similarity_count} * #{text_weight} AS text_score"
    ),
    # Vector similarity (convert distance to similarity: 1 - distance for cosine)
    "(1 - (#{quoted_column(vector_column)} <=> '#{vector_string}')) * #{vector_weight} AS vector_score",
    # Combined score
    "(((#{text_conditions}) / #{text_similarity_count}) * #{text_weight} + " \
    "(1 - (#{quoted_column(vector_column)} <=> '#{vector_string}')) * #{vector_weight}) AS combined_score"
  ).where(
    "#{text_conditions} > 0 OR #{quoted_column(vector_column)} IS NOT NULL",
    *Array.new(text_similarity_count, text_query)
  ).order(Arel.sql("combined_score DESC"))
       .limit(limit)
       .tap { |s| s.bind_values.concat(Array.new(text_similarity_count, text_query)) }
end

#knn_search(model, embedding, k: 10, column: :embedding, filters: {}) ⇒ Array<Hash>

Execute a KNN (K-Nearest Neighbors) search

Parameters:

  • model (Class)

    The ActiveRecord model

  • embedding (Array<Float>)

    Query embedding

  • k (Integer) (defaults to: 10)

    Number of nearest neighbors

  • column (Symbol) (defaults to: :embedding)

    Embedding column

  • filters (Hash) (defaults to: {})

    Additional WHERE conditions

Returns:

  • (Array<Hash>)

    Results with distance scores



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 101

def knn_search(model, embedding, k: 10, column: :embedding, filters: {})
  return [] unless pgvector_available?

  vector_string = "[#{embedding.join(',')}]"

  scope = model.all
  scope = scope.where(filters) if filters.any?

  results = scope.select(
    "#{model.table_name}.*",
    "#{quoted_column(column)} <=> '#{vector_string}' AS distance"
  ).order(Arel.sql("#{quoted_column(column)} <=> '#{vector_string}'"))
                 .limit(k)

  format_knn_response(results, model)
end

#pgvector_available?Boolean

Check if pgvector is available

Returns:

  • (Boolean)


178
179
180
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 178

def pgvector_available?
  @pgvector_available ||= available_extensions.include?("vector")
end

#store_embedding(record, embedding, column: :embedding) ⇒ Object

Store an embedding for a record

Parameters:

  • record (ActiveRecord::Base)

    The record to update

  • embedding (Array<Float>)

    The embedding vector

  • column (Symbol) (defaults to: :embedding)

    The column to store the embedding



124
125
126
127
128
129
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 124

def store_embedding(record, embedding, column: :embedding)
  return false unless pgvector_available?

  vector_string = "[#{embedding.join(',')}]"
  record.update_column(column, vector_string)
end

#vector_search(scope, embedding, column: :embedding, limit: 20, distance_threshold: nil, distance_metric: :cosine) ⇒ ActiveRecord::Relation

Perform semantic search using vector similarity

Parameters:

  • scope (ActiveRecord::Relation)

    The base scope to search

  • embedding (Array<Float>)

    The query embedding vector

  • column (Symbol) (defaults to: :embedding)

    The column containing embeddings (default: :embedding)

  • limit (Integer) (defaults to: 20)

    Maximum results to return

  • distance_threshold (Float) (defaults to: nil)

    Maximum distance threshold (optional)

  • distance_metric (Symbol) (defaults to: :cosine)

    :cosine, :l2, or :inner_product

Returns:

  • (ActiveRecord::Relation)

    Scope with vector similarity ordering



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/noiseless/adapters/execution_modules/pgvector_support.rb', line 27

def vector_search(scope, embedding, column: :embedding, limit: 20, distance_threshold: nil,
                  distance_metric: :cosine)
  return scope unless pgvector_available?

  vector_string = "[#{embedding.join(',')}]"
  distance_op = distance_operator(distance_metric)

  # Build the query with distance calculation
  scope = scope.select(
    "#{scope.table_name}.*",
    "#{quoted_column(column)} #{distance_op} '#{vector_string}' AS vector_distance"
  )

  # Apply distance threshold if specified
  if distance_threshold
    scope = scope.where(
      "#{quoted_column(column)} #{distance_op} '#{vector_string}' < ?",
      distance_threshold
    )
  end

  # Order by similarity (ascending distance = more similar)
  scope.order(Arel.sql("#{quoted_column(column)} #{distance_op} '#{vector_string}'"))
       .limit(limit)
end