Class: Woods::Storage::VectorStore::Pgvector

Inherits:
Object
  • Object
show all
Includes:
Interface
Defined in:
lib/woods/storage/pgvector.rb

Overview

PostgreSQL + pgvector adapter for vector storage and similarity search.

Uses the pgvector extension for efficient approximate nearest neighbor search with HNSW indexing. Stores metadata as JSONB for flexible filtering.

Examples:

store = Pgvector.new(connection: ActiveRecord::Base.connection, dimensions: 768)
store.ensure_schema!
store.store("User", [0.1, 0.2, ...], { type: "model" })
results = store.search([0.1, 0.2, ...], limit: 5, filters: { type: "model" })

Constant Summary collapse

TABLE =
'woods_vectors'

Instance Method Summary collapse

Methods included from Interface

#bulk_load, #each_entry

Constructor Details

#initialize(connection:, dimensions:) ⇒ Pgvector

Returns a new instance of Pgvector.

Parameters:

  • connection (Object)

    ActiveRecord database connection

  • dimensions (Integer)

    Size of the embedding vectors



27
28
29
30
# File 'lib/woods/storage/pgvector.rb', line 27

def initialize(connection:, dimensions:)
  @connection = connection
  @dimensions = dimensions
end

Instance Method Details

#countObject

See Also:



137
138
139
140
# File 'lib/woods/storage/pgvector.rb', line 137

def count
  result = @connection.execute("SELECT COUNT(*) AS count FROM #{TABLE}")
  result.first['count'].to_i
end

#delete(id) ⇒ Object

See Also:



125
126
127
128
# File 'lib/woods/storage/pgvector.rb', line 125

def delete(id)
  quoted_id = @connection.quote(id)
  @connection.execute("DELETE FROM #{TABLE} WHERE id = #{quoted_id}")
end

#delete_by_filter(filters) ⇒ Object



131
132
133
134
# File 'lib/woods/storage/pgvector.rb', line 131

def delete_by_filter(filters)
  where_clause = build_where(filters)
  @connection.execute("DELETE FROM #{TABLE} #{where_clause}")
end

#ensure_schema!Object

Create the pgvector extension, vectors table, and HNSW index.

Safe to call multiple times (uses IF NOT EXISTS).



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/woods/storage/pgvector.rb', line 35

def ensure_schema!
  @connection.execute('CREATE EXTENSION IF NOT EXISTS vector')
  @connection.execute(<<~SQL)
    CREATE TABLE IF NOT EXISTS #{TABLE} (
      id TEXT PRIMARY KEY,
      embedding vector(#{@dimensions}),
      metadata JSONB DEFAULT '{}',
      created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    )
  SQL
  @connection.execute(<<~SQL)
    CREATE INDEX IF NOT EXISTS idx_#{TABLE}_embedding_hnsw
    ON #{TABLE} USING hnsw (embedding vector_cosine_ops)
  SQL
end

#search(query_vector, limit: 10, filters: {}) ⇒ Array<SearchResult>

Search for similar vectors using cosine distance.

Parameters:

  • query_vector (Array<Float>)

    The query embedding

  • limit (Integer) (defaults to: 10)

    Maximum results to return

  • filters (Hash) (defaults to: {})

    Metadata key-value filters

Returns:

  • (Array<SearchResult>)

    Results sorted by descending similarity

See Also:



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/woods/storage/pgvector.rb', line 107

def search(query_vector, limit: 10, filters: {})
  validate_vector!(query_vector)
  vector_literal = build_vector_literal(query_vector)
  where_clause = build_where(filters)

  sql = <<~SQL
    SELECT id, embedding <=> '#{vector_literal}' AS distance, metadata
    FROM #{TABLE}
    #{where_clause}
    ORDER BY distance ASC
    LIMIT #{limit.to_i}
  SQL

  rows = @connection.execute(sql)
  rows.map { |row| row_to_result(row) }
end

#store(id, vector, metadata = {}) ⇒ Object

Store or update a vector with metadata.

Parameters:

  • id (String)

    Unique identifier

  • vector (Array<Float>)

    The embedding vector

  • metadata (Hash) (defaults to: {})

    Optional metadata

See Also:



57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/woods/storage/pgvector.rb', line 57

def store(id, vector,  = {})
  validate_vector!(vector)
  validate_dimensions!(vector) if @dimensions
  entry = format_entry(id, vector, )

  @connection.execute(<<~SQL)
    INSERT INTO #{TABLE} (id, embedding, metadata, created_at)
    VALUES #{entry}
    ON CONFLICT (id) DO UPDATE SET
      embedding = EXCLUDED.embedding,
      metadata = EXCLUDED.metadata,
      created_at = CURRENT_TIMESTAMP
  SQL
end

#store_batch(entries) ⇒ Object

Store multiple vectors in a single multi-row INSERT.

Parameters:

  • entries (Array<Hash>)

    Each entry has :id, :vector, :metadata keys

Raises:

  • (ArgumentError)

    if any entry has a non-numeric or wrong-dimension vector. Validation runs BEFORE any INSERT so partial-batch writes can’t occur.



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/woods/storage/pgvector.rb', line 77

def store_batch(entries)
  return if entries.empty?

  # Pre-validate every vector before any SQL — prevents partial-batch
  # state when a later entry's dimension doesn't match.
  entries.each_with_index do |entry, idx|
    vector = entry[:vector]
    validate_vector!(vector)
    validate_dimensions!(vector, index: idx) if @dimensions
  end

  values = entries.map { |entry| format_entry(entry[:id], entry[:vector], entry[:metadata] || {}) }

  @connection.execute(<<~SQL)
    INSERT INTO #{TABLE} (id, embedding, metadata, created_at)
    VALUES #{values.join(",\n")}
    ON CONFLICT (id) DO UPDATE SET
      embedding = EXCLUDED.embedding,
      metadata = EXCLUDED.metadata,
      created_at = CURRENT_TIMESTAMP
  SQL
end