Class: Woods::Storage::VectorStore::Pgvector
- Inherits:
-
Object
- Object
- Woods::Storage::VectorStore::Pgvector
- Includes:
- Interface
- Defined in:
- lib/woods/storage/pgvector.rb
Overview
PostgreSQL + pgvector adapter for vector storage and similarity search.
Uses the pgvector extension for efficient approximate nearest neighbor search with HNSW indexing. Stores metadata as JSONB for flexible filtering.
Constant Summary collapse
- TABLE =
'woods_vectors'
Instance Method Summary collapse
- #count ⇒ Object
- #delete(id) ⇒ Object
- #delete_by_filter(filters) ⇒ Object
-
#ensure_schema! ⇒ Object
Create the pgvector extension, vectors table, and HNSW index.
-
#initialize(connection:, dimensions:) ⇒ Pgvector
constructor
A new instance of Pgvector.
-
#search(query_vector, limit: 10, filters: {}) ⇒ Array<SearchResult>
Search for similar vectors using cosine distance.
-
#store(id, vector, metadata = {}) ⇒ Object
Store or update a vector with metadata.
-
#store_batch(entries) ⇒ Object
Store multiple vectors in a single multi-row INSERT.
Methods included from Interface
Constructor Details
#initialize(connection:, dimensions:) ⇒ Pgvector
Returns a new instance of Pgvector.
27 28 29 30 |
# File 'lib/woods/storage/pgvector.rb', line 27 def initialize(connection:, dimensions:) @connection = connection @dimensions = dimensions end |
Instance Method Details
#count ⇒ Object
137 138 139 140 |
# File 'lib/woods/storage/pgvector.rb', line 137 def count result = @connection.execute("SELECT COUNT(*) AS count FROM #{TABLE}") result.first['count'].to_i end |
#delete(id) ⇒ Object
125 126 127 128 |
# File 'lib/woods/storage/pgvector.rb', line 125 def delete(id) quoted_id = @connection.quote(id) @connection.execute("DELETE FROM #{TABLE} WHERE id = #{quoted_id}") end |
#delete_by_filter(filters) ⇒ Object
131 132 133 134 |
# File 'lib/woods/storage/pgvector.rb', line 131 def delete_by_filter(filters) where_clause = build_where(filters) @connection.execute("DELETE FROM #{TABLE} #{where_clause}") end |
#ensure_schema! ⇒ Object
Create the pgvector extension, vectors table, and HNSW index.
Safe to call multiple times (uses IF NOT EXISTS).
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/woods/storage/pgvector.rb', line 35 def ensure_schema! @connection.execute('CREATE EXTENSION IF NOT EXISTS vector') @connection.execute(<<~SQL) CREATE TABLE IF NOT EXISTS #{TABLE} ( id TEXT PRIMARY KEY, embedding vector(#{@dimensions}), metadata JSONB DEFAULT '{}', created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) SQL @connection.execute(<<~SQL) CREATE INDEX IF NOT EXISTS idx_#{TABLE}_embedding_hnsw ON #{TABLE} USING hnsw (embedding vector_cosine_ops) SQL end |
#search(query_vector, limit: 10, filters: {}) ⇒ Array<SearchResult>
Search for similar vectors using cosine distance.
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# File 'lib/woods/storage/pgvector.rb', line 107 def search(query_vector, limit: 10, filters: {}) validate_vector!(query_vector) vector_literal = build_vector_literal(query_vector) where_clause = build_where(filters) sql = <<~SQL SELECT id, embedding <=> '#{vector_literal}' AS distance, metadata FROM #{TABLE} #{where_clause} ORDER BY distance ASC LIMIT #{limit.to_i} SQL rows = @connection.execute(sql) rows.map { |row| row_to_result(row) } end |
#store(id, vector, metadata = {}) ⇒ Object
Store or update a vector with metadata.
57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/woods/storage/pgvector.rb', line 57 def store(id, vector, = {}) validate_vector!(vector) validate_dimensions!(vector) if @dimensions entry = format_entry(id, vector, ) @connection.execute(<<~SQL) INSERT INTO #{TABLE} (id, embedding, metadata, created_at) VALUES #{entry} ON CONFLICT (id) DO UPDATE SET embedding = EXCLUDED.embedding, metadata = EXCLUDED.metadata, created_at = CURRENT_TIMESTAMP SQL end |
#store_batch(entries) ⇒ Object
Store multiple vectors in a single multi-row INSERT.
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/woods/storage/pgvector.rb', line 77 def store_batch(entries) return if entries.empty? # Pre-validate every vector before any SQL — prevents partial-batch # state when a later entry's dimension doesn't match. entries.each_with_index do |entry, idx| vector = entry[:vector] validate_vector!(vector) validate_dimensions!(vector, index: idx) if @dimensions end values = entries.map { |entry| format_entry(entry[:id], entry[:vector], entry[:metadata] || {}) } @connection.execute(<<~SQL) INSERT INTO #{TABLE} (id, embedding, metadata, created_at) VALUES #{values.join(",\n")} ON CONFLICT (id) DO UPDATE SET embedding = EXCLUDED.embedding, metadata = EXCLUDED.metadata, created_at = CURRENT_TIMESTAMP SQL end |