Class: KairosMcp::VectorSearch::SemanticSearch
- Defined in:
- lib/kairos_mcp/vector_search/semantic_search.rb
Overview
Semantic search implementation using hnswlib and informers
Requires optional gems:
- hnswlib (~> 0.9) - HNSW approximate nearest neighbor search
- informers (~> 1.0) - ONNX-based sentence embeddings
This class is only loaded when gems are available.
Constant Summary collapse
- DEFAULT_MODEL =
'sentence-transformers/all-MiniLM-L6-v2'- DEFAULT_DIMENSION =
384- DEFAULT_SPACE =
'cosine'
Instance Attribute Summary collapse
-
#dimension ⇒ Object
readonly
Returns the value of attribute dimension.
-
#index_path ⇒ Object
readonly
Returns the value of attribute index_path.
-
#model_name ⇒ Object
readonly
Returns the value of attribute model_name.
Instance Method Summary collapse
- #add(id, text, metadata: {}) ⇒ Object
- #count ⇒ Object
-
#initialize(index_path:, dimension: DEFAULT_DIMENSION, model: DEFAULT_MODEL) ⇒ SemanticSearch
constructor
A new instance of SemanticSearch.
- #load ⇒ Object
- #ready? ⇒ Boolean
- #rebuild(documents) ⇒ Object
- #remove(id) ⇒ Object
- #save ⇒ Object
- #search(query, k: 5) ⇒ Object
- #semantic? ⇒ Boolean
Constructor Details
#initialize(index_path:, dimension: DEFAULT_DIMENSION, model: DEFAULT_MODEL) ⇒ SemanticSearch
Returns a new instance of SemanticSearch.
22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/kairos_mcp/vector_search/semantic_search.rb', line 22 def initialize(index_path:, dimension: DEFAULT_DIMENSION, model: DEFAULT_MODEL) @index_path = index_path @dimension = dimension @model_name = model @id_map = {} # Maps internal index -> document id @reverse_map = {} # Maps document id -> internal index @metadata_store = {} # Stores metadata by document id @next_index = 0 @ready = false @embedder = nil @index = nil ensure_index_directory end |
Instance Attribute Details
#dimension ⇒ Object (readonly)
Returns the value of attribute dimension.
20 21 22 |
# File 'lib/kairos_mcp/vector_search/semantic_search.rb', line 20 def dimension @dimension end |
#index_path ⇒ Object (readonly)
Returns the value of attribute index_path.
20 21 22 |
# File 'lib/kairos_mcp/vector_search/semantic_search.rb', line 20 def index_path @index_path end |
#model_name ⇒ Object (readonly)
Returns the value of attribute model_name.
20 21 22 |
# File 'lib/kairos_mcp/vector_search/semantic_search.rb', line 20 def model_name @model_name end |
Instance Method Details
#add(id, text, metadata: {}) ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/kairos_mcp/vector_search/semantic_search.rb', line 37 def add(id, text, metadata: {}) ensure_initialized id_str = id.to_s = (text) # If document already exists, we need to handle update if @reverse_map.key?(id_str) internal_idx = @reverse_map[id_str] # hnswlib doesn't support true updates, so we mark as deleted and add new # For simplicity, we just overwrite the point @index.add_point(, internal_idx) else internal_idx = @next_index @next_index += 1 # Resize index if needed if internal_idx >= @index.max_elements # Create new larger index and copy data resize_index(@index.max_elements * 2) end @index.add_point(, internal_idx) @id_map[internal_idx] = id_str @reverse_map[id_str] = internal_idx end @metadata_store[id_str] = .merge(text: text) true rescue StandardError => e warn "[SemanticSearch] Failed to add document #{id}: #{e.}" false end |
#count ⇒ Object
207 208 209 |
# File 'lib/kairos_mcp/vector_search/semantic_search.rb', line 207 def count @id_map.size end |
#load ⇒ Object
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
# File 'lib/kairos_mcp/vector_search/semantic_search.rb', line 172 def load return false unless File.exist?(index_file_path) && File.exist?() # Load metadata first = JSON.parse(File.read(), symbolize_names: true) # Validate dimension and model match if [:dimension] != @dimension warn "[SemanticSearch] Dimension mismatch: expected #{@dimension}, got #{[:dimension]}" return false end @id_map = [:id_map].transform_keys(&:to_i) @reverse_map = [:reverse_map].transform_keys(&:to_s) @metadata_store = [:metadata_store].transform_keys(&:to_s) @next_index = [:next_index] # Load HNSW index max_elements = [@next_index * 2, 100].max @index = create_index(max_elements) @index.load_index(index_file_path) @ready = true true rescue StandardError => e warn "[SemanticSearch] Load failed: #{e.}" false end |
#ready? ⇒ Boolean
203 204 205 |
# File 'lib/kairos_mcp/vector_search/semantic_search.rb', line 203 def ready? @ready end |
#rebuild(documents) ⇒ Object
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
# File 'lib/kairos_mcp/vector_search/semantic_search.rb', line 115 def rebuild(documents) # Initialize fresh index max_elements = [documents.size * 2, 100].max @index = create_index(max_elements) @id_map.clear @reverse_map.clear @metadata_store.clear @next_index = 0 @ready = true # Batch generate embeddings for efficiency texts = documents.map { |doc| doc[:text].to_s } = (texts) documents.each_with_index do |doc, i| id_str = doc[:id].to_s internal_idx = @next_index @next_index += 1 @index.add_point([i], internal_idx) @id_map[internal_idx] = id_str @reverse_map[id_str] = internal_idx @metadata_store[id_str] = (doc[:metadata] || {}).merge(text: doc[:text]) end save true rescue StandardError => e warn "[SemanticSearch] Rebuild failed: #{e.}" false end |
#remove(id) ⇒ Object
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/kairos_mcp/vector_search/semantic_search.rb', line 71 def remove(id) id_str = id.to_s return true unless @reverse_map.key?(id_str) internal_idx = @reverse_map[id_str] # hnswlib supports marking elements as deleted @index.mark_deleted(internal_idx) if @index.respond_to?(:mark_deleted) @id_map.delete(internal_idx) @reverse_map.delete(id_str) @metadata_store.delete(id_str) true rescue StandardError => e warn "[SemanticSearch] Failed to remove document #{id}: #{e.}" false end |
#save ⇒ Object
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/kairos_mcp/vector_search/semantic_search.rb', line 149 def save return true unless @ready && @index # Save HNSW index @index.save_index(index_file_path) # Save metadata = { id_map: @id_map, reverse_map: @reverse_map, metadata_store: @metadata_store, next_index: @next_index, dimension: @dimension, model: @model_name } File.write(, JSON.pretty_generate()) true rescue StandardError => e warn "[SemanticSearch] Save failed: #{e.}" false end |
#search(query, k: 5) ⇒ Object
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/kairos_mcp/vector_search/semantic_search.rb', line 88 def search(query, k: 5) ensure_initialized return [] if @id_map.empty? = (query) actual_k = [k, @id_map.size].min results = @index.search_knn(, actual_k) # results format: [[indices], [distances]] or similar indices, distances = results indices.zip(distances).filter_map do |idx, dist| next unless @id_map.key?(idx) id = @id_map[idx] { id: id, score: 1.0 - dist, # Convert distance to similarity score metadata: @metadata_store[id] || {} } end rescue StandardError => e warn "[SemanticSearch] Search failed: #{e.}" [] end |
#semantic? ⇒ Boolean
211 212 213 |
# File 'lib/kairos_mcp/vector_search/semantic_search.rb', line 211 def semantic? true end |