Class: Leann::Index

Inherits:
Object
  • Object
show all
Defined in:
lib/leann/index.rb

Overview

Represents a Leann index on disk

Examples:

Open and search

index = Leann::Index.open("my_index")
results = index.search("query")

Get info

index = Leann::Index.open("my_index")
puts index.document_count
puts index.embedding_model

Constant Summary collapse

INDEX_EXTENSION =
".leann"
META_SUFFIX =
".meta.json"
PASSAGES_SUFFIX =
".passages.jsonl"
OFFSETS_SUFFIX =
".passages.offsets"
VECTORS_SUFFIX =
".vectors"
IDS_SUFFIX =
".ids"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ Index

Returns a new instance of Index.

Parameters:

  • path (String)

    Full path to index



119
120
121
122
123
124
# File 'lib/leann/index.rb', line 119

def initialize(path)
  @path = path
  @name = File.basename(path, INDEX_EXTENSION)
  @metadata = 
  @searcher = nil
end

Instance Attribute Details

#metadataHash (readonly)

Returns Index metadata.

Returns:

  • (Hash)

    Index metadata



26
27
28
# File 'lib/leann/index.rb', line 26

def 
  @metadata
end

#nameString (readonly)

Returns Index name.

Returns:

  • (String)

    Index name



20
21
22
# File 'lib/leann/index.rb', line 20

def name
  @name
end

#pathString (readonly)

Returns Index path.

Returns:

  • (String)

    Index path



23
24
25
# File 'lib/leann/index.rb', line 23

def path
  @path
end

Class Method Details

.delete(name) ⇒ Boolean

Delete an index

Parameters:

  • name (String)

    Index name or path

Returns:

  • (Boolean)


69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/leann/index.rb', line 69

def delete(name)
  path = resolve_path(name)
  return false unless exists_at?(path)

  # Delete all index files
  files_to_delete = [
    "#{path}#{META_SUFFIX}",
    "#{path}#{PASSAGES_SUFFIX}",
    "#{path}#{OFFSETS_SUFFIX}",
    "#{path}#{VECTORS_SUFFIX}",
    "#{path}#{IDS_SUFFIX}",
    "#{path}.graph.bin",     # LEANN graph file
    "#{path}.graph.meta.json" # LEANN graph metadata
  ]

  files_to_delete.each do |file|
    FileUtils.rm_f(file)
  end

  true
end

.exists?(name) ⇒ Boolean

Check if an index exists

Parameters:

  • name (String)

    Index name or path

Returns:

  • (Boolean)


50
51
52
53
# File 'lib/leann/index.rb', line 50

def exists?(name)
  path = resolve_path(name)
  exists_at?(path)
end

.list(directory = ".") ⇒ Array<String>

List all indexes in a directory

Parameters:

  • directory (String) (defaults to: ".")

Returns:

  • (Array<String>)


58
59
60
61
62
63
64
# File 'lib/leann/index.rb', line 58

def list(directory = ".")
  pattern = File.join(directory, "**", "*#{META_SUFFIX}")
  Dir.glob(pattern).map do |meta_file|
    # Extract index name from path
    File.basename(meta_file, META_SUFFIX).sub(/#{INDEX_EXTENSION}$/, "")
  end.uniq.sort
end

.open(name) ⇒ Index

Open an existing index

Parameters:

  • name (String)

    Index name or path

Returns:

Raises:



40
41
42
43
44
45
# File 'lib/leann/index.rb', line 40

def open(name)
  path = resolve_path(name)
  raise IndexNotFoundError, name unless exists_at?(path)

  new(path)
end

Instance Method Details

#backendSymbol

Get backend type

Returns:

  • (Symbol)


170
171
172
# File 'lib/leann/index.rb', line 170

def backend
  (["backend"] || "leann").to_sym
end

#created_atTime?

Get creation timestamp

Returns:

  • (Time, nil)


162
163
164
165
166
# File 'lib/leann/index.rb', line 162

def created_at
  return nil unless ["created_at"]

  Time.parse(["created_at"])
end

#dimensionsInteger

Get embedding dimensions

Returns:

  • (Integer)


156
157
158
# File 'lib/leann/index.rb', line 156

def dimensions
  ["dimensions"]
end

#document_countInteger

Get number of documents in the index

Returns:

  • (Integer)


138
139
140
# File 'lib/leann/index.rb', line 138

def document_count
  ["document_count"] || count_documents
end

#each_passageEnumerator

Get all passages (lazy loaded)

Returns:

  • (Enumerator)


196
197
198
199
200
201
202
203
204
205
# File 'lib/leann/index.rb', line 196

def each_passage
  return enum_for(:each_passage) unless block_given?

  passages_file = "#{path}#{PASSAGES_SUFFIX}"
  return unless File.exist?(passages_file)

  File.foreach(passages_file) do |line|
    yield JSON.parse(line.strip, symbolize_names: true)
  end
end

#embedding_modelString

Get embedding model used

Returns:

  • (String)


144
145
146
# File 'lib/leann/index.rb', line 144

def embedding_model
  ["embedding_model"]
end

#embedding_providerSymbol

Get embedding provider

Returns:

  • (Symbol)


150
151
152
# File 'lib/leann/index.rb', line 150

def embedding_provider
  (["embedding_provider"] || "openai").to_sym
end

#get_passage(id) ⇒ Hash?

Get passage by ID

Parameters:

  • id (String)

Returns:

  • (Hash, nil)


210
211
212
# File 'lib/leann/index.rb', line 210

def get_passage(id)
  each_passage.find { |p| p[:id] == id }
end

#inspectString

Detailed inspection

Returns:

  • (String)


190
191
192
# File 'lib/leann/index.rb', line 190

def inspect
  "#<Leann::Index name=#{name.inspect} documents=#{document_count} model=#{embedding_model.inspect}>"
end

#search(query, limit: 5, threshold: nil, filters: nil) ⇒ SearchResults

Search the index

Parameters:

  • query (String)

    Search query

  • limit (Integer) (defaults to: 5)

    Maximum results

  • threshold (Float, nil) (defaults to: nil)

    Minimum score threshold

  • filters (Hash, nil) (defaults to: nil)

    Metadata filters

Returns:



132
133
134
# File 'lib/leann/index.rb', line 132

def search(query, limit: 5, threshold: nil, filters: nil)
  searcher.search(query, limit: limit, threshold: threshold, filters: filters)
end

#to_sString

Index info as string

Returns:

  • (String)


176
177
178
179
180
181
182
183
184
185
186
# File 'lib/leann/index.rb', line 176

def to_s
  lines = [
    "Index: #{name}",
    "  Documents: #{document_count}",
    "  Embedding: #{embedding_provider}/#{embedding_model}",
    "  Dimensions: #{dimensions}",
    "  Backend: #{backend}",
    "  Created: #{created_at&.strftime("%Y-%m-%d %H:%M:%S") || "unknown"}"
  ]
  lines.join("\n")
end