Class: Leann::Builder

Inherits:
Object
  • Object
show all
Defined in:
lib/leann/builder.rb

Overview

Builds a new Leann index

Examples:

DSL style

Leann.build("my_index") do
  add "First document"
  add "Second document", source: "manual"
  add_file "README.md"
end

Programmatic style

builder = Leann::Builder.new("my_index")
builder.add("First document")
builder.add("Second document")
builder.save

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(name, embedding: nil, model: nil, path: nil, force: false, **_options) ⇒ Builder

Returns a new instance of Builder.

Parameters:

  • name (String)

    Index name

  • embedding (Symbol) (defaults to: nil)

    Embedding provider (:ruby_llm, :openai, :ollama, :fastembed)

  • model (String, nil) (defaults to: nil)

    Embedding model name

  • path (String, nil) (defaults to: nil)

    Custom path for index

  • force (Boolean) (defaults to: false)

    Overwrite existing index



39
40
41
42
43
44
45
46
47
48
# File 'lib/leann/builder.rb', line 39

def initialize(name, embedding: nil, model: nil, path: nil, force: false, **_options)
  @name = name
  @path = resolve_path(name, path)
  @embedding_provider = embedding || Leann.configuration.embedding_provider
  @embedding_model = model || Leann.configuration.embedding_model_for(@embedding_provider)
  @force = force
  @documents = []

  check_existing_index unless force
end

Instance Attribute Details

#documentsArray<Hash> (readonly)

Returns Documents to be indexed.

Returns:

  • (Array<Hash>)

    Documents to be indexed



32
33
34
# File 'lib/leann/builder.rb', line 32

def documents
  @documents
end

#nameString (readonly)

Returns Index name.

Returns:

  • (String)

    Index name



26
27
28
# File 'lib/leann/builder.rb', line 26

def name
  @name
end

#pathString (readonly)

Returns Index path.

Returns:

  • (String)

    Index path



29
30
31
# File 'lib/leann/builder.rb', line 29

def path
  @path
end

Instance Method Details

#add(text, **metadata) ⇒ self Also known as: <<

Add a text document

Examples:

builder.add("Hello world")
builder.add("Document with metadata", source: "file.txt", chapter: 1)

Parameters:

  • text (String)

    Document text

  • metadata (Hash)

    Additional metadata (passed as keyword arguments)

Returns:

  • (self)

Raises:

  • (ArgumentError)


59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/leann/builder.rb', line 59

def add(text, **)
  raise ArgumentError, "Text cannot be nil" if text.nil?
  raise ArgumentError, "Text cannot be empty" if text.to_s.strip.empty?

  doc = {
    id: .delete(:id) || generate_id,
    text: text.to_s.strip,
    metadata: 
  }

  @documents << doc
  self
end

#add_all(docs) ⇒ self

Add multiple documents at once

Examples:

builder.add_all(["Doc 1", "Doc 2", "Doc 3"])
builder.add_all([
  { text: "Doc 1", source: "a" },
  { text: "Doc 2", source: "b" }
])

Parameters:

  • docs (Array<String>, Array<Hash>)

    Documents to add

Returns:

  • (self)


134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/leann/builder.rb', line 134

def add_all(docs)
  docs.each do |doc|
    case doc
    when String
      add(doc)
    when Hash
      text = doc.delete(:text) || doc.delete("text")
      add(text, **doc.transform_keys(&:to_sym))
    else
      raise ArgumentError, "Invalid document type: #{doc.class}"
    end
  end

  self
end

#add_directory(directory, pattern: "**/*", extensions: nil, **metadata) ⇒ self

Add all files from a directory

Examples:

builder.add_directory("docs/")
builder.add_directory("src/", extensions: [".rb", ".py"])

Parameters:

  • directory (String)

    Directory path

  • pattern (String) (defaults to: "**/*")

    Glob pattern (default: “*/”)

  • extensions (Array<String>, nil) (defaults to: nil)

    Filter by extensions (e.g., [“.md”, “.txt”])

  • metadata (Hash)

    Additional metadata for all files

Returns:

  • (self)

Raises:

  • (ArgumentError)


109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/leann/builder.rb', line 109

def add_directory(directory, pattern: "**/*", extensions: nil, **)
  raise ArgumentError, "Directory not found: #{directory}" unless Dir.exist?(directory)

  full_pattern = File.join(directory, pattern)
  Dir.glob(full_pattern).each do |file_path|
    next unless File.file?(file_path)
    next if extensions && !extensions.include?(File.extname(file_path))

    add_file(file_path, **)
  end

  self
end

#add_file(file_path, **metadata) ⇒ self

Add content from a file

Examples:

builder.add_file("README.md")
builder.add_file("docs/guide.txt", category: "documentation")

Parameters:

  • file_path (String)

    Path to file

  • metadata (Hash)

    Additional metadata

Returns:

  • (self)

Raises:

  • (ArgumentError)


85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/leann/builder.rb', line 85

def add_file(file_path, **)
  raise ArgumentError, "File not found: #{file_path}" unless File.exist?(file_path)

  content = File.read(file_path)
   = {
    source: file_path,
    filename: File.basename(file_path),
    extension: File.extname(file_path)
  }.merge()

  add(content, **)
end

#countInteger Also known as: size

Get number of documents added

Returns:

  • (Integer)


152
153
154
# File 'lib/leann/builder.rb', line 152

def count
  @documents.size
end

#empty?Boolean

Check if any documents have been added

Returns:

  • (Boolean)


159
160
161
# File 'lib/leann/builder.rb', line 159

def empty?
  @documents.empty?
end

#saveIndex Also known as: build

Build and save the index

Returns:

  • (Index)

    The built index

Raises:



165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/leann/builder.rb', line 165

def save
  raise EmptyIndexError if empty?

  puts "Building index '#{name}' with #{count} documents..."

  # Create directory if needed
  FileUtils.mkdir_p(File.dirname(path))

  # Delete existing if force mode
  Index.delete(path) if @force && Index.exists?(path)

  # Compute embeddings
  embeddings = compute_embeddings

  # Save passages
  save_passages

  # Build and save graph
  save_graph(embeddings)

  # Save metadata
  (embeddings)

  puts "Index '#{name}' created successfully!"

  Index.open(path)
end