Class: Ragnar::Indexer

Inherits:
Object
  • Object
show all
Defined in:
lib/ragnar/indexer.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(db_path: Ragnar::DEFAULT_DB_PATH, chunk_size: Ragnar::DEFAULT_CHUNK_SIZE, chunk_overlap: Ragnar::DEFAULT_CHUNK_OVERLAP, embedding_model: Ragnar::DEFAULT_EMBEDDING_MODEL, show_progress: true) ⇒ Indexer

Returns a new instance of Indexer.



7
8
9
10
11
12
13
14
15
16
# File 'lib/ragnar/indexer.rb', line 7

def initialize(db_path: Ragnar::DEFAULT_DB_PATH,
               chunk_size: Ragnar::DEFAULT_CHUNK_SIZE,
               chunk_overlap: Ragnar::DEFAULT_CHUNK_OVERLAP,
               embedding_model: Ragnar::DEFAULT_EMBEDDING_MODEL,
               show_progress: true)
  @database = Database.new(db_path)
  @chunker = Chunker.new(chunk_size: chunk_size, chunk_overlap: chunk_overlap)
  @embedder = Embedder.new(model_name: embedding_model)
  @show_progress = show_progress
end

Instance Attribute Details

#chunkerObject (readonly)

Returns the value of attribute chunker.



5
6
7
# File 'lib/ragnar/indexer.rb', line 5

def chunker
  @chunker
end

#databaseObject (readonly)

Returns the value of attribute database.



5
6
7
# File 'lib/ragnar/indexer.rb', line 5

def database
  @database
end

#embedderObject (readonly)

Returns the value of attribute embedder.



5
6
7
# File 'lib/ragnar/indexer.rb', line 5

def embedder
  @embedder
end

Class Method Details

.is_text_file?(file_path) ⇒ Boolean

Returns:

  • (Boolean)


212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'lib/ragnar/indexer.rb', line 212

def self.is_text_file?(file_path)
  # Check by extension
  ext = File.extname(file_path).downcase
  return true if supported_extensions.include?(ext)

  # Check if file appears to be text
  begin
    # Read first 8KB to check if it's text
    sample = File.read(file_path, 8192, mode: 'rb')
    return false if sample.nil?

    # Check for binary content
    null_count = sample.count("\x00")
    return false if null_count > 0

    # Check if mostly printable ASCII
    printable = sample.count("\t\n\r\x20-\x7E")
    ratio = printable.to_f / sample.size
    ratio > 0.9
  rescue
    false
  end
end

.supported_extensionsObject



205
206
207
208
209
210
# File 'lib/ragnar/indexer.rb', line 205

def self.supported_extensions
  # Extended list of supported formats through parser-core
  %w[.txt .md .markdown .text .log .csv .json .xml .html .htm
     .pdf .docx .doc .xlsx .xls .pptx .ppt
     .rb .py .js .rs .go .java .cpp .c .h]
end

Instance Method Details

#index_directory(dir_path) ⇒ Object



96
97
98
# File 'lib/ragnar/indexer.rb', line 96

def index_directory(dir_path)
  index_path(dir_path)
end

#index_files(files) ⇒ Object

Convenience methods for compatibility



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/ragnar/indexer.rb', line 80

def index_files(files)
  stats = {
    files_processed: 0,
    chunks_created: 0,
    errors: 0
  }
  
  files.each do |file|
    next unless File.exist?(file)
    process_file(file, stats)
    stats[:files_processed] += 1
  end
  
  stats
end

#index_path(path) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/ragnar/indexer.rb', line 18

def index_path(path)
  stats = {
    files_processed: 0,
    chunks_created: 0,
    errors: 0
  }

  files = collect_files(path)

  if files.empty?
    puts "No text files found at path: #{path}"
    return stats
  end

  puts "Found #{files.size} file(s) to process" if @show_progress

  file_progress = if @show_progress && $stdout.respond_to?(:ioctl)
    TTY::ProgressBar.new(
      "Processing [:bar] :percent :current/:total - :filename",
      total: files.size,
      bar_format: :block,
      width: 30,
      clear: true
    )
  else
    nil
  end

  files.each_with_index do |file_path, idx|
    begin
      if file_progress
        # Update the progress bar with current filename
        filename = File.basename(file_path)
        filename = filename[0..27] + "..." if filename.length > 30
        file_progress.advance(0, filename: filename)
      elsif @show_progress
        puts "Processing (#{idx + 1}/#{files.size}): #{File.basename(file_path)}"
      end

      process_file(file_path, stats, file_progress)
      stats[:files_processed] += 1
    rescue => e
      if file_progress
        file_progress.log "Error: #{File.basename(file_path)} - #{e.message}"
      else
        puts "Error processing #{File.basename(file_path)}: #{e.message}" if @show_progress
      end
      stats[:errors] += 1
    ensure
      file_progress&.advance
    end
  end

  stats
end

#index_text(text, metadata = {}) ⇒ Object



74
75
76
77
# File 'lib/ragnar/indexer.rb', line 74

def index_text(text,  = {})
  chunks = @chunker.chunk_text(text, )
  process_chunks(chunks, [:file_path] || "inline_text")
end