Class: Ragnar::Indexer
- Inherits:
-
Object
- Object
- Ragnar::Indexer
- Defined in:
- lib/ragnar/indexer.rb
Instance Attribute Summary collapse
-
#chunker ⇒ Object
readonly
Returns the value of attribute chunker.
-
#database ⇒ Object
readonly
Returns the value of attribute database.
-
#embedder ⇒ Object
readonly
Returns the value of attribute embedder.
Class Method Summary collapse
Instance Method Summary collapse
- #index_directory(dir_path) ⇒ Object
-
#index_files(files) ⇒ Object
Convenience methods for compatibility.
- #index_path(path) ⇒ Object
- #index_text(text, metadata = {}) ⇒ Object
-
#initialize(db_path: Ragnar::DEFAULT_DB_PATH, chunk_size: Ragnar::DEFAULT_CHUNK_SIZE, chunk_overlap: Ragnar::DEFAULT_CHUNK_OVERLAP, embedding_model: Ragnar::DEFAULT_EMBEDDING_MODEL, show_progress: true) ⇒ Indexer
constructor
A new instance of Indexer.
Constructor Details
#initialize(db_path: Ragnar::DEFAULT_DB_PATH, chunk_size: Ragnar::DEFAULT_CHUNK_SIZE, chunk_overlap: Ragnar::DEFAULT_CHUNK_OVERLAP, embedding_model: Ragnar::DEFAULT_EMBEDDING_MODEL, show_progress: true) ⇒ Indexer
Returns a new instance of Indexer.
7 8 9 10 11 12 13 14 15 16 |
# File 'lib/ragnar/indexer.rb', line 7 def initialize(db_path: Ragnar::DEFAULT_DB_PATH, chunk_size: Ragnar::DEFAULT_CHUNK_SIZE, chunk_overlap: Ragnar::DEFAULT_CHUNK_OVERLAP, embedding_model: Ragnar::DEFAULT_EMBEDDING_MODEL, show_progress: true) @database = Database.new(db_path) @chunker = Chunker.new(chunk_size: chunk_size, chunk_overlap: chunk_overlap) @embedder = Embedder.new(model_name: ) @show_progress = show_progress end |
Instance Attribute Details
#chunker ⇒ Object (readonly)
Returns the value of attribute chunker.
5 6 7 |
# File 'lib/ragnar/indexer.rb', line 5 def chunker @chunker end |
#database ⇒ Object (readonly)
Returns the value of attribute database.
5 6 7 |
# File 'lib/ragnar/indexer.rb', line 5 def database @database end |
#embedder ⇒ Object (readonly)
Returns the value of attribute embedder.
5 6 7 |
# File 'lib/ragnar/indexer.rb', line 5 def @embedder end |
Class Method Details
.is_text_file?(file_path) ⇒ Boolean
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
# File 'lib/ragnar/indexer.rb', line 212 def self.is_text_file?(file_path) # Check by extension ext = File.extname(file_path).downcase return true if supported_extensions.include?(ext) # Check if file appears to be text begin # Read first 8KB to check if it's text sample = File.read(file_path, 8192, mode: 'rb') return false if sample.nil? # Check for binary content null_count = sample.count("\x00") return false if null_count > 0 # Check if mostly printable ASCII printable = sample.count("\t\n\r\x20-\x7E") ratio = printable.to_f / sample.size ratio > 0.9 rescue false end end |
.supported_extensions ⇒ Object
205 206 207 208 209 210 |
# File 'lib/ragnar/indexer.rb', line 205 def self.supported_extensions # Extended list of supported formats through parser-core %w[.txt .md .markdown .text .log .csv .json .xml .html .htm .pdf .docx .doc .xlsx .xls .pptx .ppt .rb .py .js .rs .go .java .cpp .c .h] end |
Instance Method Details
#index_directory(dir_path) ⇒ Object
96 97 98 |
# File 'lib/ragnar/indexer.rb', line 96 def index_directory(dir_path) index_path(dir_path) end |
#index_files(files) ⇒ Object
Convenience methods for compatibility
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/ragnar/indexer.rb', line 80 def index_files(files) stats = { files_processed: 0, chunks_created: 0, errors: 0 } files.each do |file| next unless File.exist?(file) process_file(file, stats) stats[:files_processed] += 1 end stats end |
#index_path(path) ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/ragnar/indexer.rb', line 18 def index_path(path) stats = { files_processed: 0, chunks_created: 0, errors: 0 } files = collect_files(path) if files.empty? puts "No text files found at path: #{path}" return stats end puts "Found #{files.size} file(s) to process" if @show_progress file_progress = if @show_progress && $stdout.respond_to?(:ioctl) TTY::ProgressBar.new( "Processing [:bar] :percent :current/:total - :filename", total: files.size, bar_format: :block, width: 30, clear: true ) else nil end files.each_with_index do |file_path, idx| begin if file_progress # Update the progress bar with current filename filename = File.basename(file_path) filename = filename[0..27] + "..." if filename.length > 30 file_progress.advance(0, filename: filename) elsif @show_progress puts "Processing (#{idx + 1}/#{files.size}): #{File.basename(file_path)}" end process_file(file_path, stats, file_progress) stats[:files_processed] += 1 rescue => e if file_progress file_progress.log "Error: #{File.basename(file_path)} - #{e.}" else puts "Error processing #{File.basename(file_path)}: #{e.}" if @show_progress end stats[:errors] += 1 ensure file_progress&.advance end end stats end |
#index_text(text, metadata = {}) ⇒ Object
74 75 76 77 |
# File 'lib/ragnar/indexer.rb', line 74 def index_text(text, = {}) chunks = @chunker.chunk_text(text, ) process_chunks(chunks, [:file_path] || "inline_text") end |