Module: Legion::Extensions::Rfp::Ingest::Runners::Corpus
- Extended by:
- Helpers::Client
- Includes:
- Helpers::Lex
- Included in:
- Client
- Defined in:
- lib/legion/extensions/rfp/ingest/runners/corpus.rb
Instance Method Summary collapse
- #ingest_directory(directory:, tags: [], recursive: true) ⇒ Object
- #ingest_document(file_path:, tags: [], metadata: {}) ⇒ Object
- #ingest_to_apollo(chunks:, scope: :global) ⇒ Object
Methods included from Helpers::Client
Instance Method Details
#ingest_directory(directory:, tags: [], recursive: true) ⇒ Object
31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/legion/extensions/rfp/ingest/runners/corpus.rb', line 31 def ingest_directory(directory:, tags: [], recursive: true, **) pattern = recursive ? ::File.join(directory, '**', '*') : ::File.join(directory, '*') files = Dir.glob(pattern).select { |f| ::File.file?(f) } results = files.filter_map do |file_path| next unless supported?(file_path: file_path)[:result] ingest_document(file_path: file_path, tags: ) end { result: results, files_processed: results.length, total_chunks: results.sum { |r| r[:count] } } end |
#ingest_document(file_path:, tags: [], metadata: {}) ⇒ Object
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/legion/extensions/rfp/ingest/runners/corpus.rb', line 11 def ingest_document(file_path:, tags: [], metadata: {}, **) supported = supported?(file_path: file_path) return { result: nil, error: "Unsupported format: #{file_path}" } unless supported[:result] extracted = extract_text(file_path: file_path) chunked = chunk_text(text: extracted[:result]) ingested = chunked[:result].map.with_index do |chunk, idx| { content: chunk[:text], source: file_path, chunk_id: idx, tags: , metadata: .merge(format: extracted[:format], offset: chunk[:offset]) } end { result: ingested, count: ingested.length, source: file_path } end |
#ingest_to_apollo(chunks:, scope: :global) ⇒ Object
44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/legion/extensions/rfp/ingest/runners/corpus.rb', line 44 def ingest_to_apollo(chunks:, scope: :global, **) return { result: nil, error: 'Apollo not available' } unless defined?(Legion::Apollo) ingested = chunks.map do |chunk| Legion::Apollo.ingest( content: chunk[:content], tags: chunk[:tags] || [], metadata: chunk[:metadata] || {}, scope: scope ) end { result: ingested, count: ingested.length } end |