Module: Legion::Extensions::Knowledge::Runners::Ingest
- Extended by:
- Logging::Helper, Settings::Helper
- Included in:
- Client
- Defined in:
- lib/legion/extensions/knowledge/runners/ingest.rb
Overview
rubocop:disable Legion/Extension/RunnerIncludeHelpers
Constant Summary collapse
- FILTER_SCHEMA =
{ type: 'object', properties: { relevant: { type: 'boolean' }, confidence: { type: 'number' }, reason: { type: 'string' } }, required: %w[relevant confidence] }.freeze
Class Method Summary collapse
- .ingest_content(content:, source_type: :text, metadata: {}) ⇒ Object
- .ingest_corpus(path: nil, monitors: nil, dry_run: false, force: false, filter: true) ⇒ Object
- .ingest_file(file_path:, force: false, filter: true) ⇒ Object
- .scan_corpus(path:, extensions: nil) ⇒ Object
Class Method Details
.ingest_content(content:, source_type: :text, metadata: {}) ⇒ Object
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
# File 'lib/legion/extensions/knowledge/runners/ingest.rb', line 119 def ingest_content(content:, source_type: :text, metadata: {}) source_path = "content://#{source_type}/#{SecureRandom.uuid}" section = { content: content, heading: source_type.to_s, section_path: [source_type.to_s], source_file: source_path } chunks = filter_chunks(Helpers::Chunker.chunk(sections: [section]), filter: true) paired = (chunks, force: false) paired.each { |p| (p[:chunk], p[:embedding], force: false, exists: p[:exists] || false) } { status: :ingested, chunks: chunks.size, source_type: source_type, metadata: } rescue StandardError => e handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_content', source_type: source_type) { status: :failed, error: e., source_type: source_type, metadata: } end |
.ingest_corpus(path: nil, monitors: nil, dry_run: false, force: false, filter: true) ⇒ Object
42 43 44 45 46 47 48 49 50 |
# File 'lib/legion/extensions/knowledge/runners/ingest.rb', line 42 def ingest_corpus(path: nil, monitors: nil, dry_run: false, force: false, filter: true) return ingest_monitors(monitors: monitors, dry_run: dry_run, force: force, filter: filter) if monitors&.any? raise ArgumentError, 'path is required when monitors is not provided' if path.nil? ingest_corpus_path(path: path, dry_run: dry_run, force: force, filter: filter) rescue ArgumentError => e handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_corpus') { success: false, error: e. } end |
.ingest_file(file_path:, force: false, filter: true) ⇒ Object
136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
# File 'lib/legion/extensions/knowledge/runners/ingest.rb', line 136 def ingest_file(file_path:, force: false, filter: true) result = process_file(file_path, dry_run: false, force: force, filter: filter) { success: true, file: file_path, chunks_created: result[:created], chunks_skipped: result[:skipped], chunks_updated: result[:updated] } rescue StandardError => e handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_file', file_path: file_path) { success: false, error: e. } end |
.scan_corpus(path:, extensions: nil) ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/legion/extensions/knowledge/runners/ingest.rb', line 17 def scan_corpus(path:, extensions: nil) opts = { path: path } opts[:extensions] = extensions if extensions entries = Helpers::Manifest.scan(**opts) { success: true, path: path, file_count: entries.size, total_bytes: entries.sum { |e| e[:size] }, files: entries.map { |e| e[:path] } } end |