Module: Legion::Extensions::Knowledge::Runners::Ingest

Included in:
Client
Defined in:
lib/legion/extensions/knowledge/runners/ingest.rb

Overview

rubocop:disable Legion/Extension/RunnerIncludeHelpers

Class Method Summary collapse

Class Method Details

.ingest_content(content:, source_type: :text, metadata: {}) ⇒ Object



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/legion/extensions/knowledge/runners/ingest.rb', line 109

def ingest_content(content:, source_type: :text, metadata: {})
  source_path = "content://#{source_type}/#{SecureRandom.uuid}"
  section = {
    content:      content,
    heading:      source_type.to_s,
    section_path: [source_type.to_s],
    source_file:  source_path
  }
  chunks = Helpers::Chunker.chunk(sections: [section])
  paired = batch_embed_chunks(chunks, force: false)
  paired.each { |p| upsert_chunk_with_embedding(p[:chunk], p[:embedding], force: false, exists: p[:exists] || false) }
  { status: :ingested, chunks: chunks.size, source_type: source_type, metadata:  }
rescue StandardError => e
  log.warn(e.message)
  { status: :failed, error: e.message, source_type: source_type, metadata:  }
end

.ingest_corpus(path: nil, monitors: nil, dry_run: false, force: false) ⇒ Object



32
33
34
35
36
37
38
39
40
# File 'lib/legion/extensions/knowledge/runners/ingest.rb', line 32

def ingest_corpus(path: nil, monitors: nil, dry_run: false, force: false)
  return ingest_monitors(monitors: monitors, dry_run: dry_run, force: force) if monitors&.any?
  raise ArgumentError, 'path is required when monitors is not provided' if path.nil?

  ingest_corpus_path(path: path, dry_run: dry_run, force: force)
rescue ArgumentError => e
  log.warn(e.message)
  { success: false, error: e.message }
end

.ingest_file(file_path:, force: false) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/legion/extensions/knowledge/runners/ingest.rb', line 126

def ingest_file(file_path:, force: false)
  result = process_file(file_path, dry_run: false, force: force)

  {
    success:        true,
    file:           file_path,
    chunks_created: result[:created],
    chunks_skipped: result[:skipped],
    chunks_updated: result[:updated]
  }
rescue StandardError => e
  log.warn(e.message)
  { success: false, error: e.message }
end

.scan_corpus(path:, extensions: nil) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/legion/extensions/knowledge/runners/ingest.rb', line 17

def scan_corpus(path:, extensions: nil)
  opts = { path: path }
  opts[:extensions] = extensions if extensions

  entries = Helpers::Manifest.scan(**opts)

  {
    success:     true,
    path:        path,
    file_count:  entries.size,
    total_bytes: entries.sum { |e| e[:size] },
    files:       entries.map { |e| e[:path] }
  }
end