Module: Legion::Extensions::Knowledge::Runners::Ingest

Extended by:
Logging::Helper, Settings::Helper
Included in:
Client
Defined in:
lib/legion/extensions/knowledge/runners/ingest.rb

Overview

rubocop:disable Legion/Extension/RunnerIncludeHelpers

Constant Summary collapse

FILTER_SCHEMA =
{
  type:       'object',
  properties: {
    relevant:   { type: 'boolean' },
    confidence: { type: 'number' },
    reason:     { type: 'string' }
  },
  required:   %w[relevant confidence]
}.freeze

Class Method Summary collapse

Class Method Details

.ingest_content(content:, source_type: :text, metadata: {}) ⇒ Object



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/legion/extensions/knowledge/runners/ingest.rb', line 119

def ingest_content(content:, source_type: :text, metadata: {})
  source_path = "content://#{source_type}/#{SecureRandom.uuid}"
  section = {
    content:      content,
    heading:      source_type.to_s,
    section_path: [source_type.to_s],
    source_file:  source_path
  }
  chunks = filter_chunks(Helpers::Chunker.chunk(sections: [section]), filter: true)
  paired = batch_embed_chunks(chunks, force: false)
  paired.each { |p| upsert_chunk_with_embedding(p[:chunk], p[:embedding], force: false, exists: p[:exists] || false) }
  { status: :ingested, chunks: chunks.size, source_type: source_type, metadata:  }
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_content', source_type: source_type)
  { status: :failed, error: e.message, source_type: source_type, metadata:  }
end

.ingest_corpus(path: nil, monitors: nil, dry_run: false, force: false, filter: true) ⇒ Object



42
43
44
45
46
47
48
49
50
# File 'lib/legion/extensions/knowledge/runners/ingest.rb', line 42

def ingest_corpus(path: nil, monitors: nil, dry_run: false, force: false, filter: true)
  return ingest_monitors(monitors: monitors, dry_run: dry_run, force: force, filter: filter) if monitors&.any?
  raise ArgumentError, 'path is required when monitors is not provided' if path.nil?

  ingest_corpus_path(path: path, dry_run: dry_run, force: force, filter: filter)
rescue ArgumentError => e
  handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_corpus')
  { success: false, error: e.message }
end

.ingest_file(file_path:, force: false, filter: true) ⇒ Object



136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/legion/extensions/knowledge/runners/ingest.rb', line 136

def ingest_file(file_path:, force: false, filter: true)
  result = process_file(file_path, dry_run: false, force: force, filter: filter)

  {
    success:        true,
    file:           file_path,
    chunks_created: result[:created],
    chunks_skipped: result[:skipped],
    chunks_updated: result[:updated]
  }
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'knowledge.ingest.ingest_file', file_path: file_path)
  { success: false, error: e.message }
end

.scan_corpus(path:, extensions: nil) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/legion/extensions/knowledge/runners/ingest.rb', line 17

def scan_corpus(path:, extensions: nil)
  opts = { path: path }
  opts[:extensions] = extensions if extensions

  entries = Helpers::Manifest.scan(**opts)

  {
    success:     true,
    path:        path,
    file_count:  entries.size,
    total_bytes: entries.sum { |e| e[:size] },
    files:       entries.map { |e| e[:path] }
  }
end