Module: Legion::LLM::API::Namespaces::OpenAI::VectorStores::FileBatches

Extended by:: Legion::Logging::Helper, Sinatra::Extension

Defined in:: lib/legion/llm/api/namespaces/openai/vector_stores/file_batches.rb

Constant Summary collapse

SYNC_BATCH_LIMIT =

Instance Method Summary collapse

#fetch_file_content(file_id) ⇒ Object
#format_batch(row) ⇒ Object
#format_vsf(row) ⇒ Object
#process_batch_files(_batch_id, store_id, file_ids, chunking_strategy) ⇒ Object
#store_chunks(vsf_id, store_id, file_id, chunks, embed_results) ⇒ Object

Instance Method Details

#fetch_file_content(file_id) ⇒ `Object`

# File 'lib/legion/llm/api/namespaces/openai/vector_stores/file_batches.rb', line 228

def fetch_file_content(file_id)
  return nil unless Legion::LLM::VectorStore::Storage.data_available?
  return nil unless Legion::LLM::VectorStore::Storage.db.table_exists?(:llm_files)

  Legion::LLM::VectorStore::Storage.db[:llm_files].where(id: file_id).first&.dig(:content)
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true,
                   operation: "llm.api.vector_stores.batches.fetch_content.#{file_id}")
  nil
end

#format_batch(row) ⇒ `Object`

# File 'lib/legion/llm/api/namespaces/openai/vector_stores/file_batches.rb', line 257

def format_batch(row)
  return nil unless row

  counts = Legion::JSON.load(row[:file_counts_json] || '{}')
  counts = { in_progress: 0, completed: 0, failed: 0, cancelled: 0, total: 0 }.merge(counts)

  {
    id:              row[:id],
    object:          'vector_store.file_batch',
    vector_store_id: row[:vector_store_id],
    status:          row[:status].to_s,
    file_counts:     counts,
    created_at:      row[:created_at].to_i
  }
end

#format_vsf(row) ⇒ `Object`

# File 'lib/legion/llm/api/namespaces/openai/vector_stores/file_batches.rb', line 273

def format_vsf(row)
  return nil unless row

  {
    id:                row[:id],
    object:            'vector_store.file',
    vector_store_id:   row[:vector_store_id],
    file_id:           row[:file_id],
    status:            row[:status].to_s,
    usage_bytes:       row[:usage_bytes].to_i,
    created_at:        row[:created_at].to_i,
    chunking_strategy: Legion::JSON.load(row[:chunking_strategy_json] || '{"type":"auto"}'),
    last_error:        row[:last_error_json] ? Legion::JSON.load(row[:last_error_json]) : nil,
    attributes:        Legion::JSON.load(row[:attributes_json] || '{}')
  }
end

#process_batch_files(_batch_id, store_id, file_ids, chunking_strategy) ⇒ `Object`

# File 'lib/legion/llm/api/namespaces/openai/vector_stores/file_batches.rb', line 185

def process_batch_files(_batch_id, store_id, file_ids, chunking_strategy)
  counts = { in_progress: 0, completed: 0, failed: 0, cancelled: 0, total: file_ids.size }
  chunking_json = chunking_strategy.is_a?(Hash) ? Legion::JSON.dump(chunking_strategy) : '{"type":"auto"}'
  ts = Legion::LLM::VectorStore::Storage.now_ts

  file_ids.each do |file_id|
    vsf_id  = Legion::LLM::VectorStore::Storage.generate_id('vsf')
    status  = 'in_progress'
    usage   = 0

    content_text = fetch_file_content(file_id)

    if content_text
      chunks        = Legion::LLM::VectorStore::Storage.chunk_text(content_text)
      embed_results = Legion::LLM::Call::Embeddings.generate_batch(texts: chunks, task: :document)
      store_chunks(vsf_id, store_id, file_id, chunks, embed_results)
      status = 'completed'
      usage  = content_text.bytesize
      counts[:completed] += 1
    else
      counts[:in_progress] += 1
    end

    Legion::LLM::VectorStore::Storage.db[:llm_vector_store_files].insert(
      id:                     vsf_id,
      vector_store_id:        store_id,
      file_id:                file_id,
      status:                 status,
      usage_bytes:            usage,
      chunking_strategy_json: chunking_json,
      attributes_json:        '{}',
      last_error_json:        nil,
      created_at:             ts
    )
  rescue StandardError => e
    handle_exception(e, level: :warn, handled: true,
                     operation: "llm.api.vector_stores.batches.process_file.#{file_id}")
    counts[:failed] += 1
  end

  counts
end

#store_chunks(vsf_id, store_id, file_id, chunks, embed_results) ⇒ `Object`

# File 'lib/legion/llm/api/namespaces/openai/vector_stores/file_batches.rb', line 239

def store_chunks(vsf_id, store_id, file_id, chunks, embed_results)
  ts = Legion::LLM::VectorStore::Storage.now_ts
  chunks.each_with_index do |chunk, index|
    result = embed_results[index] || {}
    vec    = result[:vector]
    Legion::LLM::VectorStore::Storage.db[:llm_vector_store_chunks].insert(
      id:                   Legion::LLM::VectorStore::Storage.generate_id('vsc'),
      vector_store_id:      store_id,
      vector_store_file_id: vsf_id,
      file_id:              file_id,
      chunk_index:          index,
      text:                 chunk,
      embedding_json:       vec ? Legion::JSON.dump(vec) : nil,
      created_at:           ts
    )
  end
end

Module: Legion::LLM::API::Namespaces::OpenAI::VectorStores::FileBatches

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#fetch_file_content(file_id) ⇒ Object

#format_batch(row) ⇒ Object

#format_vsf(row) ⇒ Object

#process_batch_files(_batch_id, store_id, file_ids, chunking_strategy) ⇒ Object

#store_chunks(vsf_id, store_id, file_id, chunks, embed_results) ⇒ Object

#fetch_file_content(file_id) ⇒ `Object`

#format_batch(row) ⇒ `Object`

#format_vsf(row) ⇒ `Object`

#process_batch_files(_batch_id, store_id, file_ids, chunking_strategy) ⇒ `Object`

#store_chunks(vsf_id, store_id, file_id, chunks, embed_results) ⇒ `Object`