Module: Legion::LLM::VectorStore::Storage

Extended by:
Legion::Logging::Helper
Defined in:
lib/legion/llm/vector_store/storage.rb

Constant Summary collapse

MAX_SCAN_CHUNKS =
10_000

Class Method Summary collapse

Class Method Details

.chunk_text(text, max_chars: 512) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/legion/llm/vector_store/storage.rb', line 110

def chunk_text(text, max_chars: 512)
  paragraphs = text.split(/\n\n+/).map(&:strip).reject(&:empty?)
  chunks     = []
  current    = +''

  paragraphs.each do |para|
    if current.empty?
      current << para
    elsif (current.length + para.length + 2) <= max_chars
      current << "\n\n" << para
    else
      chunks << current.dup
      current.clear
      current << para
    end
  end

  chunks << current unless current.empty?
  chunks.empty? ? [text[0, max_chars]] : chunks
end

.cosine_similarity(vec_a, vec_b) ⇒ Object



96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/legion/llm/vector_store/storage.rb', line 96

def cosine_similarity(vec_a, vec_b)
  return 0.0 if vec_a.nil? || vec_b.nil?
  return 0.0 unless vec_a.is_a?(Array) && vec_b.is_a?(Array)
  return 0.0 unless vec_a.size == vec_b.size && vec_a.size.positive?

  dot   = vec_a.zip(vec_b).sum { |x, y| x.to_f * y.to_f }
  mag_a = Math.sqrt(vec_a.sum { |x| x.to_f**2 })
  mag_b = Math.sqrt(vec_b.sum { |x| x.to_f**2 })
  denom = mag_a * mag_b
  return 0.0 if denom.zero?

  (dot / denom).clamp(-1.0, 1.0)
end

.data_available?Boolean

Returns:

  • (Boolean)


15
16
17
18
19
20
21
22
23
# File 'lib/legion/llm/vector_store/storage.rb', line 15

def data_available?
  return false unless defined?(Legion::Data)
  return false unless Legion::Data.respond_to?(:connected?) && Legion::Data.connected?

  true
rescue StandardError => e
  handle_exception(e, level: :warn, handled: true, operation: 'llm.vector_store.dataAvailable?')
  false
end

.dbObject



25
26
27
# File 'lib/legion/llm/vector_store/storage.rb', line 25

def db
  Legion::Data.db
end

.ensure_tables!Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/legion/llm/vector_store/storage.rb', line 29

def ensure_tables!
  return unless data_available?

  db.create_table?(:llm_vector_stores) do
    String   :id, primary_key: true
    String   :name
    String   :status, default: 'completed'
    String   :metadata_json, default: '{}'
    Integer  :usage_bytes, default: 0
    Integer  :expires_at
    Integer  :created_at
    Integer  :last_active_at
  end

  db.create_table?(:llm_vector_store_files) do
    String   :id, primary_key: true
    String   :vector_store_id
    String   :file_id
    String   :status, default: 'in_progress'
    Integer  :usage_bytes, default: 0
    String   :chunking_strategy_json, default: '{"type":"auto"}'
    String   :attributes_json, default: '{}'
    String   :last_error_json
    Integer  :created_at
  end
  safe_add_index(:llm_vector_store_files, :vector_store_id, name: :idx_vsf_vector_store_id)
  safe_add_index(:llm_vector_store_files, :file_id, name: :idx_vsf_file_id)

  db.create_table?(:llm_vector_store_chunks) do
    String   :id, primary_key: true
    String   :vector_store_id
    String   :vector_store_file_id
    String   :file_id
    Integer  :chunk_index
    String   :text, text: true
    String   :embedding_json, text: true
    Integer  :created_at
  end
  safe_add_index(:llm_vector_store_chunks, :vector_store_id, name: :idx_vsc_vector_store_id)
  safe_add_index(:llm_vector_store_chunks, :vector_store_file_id, name: :idx_vsc_vector_store_file_id)

  db.create_table?(:llm_vector_store_batches) do
    String   :id, primary_key: true
    String   :vector_store_id
    String   :status, default: 'in_progress'
    String   :file_ids_json, default: '[]'
    String   :file_counts_json, default: '{"in_progress":0,"completed":0,"failed":0,"cancelled":0,"total":0}'
    Integer  :created_at
    index    :vector_store_id
  end

  log.debug('[llm][vector_store] action=ensure_tables tables=created')
rescue StandardError => e
  handle_exception(e, level: :error, operation: 'llm.vector_store.ensure_tables')
  raise
end

.generate_id(prefix) ⇒ Object



88
89
90
# File 'lib/legion/llm/vector_store/storage.rb', line 88

def generate_id(prefix)
  "#{prefix}_#{SecureRandom.hex(10)}"
end

.now_tsObject



92
93
94
# File 'lib/legion/llm/vector_store/storage.rb', line 92

def now_ts
  Time.now.to_i
end

.safe_add_index(table, column, name:) ⇒ Object



133
134
135
136
137
# File 'lib/legion/llm/vector_store/storage.rb', line 133

def safe_add_index(table, column, name:)
  db.add_index(table, column, name: name)
rescue StandardError
  # Idempotent: table may already have this index
end