Module: IndexUtil::Document

Defined in:
lib/index_util/document.rb

Class Method Summary collapse

Class Method Details

.delete(db, document) ⇒ Object



24
25
26
27
28
29
30
31
32
33
# File 'lib/index_util/document.rb', line 24

def delete(db, document)
  row = db[:documents].where(document: document.to_s).first
  return unless row

  db[:fragments].where(document_id: row.fetch(:id)).all.each do |fragment|
    FragmentsFts.delete(db, fragment)
    VectorIndex.delete(db, fragment.fetch(:id))
  end
  db[:documents].where(id: row.fetch(:id)).delete
end

.delete_stale(db, current_documents) ⇒ Object



17
18
19
20
21
22
# File 'lib/index_util/document.rb', line 17

def delete_stale(db, current_documents)
  current = current_documents.map(&:to_s)
  dataset = db[:documents]
  dataset = dataset.exclude(document: current) unless current.empty?
  dataset.all.each { |row| delete(db, row.fetch(:document)) }
end

.exist?(db, document) ⇒ Boolean

Returns:

  • (Boolean)


9
10
11
# File 'lib/index_util/document.rb', line 9

def exist?(db, document)
  db[:documents].where(document: document.to_s).count.positive?
end

.replace_with_fragments(db, attributes) ⇒ Object

Raises:



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/index_util/document.rb', line 35

def replace_with_fragments(db, attributes)
  document = attributes.fetch(:document)
  checksum = attributes.fetch(:checksum)
  fragments = attributes.fetch(:fragments)
  embeddings = attributes.fetch(:embeddings)
  profile = attributes.fetch(:profile)
  raise Error, "fragment and embedding counts differ" unless fragments.length == embeddings.length

  db.transaction do
    delete(db, document)
    now = timestamp
    document_id = db[:documents].insert(document: document.to_s, checksum: checksum.to_s, indexed_at: now, created_at: now, updated_at: now)

    fragments.zip(embeddings).each do |fragment, embedding|
      fragment_id = db[:fragments].insert(
        document_id: document_id,
        document: document.to_s,
        section_id: fragment.fetch(:section_id),
        fragment_document: fragment.fetch(:fragment_document),
        content: fragment.fetch(:content),
        created_at: now,
        updated_at: now
      )
      stored_fragment = db[:fragments].where(id: fragment_id).first
      db[:fragment_embeddings].insert(
        fragment_id: fragment_id,
        profile: profile.to_s,
        dimensions: embedding.length,
        vector: FragmentEmbedding.pack(embedding),
        vector_norm: FragmentEmbedding.norm(embedding),
        created_at: now
      )
      FragmentsFts.insert(db, stored_fragment)
      VectorIndex.insert(db, fragment_id, embedding)
    end
  end
end

.timestampObject



73
74
75
# File 'lib/index_util/document.rb', line 73

def timestamp
  Time.now.utc.iso8601
end

.unchanged?(db, document, checksum) ⇒ Boolean

Returns:

  • (Boolean)


13
14
15
# File 'lib/index_util/document.rb', line 13

def unchanged?(db, document, checksum)
  db[:documents].where(document: document.to_s, checksum: checksum.to_s).count.positive?
end