Class: IndexUtil::Index

Inherits:
Object
  • Object
show all
Defined in:
lib/index_util/index.rb

Constant Summary collapse

EMBED_MANY_LIMIT =
1000
DEFAULT_LIMIT =
5

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.cli(argv = ARGV) ⇒ Object



15
16
17
# File 'lib/index_util/index.rb', line 15

def cli(argv = ARGV)
  CLI.run(argv, index_class: self)
end

Instance Method Details

#database_fileObject

Raises:



20
21
22
# File 'lib/index_util/index.rb', line 20

def database_file
  raise Error, "#{self.class} must define #database_file"
end

#document_checksum(_document, content) ⇒ Object



32
33
34
# File 'lib/index_util/index.rb', line 32

def document_checksum(_document, content)
  Digest::SHA256.hexdigest(content.to_s)
end

#document_content(document) ⇒ Object



28
29
30
# File 'lib/index_util/index.rb', line 28

def document_content(document)
  File.read(document.to_s)
end

#document_listObject

Raises:



24
25
26
# File 'lib/index_util/index.rb', line 24

def document_list
  raise Error, "#{self.class} must define #document_list"
end

#document_postprocess(_fragment_document, content) ⇒ Object



40
41
42
# File 'lib/index_util/index.rb', line 40

def document_postprocess(_fragment_document, content)
  content
end

#document_sections(_document, content) ⇒ Object



36
37
38
# File 'lib/index_util/index.rb', line 36

def document_sections(_document, content)
  { nil => content }
end

#index_all!(progress: nil) ⇒ Object



48
49
50
51
52
53
54
55
# File 'lib/index_util/index.rb', line 48

def index_all!(progress: nil)
  progress&.step("Preparing index", detail: "rebuilding #{database_file}")
  db = Database.rebuild!(database_file)
  documents = list_documents(progress)
  index_documents(db, documents, mode: :all, progress: progress)
ensure
  db&.disconnect
end

#index_new!(progress: nil) ⇒ Object



57
58
59
60
61
62
63
64
# File 'lib/index_util/index.rb', line 57

def index_new!(progress: nil)
  progress&.step("Preparing index", detail: "opening #{database_file}")
  db = Database.connect(database_file)
  documents = list_documents(progress)
  index_documents(db, documents, mode: :new, progress: progress)
ensure
  db&.disconnect
end

#index_update!(progress: nil) ⇒ Object



66
67
68
69
70
71
72
73
74
75
# File 'lib/index_util/index.rb', line 66

def index_update!(progress: nil)
  progress&.step("Preparing index", detail: "opening #{database_file}")
  db = Database.connect(database_file)
  documents = list_documents(progress)
  progress&.step("Updating index", current: 0, total: documents.length, detail: "removing stale documents")
  Document.delete_stale(db, documents)
  index_documents(db, documents, mode: :update, progress: progress)
ensure
  db&.disconnect
end

#query(query, limit: DEFAULT_LIMIT, verbose: false) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/index_util/index.rb', line 77

def query(query, limit: DEFAULT_LIMIT, verbose: false)
  db = Database.connect(database_file)
  limit = Integer(limit)
  candidate_limit = [limit * 10, 50].max
  query_embedding = EmbeddingUtil.embed(query.to_s)
  vector_candidates = VectorIndex.search(db, query_embedding, limit: candidate_limit)
  keyword_candidates = FragmentsFts.search(db, query.to_s, limit: candidate_limit)
  amendments = query_amendments(query.to_s)
  candidates = merge_candidates(vector_candidates, keyword_candidates, amendments)
  rerank(query.to_s, candidates, limit: limit, verbose: verbose)
ensure
  db&.disconnect
end

#query_amendments(_query) ⇒ Object



44
45
46
# File 'lib/index_util/index.rb', line 44

def query_amendments(_query)
  {}
end