Class: IndexUtil::Index
- Inherits:
-
Object
- Object
- IndexUtil::Index
- Defined in:
- lib/index_util/index.rb
Constant Summary collapse
- EMBED_MANY_LIMIT =
1000- DEFAULT_LIMIT =
5
Class Method Summary collapse
Instance Method Summary collapse
- #database_file ⇒ Object
- #document_checksum(_document, content) ⇒ Object
- #document_content(document) ⇒ Object
- #document_list ⇒ Object
- #document_postprocess(_fragment_document, content) ⇒ Object
- #document_sections(_document, content) ⇒ Object
- #index_all!(progress: nil) ⇒ Object
- #index_new!(progress: nil) ⇒ Object
- #index_update!(progress: nil) ⇒ Object
- #query(query, limit: DEFAULT_LIMIT, verbose: false) ⇒ Object
- #query_amendments(_query) ⇒ Object
Class Method Details
Instance Method Details
#database_file ⇒ Object
20 21 22 |
# File 'lib/index_util/index.rb', line 20 def database_file raise Error, "#{self.class} must define #database_file" end |
#document_checksum(_document, content) ⇒ Object
32 33 34 |
# File 'lib/index_util/index.rb', line 32 def document_checksum(_document, content) Digest::SHA256.hexdigest(content.to_s) end |
#document_content(document) ⇒ Object
28 29 30 |
# File 'lib/index_util/index.rb', line 28 def document_content(document) File.read(document.to_s) end |
#document_list ⇒ Object
24 25 26 |
# File 'lib/index_util/index.rb', line 24 def document_list raise Error, "#{self.class} must define #document_list" end |
#document_postprocess(_fragment_document, content) ⇒ Object
40 41 42 |
# File 'lib/index_util/index.rb', line 40 def document_postprocess(_fragment_document, content) content end |
#document_sections(_document, content) ⇒ Object
36 37 38 |
# File 'lib/index_util/index.rb', line 36 def document_sections(_document, content) { nil => content } end |
#index_all!(progress: nil) ⇒ Object
48 49 50 51 52 53 54 55 |
# File 'lib/index_util/index.rb', line 48 def index_all!(progress: nil) progress&.step("Preparing index", detail: "rebuilding #{database_file}") db = Database.rebuild!(database_file) documents = list_documents(progress) index_documents(db, documents, mode: :all, progress: progress) ensure db&.disconnect end |
#index_new!(progress: nil) ⇒ Object
57 58 59 60 61 62 63 64 |
# File 'lib/index_util/index.rb', line 57 def index_new!(progress: nil) progress&.step("Preparing index", detail: "opening #{database_file}") db = Database.connect(database_file) documents = list_documents(progress) index_documents(db, documents, mode: :new, progress: progress) ensure db&.disconnect end |
#index_update!(progress: nil) ⇒ Object
66 67 68 69 70 71 72 73 74 75 |
# File 'lib/index_util/index.rb', line 66 def index_update!(progress: nil) progress&.step("Preparing index", detail: "opening #{database_file}") db = Database.connect(database_file) documents = list_documents(progress) progress&.step("Updating index", current: 0, total: documents.length, detail: "removing stale documents") Document.delete_stale(db, documents) index_documents(db, documents, mode: :update, progress: progress) ensure db&.disconnect end |
#query(query, limit: DEFAULT_LIMIT, verbose: false) ⇒ Object
77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/index_util/index.rb', line 77 def query(query, limit: DEFAULT_LIMIT, verbose: false) db = Database.connect(database_file) limit = Integer(limit) candidate_limit = [limit * 10, 50].max = EmbeddingUtil.(query.to_s) vector_candidates = VectorIndex.search(db, , limit: candidate_limit) keyword_candidates = FragmentsFts.search(db, query.to_s, limit: candidate_limit) amendments = query_amendments(query.to_s) candidates = merge_candidates(vector_candidates, keyword_candidates, amendments) rerank(query.to_s, candidates, limit: limit, verbose: verbose) ensure db&.disconnect end |
#query_amendments(_query) ⇒ Object
44 45 46 |
# File 'lib/index_util/index.rb', line 44 def query_amendments(_query) {} end |