Module: Legion::Extensions::Knowledge::Helpers::Chunker

Defined in:
lib/legion/extensions/knowledge/helpers/chunker.rb

Constant Summary collapse

CHARS_PER_TOKEN =
4

Class Method Summary collapse

Class Method Details

.chunk(sections:, max_tokens: nil, overlap_tokens: nil) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/legion/extensions/knowledge/helpers/chunker.rb', line 14

def chunk(sections:, max_tokens: nil, overlap_tokens: nil)
  resolved_max     = max_tokens     || settings_max_tokens     || 512
  resolved_overlap = overlap_tokens || settings_overlap_tokens || 128

  max_chars     = resolved_max * CHARS_PER_TOKEN
  overlap_chars = resolved_overlap * CHARS_PER_TOKEN

  chunks = []
  sections.each do |section|
    chunks.concat(split_section(section, max_chars, overlap_chars))
  end
  chunks
end