Class: SemanticTextChunker::ChunkBuilder

Inherits:
Object
  • Object
show all
Defined in:
lib/semantic_text_chunker/chunk_builder.rb

Instance Method Summary collapse

Constructor Details

#initialize(sentences:, boundaries:, overlap_sentences:) ⇒ ChunkBuilder

Returns a new instance of ChunkBuilder.



3
4
5
6
7
# File 'lib/semantic_text_chunker/chunk_builder.rb', line 3

def initialize(sentences:, boundaries:, overlap_sentences:)
  @sentences         = sentences
  @boundaries        = boundaries
  @overlap_sentences = overlap_sentences
end

Instance Method Details

#buildObject



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/semantic_text_chunker/chunk_builder.rb', line 9

def build
  return [@sentences.join(" ")] if @boundaries.empty?

  chunks = []
  prev_end = -1

  split_points = @boundaries + [@sentences.size - 1]

  split_points.each_with_index do |boundary, idx|
    start = if idx == 0
      0
    else
      # Overlap: go back N sentences from previous boundary
      [prev_end - @overlap_sentences + 1, 0].max
    end

    chunk = @sentences[start..boundary].join(" ").strip
    chunks << chunk unless chunk.empty?
    prev_end = boundary
  end

  chunks
end