Module: Kernai::Parser

Defined in:
lib/kernai/parser.rb

Constant Summary collapse

BLOCK_PATTERN =

Canonical: <block type=“TYPE” name=“NAME”>content</block>

%r{<block\s+type="([^"]+)"(?:\s+name="([^"]*)")?\s*>(.*?)</block>}m
SHORTHAND_TYPES =

Shorthand: <TYPE name=“NAME”>content</TYPE> (e.g. <final>answer</final>)

Block::TYPES.map(&:to_s).join('|')
SHORTHAND_PATTERN =
%r{<(#{SHORTHAND_TYPES})(?:\s+name="([^"]*)")?\s*>(.*?)</\1>}m

Class Method Summary collapse

Class Method Details

.parse(text) ⇒ Object

Scans the response text once and weaves blocks and text segments back in source order. The method is linear but touches a lot of locals (matches, positions, segments) which drives AbcSize up without adding real complexity. rubocop:disable Metrics/AbcSize



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/kernai/parser.rb', line 18

def parse(text)
  blocks = []
  text_segments = []

  # Find all matches from both patterns with their positions
  matches = []

  text.scan(BLOCK_PATTERN) do
    m = Regexp.last_match
    matches << { pos: m.begin(0), end_pos: m.end(0), type: m[1], name: m[2], content: m[3] }
  end

  text.scan(SHORTHAND_PATTERN) do
    m = Regexp.last_match
    # Skip if this region overlaps with an already-found canonical block
    next if matches.any? { |existing| m.begin(0) >= existing[:pos] && m.begin(0) < existing[:end_pos] }

    matches << { pos: m.begin(0), end_pos: m.end(0), type: m[1], name: m[2], content: m[3] }
  end

  matches.sort_by! { |m| m[:pos] }

  last_end = 0
  matches.each do |m|
    if m[:pos] > last_end
      segment = text[last_end...m[:pos]]
      text_segments << segment unless segment.strip.empty?
    end

    blocks << Block.new(type: m[:type].to_sym, content: m[:content], name: m[:name])
    last_end = m[:end_pos]
  end

  if last_end < text.length
    segment = text[last_end..]
    text_segments << segment unless segment.strip.empty?
  end

  { blocks: blocks, text_segments: text_segments }
end