Class: Rng::RncParser

Inherits:
Parslet::Parser
  • Object
show all
Defined in:
lib/rng/rnc_parser.rb

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.convert_to_rng(tree) ⇒ Object

Convert parse tree to RNG XML



689
690
691
# File 'lib/rng/rnc_parser.rb', line 689

def self.convert_to_rng(tree)
  RncToRngConverter.new.convert(tree)
end

.extract_string(obj) ⇒ Object

Helper method to extract clean string without Parslet position markers



10
11
12
13
14
15
16
17
18
19
# File 'lib/rng/rnc_parser.rb', line 10

def self.extract_string(obj)
  if obj.respond_to?(:str)
    # Parslet::Slice - use .str to get clean string
    obj.str
  elsif obj.is_a?(String)
    obj
  else
    obj.to_s
  end
end

.parse(input) ⇒ Object



669
670
671
672
673
674
675
676
677
678
679
680
681
# File 'lib/rng/rnc_parser.rb', line 669

def self.parse(input)
  parser = new
  preprocessed = preprocess_hex_escapes(input.strip)
  tree = parser.parse(preprocessed)

  # Normalize parse tree
  processor = ParseTreeProcessor.new(tree)
  normalized = processor.normalize

  # Convert to RNG XML and Grammar object
  rng_xml = convert_to_rng(normalized.grammar_tree)
  Grammar.from_xml(rng_xml)
end

.parse_file(file_path, base_dir = nil, visited_files = Set.new) ⇒ Object

Class method to parse a file with include resolution



595
596
597
# File 'lib/rng/rnc_parser.rb', line 595

def self.parse_file(file_path, base_dir = nil, visited_files = Set.new)
  IncludeProcessor.new.parse_file(file_path, base_dir, visited_files)
end

.preprocess_hex_escapes(input) ⇒ Object

Pre-process RNC input to resolve hex escapes (xHHHHHH) outside of string literals. This allows keywords to contain hex escapes (e.g., x65lx00065ment = “element”). String literals keep their hex escapes for the parser to handle, because control characters like xA (newline) are forbidden inside single-line quoted strings and must remain escaped.



605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
# File 'lib/rng/rnc_parser.rb', line 605

def self.preprocess_hex_escapes(input)
  result = +''
  i = 0
  while i < input.length
    # Triple-quoted strings: copy verbatim
    if input[i, 3] == '"""'
      end_idx = input.index('"""', i + 3)
      end_idx ||= input.length - 3
      result << input[i..(end_idx + 2)]
      i = end_idx + 3
    elsif input[i, 3] == "'''"
      end_idx = input.index("'''", i + 3)
      end_idx ||= input.length - 3
      result << input[i..(end_idx + 2)]
      i = end_idx + 3
    # Single-line double-quoted string: copy verbatim
    elsif input[i] == '"'
      j = i + 1
      while j < input.length && input[j] != '"'
        j += 1 if input[j] == '\\' && j + 1 < input.length # skip escaped char
        j += 1
      end
      result << input[i..j]
      i = j + 1
    # Single-line single-quoted string: copy verbatim
    elsif input[i] == "'"
      j = i + 1
      while j < input.length && input[j] != "'"
        j += 1 if input[j] == '\\' && j + 1 < input.length # skip escaped char
        j += 1
      end
      result << input[i..j]
      i = j + 1
    # Comment: copy verbatim to end of line
    elsif input[i] == '#'
      j = input.index("\n", i) || input.length
      result << input[i...j]
      i = j
    # Hex escape outside string: decode it
    elsif input[i] == '\\' && input[i + 1] == 'x' && input[i + 2] == '{'
      end_brace = input.index('}', i + 3)
      if end_brace
        hex = input[(i + 3)...end_brace]
        if hex.match?(/\A[0-9a-fA-F]{1,6}\z/)
          code_point = hex.to_i(16)
          if code_point <= 0x10FFFF && !code_point.between?(0xD800, 0xDFFF) &&
             code_point >= 0x20 # Reject control characters outside strings
            result << [code_point].pack('U')
            i = end_brace + 1
            next
          end
        end
      end
      # Not a valid hex escape, copy as-is
      result << input[i]
      i += 1
    else
      result << input[i]
      i += 1
    end
  end
  result
end

.to_rnc(schema) ⇒ Object

Convert RNG schema to RNC



684
685
686
# File 'lib/rng/rnc_parser.rb', line 684

def self.to_rnc(schema)
  RncBuilder.new.build(schema)
end

Instance Method Details

#keyword(kw) ⇒ Object

Match a keyword that may contain hex escapes Hex escapes are resolved in pre-processing, so keywords match literally here But we still need to handle the case where pre-processing didn’t happen



49
50
51
# File 'lib/rng/rnc_parser.rb', line 49

def keyword(kw)
  str(kw)
end