Class: Dms::Parser

Inherits:

Object

Object
Dms::Parser

show all

Defined in:: lib/dms/parser.rb

Defined Under Namespace

Classes: HBody

Constant Summary collapse

SP = ———- Byte constants (faster than ?x.ord at call site) ———-

0x20

TAB =

0x09

LF =

0x0A

CR =

0x0D

HASH = ‘#’

0x23

SLASH = ‘/’

0x2F

STAR = ‘*’

0x2A

BANG = ‘!’ (unused)

0x21

PLUS = ‘+’

0x2B

MINUS = ‘-’

0x2D

COLON = ‘:’

0x3A

COMMA = ‘,’

0x2C

LBRACK = ‘[’

0x5B

RBRACK = ‘]’

0x5D

LBRACE = ‘{’

0x7B

RBRACE = ‘}’

0x7D

DOT = ‘.’

0x2E

UNDERSCORE = ‘_’

0x5F

BACKSLASH = ‘\’

0x5C

DQUOTE = ‘“’

0x22

SQUOTE = “‘”

0x27

DIGIT0 =

0x30

DIGIT9 =

0x39

LOWER_A =

0x61

LOWER_F =

0x66

LOWER_Z =

0x7A

UPPER_A =

0x41

UPPER_F =

0x46

UPPER_Z =

0x5A

LOWER_X =

0x78

LOWER_O =

0x6F

LOWER_B =

0x62

LOWER_E =

0x65

UPPER_E =

0x45

LOWER_P =

0x70

LOWER_T =

0x74

LOWER_N =

0x6E

LOWER_U =

0x75

UPPER_U =

0x55

UPPER_T =

0x54

UPPER_Z_LETTER =

0x5A

LOWER_Z_LETTER =

0x7A

LOWER_R =

0x72

LOWER_F_LETTER =

0x66

HEX_DIGITS =

"0123456789abcdefABCDEF"

BUILT_IN_HEREDOC_MODIFIERS =

%w[_trim _fold_paragraphs].freeze

BARE_KEY_BYTE = Bare-key fast lookup: 256-entry table, value true iff that ASCII byte is a bare-key char (alnum, ‘_’, ‘-’).

Array.new(256, false)

RESERVED_SIGIL_BYTE = Reserved decorator sigils (SPEC tier-0): a body line whose first non-whitespace byte is one of these is a parse error. The check is line-start only; these chars are unrestricted inside string bodies, comments, and heredoc bodies. Underscore is intentionally NOT in this set (it remains a valid identifier-leading byte).

Array.new(256, false)

XID_CONTINUE_RANGES = XID_Continue snapshot, frozen at Unicode 15.1 per UAX #31 §2 default identifier syntax (XID_Continue \ Default_Ignorable_Code_Point). Embedded so accept/reject is identical across Ruby/Onigmo Unicode-data versions. 773 sorted, non-overlapping ranges; binary-searched by #xid_continue?.

[
  [0x00AA, 0x00AA],
  [0x00B5, 0x00B5],
  [0x00B7, 0x00B7],
  [0x00BA, 0x00BA],
  [0x00C0, 0x00D6],
  [0x00D8, 0x00F6],
  [0x00F8, 0x02C1],
  [0x02C6, 0x02D1],
  [0x02E0, 0x02E4],
  [0x02EC, 0x02EC],
  [0x02EE, 0x02EE],
  [0x0300, 0x034E],
  [0x0350, 0x0374],
  [0x0376, 0x0377],
  [0x037B, 0x037D],
  [0x037F, 0x037F],
  [0x0386, 0x038A],
  [0x038C, 0x038C],
  [0x038E, 0x03A1],
  [0x03A3, 0x03F5],
  [0x03F7, 0x0481],
  [0x0483, 0x0487],
  [0x048A, 0x052F],
  [0x0531, 0x0556],
  [0x0559, 0x0559],
  [0x0560, 0x0588],
  [0x0591, 0x05BD],
  [0x05BF, 0x05BF],
  [0x05C1, 0x05C2],
  [0x05C4, 0x05C5],
  [0x05C7, 0x05C7],
  [0x05D0, 0x05EA],
  [0x05EF, 0x05F2],
  [0x0610, 0x061A],
  [0x0620, 0x0669],
  [0x066E, 0x06D3],
  [0x06D5, 0x06DC],
  [0x06DF, 0x06E8],
  [0x06EA, 0x06FC],
  [0x06FF, 0x06FF],
  [0x0710, 0x074A],
  [0x074D, 0x07B1],
  [0x07C0, 0x07F5],
  [0x07FA, 0x07FA],
  [0x07FD, 0x07FD],
  [0x0800, 0x082D],
  [0x0840, 0x085B],
  [0x0860, 0x086A],
  [0x0870, 0x0887],
  [0x0889, 0x088E],
  [0x0898, 0x08E1],
  [0x08E3, 0x0963],
  [0x0966, 0x096F],
  [0x0971, 0x0983],
  [0x0985, 0x098C],
  [0x098F, 0x0990],
  [0x0993, 0x09A8],
  [0x09AA, 0x09B0],
  [0x09B2, 0x09B2],
  [0x09B6, 0x09B9],
  [0x09BC, 0x09C4],
  [0x09C7, 0x09C8],
  [0x09CB, 0x09CE],
  [0x09D7, 0x09D7],
  [0x09DC, 0x09DD],
  [0x09DF, 0x09E3],
  [0x09E6, 0x09F1],
  [0x09FC, 0x09FC],
  [0x09FE, 0x09FE],
  [0x0A01, 0x0A03],
  [0x0A05, 0x0A0A],
  [0x0A0F, 0x0A10],
  [0x0A13, 0x0A28],
  [0x0A2A, 0x0A30],
  [0x0A32, 0x0A33],
  [0x0A35, 0x0A36],
  [0x0A38, 0x0A39],
  [0x0A3C, 0x0A3C],
  [0x0A3E, 0x0A42],
  [0x0A47, 0x0A48],
  [0x0A4B, 0x0A4D],
  [0x0A51, 0x0A51],
  [0x0A59, 0x0A5C],
  [0x0A5E, 0x0A5E],
  [0x0A66, 0x0A75],
  [0x0A81, 0x0A83],
  [0x0A85, 0x0A8D],
  [0x0A8F, 0x0A91],
  [0x0A93, 0x0AA8],
  [0x0AAA, 0x0AB0],
  [0x0AB2, 0x0AB3],
  [0x0AB5, 0x0AB9],
  [0x0ABC, 0x0AC5],
  [0x0AC7, 0x0AC9],
  [0x0ACB, 0x0ACD],
  [0x0AD0, 0x0AD0],
  [0x0AE0, 0x0AE3],
  [0x0AE6, 0x0AEF],
  [0x0AF9, 0x0AFF],
  [0x0B01, 0x0B03],
  [0x0B05, 0x0B0C],
  [0x0B0F, 0x0B10],
  [0x0B13, 0x0B28],
  [0x0B2A, 0x0B30],
  [0x0B32, 0x0B33],
  [0x0B35, 0x0B39],
  [0x0B3C, 0x0B44],
  [0x0B47, 0x0B48],
  [0x0B4B, 0x0B4D],
  [0x0B55, 0x0B57],
  [0x0B5C, 0x0B5D],
  [0x0B5F, 0x0B63],
  [0x0B66, 0x0B6F],
  [0x0B71, 0x0B71],
  [0x0B82, 0x0B83],
  [0x0B85, 0x0B8A],
  [0x0B8E, 0x0B90],
  [0x0B92, 0x0B95],
  [0x0B99, 0x0B9A],
  [0x0B9C, 0x0B9C],
  [0x0B9E, 0x0B9F],
  [0x0BA3, 0x0BA4],
  [0x0BA8, 0x0BAA],
  [0x0BAE, 0x0BB9],
  [0x0BBE, 0x0BC2],
  [0x0BC6, 0x0BC8],
  [0x0BCA, 0x0BCD],
  [0x0BD0, 0x0BD0],
  [0x0BD7, 0x0BD7],
  [0x0BE6, 0x0BEF],
  [0x0C00, 0x0C0C],
  [0x0C0E, 0x0C10],
  [0x0C12, 0x0C28],
  [0x0C2A, 0x0C39],
  [0x0C3C, 0x0C44],
  [0x0C46, 0x0C48],
  [0x0C4A, 0x0C4D],
  [0x0C55, 0x0C56],
  [0x0C58, 0x0C5A],
  [0x0C5D, 0x0C5D],
  [0x0C60, 0x0C63],
  [0x0C66, 0x0C6F],
  [0x0C80, 0x0C83],
  [0x0C85, 0x0C8C],
  [0x0C8E, 0x0C90],
  [0x0C92, 0x0CA8],
  [0x0CAA, 0x0CB3],
  [0x0CB5, 0x0CB9],
  [0x0CBC, 0x0CC4],
  [0x0CC6, 0x0CC8],
  [0x0CCA, 0x0CCD],
  [0x0CD5, 0x0CD6],
  [0x0CDD, 0x0CDE],
  [0x0CE0, 0x0CE3],
  [0x0CE6, 0x0CEF],
  [0x0CF1, 0x0CF3],
  [0x0D00, 0x0D0C],
  [0x0D0E, 0x0D10],
  [0x0D12, 0x0D44],
  [0x0D46, 0x0D48],
  [0x0D4A, 0x0D4E],
  [0x0D54, 0x0D57],
  [0x0D5F, 0x0D63],
  [0x0D66, 0x0D6F],
  [0x0D7A, 0x0D7F],
  [0x0D81, 0x0D83],
  [0x0D85, 0x0D96],
  [0x0D9A, 0x0DB1],
  [0x0DB3, 0x0DBB],
  [0x0DBD, 0x0DBD],
  [0x0DC0, 0x0DC6],
  [0x0DCA, 0x0DCA],
  [0x0DCF, 0x0DD4],
  [0x0DD6, 0x0DD6],
  [0x0DD8, 0x0DDF],
  [0x0DE6, 0x0DEF],
  [0x0DF2, 0x0DF3],
  [0x0E01, 0x0E3A],
  [0x0E40, 0x0E4E],
  [0x0E50, 0x0E59],
  [0x0E81, 0x0E82],
  [0x0E84, 0x0E84],
  [0x0E86, 0x0E8A],
  [0x0E8C, 0x0EA3],
  [0x0EA5, 0x0EA5],
  [0x0EA7, 0x0EBD],
  [0x0EC0, 0x0EC4],
  [0x0EC6, 0x0EC6],
  [0x0EC8, 0x0ECE],
  [0x0ED0, 0x0ED9],
  [0x0EDC, 0x0EDF],
  [0x0F00, 0x0F00],
  [0x0F18, 0x0F19],
  [0x0F20, 0x0F29],
  [0x0F35, 0x0F35],
  [0x0F37, 0x0F37],
  [0x0F39, 0x0F39],
  [0x0F3E, 0x0F47],
  [0x0F49, 0x0F6C],
  [0x0F71, 0x0F84],
  [0x0F86, 0x0F97],
  [0x0F99, 0x0FBC],
  [0x0FC6, 0x0FC6],
  [0x1000, 0x1049],
  [0x1050, 0x109D],
  [0x10A0, 0x10C5],
  [0x10C7, 0x10C7],
  [0x10CD, 0x10CD],
  [0x10D0, 0x10FA],
  [0x10FC, 0x115E],
  [0x1161, 0x1248],
  [0x124A, 0x124D],
  [0x1250, 0x1256],
  [0x1258, 0x1258],
  [0x125A, 0x125D],
  [0x1260, 0x1288],
  [0x128A, 0x128D],
  [0x1290, 0x12B0],
  [0x12B2, 0x12B5],
  [0x12B8, 0x12BE],
  [0x12C0, 0x12C0],
  [0x12C2, 0x12C5],
  [0x12C8, 0x12D6],
  [0x12D8, 0x1310],
  [0x1312, 0x1315],
  [0x1318, 0x135A],
  [0x135D, 0x135F],
  [0x1369, 0x1371],
  [0x1380, 0x138F],
  [0x13A0, 0x13F5],
  [0x13F8, 0x13FD],
  [0x1401, 0x166C],
  [0x166F, 0x167F],
  [0x1681, 0x169A],
  [0x16A0, 0x16EA],
  [0x16EE, 0x16F8],
  [0x1700, 0x1715],
  [0x171F, 0x1734],
  [0x1740, 0x1753],
  [0x1760, 0x176C],
  [0x176E, 0x1770],
  [0x1772, 0x1773],
  [0x1780, 0x17B3],
  [0x17B6, 0x17D3],
  [0x17D7, 0x17D7],
  [0x17DC, 0x17DD],
  [0x17E0, 0x17E9],
  [0x1810, 0x1819],
  [0x1820, 0x1878],
  [0x1880, 0x18AA],
  [0x18B0, 0x18F5],
  [0x1900, 0x191E],
  [0x1920, 0x192B],
  [0x1930, 0x193B],
  [0x1946, 0x196D],
  [0x1970, 0x1974],
  [0x1980, 0x19AB],
  [0x19B0, 0x19C9],
  [0x19D0, 0x19DA],
  [0x1A00, 0x1A1B],
  [0x1A20, 0x1A5E],
  [0x1A60, 0x1A7C],
  [0x1A7F, 0x1A89],
  [0x1A90, 0x1A99],
  [0x1AA7, 0x1AA7],
  [0x1AB0, 0x1ABD],
  [0x1ABF, 0x1ACE],
  [0x1B00, 0x1B4C],
  [0x1B50, 0x1B59],
  [0x1B6B, 0x1B73],
  [0x1B80, 0x1BF3],
  [0x1C00, 0x1C37],
  [0x1C40, 0x1C49],
  [0x1C4D, 0x1C7D],
  [0x1C80, 0x1C88],
  [0x1C90, 0x1CBA],
  [0x1CBD, 0x1CBF],
  [0x1CD0, 0x1CD2],
  [0x1CD4, 0x1CFA],
  [0x1D00, 0x1F15],
  [0x1F18, 0x1F1D],
  [0x1F20, 0x1F45],
  [0x1F48, 0x1F4D],
  [0x1F50, 0x1F57],
  [0x1F59, 0x1F59],
  [0x1F5B, 0x1F5B],
  [0x1F5D, 0x1F5D],
  [0x1F5F, 0x1F7D],
  [0x1F80, 0x1FB4],
  [0x1FB6, 0x1FBC],
  [0x1FBE, 0x1FBE],
  [0x1FC2, 0x1FC4],
  [0x1FC6, 0x1FCC],
  [0x1FD0, 0x1FD3],
  [0x1FD6, 0x1FDB],
  [0x1FE0, 0x1FEC],
  [0x1FF2, 0x1FF4],
  [0x1FF6, 0x1FFC],
  [0x203F, 0x2040],
  [0x2054, 0x2054],
  [0x2071, 0x2071],
  [0x207F, 0x207F],
  [0x2090, 0x209C],
  [0x20D0, 0x20DC],
  [0x20E1, 0x20E1],
  [0x20E5, 0x20F0],
  [0x2102, 0x2102],
  [0x2107, 0x2107],
  [0x210A, 0x2113],
  [0x2115, 0x2115],
  [0x2118, 0x211D],
  [0x2124, 0x2124],
  [0x2126, 0x2126],
  [0x2128, 0x2128],
  [0x212A, 0x2139],
  [0x213C, 0x213F],
  [0x2145, 0x2149],
  [0x214E, 0x214E],
  [0x2160, 0x2188],
  [0x2C00, 0x2CE4],
  [0x2CEB, 0x2CF3],
  [0x2D00, 0x2D25],
  [0x2D27, 0x2D27],
  [0x2D2D, 0x2D2D],
  [0x2D30, 0x2D67],
  [0x2D6F, 0x2D6F],
  [0x2D7F, 0x2D96],
  [0x2DA0, 0x2DA6],
  [0x2DA8, 0x2DAE],
  [0x2DB0, 0x2DB6],
  [0x2DB8, 0x2DBE],
  [0x2DC0, 0x2DC6],
  [0x2DC8, 0x2DCE],
  [0x2DD0, 0x2DD6],
  [0x2DD8, 0x2DDE],
  [0x2DE0, 0x2DFF],
  [0x3005, 0x3007],
  [0x3021, 0x302F],
  [0x3031, 0x3035],
  [0x3038, 0x303C],
  [0x3041, 0x3096],
  [0x3099, 0x309A],
  [0x309D, 0x309F],
  [0x30A1, 0x30FF],
  [0x3105, 0x312F],
  [0x3131, 0x3163],
  [0x3165, 0x318E],
  [0x31A0, 0x31BF],
  [0x31F0, 0x31FF],
  [0x3400, 0x4DBF],
  [0x4E00, 0xA48C],
  [0xA4D0, 0xA4FD],
  [0xA500, 0xA60C],
  [0xA610, 0xA62B],
  [0xA640, 0xA66F],
  [0xA674, 0xA67D],
  [0xA67F, 0xA6F1],
  [0xA717, 0xA71F],
  [0xA722, 0xA788],
  [0xA78B, 0xA7CA],
  [0xA7D0, 0xA7D1],
  [0xA7D3, 0xA7D3],
  [0xA7D5, 0xA7D9],
  [0xA7F2, 0xA827],
  [0xA82C, 0xA82C],
  [0xA840, 0xA873],
  [0xA880, 0xA8C5],
  [0xA8D0, 0xA8D9],
  [0xA8E0, 0xA8F7],
  [0xA8FB, 0xA8FB],
  [0xA8FD, 0xA92D],
  [0xA930, 0xA953],
  [0xA960, 0xA97C],
  [0xA980, 0xA9C0],
  [0xA9CF, 0xA9D9],
  [0xA9E0, 0xA9FE],
  [0xAA00, 0xAA36],
  [0xAA40, 0xAA4D],
  [0xAA50, 0xAA59],
  [0xAA60, 0xAA76],
  [0xAA7A, 0xAAC2],
  [0xAADB, 0xAADD],
  [0xAAE0, 0xAAEF],
  [0xAAF2, 0xAAF6],
  [0xAB01, 0xAB06],
  [0xAB09, 0xAB0E],
  [0xAB11, 0xAB16],
  [0xAB20, 0xAB26],
  [0xAB28, 0xAB2E],
  [0xAB30, 0xAB5A],
  [0xAB5C, 0xAB69],
  [0xAB70, 0xABEA],
  [0xABEC, 0xABED],
  [0xABF0, 0xABF9],
  [0xAC00, 0xD7A3],
  [0xD7B0, 0xD7C6],
  [0xD7CB, 0xD7FB],
  [0xF900, 0xFA6D],
  [0xFA70, 0xFAD9],
  [0xFB00, 0xFB06],
  [0xFB13, 0xFB17],
  [0xFB1D, 0xFB28],
  [0xFB2A, 0xFB36],
  [0xFB38, 0xFB3C],
  [0xFB3E, 0xFB3E],
  [0xFB40, 0xFB41],
  [0xFB43, 0xFB44],
  [0xFB46, 0xFBB1],
  [0xFBD3, 0xFC5D],
  [0xFC64, 0xFD3D],
  [0xFD50, 0xFD8F],
  [0xFD92, 0xFDC7],
  [0xFDF0, 0xFDF9],
  [0xFE20, 0xFE2F],
  [0xFE33, 0xFE34],
  [0xFE4D, 0xFE4F],
  [0xFE71, 0xFE71],
  [0xFE73, 0xFE73],
  [0xFE77, 0xFE77],
  [0xFE79, 0xFE79],
  [0xFE7B, 0xFE7B],
  [0xFE7D, 0xFE7D],
  [0xFE7F, 0xFEFC],
  [0xFF10, 0xFF19],
  [0xFF21, 0xFF3A],
  [0xFF3F, 0xFF3F],
  [0xFF41, 0xFF5A],
  [0xFF65, 0xFF9F],
  [0xFFA1, 0xFFBE],
  [0xFFC2, 0xFFC7],
  [0xFFCA, 0xFFCF],
  [0xFFD2, 0xFFD7],
  [0xFFDA, 0xFFDC],
  [0x10000, 0x1000B],
  [0x1000D, 0x10026],
  [0x10028, 0x1003A],
  [0x1003C, 0x1003D],
  [0x1003F, 0x1004D],
  [0x10050, 0x1005D],
  [0x10080, 0x100FA],
  [0x10140, 0x10174],
  [0x101FD, 0x101FD],
  [0x10280, 0x1029C],
  [0x102A0, 0x102D0],
  [0x102E0, 0x102E0],
  [0x10300, 0x1031F],
  [0x1032D, 0x1034A],
  [0x10350, 0x1037A],
  [0x10380, 0x1039D],
  [0x103A0, 0x103C3],
  [0x103C8, 0x103CF],
  [0x103D1, 0x103D5],
  [0x10400, 0x1049D],
  [0x104A0, 0x104A9],
  [0x104B0, 0x104D3],
  [0x104D8, 0x104FB],
  [0x10500, 0x10527],
  [0x10530, 0x10563],
  [0x10570, 0x1057A],
  [0x1057C, 0x1058A],
  [0x1058C, 0x10592],
  [0x10594, 0x10595],
  [0x10597, 0x105A1],
  [0x105A3, 0x105B1],
  [0x105B3, 0x105B9],
  [0x105BB, 0x105BC],
  [0x10600, 0x10736],
  [0x10740, 0x10755],
  [0x10760, 0x10767],
  [0x10780, 0x10785],
  [0x10787, 0x107B0],
  [0x107B2, 0x107BA],
  [0x10800, 0x10805],
  [0x10808, 0x10808],
  [0x1080A, 0x10835],
  [0x10837, 0x10838],
  [0x1083C, 0x1083C],
  [0x1083F, 0x10855],
  [0x10860, 0x10876],
  [0x10880, 0x1089E],
  [0x108E0, 0x108F2],
  [0x108F4, 0x108F5],
  [0x10900, 0x10915],
  [0x10920, 0x10939],
  [0x10980, 0x109B7],
  [0x109BE, 0x109BF],
  [0x10A00, 0x10A03],
  [0x10A05, 0x10A06],
  [0x10A0C, 0x10A13],
  [0x10A15, 0x10A17],
  [0x10A19, 0x10A35],
  [0x10A38, 0x10A3A],
  [0x10A3F, 0x10A3F],
  [0x10A60, 0x10A7C],
  [0x10A80, 0x10A9C],
  [0x10AC0, 0x10AC7],
  [0x10AC9, 0x10AE6],
  [0x10B00, 0x10B35],
  [0x10B40, 0x10B55],
  [0x10B60, 0x10B72],
  [0x10B80, 0x10B91],
  [0x10C00, 0x10C48],
  [0x10C80, 0x10CB2],
  [0x10CC0, 0x10CF2],
  [0x10D00, 0x10D27],
  [0x10D30, 0x10D39],
  [0x10E80, 0x10EA9],
  [0x10EAB, 0x10EAC],
  [0x10EB0, 0x10EB1],
  [0x10EFD, 0x10F1C],
  [0x10F27, 0x10F27],
  [0x10F30, 0x10F50],
  [0x10F70, 0x10F85],
  [0x10FB0, 0x10FC4],
  [0x10FE0, 0x10FF6],
  [0x11000, 0x11046],
  [0x11066, 0x11075],
  [0x1107F, 0x110BA],
  [0x110C2, 0x110C2],
  [0x110D0, 0x110E8],
  [0x110F0, 0x110F9],
  [0x11100, 0x11134],
  [0x11136, 0x1113F],
  [0x11144, 0x11147],
  [0x11150, 0x11173],
  [0x11176, 0x11176],
  [0x11180, 0x111C4],
  [0x111C9, 0x111CC],
  [0x111CE, 0x111DA],
  [0x111DC, 0x111DC],
  [0x11200, 0x11211],
  [0x11213, 0x11237],
  [0x1123E, 0x11241],
  [0x11280, 0x11286],
  [0x11288, 0x11288],
  [0x1128A, 0x1128D],
  [0x1128F, 0x1129D],
  [0x1129F, 0x112A8],
  [0x112B0, 0x112EA],
  [0x112F0, 0x112F9],
  [0x11300, 0x11303],
  [0x11305, 0x1130C],
  [0x1130F, 0x11310],
  [0x11313, 0x11328],
  [0x1132A, 0x11330],
  [0x11332, 0x11333],
  [0x11335, 0x11339],
  [0x1133B, 0x11344],
  [0x11347, 0x11348],
  [0x1134B, 0x1134D],
  [0x11350, 0x11350],
  [0x11357, 0x11357],
  [0x1135D, 0x11363],
  [0x11366, 0x1136C],
  [0x11370, 0x11374],
  [0x11400, 0x1144A],
  [0x11450, 0x11459],
  [0x1145E, 0x11461],
  [0x11480, 0x114C5],
  [0x114C7, 0x114C7],
  [0x114D0, 0x114D9],
  [0x11580, 0x115B5],
  [0x115B8, 0x115C0],
  [0x115D8, 0x115DD],
  [0x11600, 0x11640],
  [0x11644, 0x11644],
  [0x11650, 0x11659],
  [0x11680, 0x116B8],
  [0x116C0, 0x116C9],
  [0x11700, 0x1171A],
  [0x1171D, 0x1172B],
  [0x11730, 0x11739],
  [0x11740, 0x11746],
  [0x11800, 0x1183A],
  [0x118A0, 0x118E9],
  [0x118FF, 0x11906],
  [0x11909, 0x11909],
  [0x1190C, 0x11913],
  [0x11915, 0x11916],
  [0x11918, 0x11935],
  [0x11937, 0x11938],
  [0x1193B, 0x11943],
  [0x11950, 0x11959],
  [0x119A0, 0x119A7],
  [0x119AA, 0x119D7],
  [0x119DA, 0x119E1],
  [0x119E3, 0x119E4],
  [0x11A00, 0x11A3E],
  [0x11A47, 0x11A47],
  [0x11A50, 0x11A99],
  [0x11A9D, 0x11A9D],
  [0x11AB0, 0x11AF8],
  [0x11C00, 0x11C08],
  [0x11C0A, 0x11C36],
  [0x11C38, 0x11C40],
  [0x11C50, 0x11C59],
  [0x11C72, 0x11C8F],
  [0x11C92, 0x11CA7],
  [0x11CA9, 0x11CB6],
  [0x11D00, 0x11D06],
  [0x11D08, 0x11D09],
  [0x11D0B, 0x11D36],
  [0x11D3A, 0x11D3A],
  [0x11D3C, 0x11D3D],
  [0x11D3F, 0x11D47],
  [0x11D50, 0x11D59],
  [0x11D60, 0x11D65],
  [0x11D67, 0x11D68],
  [0x11D6A, 0x11D8E],
  [0x11D90, 0x11D91],
  [0x11D93, 0x11D98],
  [0x11DA0, 0x11DA9],
  [0x11EE0, 0x11EF6],
  [0x11F00, 0x11F10],
  [0x11F12, 0x11F3A],
  [0x11F3E, 0x11F42],
  [0x11F50, 0x11F59],
  [0x11FB0, 0x11FB0],
  [0x12000, 0x12399],
  [0x12400, 0x1246E],
  [0x12480, 0x12543],
  [0x12F90, 0x12FF0],
  [0x13000, 0x1342F],
  [0x13440, 0x13455],
  [0x14400, 0x14646],
  [0x16800, 0x16A38],
  [0x16A40, 0x16A5E],
  [0x16A60, 0x16A69],
  [0x16A70, 0x16ABE],
  [0x16AC0, 0x16AC9],
  [0x16AD0, 0x16AED],
  [0x16AF0, 0x16AF4],
  [0x16B00, 0x16B36],
  [0x16B40, 0x16B43],
  [0x16B50, 0x16B59],
  [0x16B63, 0x16B77],
  [0x16B7D, 0x16B8F],
  [0x16E40, 0x16E7F],
  [0x16F00, 0x16F4A],
  [0x16F4F, 0x16F87],
  [0x16F8F, 0x16F9F],
  [0x16FE0, 0x16FE1],
  [0x16FE3, 0x16FE4],
  [0x16FF0, 0x16FF1],
  [0x17000, 0x187F7],
  [0x18800, 0x18CD5],
  [0x18D00, 0x18D08],
  [0x1AFF0, 0x1AFF3],
  [0x1AFF5, 0x1AFFB],
  [0x1AFFD, 0x1AFFE],
  [0x1B000, 0x1B122],
  [0x1B132, 0x1B132],
  [0x1B150, 0x1B152],
  [0x1B155, 0x1B155],
  [0x1B164, 0x1B167],
  [0x1B170, 0x1B2FB],
  [0x1BC00, 0x1BC6A],
  [0x1BC70, 0x1BC7C],
  [0x1BC80, 0x1BC88],
  [0x1BC90, 0x1BC99],
  [0x1BC9D, 0x1BC9E],
  [0x1CF00, 0x1CF2D],
  [0x1CF30, 0x1CF46],
  [0x1D165, 0x1D169],
  [0x1D16D, 0x1D172],
  [0x1D17B, 0x1D182],
  [0x1D185, 0x1D18B],
  [0x1D1AA, 0x1D1AD],
  [0x1D242, 0x1D244],
  [0x1D400, 0x1D454],
  [0x1D456, 0x1D49C],
  [0x1D49E, 0x1D49F],
  [0x1D4A2, 0x1D4A2],
  [0x1D4A5, 0x1D4A6],
  [0x1D4A9, 0x1D4AC],
  [0x1D4AE, 0x1D4B9],
  [0x1D4BB, 0x1D4BB],
  [0x1D4BD, 0x1D4C3],
  [0x1D4C5, 0x1D505],
  [0x1D507, 0x1D50A],
  [0x1D50D, 0x1D514],
  [0x1D516, 0x1D51C],
  [0x1D51E, 0x1D539],
  [0x1D53B, 0x1D53E],
  [0x1D540, 0x1D544],
  [0x1D546, 0x1D546],
  [0x1D54A, 0x1D550],
  [0x1D552, 0x1D6A5],
  [0x1D6A8, 0x1D6C0],
  [0x1D6C2, 0x1D6DA],
  [0x1D6DC, 0x1D6FA],
  [0x1D6FC, 0x1D714],
  [0x1D716, 0x1D734],
  [0x1D736, 0x1D74E],
  [0x1D750, 0x1D76E],
  [0x1D770, 0x1D788],
  [0x1D78A, 0x1D7A8],
  [0x1D7AA, 0x1D7C2],
  [0x1D7C4, 0x1D7CB],
  [0x1D7CE, 0x1D7FF],
  [0x1DA00, 0x1DA36],
  [0x1DA3B, 0x1DA6C],
  [0x1DA75, 0x1DA75],
  [0x1DA84, 0x1DA84],
  [0x1DA9B, 0x1DA9F],
  [0x1DAA1, 0x1DAAF],
  [0x1DF00, 0x1DF1E],
  [0x1DF25, 0x1DF2A],
  [0x1E000, 0x1E006],
  [0x1E008, 0x1E018],
  [0x1E01B, 0x1E021],
  [0x1E023, 0x1E024],
  [0x1E026, 0x1E02A],
  [0x1E030, 0x1E06D],
  [0x1E08F, 0x1E08F],
  [0x1E100, 0x1E12C],
  [0x1E130, 0x1E13D],
  [0x1E140, 0x1E149],
  [0x1E14E, 0x1E14E],
  [0x1E290, 0x1E2AE],
  [0x1E2C0, 0x1E2F9],
  [0x1E4D0, 0x1E4F9],
  [0x1E7E0, 0x1E7E6],
  [0x1E7E8, 0x1E7EB],
  [0x1E7ED, 0x1E7EE],
  [0x1E7F0, 0x1E7FE],
  [0x1E800, 0x1E8C4],
  [0x1E8D0, 0x1E8D6],
  [0x1E900, 0x1E94B],
  [0x1E950, 0x1E959],
  [0x1EE00, 0x1EE03],
  [0x1EE05, 0x1EE1F],
  [0x1EE21, 0x1EE22],
  [0x1EE24, 0x1EE24],
  [0x1EE27, 0x1EE27],
  [0x1EE29, 0x1EE32],
  [0x1EE34, 0x1EE37],
  [0x1EE39, 0x1EE39],
  [0x1EE3B, 0x1EE3B],
  [0x1EE42, 0x1EE42],
  [0x1EE47, 0x1EE47],
  [0x1EE49, 0x1EE49],
  [0x1EE4B, 0x1EE4B],
  [0x1EE4D, 0x1EE4F],
  [0x1EE51, 0x1EE52],
  [0x1EE54, 0x1EE54],
  [0x1EE57, 0x1EE57],
  [0x1EE59, 0x1EE59],
  [0x1EE5B, 0x1EE5B],
  [0x1EE5D, 0x1EE5D],
  [0x1EE5F, 0x1EE5F],
  [0x1EE61, 0x1EE62],
  [0x1EE64, 0x1EE64],
  [0x1EE67, 0x1EE6A],
  [0x1EE6C, 0x1EE72],
  [0x1EE74, 0x1EE77],
  [0x1EE79, 0x1EE7C],
  [0x1EE7E, 0x1EE7E],
  [0x1EE80, 0x1EE89],
  [0x1EE8B, 0x1EE9B],
  [0x1EEA1, 0x1EEA3],
  [0x1EEA5, 0x1EEA9],
  [0x1EEAB, 0x1EEBB],
  [0x1FBF0, 0x1FBF9],
  [0x20000, 0x2A6DF],
  [0x2A700, 0x2B739],
  [0x2B740, 0x2B81D],
  [0x2B820, 0x2CEA1],
  [0x2CEB0, 0x2EBE0],
  [0x2EBF0, 0x2EE5D],
  [0x2F800, 0x2FA1D],
  [0x30000, 0x3134A],
  [0x31350, 0x323AF],
].freeze

LABEL_START_BYTE = Label-start (used for heredoc labels and modifier names): underscore or ASCII alpha.

Array.new(256, false)

LABEL_CONT_BYTE = Label-cont: alpha + digit + underscore.

Array.new(256, false)

DIGIT_BYTE = ASCII digit lookup.

Array.new(256, false)

HEX_BYTE = Hex digit lookup.

Array.new(256, false)

VALUE_TERMINATOR_BYTE = Value-terminator lookup (whitespace, EOL, comments, flow-end markers).

Array.new(256, false)

INT64_MIN =

-(2**63)

INT64_MAX =

2**63 - 1

Instance Attribute Summary collapse

#comments ⇒ Object readonly

———- Init ———-.
#original_forms ⇒ Object readonly

———- Init ———-.

Class Method Summary collapse

._parse_document_with_mode(src, lite, ignore_order = false) ⇒ Object
.parse_document(src) ⇒ Object

———- Public entry ———-.
.parse_document_unordered(src) ⇒ Object

Unordered full-mode parse (SPEC §“Unordered tables”).
.parse_front_matter_only(src) ⇒ Object

SPEC §Front-matter-only decode.
.parse_lite_document(src) ⇒ Object

Lite-mode parse: same data tree, no comment AST, no original_forms.
.parse_lite_document_unordered(src) ⇒ Object

Unordered lite-mode parse (SPEC §“Unordered tables”).

Instance Method Summary collapse

#advance_line ⇒ Object
#apply_modifiers(s, mods) ⇒ Object
#apply_trim(s, chars, where_s, replacement) ⇒ Object
#capture_inner_block_comments ⇒ Object

———- Value dispatch ———-.
#col ⇒ Object

1-based char column.
#collect_heredoc_body(terminator) ⇒ Object
#consume_after_value(allow_eof) ⇒ Object

———- Post-value ———-.
#consume_eol ⇒ Object
#days_in_month(y, m) ⇒ Object
#eof? ⇒ Boolean
#err(msg) ⇒ Object
#err_at(line, line_start, byte_pos, msg) ⇒ Object
#finalize_table(t) ⇒ Object

Shuffle an UnorderedHash in place at end-of-build so callers cannot rely on insertion order.
#flush_pending_as_floating ⇒ Object

———- Pending leading flushers ———-.
#flush_pending_as_leading_on_current ⇒ Object
#fold_paragraphs(s) ⇒ Object
#initialize(src, lite: false, ignore_order: false) ⇒ Parser constructor

A new instance of Parser.
#line_starts_kvpair? ⇒ Boolean
#looks_like_date_prefix_at?(p) ⇒ Boolean

———- Numbers & datetimes ———-.
#looks_like_time_prefix_at?(p) ⇒ Boolean
#looks_like_time_str?(s) ⇒ Boolean
#measure_line_indent ⇒ Object

———- Block parsers ———-.
#new_table ⇒ Object

Allocate a fresh table (Hash by default, UnorderedHash when ‘ignore_order` is set).
#parse_bare_key ⇒ Object
#parse_basic_string_value ⇒ Object

———- Strings ———-.
#parse_block_value(indent) ⇒ Object
#parse_body ⇒ Object
#parse_body_as_table ⇒ Object
#parse_bool_value ⇒ Object
#parse_datetime_value ⇒ Object
#parse_dec_float(s) ⇒ Object
#parse_float_lit(s) ⇒ Object
#parse_flow_array ⇒ Object

———- Flow forms ———-.
#parse_flow_table ⇒ Object
#parse_front_matter ⇒ Object

———- Document entry ———-.
#parse_heredoc_basic ⇒ Object
#parse_heredoc_label ⇒ Object
#parse_heredoc_literal ⇒ Object
#parse_heredoc_modifiers ⇒ Object
#parse_inf_value ⇒ Object
#parse_inline_value_in_flow ⇒ Object
#parse_inline_value_or_heredoc ⇒ Object
#parse_integer_lit(s) ⇒ Object
#parse_key ⇒ Object

———- Keys ———-.
#parse_kvpair(parent_indent) ⇒ Object

———- kvpair ———-.
#parse_list_block(indent) ⇒ Object
#parse_list_item_value(list_indent) ⇒ Object
#parse_literal_string_value ⇒ Object
#parse_local_time_value ⇒ Object
#parse_modifier_call_args ⇒ Object
#parse_nan_value ⇒ Object
#parse_nondec_float(s) ⇒ Object
#parse_number_or_datetime ⇒ Object
#parse_one_modifier ⇒ Object
#parse_table_block(indent) ⇒ Object
#peek_after_plus_is_space_or_eol? ⇒ Boolean
#peek_byte ⇒ Object

peek: returns the byte at @pos as Integer, or nil at EOF.
#peek_char_byte_safe ⇒ Object

peek_char: returns the character at @pos as a String.
#per_line_edges(s, char_set, replacement) ⇒ Object
#read_c_block_comment ⇒ Object
#read_hash_block_comment ⇒ Object
#read_hex_codepoint(n) ⇒ Object
#read_line_comment_to_eol ⇒ Object

———- Raw comment readers ———-.
#record_form(lit) ⇒ Object

———- Original-form recording ———-.
#reject_reserved_sigil_at_line_start! ⇒ Object

SPEC tier-0: reject reserved decorator sigils at line-start position.
#replace_all_runs(s, char_set, replacement) ⇒ Object
#replace_leading_run(s, char_set, replacement) ⇒ Object
#replace_trailing_run(s, char_set, replacement) ⇒ Object
#scan_number_token ⇒ Object
#skip_flow_ws ⇒ Object
#skip_inline_ws ⇒ Object

———- Whitespace / EOL ———-.
#skip_trivia ⇒ Object
#starts_bytes?(s) ⇒ Boolean
#strip_indent_and_continuations(body, allow_cont) ⇒ Object

———- Heredoc body processing ———-.
#utf8_char_len(b) ⇒ Object
#valid_underscores?(s) ⇒ Boolean

———- Numeric helpers ———-.
#validate_date(s) ⇒ Object
#validate_heredoc_basic_surrogates(body) ⇒ Object

SPEC §basic-string escapes: a ‘uXXXX` / `UXXXXXXXX` escape whose decoded value falls in the surrogate range U+D800..U+DFFF is a parse error.
#validate_time(s) ⇒ Object
#xid_continue?(cp) ⇒ Boolean

Frozen XID_Continue test (Unicode 15.1, UAX #31 §2).

Constructor Details

#initialize(src, lite: false, ignore_order: false) ⇒ `Parser`

Returns a new instance of Parser.

# File 'lib/dms/parser.rb', line 1004

def initialize(src, lite: false, ignore_order: false)
  # NB: BOM-at-file-start rejection happens in _parse_document_with_mode
  # before we get here; do not silently strip a leading U+FEFF.
  @src = src
  # Position is a *byte* index into src. For pure-ASCII, this matches
  # the character index; for mixed UTF-8, multi-byte chars only appear
  # inside string bodies / non-ASCII keys, where we slice via byteslice.
  @len = src.bytesize
  @pos = 0
  @line = 1
  @line_start = 0
  @comments = []
  @pending_leading = []
  @path = []
  @original_forms = []
  @record_forms = true
  # Lite mode: skip comment-AST + original_forms bookkeeping.
  # Same grammar, same errors. SPEC §Parsing modes — full and lite.
  @lite = lite
  # Unordered mode (SPEC §"Unordered tables"): when true, every body
  # table is built as an UnorderedHash. Keys are shuffled at
  # end-of-build to expose the arbitrary-order contract.
  @ignore_order = ignore_order
end

Instance Attribute Details

#comments ⇒ `Object` (readonly)

———- Init ———-



1002
1003
1004

# File 'lib/dms/parser.rb', line 1002

def comments
  @comments
end

#original_forms ⇒ `Object` (readonly)

———- Init ———-



1002
1003
1004

# File 'lib/dms/parser.rb', line 1002

def original_forms
  @original_forms
end

Class Method Details

._parse_document_with_mode(src, lite, ignore_order = false) ⇒ `Object`

# File 'lib/dms/parser.rb', line 931

def self._parse_document_with_mode(src, lite, ignore_order = false)
  src = src.dup if src.frozen?
  if src.encoding == Encoding::ASCII_8BIT || src.encoding == Encoding::BINARY
    src.force_encoding("UTF-8")
  elsif src.encoding != Encoding::UTF_8
    src = src.encode("UTF-8")
  end
  # SPEC §"UTF-8 only, NFC-normalized": DMS source is plain UTF-8 with
  # no byte-order mark. A leading U+FEFF is not silently consumed —
  # reject it explicitly so encoding mistakes surface loudly. (BOMs
  # *inside* string/heredoc bodies are fine; this only fires at offset 0.)
  if src.start_with?("")
    raise DecodeError.new(1, 1, "BOM (U+FEFF) at file start is not allowed; DMS source is plain UTF-8")
  end
  nul = src.index("\0")
  if nul
    prefix = src.byteslice(0, nul)
    line = 1 + prefix.count("\n")
    last_nl = prefix.rindex("\n")
    col = last_nl ? (nul - last_nl) : (nul + 1)
    raise DecodeError.new(line, col, "U+0000 (NUL) is not allowed in DMS source")
  end
  # NFC-normalize unless ASCII-only (which is a no-op).
  src = src.unicode_normalize(:nfc) unless src.ascii_only?
  p = new(src, lite: lite, ignore_order: ignore_order)
  meta = p.parse_front_matter
  body = p.parse_body
  Document.new(meta, body, p.comments, p.original_forms)
end

.parse_document(src) ⇒ `Object`

———- Public entry ———-



905
906
907

# File 'lib/dms/parser.rb', line 905

def self.parse_document(src)
  _parse_document_with_mode(src, false, false)
end

.parse_document_unordered(src) ⇒ `Object`

Unordered full-mode parse (SPEC §“Unordered tables”). Every body ‘Hash` is replaced by an `UnorderedHash`; iteration order is arbitrary. Comments + original_forms are still recorded, but `Dms.encode` will refuse to round-trip the result — use `Dms.encode_lite` for canonical emit instead.



920
921
922

# File 'lib/dms/parser.rb', line 920

def self.parse_document_unordered(src)
  _parse_document_with_mode(src, false, true)
end

.parse_front_matter_only(src) ⇒ `Object`

SPEC §Front-matter-only decode. Decodes the leading ‘+++ … +++` block and stops — body bytes after the closer are not tokenized, so body-only errors (duplicate body keys, unterminated body heredoc, etc.) are not surfaced here. Front-matter validation is byte-identical to a full decode: open/close on their own lines, `_dms_tier` is type-checked, unknown reserved keys rejected, unterminated front matter is a parse error.

Returns the front-matter Hash (possibly empty) when an opener is present, or nil when the document has no front matter at all. Always runs in lite mode — no comment AST, no original_forms.

# File 'lib/dms/parser.rb', line 972

def self.parse_front_matter_only(src)
  src = src.dup if src.frozen?
  if src.encoding == Encoding::ASCII_8BIT || src.encoding == Encoding::BINARY
    src.force_encoding("UTF-8")
  elsif src.encoding != Encoding::UTF_8
    src = src.encode("UTF-8")
  end
  if src.start_with?("")
    raise DecodeError.new(1, 1, "BOM (U+FEFF) at file start is not allowed; DMS source is plain UTF-8")
  end
  # NUL scan is bounded to the FM region — only pre-closer NULs are
  # diagnosable here (body-only errors are not surfaced per SPEC).
  # We still do the cheap whole-buffer scan: catching a NUL anywhere
  # is byte-identical to the full decoder for the prefix that would
  # have been tokenized, and matches what a config loader expects.
  nul = src.index("\0")
  if nul
    prefix = src.byteslice(0, nul)
    line = 1 + prefix.count("\n")
    last_nl = prefix.rindex("\n")
    col = last_nl ? (nul - last_nl) : (nul + 1)
    raise DecodeError.new(line, col, "U+0000 (NUL) is not allowed in DMS source")
  end
  src = src.unicode_normalize(:nfc) unless src.ascii_only?
  p = new(src, lite: true, ignore_order: false)
  p.parse_front_matter
end

.parse_lite_document(src) ⇒ `Object`

Lite-mode parse: same data tree, no comment AST, no original_forms. Not suitable for to_dms round-trip. SPEC §Parsing modes — full and lite.



911
912
913

# File 'lib/dms/parser.rb', line 911

def self.parse_lite_document(src)
  _parse_document_with_mode(src, true, false)
end

.parse_lite_document_unordered(src) ⇒ `Object`

Unordered lite-mode parse (SPEC §“Unordered tables”). The ‘(unordered, lite)` combo is the fastest read-only path for ports that ship a hash-only backing.



927
928
929

# File 'lib/dms/parser.rb', line 927

def self.parse_lite_document_unordered(src)
  _parse_document_with_mode(src, true, true)
end

Instance Method Details

#advance_line ⇒ `Object`

# File 'lib/dms/parser.rb', line 1102

def advance_line
  @line += 1
  @line_start = @pos
end

#apply_modifiers(s, mods) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2833

def apply_modifiers(s, mods)
  cur = s
  mods.each do |m|
    case m[:name]
    when "_fold_paragraphs"
      raise "fold_paragraphs() takes no arguments" unless m[:args].empty?
      cur = fold_paragraphs(cur)
    when "_trim"
      args = m[:args]
      raise "trim(chars, where, replacement = \"\") expects 2 or 3 arguments" unless args.length.between?(2, 3)
      chars = args[0]
      raise "trim: first argument (chars) must be a string" unless chars.is_a?(String)
      where = args[1]
      raise "trim: second argument (where) must be a string" unless where.is_a?(String)
      replacement = ""
      if args.length == 3
        raise "trim: third argument (replacement) must be a string" unless args[2].is_a?(String)
        replacement = args[2]
      end
      cur = apply_trim(cur, chars, where, replacement)
    else
      raise "unknown modifier: #{m[:name]}"
    end
  end
  cur
end

#apply_trim(s, chars, where_s, replacement) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2817

def apply_trim(s, chars, where_s, replacement)
  return s if chars.empty?
  char_set = chars.each_char.to_a.uniq
  has_star = where_s.include?("*")
  has_pipe = where_s.include?("|")
  has_lt = where_s.include?("<")
  has_gt = where_s.include?(">")
  return s unless has_star || has_pipe || has_lt || has_gt
  return replace_all_runs(s, char_set, replacement) if has_star
  cur = s
  cur = per_line_edges(cur, char_set, replacement) if has_pipe
  cur = replace_leading_run(cur, char_set, replacement) if has_lt
  cur = replace_trailing_run(cur, char_set, replacement) if has_gt
  cur
end

#capture_inner_block_comments ⇒ `Object`

———- Value dispatch ———-

# File 'lib/dms/parser.rb', line 1809

def capture_inner_block_comments
  loop do
    if @src.getbyte(@pos) == SLASH && @src.getbyte(@pos + 1) == STAR
      raw = read_c_block_comment
      @comments << AttachedComment.new(Comment.new(raw, :block), :inner, @path.dup.freeze) unless @lite
      skip_inline_ws
    else
      break
    end
  end
end

#col ⇒ `Object`

1-based char column. For pure-ASCII lines this is identical to the byte offset within the line; for mixed UTF-8 we count chars from

# File 'lib/dms/parser.rb', line 1053

def col
  bytes = @pos - @line_start
  return bytes + 1 if @src.ascii_only?
  @src.byteslice(@line_start, bytes).length + 1
end

#collect_heredoc_body(terminator) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2678

def collect_heredoc_body(terminator)
  lines = []
  sl = @line; sls = @line_start; sp = @pos
  s = @src
  n = @len
  loop do
    if @pos >= n
      raise err_at(sl, sls, sp, "unterminated heredoc")
    end
    line_begin = @pos
    while @pos < n
      b = s.getbyte(@pos)
      break if b == LF || b == CR
      @pos += 1
    end
    raw = s.byteslice(line_begin, @pos - line_begin).force_encoding(Encoding::UTF_8)
    this_line = @line; this_lstart = @line_start
    if raw.strip == terminator
      strip_depth = 0
      raw.each_char do |c|
        break unless c == " "
        strip_depth += 1
      end
      return HBody.new(lines, strip_depth)
    end
    consume_eol
    lines << [raw, this_line, this_lstart]
  end
end

#consume_after_value(allow_eof) ⇒ `Object`

———- Post-value ———-

# File 'lib/dms/parser.rb', line 2965

def consume_after_value(allow_eof)
  loop do
    ws_start = @pos
    skip_inline_ws
    had_ws = @pos > ws_start
    b = @src.getbyte(@pos)
    if b == HASH && !starts_bytes?("###")
      raise err("expected whitespace before '#' comment") unless had_ws
      raw = read_line_comment_to_eol
      @comments << AttachedComment.new(Comment.new(raw, :line), :trailing, @path.dup.freeze) unless @lite
      break
    elsif b == SLASH && @src.getbyte(@pos + 1) == SLASH
      raise err("expected whitespace before '//' comment") unless had_ws
      raw = read_line_comment_to_eol
      @comments << AttachedComment.new(Comment.new(raw, :line), :trailing, @path.dup.freeze) unless @lite
      break
    elsif b == SLASH && @src.getbyte(@pos + 1) == STAR
      raw = read_c_block_comment
      @comments << AttachedComment.new(Comment.new(raw, :block), :trailing, @path.dup.freeze) unless @lite
      next
    else
      break
    end
  end
  b = @src.getbyte(@pos)
  return if b.nil?
  if b == LF
    @pos += 1; advance_line; return
  end
  if b == CR && @src.getbyte(@pos + 1) == LF
    @pos += 2; advance_line; return
  end
  raise err("unexpected character '#{b < 128 ? b.chr : '?'}' after value")
end

#consume_eol ⇒ `Object`

# File 'lib/dms/parser.rb', line 1121

def consume_eol
  b = @src.getbyte(@pos)
  if b == LF
    @pos += 1
    advance_line
    return true
  end
  if b == CR && @src.getbyte(@pos + 1) == LF
    @pos += 2
    advance_line
    return true
  end
  false
end

#days_in_month(y, m) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2249

def days_in_month(y, m)
  case m
  when 1, 3, 5, 7, 8, 10, 12 then 31
  when 4, 6, 9, 11 then 30
  when 2
    leap = (y % 4 == 0 && y % 100 != 0) || y % 400 == 0
    leap ? 29 : 28
  else 0
  end
end

#eof? ⇒ `Boolean`

Returns:

(Boolean)

1100	# File 'lib/dms/parser.rb', line 1100 def eof?; @pos >= @len; end

#err(msg) ⇒ `Object`

1059	# File 'lib/dms/parser.rb', line 1059 def err(msg); DecodeError.new(@line, col, msg); end

#err_at(line, line_start, byte_pos, msg) ⇒ `Object`

# File 'lib/dms/parser.rb', line 1061

def err_at(line, line_start, byte_pos, msg)
  bytes = byte_pos - line_start
  column =
    if @src.ascii_only?
      bytes + 1
    else
      @src.byteslice(line_start, bytes).length + 1
    end
  DecodeError.new(line, column, msg)
end

#finalize_table(t) ⇒ `Object`

Shuffle an UnorderedHash in place at end-of-build so callers cannot rely on insertion order. No-op for plain ‘Hash` or empty tables.

# File 'lib/dms/parser.rb', line 1039

def finalize_table(t)
  return t unless @ignore_order && t.is_a?(UnorderedHash) && t.size > 1
  keys = t.keys.shuffle
  pairs = keys.map { |k| [k, t[k]] }
  t.clear
  pairs.each { |k, v| t[k] = v }
  t
end

#flush_pending_as_floating ⇒ `Object`

———- Pending leading flushers ———-

# File 'lib/dms/parser.rb', line 1184

def flush_pending_as_floating
  return if @pending_leading.empty?
  drained = @pending_leading
  @pending_leading = []
  path = @path.dup.freeze
  drained.each do |c|
    @comments << AttachedComment.new(c, :floating, path)
  end
end

#flush_pending_as_leading_on_current ⇒ `Object`

# File 'lib/dms/parser.rb', line 1194

def flush_pending_as_leading_on_current
  return if @pending_leading.empty?
  drained = @pending_leading
  @pending_leading = []
  path = @path.dup.freeze
  drained.each do |c|
    @comments << AttachedComment.new(c, :leading, path)
  end
end

#fold_paragraphs(s) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2769

def fold_paragraphs(s)
  paragraphs = s.split("\n\n", -1)
  paragraphs.map { |p| p.split("\n").reject(&:empty?).join(" ") }.join("\n")
end

#line_starts_kvpair? ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/dms/parser.rb', line 1443

def line_starts_kvpair?
  p = @pos
  s = @src
  n = @len
  first = s.getbyte(p)
  if first == DQUOTE
    p += 1
    while p < n
      b = s.getbyte(p)
      if b == BACKSLASH
        p += 2
      elsif b == DQUOTE
        p += 1
        break
      elsif b == LF || b == CR
        return false
      else
        p += 1
      end
    end
  elsif first == SQUOTE
    p += 1
    while p < n
      b = s.getbyte(p)
      if b == SQUOTE
        p += 1
        break
      elsif b == LF || b == CR
        return false
      else
        p += 1
      end
    end
  else
    any_chars = false
    while p < n
      b = s.getbyte(p)
      if b < 128
        break unless BARE_KEY_BYTE[b]
        p += 1
        any_chars = true
      else
        # Non-ASCII byte: walk a full UTF-8 char and XID_Continue-test it.
        ch_len = utf8_char_len(b)
        ch = s.byteslice(p, ch_len).force_encoding(Encoding::UTF_8)
        break unless xid_continue?(ch.ord)
        p += ch_len
        any_chars = true
      end
    end
    return false unless any_chars
  end
  return false if p >= n || s.getbyte(p) != COLON
  nxt = s.getbyte(p + 1)
  nxt.nil? || nxt == SP || nxt == TAB || nxt == LF || nxt == CR
end

#looks_like_date_prefix_at?(p) ⇒ `Boolean`

———- Numbers & datetimes ———-

Returns:

(Boolean)

# File 'lib/dms/parser.rb', line 1919

def looks_like_date_prefix_at?(p)
  return false if p + 10 > @len
  s = @src
  return false unless DIGIT_BYTE[s.getbyte(p)]
  return false unless DIGIT_BYTE[s.getbyte(p + 1)]
  return false unless DIGIT_BYTE[s.getbyte(p + 2)]
  return false unless DIGIT_BYTE[s.getbyte(p + 3)]
  return false unless s.getbyte(p + 4) == MINUS
  return false unless DIGIT_BYTE[s.getbyte(p + 5)]
  return false unless DIGIT_BYTE[s.getbyte(p + 6)]
  return false unless s.getbyte(p + 7) == MINUS
  return false unless DIGIT_BYTE[s.getbyte(p + 8)]
  return false unless DIGIT_BYTE[s.getbyte(p + 9)]
  true
end

#looks_like_time_prefix_at?(p) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/dms/parser.rb', line 1935

def looks_like_time_prefix_at?(p)
  return false if p + 8 > @len
  s = @src
  return false unless DIGIT_BYTE[s.getbyte(p)]
  return false unless DIGIT_BYTE[s.getbyte(p + 1)]
  return false unless s.getbyte(p + 2) == 0x3A # ':'
  return false unless DIGIT_BYTE[s.getbyte(p + 3)]
  return false unless DIGIT_BYTE[s.getbyte(p + 4)]
  return false unless s.getbyte(p + 5) == 0x3A
  return false unless DIGIT_BYTE[s.getbyte(p + 6)]
  return false unless DIGIT_BYTE[s.getbyte(p + 7)]
  true
end

#looks_like_time_str?(s) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/dms/parser.rb', line 2371

def looks_like_time_str?(s)
  return false if s.length < 8
  s[0] >= "0" && s[0] <= "9" && s[1] >= "0" && s[1] <= "9" && s[2] == ":" &&
    s[3] >= "0" && s[3] <= "9" && s[4] >= "0" && s[4] <= "9" && s[5] == ":" &&
    s[6] >= "0" && s[6] <= "9" && s[7] >= "0" && s[7] <= "9"
end

#measure_line_indent ⇒ `Object`

———- Block parsers ———-

# File 'lib/dms/parser.rb', line 1534

def measure_line_indent
  n = 0
  i = @line_start
  s = @src
  while i < @len && s.getbyte(i) == SP
    n += 1
    i += 1
  end
  n
end

#new_table ⇒ `Object`

Allocate a fresh table (Hash by default, UnorderedHash when ‘ignore_order` is set). Used for body tables, list-item tables, and flow tables. Front-matter `meta` is excluded — the front-matter block is always insertion-ordered regardless of body mode.



1033
1034
1035

# File 'lib/dms/parser.rb', line 1033

def new_table
  @ignore_order ? UnorderedHash.new : {}
end

#parse_bare_key ⇒ `Object`

# File 'lib/dms/parser.rb', line 1783

def parse_bare_key
  s = @src
  n = @len
  pos = @pos
  start = pos
  while pos < n
    b = s.getbyte(pos)
    if b < 128
      break unless BARE_KEY_BYTE[b]
      pos += 1
    else
      ch_len = utf8_char_len(b)
      ch = s.byteslice(pos, ch_len).force_encoding(Encoding::UTF_8)
      # SPEC §"What counts as a bare key" — UAX #31 XID_Continue.
      # Onigmo supports the property name natively.
      break unless xid_continue?(ch.ord)
      pos += ch_len
    end
  end
  raise err("expected key") if pos == start
  @pos = pos
  s.byteslice(start, pos - start).force_encoding(Encoding::UTF_8)
end

#parse_basic_string_value ⇒ `Object`

———- Strings ———-

# File 'lib/dms/parser.rb', line 2411

def parse_basic_string_value
  sl = @line; sls = @line_start; sp = @pos
  @pos += 1 # opening "
  out = +""
  out.force_encoding(Encoding::UTF_8)
  s = @src
  n = @len
  run_start = @pos
  loop do
    b = s.getbyte(@pos)
    if b.nil?
      raise err_at(sl, sls, sp, "unterminated string")
    elsif b == LF || b == CR
      raise err("strings cannot span lines")
    elsif b == DQUOTE
      out << s.byteslice(run_start, @pos - run_start).force_encoding(Encoding::UTF_8) if @pos > run_start
      @pos += 1
      # NFC re-normalize after escape decoding (only needed if escapes used).
      return out.ascii_only? ? out : out.unicode_normalize(:nfc)
    elsif b == BACKSLASH
      out << s.byteslice(run_start, @pos - run_start).force_encoding(Encoding::UTF_8) if @pos > run_start
      @pos += 1
      esc = s.getbyte(@pos)
      @pos += 1 unless esc.nil?
      case esc
      when DQUOTE then out << '"'
      when BACKSLASH then out << "\\"
      when LOWER_N then out << "\n"
      when LOWER_T then out << "\t"
      when LOWER_R then out << "\r"
      when 0x62 then out << "\b"
      when LOWER_F_LETTER then out << "\f"
      when LOWER_U then out << read_hex_codepoint(4)
      when UPPER_U then out << read_hex_codepoint(8)
      when nil then raise err("unterminated escape")
      else raise err("invalid escape '\\#{esc.chr}'")
      end
      run_start = @pos
    else
      @pos += 1
    end
  end
end

#parse_block_value(indent) ⇒ `Object`

# File 'lib/dms/parser.rb', line 1627

def parse_block_value(indent)
  @pos = @line_start + indent
  if @src.getbyte(@pos) == PLUS && peek_after_plus_is_space_or_eol?
    return parse_list_block(indent)
  end
  parse_table_block(indent)
end

#parse_body ⇒ `Object`

# File 'lib/dms/parser.rb', line 1394

def parse_body
  skip_trivia
  if eof?
    flush_pending_as_floating
    return new_table
  end
  b = @src.getbyte(@pos)
  raise err("unexpected indentation at document root") if b == SP || b == TAB
  reject_reserved_sigil_at_line_start!
  if b == PLUS && peek_after_plus_is_space_or_eol?
    v = parse_list_block(0)
    skip_trivia
    raise err("trailing content after list root") unless eof?
    flush_pending_as_floating
    return v
  end
  if line_starts_kvpair?
    t = parse_table_block(0)
    skip_trivia
    raise err("trailing content after table root") unless eof?
    flush_pending_as_floating
    return t
  end
  v = parse_inline_value_or_heredoc
  consume_after_value(true)
  skip_trivia
  raise err("scalar root cannot be followed by more content") unless eof?
  flush_pending_as_floating
  v
end

#parse_body_as_table ⇒ `Object`

# File 'lib/dms/parser.rb', line 1371

def parse_body_as_table
  skip_trivia
  if eof?
    flush_pending_as_floating
    return new_table
  end
  b = @src.getbyte(@pos)
  if b == SP || b == TAB
    raise err("unexpected indentation inside front matter")
  end
  reject_reserved_sigil_at_line_start!
  if b == PLUS && peek_after_plus_is_space_or_eol?
    raise err("front matter block cannot have a list root")
  end
  unless line_starts_kvpair?
    raise err("front matter block must be a table")
  end
  t = parse_table_block(0)
  skip_trivia
  raise err("trailing content inside front matter") unless eof?
  t
end

#parse_bool_value ⇒ `Object`

# File 'lib/dms/parser.rb', line 1875

def parse_bool_value
  s = @src
  p = @pos
  if s.byteslice(p, 4) == "true"
    after = s.getbyte(p + 4)
    if after.nil? || VALUE_TERMINATOR_BYTE[after]
      @pos += 4
      return true
    end
  end
  if s.byteslice(p, 5) == "false"
    after = s.getbyte(p + 5)
    if after.nil? || VALUE_TERMINATOR_BYTE[after]
      @pos += 5
      return false
    end
  end
  raise err("expected value")
end

#parse_datetime_value ⇒ `Object`

# File 'lib/dms/parser.rb', line 2283

def parse_datetime_value
  rest = @src.byteslice(@pos, @len - @pos).force_encoding(Encoding::UTF_8)
  date = rest[0, 10]
  begin
    validate_date(date)
  rescue StandardError => e
    raise err(e.message)
  end
  rest2 = rest[10..]
  if !rest2.start_with?("T") && !rest2.start_with?(" ")
    if rest2.start_with?("t")
      raise err("date and time separator must be uppercase 'T' (lowercase 't' not permitted)")
    end
    after = rest2[0]
    unless after.nil? || after == " " || after == "\t" || after == "\n" || after == "\r" ||
           after == "#" || after == "/" || after == "," || after == "]" || after == "}"
      raise err("invalid character after date")
    end
    @pos += 10
    return LocalDate.new(date)
  end
  if rest2.start_with?(" ")
    i = 0
    while i < rest2.length && (rest2[i] == " " || rest2[i] == "\t")
      i += 1
    end
    if i < rest2.length && rest2[i] >= "0" && rest2[i] <= "9"
      raise err("date and time must be separated by 'T' (space not permitted)")
    end
    @pos += 10
    return LocalDate.new(date)
  end
  after_t = rest2[1..]
  raise err("expected HH:MM:SS after 'T'") unless looks_like_time_str?(after_t)
  time_str = after_t[0, 8]
  begin
    validate_time(time_str)
  rescue StandardError => e
    raise err(e.message)
  end
  consumed = 10 + 1 + 8
  after_time = rest[consumed..]
  frac_len = 0
  if after_time.start_with?(".")
    k = 1
    while k < after_time.length && after_time[k] >= "0" && after_time[k] <= "9"
      k += 1
    end
    digits = k - 1
    raise err("expected fractional digits after '.'") if digits == 0
    raise err("fractional seconds limited to 9 digits (nanosecond precision)") if digits > 9
    frac_len = k
  end
  consumed += frac_len
  after_frac = rest[consumed..]
  if after_frac.start_with?("Z") || after_frac.start_with?("z")
    consumed += 1
    s = rest[0, consumed]
    @pos += consumed
    return OffsetDateTime.new(s)
  end
  if after_frac.start_with?("+") || after_frac.start_with?("-")
    if after_frac.length < 6 ||
       !(after_frac[1] >= "0" && after_frac[1] <= "9") ||
       !(after_frac[2] >= "0" && after_frac[2] <= "9") ||
       after_frac[3] != ":" ||
       !(after_frac[4] >= "0" && after_frac[4] <= "9") ||
       !(after_frac[5] >= "0" && after_frac[5] <= "9")
      raise err("invalid offset; expected ±HH:MM")
    end
    oh = after_frac[1, 2].to_i
    om = after_frac[4, 2].to_i
    raise err("offset out of range") if oh > 23 || om > 59
    consumed += 6
    s = rest[0, consumed]
    @pos += consumed
    return OffsetDateTime.new(s)
  end
  after = after_frac[0]
  unless after.nil? || after == " " || after == "\t" || after == "\n" || after == "\r" ||
         after == "#" || after == "/" || after == "," || after == "]" || after == "}"
    raise err("invalid character after datetime")
  end
  s = rest[0, consumed]
  @pos += consumed
  LocalDateTime.new(s)
end

#parse_dec_float(s) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2181

def parse_dec_float(s)
  e_idx = nil
  s.each_char.with_index do |c, i|
    if c == "e" || c == "E"
      e_idx = i
      break
    end
  end
  m = e_idx.nil? ? s : s[0, e_idx]
  e = e_idx.nil? ? nil : s[e_idx + 1..]
  raise "decimal float requires '.'" unless m.include?(".")
  ip, fp = m.split(".", 2)
  raise "decimal float requires digit on both sides of '.'" if ip.empty? || fp.nil? || fp.empty?
  raise "invalid character in mantissa" unless ip.each_char.all? { |c| c == "_" || (c >= "0" && c <= "9") }
  raise "invalid character in mantissa" unless fp.each_char.all? { |c| c == "_" || (c >= "0" && c <= "9") }
  raise "bad underscore in mantissa" unless valid_underscores?(ip) && valid_underscores?(fp)
  full = ip.delete("_") + "." + fp.delete("_")
  if e
    es_clean = e.sub(/^[+-]/, "")
    raise "underscore not allowed in exponent" if es_clean.include?("_")
    raise "invalid character in exponent" unless e.each_char.all? { |c| c == "+" || c == "-" || (c >= "0" && c <= "9") }
    raise "empty exponent" if es_clean.empty?
    full = "#{full}e#{e}"
  end
  Float(full)
end

#parse_float_lit(s) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2164

def parse_float_lit(s)
  if s.start_with?("-")
    sign = -1.0; rest = s[1..]
  elsif s.start_with?("+")
    sign = 1.0; rest = s[1..]
  else
    sign = 1.0; rest = s
  end
  v =
    if rest.start_with?("0x") || rest.start_with?("0o") || rest.start_with?("0b")
      parse_nondec_float(rest)
    else
      parse_dec_float(rest)
    end
  sign * v
end

#parse_flow_array ⇒ `Object`

———- Flow forms ———-

# File 'lib/dms/parser.rb', line 2862

def parse_flow_array
  @pos += 1 # [
  items = []
  loop do
    skip_flow_ws
    if @src.getbyte(@pos) == RBRACK
      @pos += 1
      return items
    end
    idx = items.length
    @path.push(idx)
    begin
      v = parse_inline_value_in_flow
    ensure
      @path.pop
    end
    items << v
    skip_flow_ws
    b = @src.getbyte(@pos)
    if b == COMMA
      @pos += 1
    elsif b == RBRACK
      @pos += 1
      return items
    elsif b.nil?
      raise err("unterminated flow array")
    else
      raise err("unexpected '#{b.chr}' in flow array; expected ',' or ']'")
    end
  end
end

#parse_flow_table ⇒ `Object`

# File 'lib/dms/parser.rb', line 2894

def parse_flow_table
  @pos += 1 # {
  t = new_table
  loop do
    skip_flow_ws
    if @src.getbyte(@pos) == RBRACE
      @pos += 1
      return finalize_table(t)
    end
    key = parse_key
    raise err("expected ':' after flow-table key") unless @src.getbyte(@pos) == COLON
    @pos += 1
    b = @src.getbyte(@pos)
    unless b == SP || b == TAB || b == LF || b == CR
      raise err("expected whitespace after ':'")
    end
    skip_flow_ws
    @path.push(key)
    begin
      v = parse_inline_value_in_flow
    ensure
      @path.pop
    end
    raise err("duplicate key: #{key}") if t.key?(key)
    t[key] = v
    skip_flow_ws
    b = @src.getbyte(@pos)
    if b == COMMA
      @pos += 1
    elsif b == RBRACE
      @pos += 1
      return finalize_table(t)
    elsif b.nil?
      raise err("unterminated flow table")
    else
      raise err("unexpected '#{b.chr}' in flow table; expected ',' or '}'")
    end
  end
end

#parse_front_matter ⇒ `Object`

———- Document entry ———-

# File 'lib/dms/parser.rb', line 1290

def parse_front_matter
  save_pos = @pos; save_line = @line; save_lstart = @line_start
  save_pending = @pending_leading.length
  save_comments = @comments.length
  skip_trivia
  unless starts_bytes?("+++")
    @pos = save_pos; @line = save_line; @line_start = save_lstart
    @pending_leading.slice!(save_pending..)
    @comments.slice!(save_comments..)
    return nil
  end
  # Any trailing content on the opener line is a parse error
  # (SPEC §Front matter: "each `+++` must appear on its own line,
  # with no trailing content"). Advance past `+++` and let the
  # strict EOL check below diagnose.
  opener_line = @line; opener_lstart = @line_start; opener_pos = @pos
  @pos += 3
  skip_inline_ws
  unless consume_eol || eof?
    raise err("front matter opener must be on its own line")
  end
  inner_buf = +""
  inner_buf.force_encoding(Encoding::UTF_8)
  loop do
    if eof?
      raise DecodeError.new(opener_line, opener_pos - opener_lstart + 1,
                           "unterminated front matter: missing closing '+++'")
    end
    line_begin = @pos
    while @pos < @len
      b = @src.getbyte(@pos)
      break if b == LF || b == CR
      @pos += 1
    end
    line_text = @src.byteslice(line_begin, @pos - line_begin).force_encoding(Encoding::UTF_8)
    if line_text.strip == "+++"
      consume_eol
      break
    end
    inner_buf << line_text
    inner_buf << "\n" if consume_eol
  end
  sub = self.class.new(inner_buf, lite: @lite)
  table = sub.parse_body_as_table
  meta = {}
  fm_err = ->(msg) { DecodeError.new(opener_line, opener_pos - opener_lstart + 1, msg) }
  table.each do |k, v|
    if k.start_with?("_")
      if k == "_dms_tier"
        unless v.is_a?(Integer) && !v.is_a?(TrueClass) && !v.is_a?(FalseClass)
          raise fm_err.call("_dms_tier must be a non-negative integer")
        end
        raise fm_err.call("_dms_tier must be non-negative") if v < 0
        if v >= 2
          raise fm_err.call("_dms_tier: #{v} is not supported (only tier 0 and 1 are defined)")
        end
        if v == 1
          raise fm_err.call("_dms_tier: 1 requires tier-1 decode mode (use --tier=1)")
        end
      else
        raise fm_err.call("unknown reserved key: #{k}")
      end
    else
      meta[k] = v
    end
  end
  sub.comments.each do |ac|
    attached_to_reserved = !ac.path.empty? && ac.path[0].is_a?(String) && ac.path[0].start_with?("_")
    if attached_to_reserved
      @comments << AttachedComment.new(ac.comment, :floating, ["__fm__"].freeze)
      next
    end
    @comments << AttachedComment.new(ac.comment, ac.position, (["__fm__"] + ac.path).freeze)
  end
  sub.original_forms.each do |path, lit|
    next if !path.empty? && path[0].is_a?(String) && path[0].start_with?("_")
    @original_forms << [(["__fm__"] + path).freeze, lit]
  end
  meta
end

#parse_heredoc_basic ⇒ `Object`

# File 'lib/dms/parser.rb', line 2509

def parse_heredoc_basic
  @pos += 3
  label = parse_heredoc_label
  modifiers = parse_heredoc_modifiers
  skip_inline_ws
  raise err("heredoc opener must be followed by end of line") unless consume_eol || eof?
  terminator = label.empty? ? '"""' : label
  body = collect_heredoc_body(terminator)
  # SPEC §basic-string escapes: surrogate codepoints (U+D800..U+DFFF)
  # are not valid Unicode scalars and are a parse error in `\uXXXX` /
  # `\UXXXXXXXX` escapes. Basic-heredoc bodies process the same
  # escapes as basic strings, so apply the same rejection here.
  validate_heredoc_basic_surrogates(body)
  stripped = strip_indent_and_continuations(body, true)
  result =
    begin
      apply_modifiers(stripped, modifiers)
    rescue StandardError => e
      raise err(e.message)
    end
  label_opt = label.empty? ? nil : label
  calls = modifiers.map { |m| HeredocModifierCall.new(m[:name], m[:args]) }
  record_form(OriginalLiteral.string(StringForm.heredoc(:basic_triple, label_opt, calls)))
  result.ascii_only? ? result : result.unicode_normalize(:nfc)
end

#parse_heredoc_label ⇒ `Object`

# File 'lib/dms/parser.rb', line 2608

def parse_heredoc_label
  b = @src.getbyte(@pos)
  return "" if b.nil? || !LABEL_START_BYTE[b]
  start = @pos
  while (bb = @src.getbyte(@pos)) && LABEL_CONT_BYTE[bb]
    @pos += 1
  end
  @src.byteslice(start, @pos - start).force_encoding(Encoding::UTF_8)
end

#parse_heredoc_literal ⇒ `Object`

# File 'lib/dms/parser.rb', line 2587

def parse_heredoc_literal
  @pos += 3
  label = parse_heredoc_label
  modifiers = parse_heredoc_modifiers
  skip_inline_ws
  raise err("heredoc opener must be followed by end of line") unless consume_eol || eof?
  terminator = label.empty? ? "'''" : label
  body = collect_heredoc_body(terminator)
  stripped = strip_indent_and_continuations(body, false)
  result =
    begin
      apply_modifiers(stripped, modifiers)
    rescue StandardError => e
      raise err(e.message)
    end
  label_opt = label.empty? ? nil : label
  calls = modifiers.map { |m| HeredocModifierCall.new(m[:name], m[:args]) }
  record_form(OriginalLiteral.string(StringForm.heredoc(:literal_triple, label_opt, calls)))
  result
end

#parse_heredoc_modifiers ⇒ `Object`

# File 'lib/dms/parser.rb', line 2618

def parse_heredoc_modifiers
  mods = []
  loop do
    ws_start = @pos
    skip_inline_ws
    had_ws = @pos > ws_start
    b = @src.getbyte(@pos)
    if b && LABEL_START_BYTE[b]
      raise err("modifier must be preceded by whitespace") unless had_ws
      mods << parse_one_modifier
    else
      @pos = ws_start
      return mods
    end
  end
end

#parse_inf_value ⇒ `Object`

# File 'lib/dms/parser.rb', line 1895

def parse_inf_value
  if @src.byteslice(@pos, 3) == "inf"
    after = @src.getbyte(@pos + 3)
    if after.nil? || VALUE_TERMINATOR_BYTE[after]
      @pos += 3
      return Float::INFINITY
    end
  end
  raise err("expected 'inf'")
end

#parse_inline_value_in_flow ⇒ `Object`

# File 'lib/dms/parser.rb', line 2953

def parse_inline_value_in_flow
  if @src.getbyte(@pos) == DQUOTE && starts_bytes?('"""')
    raise err("heredocs are not allowed inside flow forms")
  end
  if @src.getbyte(@pos) == SQUOTE && starts_bytes?("'''")
    raise err("heredocs are not allowed inside flow forms")
  end
  parse_inline_value_or_heredoc
end

#parse_inline_value_or_heredoc ⇒ `Object`

# File 'lib/dms/parser.rb', line 1821

def parse_inline_value_or_heredoc
  b = @src.getbyte(@pos)
  # Fast path: plain decimal integer. Most hot-loop benchmarks parse
  # millions of these, so we recognize "[0-9]+ <terminator>" inline,
  # skipping number_or_datetime's full lookahead/scanner setup.
  if b && b >= DIGIT0 && b <= DIGIT9
    s = @src
    n = @len
    start = @pos
    p = start + 1
    while p < n
      bb = s.getbyte(p)
      break unless bb >= DIGIT0 && bb <= DIGIT9
      p += 1
    end
    # If next byte is a non-numeric value terminator and the token
    # length is safely within i64 (<=18 digits) and not a date/time
    # prefix, take the fast path.
    len = p - start
    if len <= 18 && (p >= n || VALUE_TERMINATOR_BYTE[s.getbyte(p)])
      # Reject leading-zero on multi-digit (e.g. "012") via slow path.
      if !(s.getbyte(start) == DIGIT0 && len > 1)
        @pos = p
        return s.byteslice(start, len).to_i
      end
    end
  end
  case b
  when DQUOTE
    return parse_heredoc_basic if starts_bytes?('"""')
    return parse_basic_string_value
  when SQUOTE
    return parse_heredoc_literal if starts_bytes?("'''")
    v = parse_literal_string_value
    record_form(OriginalLiteral.string(StringForm.literal))
    return v
  when LBRACK
    return parse_flow_array
  when LBRACE
    return parse_flow_table
  when LOWER_T, LOWER_F_LETTER
    return parse_bool_value
  when 0x69 # 'i'
    return parse_inf_value
  when LOWER_N
    return parse_nan_value
  end
  if b && (b == PLUS || b == MINUS || (b >= DIGIT0 && b <= DIGIT9))
    return parse_number_or_datetime
  end
  raise err("expected value") if b.nil?
  raise err("unexpected character '#{b.chr}' in value")
end

#parse_integer_lit(s) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2088

def parse_integer_lit(s)
  # Fast path: pure decimal digits, no underscore, no sign or just leading '-',
  # length such that no overflow check is needed (<=18 digits for unsigned,
  # <=19 with leading minus). Skip the per-char validation loop.
  bs = s.bytesize
  if bs > 0
    first = s.getbyte(0)
    first_digit_idx = (first == MINUS) ? 1 : 0
    digit_count = bs - first_digit_idx
    # 18 digits never overflow signed i64 (max 9.22e18); negatives same.
    if digit_count > 0 && digit_count <= 18
      fc = s.getbyte(first_digit_idx)
      if fc >= DIGIT0 && fc <= DIGIT9
        ok = true
        i = first_digit_idx + 1
        while i < bs
          b = s.getbyte(i)
          unless b >= DIGIT0 && b <= DIGIT9
            ok = false
            break
          end
          i += 1
        end
        if ok
          if fc == DIGIT0 && digit_count > 1
            raise "leading zeros are not allowed on decimal integers"
          end
          return s.to_i
        end
      end
    end
  end
  # Slow path: full parser for hex/oct/bin/underscored/edge cases.
  if s.start_with?("-")
    sign = -1; rest = s[1..]
  elsif s.start_with?("+")
    sign = 1; rest = s[1..]
  else
    sign = 1; rest = s
  end
  raise "hex prefix must be lowercase '0x'" if rest.start_with?("0X")
  if rest.start_with?("0x")
    radix = 16; body = rest[2..]
  elsif rest.start_with?("0o")
    radix = 8; body = rest[2..]
  elsif rest.start_with?("0b")
    radix = 2; body = rest[2..]
  else
    radix = 10; body = rest
  end
  raise "empty number" if body.empty?
  raise "underscore must be between digits" if body.start_with?("_") || body.end_with?("_")
  if radix == 10 && rest.length > 1 && rest.start_with?("0")
    raise "leading zeros are not allowed on decimal integers"
  end
  digit_chars = "0123456789abcdef"[0, radix]
  clean = +""
  prev_is_digit = false
  body.each_char do |c|
    if c == "_"
      raise "underscore must be between digits" unless prev_is_digit
      prev_is_digit = false
    else
      unless digit_chars.include?(c.downcase)
        raise "invalid digit '#{c}' for base #{radix}"
      end
      clean << c
      prev_is_digit = true
    end
  end
  raise "underscore must be between digits" unless prev_is_digit
  n = sign * clean.to_i(radix)
  raise "integer out of i64 range" if n < INT64_MIN || n > INT64_MAX
  n
end

#parse_key ⇒ `Object`

———- Keys ———-

# File 'lib/dms/parser.rb', line 1757

def parse_key
  b = @src.getbyte(@pos)
  if b == DQUOTE
    raise err("triple-quoted strings are not allowed as keys") if starts_bytes?('"""')
    saved = @record_forms
    @record_forms = false
    begin
      return parse_basic_string_value
    ensure
      @record_forms = saved
    end
  end
  if b == SQUOTE
    raise err("triple-quoted strings are not allowed as keys") if starts_bytes?("'''")
    saved = @record_forms
    @record_forms = false
    begin
      return parse_literal_string_value
    ensure
      @record_forms = saved
    end
  end
  raise err("expected key") if b.nil?
  parse_bare_key
end

#parse_kvpair(parent_indent) ⇒ `Object`

———- kvpair ———-

# File 'lib/dms/parser.rb', line 1670

def parse_kvpair(parent_indent)
  # Inlined parse_key fast path for bare ASCII keys (common hot-loop case).
  # Only takes the fast path when the *next* byte after the key run is
  # ASCII too — otherwise the key may include trailing unicode chars
  # the slow path needs to consume.
  s = @src
  n = @len
  start = @pos
  b0 = s.getbyte(start)
  took_fast = false
  if b0 && b0 < 128 && BARE_KEY_BYTE[b0]
    p = start + 1
    while p < n
      bb = s.getbyte(p)
      break unless bb && bb < 128 && BARE_KEY_BYTE[bb]
      p += 1
    end
    # Only commit fast path if next byte is ASCII (i.e. truly key end).
    nb = (p < n) ? s.getbyte(p) : nil
    if nb.nil? || nb < 128
      @pos = p
      key = s.byteslice(start, p - start).force_encoding(Encoding::UTF_8)
      took_fast = true
    end
  end
  key = parse_key unless took_fast
  raise err("expected ':' after key") if @src.getbyte(@pos) != COLON
  @path.push(key)
  flush_pending_as_leading_on_current unless @pending_leading.empty?
  @pos += 1 # consume ':'
  b = @src.getbyte(@pos)
  if b == SP || b == TAB
    @pos += 1
    skip_inline_ws
    # Only enter the comment-capture loop if we see '/' (cheap byte check).
    capture_inner_block_comments if @src.getbyte(@pos) == SLASH
    nb = @src.getbyte(@pos)
    if nb.nil? || nb == LF || nb == CR
      consume_eol
      skip_trivia
      raise err("expected indented child block") if @pos >= @len
      child_indent = measure_line_indent
      raise err("expected indented child block") if child_indent <= parent_indent
      v = parse_block_value(child_indent)
      @path.pop
      return [key, v]
    end
    v = parse_inline_value_or_heredoc
    # Fast path peek: consume optional inline ws, then if next byte is
    # LF we just jump past it. Anything else (including comments) falls
    # back to the full consume_after_value, which needs to see the ws.
    s2 = @src
    p2 = @pos
    while (bb = s2.getbyte(p2)) == SP || bb == TAB
      p2 += 1
    end
    if bb == LF
      @pos = p2 + 1
      advance_line
      @path.pop
      return [key, v]
    end
    if bb.nil?
      @pos = p2
      @path.pop
      return [key, v]
    end
    # leave @pos before the ws so trailing-comment whitespace check sees it
    consume_after_value(false)
    @path.pop
    return [key, v]
  end
  if b.nil? || b == LF || b == CR
    consume_eol
    skip_trivia
    raise err("expected indented child block") if @pos >= @len
    child_indent = measure_line_indent
    raise err("expected indented child block") if child_indent <= parent_indent
    v = parse_block_value(child_indent)
    @path.pop
    return [key, v]
  end
  raise err("expected whitespace after ':'")
end

#parse_list_block(indent) ⇒ `Object`

# File 'lib/dms/parser.rb', line 1572

def parse_list_block(indent)
  items = []
  loop do
    skip_trivia
    break if @pos >= @len
    li = measure_line_indent
    break if li < indent
    if li != indent
      raise err_at(@line, @line_start, @line_start + indent,
                   "inconsistent indent: expected #{indent} spaces, got #{li}")
    end
    @pos = @line_start + indent
    reject_reserved_sigil_at_line_start!
    break unless @src.getbyte(@pos) == PLUS
    idx = items.length
    @path.push(idx)
    flush_pending_as_leading_on_current unless @pending_leading.empty?
    begin
      @pos += 1 # consume '+'
      b = @src.getbyte(@pos)
      v =
        if b == SP || b == TAB
          @pos += 1
          skip_inline_ws
          capture_inner_block_comments
          nb = @src.getbyte(@pos)
          if nb.nil? || nb == LF || nb == CR
            consume_eol
            skip_trivia
            raise err("expected indented block after empty '+' marker") if @pos >= @len
            inner_indent = measure_line_indent
            raise err("expected indented block after empty '+' marker") if inner_indent <= indent
            parse_block_value(inner_indent)
          else
            parse_list_item_value(indent)
          end
        elsif b.nil? || b == LF || b == CR
          consume_eol
          skip_trivia
          raise err("expected indented block after empty '+' marker") if @pos >= @len
          inner_indent = measure_line_indent
          raise err("expected indented block after empty '+' marker") if inner_indent <= indent
          parse_block_value(inner_indent)
        else
          raise err("expected space after '+'")
        end
    ensure
      @path.pop
    end
    items << v
  end
  flush_pending_as_floating
  items
end

#parse_list_item_value(list_indent) ⇒ `Object`

# File 'lib/dms/parser.rb', line 1635

def parse_list_item_value(list_indent)
  if line_starts_kvpair?
    key_col = col - 1
    k, v = parse_kvpair(key_col)
    t = new_table
    t[k] = v
    loop do
      skip_trivia
      break if @pos >= @len
      li = measure_line_indent
      break if li < key_col
      if li != key_col
        raise err_at(@line, @line_start, @line_start + key_col,
                     "list-item table sibling key must align with first key")
      end
      @pos = @line_start + key_col
      reject_reserved_sigil_at_line_start!
      if @src.getbyte(@pos) == PLUS
        raise err("'+' marker at sibling-key column is ambiguous")
      end
      break unless line_starts_kvpair?
      k2, v2 = parse_kvpair(key_col)
      raise err("duplicate key: #{k2}") if t.key?(k2)
      t[k2] = v2
    end
    flush_pending_as_floating
    return finalize_table(t)
  end
  v = parse_inline_value_or_heredoc
  consume_after_value(false)
  v
end

#parse_literal_string_value ⇒ `Object`

# File 'lib/dms/parser.rb', line 2455

def parse_literal_string_value
  sl = @line; sls = @line_start; sp = @pos
  @pos += 1 # opening '
  start = @pos
  s = @src
  n = @len
  while @pos < n
    b = s.getbyte(@pos)
    if b == SQUOTE
      out = s.byteslice(start, @pos - start).force_encoding(Encoding::UTF_8)
      @pos += 1
      return out
    end
    if b == LF || b == CR
      raise err("strings cannot span lines")
    end
    @pos += 1
  end
  raise err_at(sl, sls, sp, "unterminated string")
end

#parse_local_time_value ⇒ `Object`

# File 'lib/dms/parser.rb', line 2378

def parse_local_time_value
  rest = @src.byteslice(@pos, @len - @pos).force_encoding(Encoding::UTF_8)
  time_str = rest[0, 8]
  begin
    validate_time(time_str)
  rescue StandardError => e
    raise err(e.message)
  end
  consumed = 8
  after = rest[consumed..]
  if after.start_with?(".")
    k = 1
    while k < after.length && after[k] >= "0" && after[k] <= "9"
      k += 1
    end
    digits = k - 1
    raise err("expected fractional digits after '.'") if digits == 0
    raise err("fractional seconds limited to 9 digits") if digits > 9
    consumed += k
  end
  after2 = rest[consumed..]
  nxt = after2[0]
  unless nxt.nil? || nxt == " " || nxt == "\t" || nxt == "\n" || nxt == "\r" ||
         nxt == "#" || nxt == "/" || nxt == "," || nxt == "]" || nxt == "}"
    raise err("invalid character after time")
  end
  s = rest[0, consumed]
  @pos += consumed
  LocalTime.new(s)
end

#parse_modifier_call_args ⇒ `Object`

# File 'lib/dms/parser.rb', line 2653

def parse_modifier_call_args
  args = []
  loop do
    skip_inline_ws
    b = @src.getbyte(@pos)
    if b == 0x29 # ')'
      @pos += 1
      return args
    end
    raise err("expected ',' or ')' in modifier args") if b.nil?
    v = parse_inline_value_or_heredoc
    args << v
    skip_inline_ws
    b = @src.getbyte(@pos)
    if b == COMMA
      @pos += 1
    elsif b == 0x29
      @pos += 1
      return args
    else
      raise err("expected ',' or ')' in modifier args")
    end
  end
end

#parse_nan_value ⇒ `Object`

# File 'lib/dms/parser.rb', line 1906

def parse_nan_value
  if @src.byteslice(@pos, 3) == "nan"
    after = @src.getbyte(@pos + 3)
    if after.nil? || VALUE_TERMINATOR_BYTE[after]
      @pos += 3
      return Float::NAN
    end
  end
  raise err("expected 'nan'")
end

#parse_nondec_float(s) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2208

def parse_nondec_float(s)
  if s.start_with?("0x")
    radix = 16; rest = s[2..]
  elsif s.start_with?("0o")
    radix = 8; rest = s[2..]
  elsif s.start_with?("0b")
    radix = 2; rest = s[2..]
  else
    raise "non-decimal float prefix required"
  end
  p_idx = rest.index("p")
  raise "non-decimal float requires 'p' exponent" if p_idx.nil?
  mant = rest[0, p_idx]
  exp_str = rest[p_idx + 1..]
  raise "empty exponent" if exp_str.nil? || exp_str.empty?
  raise "underscore not allowed in exponent" if exp_str.include?("_")
  raise "invalid exponent character" unless exp_str.each_char.all? { |c| c == "+" || c == "-" || (c >= "0" && c <= "9") }
  exp = Integer(exp_str)
  if mant.include?(".")
    ip, fp = mant.split(".", 2)
    raise "digit required on both sides of '.'" if ip.empty? || fp.nil? || fp.empty?
  else
    ip = mant; fp = ""
  end
  raise "bad underscore in mantissa" unless valid_underscores?(ip) && valid_underscores?(fp)
  ip_clean = ip.delete("_")
  fp_clean = fp.delete("_")
  digit_chars = "0123456789abcdef"[0, radix]
  raise "invalid digit for base #{radix}" unless ip_clean.each_char.all? { |c| digit_chars.include?(c.downcase) }
  raise "invalid digit for base #{radix}" unless fp_clean.each_char.all? { |c| digit_chars.include?(c.downcase) }
  int_val = ip_clean.empty? ? 0 : ip_clean.to_i(radix)
  frac_val = 0.0
  div = radix.to_f
  fp_clean.each_char do |c|
    d = c.to_i(radix)
    frac_val += d / div
    div *= radix
  end
  (int_val + frac_val) * (2.0 ** exp)
end

#parse_number_or_datetime ⇒ `Object`

# File 'lib/dms/parser.rb', line 1949

def parse_number_or_datetime
  s = @src
  p = @pos
  first = s.getbyte(p)
  starts_sign = first == PLUS || first == MINUS
  if !starts_sign && looks_like_date_prefix_at?(p)
    return parse_datetime_value
  end
  if !starts_sign && looks_like_time_prefix_at?(p)
    return parse_local_time_value
  end
  if starts_sign && s.byteslice(p + 1, 3) == "inf"
    after = s.getbyte(p + 4)
    if after.nil? || VALUE_TERMINATOR_BYTE[after]
      neg = first == MINUS
      @pos += 4
      return neg ? -Float::INFINITY : Float::INFINITY
    end
  end
  tok_len, is_float = scan_number_token
  lex = s.byteslice(p, tok_len).force_encoding(Encoding::UTF_8)
  if is_float
    f =
      begin
        parse_float_lit(lex)
      rescue StandardError => e
        raise err("invalid float: #{lex} (#{e.message})")
      end
    @pos += tok_len
    return f
  end
  n =
    begin
      parse_integer_lit(lex)
    rescue StandardError => e
      raise err(e.message)
    end
  @pos += tok_len
  # Record original lexeme only if it differs from canonical form.
  # Fast cheap test: if lex contains '_', '+', or starts with '0' followed
  # by a non-digit (hex/oct/bin prefix marker), it's non-canonical. Simple
  # decimal integers like "42" / "-7" map directly to n.to_s and need no
  # entry; skip the to_s allocation in that common case.
  if @record_forms
    bs = lex.bytesize
    first = lex.getbyte(0)
    possibly_non_canonical =
      lex.include?("_") || first == PLUS ||
      (first == DIGIT0 && bs > 1) ||
      (first == MINUS && bs > 1 && lex.getbyte(1) == DIGIT0)
    if possibly_non_canonical && lex != n.to_s
      @original_forms << [@path.dup.freeze, OriginalLiteral.integer(lex)]
    end
  end
  n
end

#parse_one_modifier ⇒ `Object`

# File 'lib/dms/parser.rb', line 2635

def parse_one_modifier
  ns = @pos
  while (b = @src.getbyte(@pos)) && LABEL_CONT_BYTE[b]
    @pos += 1
  end
  name = @src.byteslice(ns, @pos - ns).force_encoding(Encoding::UTF_8)
  raise err("modifiers require parentheses") if @src.getbyte(@pos) != 0x28 # '('
  @pos += 1
  saved = @record_forms
  @record_forms = false
  begin
    args = parse_modifier_call_args
  ensure
    @record_forms = saved
  end
  { name: name, args: args }
end

#parse_table_block(indent) ⇒ `Object`

# File 'lib/dms/parser.rb', line 1545

def parse_table_block(indent)
  t = new_table
  loop do
    skip_trivia
    break if @pos >= @len
    # measure indent inline
    li = 0
    i = @line_start
    while i < @len && @src.getbyte(i) == SP
      li += 1
      i += 1
    end
    break if li < indent
    if li != indent
      raise err_at(@line, @line_start, @line_start + indent,
                   "inconsistent indent: expected #{indent} spaces, got #{li}")
    end
    @pos = @line_start + indent
    reject_reserved_sigil_at_line_start!
    k, v = parse_kvpair(indent)
    raise err("duplicate key: #{k}") if t.key?(k)
    t[k] = v
  end
  flush_pending_as_floating
  finalize_table(t)
end

#peek_after_plus_is_space_or_eol? ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/dms/parser.rb', line 1425

def peek_after_plus_is_space_or_eol?
  b = @src.getbyte(@pos + 1)
  b.nil? || b == SP || b == TAB || b == LF || b == CR
end

#peek_byte ⇒ `Object`

peek: returns the byte at @pos as Integer, or nil at EOF.



1073
1074
1075

# File 'lib/dms/parser.rb', line 1073

def peek_byte
  @src.getbyte(@pos)
end

#peek_char_byte_safe ⇒ `Object`

peek_char: returns the character at @pos as a String. Used in error messages and a handful of dispatch sites where we already know we’re on a single-byte ASCII char.

# File 'lib/dms/parser.rb', line 1080

def peek_char_byte_safe
  b = @src.getbyte(@pos)
  return nil if b.nil?
  return b.chr if b < 128
  # Multi-byte: read full character.
  @src.byteslice(@pos, 4).force_encoding(Encoding::UTF_8)[0]
end

#per_line_edges(s, char_set, replacement) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2810

def per_line_edges(s, char_set, replacement)
  s.split("\n", -1).map { |l|
    l = replace_leading_run(l, char_set, replacement)
    replace_trailing_run(l, char_set, replacement)
  }.join("\n")
end

#read_c_block_comment ⇒ `Object`

# File 'lib/dms/parser.rb', line 1220

def read_c_block_comment
  sl = @line; sls = @line_start; sp = @pos
  @pos += 2
  depth = 1
  s = @src
  n = @len
  while depth > 0
    if @pos >= n
      raise err_at(sl, sls, sp, "unterminated /* block comment")
    end
    b = s.getbyte(@pos)
    if b == SLASH && s.getbyte(@pos + 1) == STAR
      @pos += 2; depth += 1
    elsif b == STAR && s.getbyte(@pos + 1) == SLASH
      @pos += 2; depth -= 1
    elsif b == LF
      @pos += 1; advance_line
    elsif b == CR && s.getbyte(@pos + 1) == LF
      @pos += 2; advance_line
    else
      @pos += 1
    end
  end
  s.byteslice(sp, @pos - sp).force_encoding(Encoding::UTF_8)
end

#read_hash_block_comment ⇒ `Object`

# File 'lib/dms/parser.rb', line 1246

def read_hash_block_comment
  sl = @line; sls = @line_start; sp = @pos
  @pos += 3
  ls = @pos
  s = @src
  n = @len
  while @pos < n
    b = s.getbyte(@pos)
    break unless LABEL_CONT_BYTE[b]
    @pos += 1
  end
  label = s.byteslice(ls, @pos - ls).force_encoding(Encoding::UTF_8)
  if !label.empty?
    first = label.getbyte(0)
    unless first == UNDERSCORE || (first >= LOWER_A && first <= LOWER_Z) || (first >= UPPER_A && first <= UPPER_Z)
      raise err_at(sl, sls, sp, "block comment label must start with a letter or underscore")
    end
  end
  terminator = label.empty? ? "###" : label
  skip_inline_ws
  unless consume_eol || eof?
    raise err("block comment opener must be on its own line")
  end
  loop do
    if eof?
      raise err_at(sl, sls, sp, "unterminated ### block comment")
    end
    line_begin = @pos
    while @pos < n
      b = s.getbyte(@pos)
      break if b == LF || b == CR
      @pos += 1
    end
    line_text = s.byteslice(line_begin, @pos - line_begin).force_encoding(Encoding::UTF_8)
    line_end = @pos
    consume_eol
    if line_text.strip == terminator
      return s.byteslice(sp, line_end - sp).force_encoding(Encoding::UTF_8)
    end
  end
end

#read_hex_codepoint(n) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2476

def read_hex_codepoint(n)
  s = @src
  return raise(err("expected #{n} hex digits in unicode escape")) if @pos + n > @len
  hex_str = s.byteslice(@pos, n).force_encoding(Encoding::UTF_8)
  i = 0
  while i < n
    b = s.getbyte(@pos + i)
    unless HEX_BYTE[b]
      raise err("invalid hex in unicode escape: #{hex_str}")
    end
    i += 1
  end
  v = hex_str.to_i(16)
  @pos += n
  # SPEC: U+0000 is forbidden anywhere in DMS source, including via
  # escape decoding. `` / `\U00000000` must not slip through.
  if v == 0
    raise err("\\u0000 escape forbidden")
  end
  if v >= 0xD800 && v <= 0xDFFF
    raise err(format("surrogate codepoint U+%04X in escape", v))
  end
  begin
    v.chr(Encoding::UTF_8)
  rescue RangeError
    raise err("unicode escape is not a scalar value")
  end
end

#read_line_comment_to_eol ⇒ `Object`

———- Raw comment readers ———-

# File 'lib/dms/parser.rb', line 1206

def read_line_comment_to_eol
  s = @src
  n = @len
  p = @pos
  start = p
  while p < n
    b = s.getbyte(p)
    break if b == LF || b == CR
    p += 1
  end
  @pos = p
  s.byteslice(start, p - start).force_encoding(Encoding::UTF_8)
end

#record_form(lit) ⇒ `Object`

———- Original-form recording ———-

# File 'lib/dms/parser.rb', line 3002

def record_form(lit)
  return if @lite || !@record_forms
  @original_forms << [@path.dup.freeze, lit]
end

#reject_reserved_sigil_at_line_start! ⇒ `Object`

SPEC tier-0: reject reserved decorator sigils at line-start position. Caller has already consumed leading whitespace + trivia, so @pos sits on the first non-whitespace byte of a body line. If that byte is one of the 17 reserved sigils (! @ $ % ^ & * | ~ ‘ . , > < ? ; =), raise. The check is only valid here — string bodies, comments, and heredoc bodies are parsed by their own readers and never reach this dispatch.

# File 'lib/dms/parser.rb', line 1436

def reject_reserved_sigil_at_line_start!
  return if @pos >= @len
  b = @src.getbyte(@pos)
  return unless b && RESERVED_SIGIL_BYTE[b]
  raise err("reserved decorator sigil '#{b.chr}' at line start is not allowed")
end

#replace_all_runs(s, char_set, replacement) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2774

def replace_all_runs(s, char_set, replacement)
  out = +""
  i = 0
  n = s.length
  while i < n
    if char_set.include?(s[i])
      while i < n && char_set.include?(s[i])
        i += 1
      end
      out << replacement
    else
      out << s[i]
      i += 1
    end
  end
  out
end

#replace_leading_run(s, char_set, replacement) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2792

def replace_leading_run(s, char_set, replacement)
  e = 0
  while e < s.length && char_set.include?(s[e])
    e += 1
  end
  return s if e == 0
  replacement + s[e..]
end

#replace_trailing_run(s, char_set, replacement) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2801

def replace_trailing_run(s, char_set, replacement)
  st = s.length
  while st > 0 && char_set.include?(s[st - 1])
    st -= 1
  end
  return s if st == s.length
  s[0, st] + replacement
end

#scan_number_token ⇒ `Object`

# File 'lib/dms/parser.rb', line 2006

def scan_number_token
  s = @src
  n = @len
  i = @pos
  start = i
  first = s.getbyte(i)
  if first == PLUS || first == MINUS
    i += 1
  end
  is_prefixed = false
  if i + 1 < n && s.getbyte(i) == DIGIT0
    nb = s.getbyte(i + 1)
    if nb == LOWER_X || nb == LOWER_O || nb == LOWER_B
      is_prefixed = true
    end
  end
  saw_dot = false; saw_p = false; saw_e = false
  if is_prefixed
    i += 2
    while i < n
      b = s.getbyte(i)
      if b == UNDERSCORE || HEX_BYTE[b]
        i += 1
      elsif b == DOT && !saw_dot && !saw_p
        saw_dot = true; i += 1
      elsif b == LOWER_P && !saw_p
        saw_p = true; i += 1
        nb = s.getbyte(i)
        if nb == PLUS || nb == MINUS
          i += 1
        end
      elsif saw_p && DIGIT_BYTE[b]
        i += 1
      else
        break
      end
    end
    return [i - start, saw_dot || saw_p]
  end
  while i < n
    b = s.getbyte(i)
    if DIGIT_BYTE[b] || b == UNDERSCORE
      i += 1
    elsif b == DOT && !saw_dot && !saw_e
      saw_dot = true; i += 1
    elsif (b == LOWER_E || b == UPPER_E) && !saw_e
      saw_e = true; i += 1
      nb = s.getbyte(i)
      if nb == PLUS || nb == MINUS
        i += 1
      end
    else
      break
    end
  end
  [i - start, saw_dot || saw_e]
end

#skip_flow_ws ⇒ `Object`

# File 'lib/dms/parser.rb', line 2934

def skip_flow_ws
  loop do
    b = @src.getbyte(@pos)
    if b == SP || b == TAB
      @pos += 1
    elsif b == LF
      @pos += 1; advance_line
    elsif b == CR && @src.getbyte(@pos + 1) == LF
      @pos += 2; advance_line
    elsif b == HASH
      raise err("comments not allowed inside flow forms")
    elsif b == SLASH && (@src.getbyte(@pos + 1) == SLASH || @src.getbyte(@pos + 1) == STAR)
      raise err("comments not allowed inside flow forms")
    else
      return
    end
  end
end

#skip_inline_ws ⇒ `Object`

———- Whitespace / EOL ———-

# File 'lib/dms/parser.rb', line 1109

def skip_inline_ws
  s = @src
  n = @len
  p = @pos
  while p < n
    b = s.getbyte(p)
    break unless b == SP || b == TAB
    p += 1
  end
  @pos = p
end

#skip_trivia ⇒ `Object`

# File 'lib/dms/parser.rb', line 1136

def skip_trivia
  loop do
    line_start_pos = @pos
    skip_inline_ws
    b = @src.getbyte(@pos)
    if b.nil?
      @pos = line_start_pos
      return
    elsif b == LF
      flush_pending_as_floating
      @pos += 1; advance_line
    elsif b == CR
      if @src.getbyte(@pos + 1) != LF
        raise err("bare CR is not a valid line terminator")
      end
      flush_pending_as_floating
      @pos += 2; advance_line
    elsif b == HASH
      if starts_bytes?("###")
        raw = read_hash_block_comment
        @pending_leading << Comment.new(raw, :block) unless @lite
      else
        raw = read_line_comment_to_eol
        consume_eol
        @pending_leading << Comment.new(raw, :line) unless @lite
      end
    elsif b == SLASH
      n2 = @src.getbyte(@pos + 1)
      if n2 == SLASH
        raw = read_line_comment_to_eol
        consume_eol
        @pending_leading << Comment.new(raw, :line) unless @lite
      elsif n2 == STAR
        raw = read_c_block_comment
        @pending_leading << Comment.new(raw, :block) unless @lite
      else
        @pos = line_start_pos
        return
      end
    else
      @pos = line_start_pos
      return
    end
  end
end

#starts_bytes?(s) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/dms/parser.rb', line 1088

def starts_bytes?(s)
  # Compare s (ASCII string) byte-for-byte at @pos.
  slen = s.bytesize
  return false if @pos + slen > @len
  i = 0
  while i < slen
    return false if @src.getbyte(@pos + i) != s.getbyte(i)
    i += 1
  end
  true
end

#strip_indent_and_continuations(body, allow_cont) ⇒ `Object`

———- Heredoc body processing ———-

# File 'lib/dms/parser.rb', line 2710

def strip_indent_and_continuations(body, allow_cont)
  out = +""
  out.force_encoding(Encoding::UTF_8)
  first = true
  pending = false
  last_pos = [1, 0]
  body.lines.each do |text, line_no, line_start|
    last_pos = [line_no, line_start]
    is_blank = text.each_char.all? { |c| c == " " || c == "\t" }
    if is_blank
      stripped = ""
    else
      leading = 0
      text.each_char do |c|
        break unless c == " "
        leading += 1
      end
      if leading < body.strip_depth
        raise DecodeError.new(line_no, leading + 1,
                             "heredoc body line indented #{leading} spaces, less than strip depth #{body.strip_depth}")
      end
      stripped = text[body.strip_depth..]
    end
    piece = stripped
    splice = false
    if allow_cont
      trimmed_end = piece.sub(/[ \t]+\z/, "")
      idx = trimmed_end.rindex("\\")
      if idx && idx == trimmed_end.length - 1
        preceding = 0
        k = idx - 1
        while k >= 0 && trimmed_end[k] == "\\"
          preceding += 1
          k -= 1
        end
        if preceding.even?
          piece = trimmed_end[0, idx]
          splice = true
        end
      end
    end
    if first
      out << piece
      first = false
    elsif pending
      trimmed_start = piece.sub(/\A[ \t]+/, "")
      out << trimmed_start unless is_blank
    else
      out << "\n"
      out << piece
    end
    pending = splice
  end
  if pending
    raise DecodeError.new(last_pos[0], 1, "trailing line continuation has nothing to splice to")
  end
  out
end

#utf8_char_len(b) ⇒ `Object`

# File 'lib/dms/parser.rb', line 1500

def utf8_char_len(b)
  return 1 if b < 0x80
  return 2 if b < 0xC0
  return 2 if b < 0xE0
  return 3 if b < 0xF0
  4
end

#valid_underscores?(s) ⇒ `Boolean`

———- Numeric helpers ———-

Returns:

(Boolean)

# File 'lib/dms/parser.rb', line 2066

def valid_underscores?(s)
  return true if s.empty?
  return false if s.start_with?("_") || s.end_with?("_")
  prev_us = false
  i = 0
  n = s.bytesize
  while i < n
    b = s.getbyte(i)
    if b == UNDERSCORE
      return false if prev_us
      prev_us = true
    else
      prev_us = false
    end
    i += 1
  end
  true
end

#validate_date(s) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2260

def validate_date(s)
  raise "invalid date format" if s.length != 10 || s[4] != "-" || s[7] != "-"
  [0, 1, 2, 3, 5, 6, 8, 9].each do |i|
    c = s[i]
    raise "date must be all digits" unless c >= "0" && c <= "9"
  end
  y = s[0, 4].to_i; m = s[5, 2].to_i; d = s[8, 2].to_i
  raise "month out of range" unless m.between?(1, 12)
  raise "day out of range" unless d.between?(1, days_in_month(y, m))
end

#validate_heredoc_basic_surrogates(body) ⇒ `Object`

SPEC §basic-string escapes: a ‘uXXXX` / `UXXXXXXXX` escape whose decoded value falls in the surrogate range U+D800..U+DFFF is a parse error. Basic-string lexer enforces it inline; heredoc bodies are collected raw, so we scan the body for surrogate escapes here.

# File 'lib/dms/parser.rb', line 2539

def validate_heredoc_basic_surrogates(body)
  body.lines.each do |text, line_no, line_start|
    bytes = text.b
    i = 0
    len = bytes.bytesize
    while i < len
      if bytes.getbyte(i) == BACKSLASH
        j = i
        while j < len && bytes.getbyte(j) == BACKSLASH
          j += 1
        end
        run = j - i
        if run.odd? && j < len
          intro = bytes.getbyte(j)
          n = if intro == LOWER_U then 4
              elsif intro == UPPER_U then 8
              else 0
              end
          if n > 0 && j + 1 + n <= len
            hex = bytes.byteslice(j + 1, n)
            ok = true
            k = 0
            while k < n
              unless HEX_BYTE[hex.getbyte(k)]
                ok = false
                break
              end
              k += 1
            end
            if ok
              cp = hex.to_i(16)
              if cp >= 0xD800 && cp <= 0xDFFF
                esc_off = j - 1
                column = esc_off + 1
                raise DecodeError.new(line_no, column,
                                     format("surrogate codepoint U+%04X in escape", cp))
              end
            end
          end
        end
        i = j
      else
        i += 1
      end
    end
  end
end

#validate_time(s) ⇒ `Object`

# File 'lib/dms/parser.rb', line 2271

def validate_time(s)
  raise "invalid time format" if s.length != 8 || s[2] != ":" || s[5] != ":"
  [0, 1, 3, 4, 6, 7].each do |i|
    c = s[i]
    raise "time must be all digits" unless c >= "0" && c <= "9"
  end
  h = s[0, 2].to_i; m = s[3, 2].to_i; sec = s[6, 2].to_i
  raise "hour out of range" if h > 23
  raise "minute out of range" if m > 59
  raise "second out of range (leap seconds not supported)" if sec > 59
end

#xid_continue?(cp) ⇒ `Boolean`

Frozen XID_Continue test (Unicode 15.1, UAX #31 §2). ASCII fast path: the bare-key ASCII set is handled by BARE_KEY_BYTE before this is called, so any cp < 0x80 reaching here is not an XID_Continue char for our purposes (we already accepted alnum/_/- and rejected the rest). Binary-search the sorted, non-overlapping range table.

Returns:

(Boolean)

# File 'lib/dms/parser.rb', line 1513

def xid_continue?(cp)
  return false if cp < 0x80
  ranges = XID_CONTINUE_RANGES
  lo = 0
  hi = ranges.length - 1
  while lo <= hi
    mid = (lo + hi) >> 1
    r = ranges[mid]
    if cp < r[0]
      hi = mid - 1
    elsif cp > r[1]
      lo = mid + 1
    else
      return true
    end
  end
  false
end

Class: Dms::Parser

Defined Under Namespace

Constant Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(src, lite: false, ignore_order: false) ⇒ Parser

Instance Attribute Details

#comments ⇒ Object (readonly)

#original_forms ⇒ Object (readonly)

Class Method Details

._parse_document_with_mode(src, lite, ignore_order = false) ⇒ Object

.parse_document(src) ⇒ Object

.parse_document_unordered(src) ⇒ Object

.parse_front_matter_only(src) ⇒ Object

.parse_lite_document(src) ⇒ Object

.parse_lite_document_unordered(src) ⇒ Object

Instance Method Details

#advance_line ⇒ Object

#apply_modifiers(s, mods) ⇒ Object

#apply_trim(s, chars, where_s, replacement) ⇒ Object

#capture_inner_block_comments ⇒ Object

#col ⇒ Object

#collect_heredoc_body(terminator) ⇒ Object

#consume_after_value(allow_eof) ⇒ Object

#consume_eol ⇒ Object

#days_in_month(y, m) ⇒ Object

#eof? ⇒ Boolean

#err(msg) ⇒ Object

#err_at(line, line_start, byte_pos, msg) ⇒ Object

#finalize_table(t) ⇒ Object

#flush_pending_as_floating ⇒ Object

#flush_pending_as_leading_on_current ⇒ Object

#fold_paragraphs(s) ⇒ Object

#line_starts_kvpair? ⇒ Boolean

#looks_like_date_prefix_at?(p) ⇒ Boolean

#looks_like_time_prefix_at?(p) ⇒ Boolean

#looks_like_time_str?(s) ⇒ Boolean

#measure_line_indent ⇒ Object

#new_table ⇒ Object

#parse_bare_key ⇒ Object

#parse_basic_string_value ⇒ Object

#parse_block_value(indent) ⇒ Object

#parse_body ⇒ Object

#parse_body_as_table ⇒ Object

#parse_bool_value ⇒ Object

#parse_datetime_value ⇒ Object

#parse_dec_float(s) ⇒ Object

#parse_float_lit(s) ⇒ Object

#parse_flow_array ⇒ Object

#parse_flow_table ⇒ Object

#parse_front_matter ⇒ Object

#parse_heredoc_basic ⇒ Object

#parse_heredoc_label ⇒ Object

#parse_heredoc_literal ⇒ Object

#parse_heredoc_modifiers ⇒ Object

#parse_inf_value ⇒ Object

#parse_inline_value_in_flow ⇒ Object

#parse_inline_value_or_heredoc ⇒ Object

#parse_integer_lit(s) ⇒ Object

#parse_key ⇒ Object

#parse_kvpair(parent_indent) ⇒ Object

#parse_list_block(indent) ⇒ Object

#parse_list_item_value(list_indent) ⇒ Object

#parse_literal_string_value ⇒ Object

#parse_local_time_value ⇒ Object

#parse_modifier_call_args ⇒ Object

#parse_nan_value ⇒ Object

#parse_nondec_float(s) ⇒ Object

#parse_number_or_datetime ⇒ Object

#parse_one_modifier ⇒ Object

#parse_table_block(indent) ⇒ Object

#peek_after_plus_is_space_or_eol? ⇒ Boolean

#peek_byte ⇒ Object

#peek_char_byte_safe ⇒ Object

#per_line_edges(s, char_set, replacement) ⇒ Object

#read_c_block_comment ⇒ Object

#read_hash_block_comment ⇒ Object

#read_hex_codepoint(n) ⇒ Object

#initialize(src, lite: false, ignore_order: false) ⇒ `Parser`

#comments ⇒ `Object` (readonly)

#original_forms ⇒ `Object` (readonly)

._parse_document_with_mode(src, lite, ignore_order = false) ⇒ `Object`

.parse_document(src) ⇒ `Object`

.parse_document_unordered(src) ⇒ `Object`

.parse_front_matter_only(src) ⇒ `Object`

.parse_lite_document(src) ⇒ `Object`

.parse_lite_document_unordered(src) ⇒ `Object`

#advance_line ⇒ `Object`

#apply_modifiers(s, mods) ⇒ `Object`

#apply_trim(s, chars, where_s, replacement) ⇒ `Object`

#capture_inner_block_comments ⇒ `Object`

#col ⇒ `Object`

#collect_heredoc_body(terminator) ⇒ `Object`

#consume_after_value(allow_eof) ⇒ `Object`

#consume_eol ⇒ `Object`

#days_in_month(y, m) ⇒ `Object`

#eof? ⇒ `Boolean`

#err(msg) ⇒ `Object`

#err_at(line, line_start, byte_pos, msg) ⇒ `Object`

#finalize_table(t) ⇒ `Object`

#flush_pending_as_floating ⇒ `Object`

#flush_pending_as_leading_on_current ⇒ `Object`

#fold_paragraphs(s) ⇒ `Object`

#line_starts_kvpair? ⇒ `Boolean`

#looks_like_date_prefix_at?(p) ⇒ `Boolean`

#looks_like_time_prefix_at?(p) ⇒ `Boolean`

#looks_like_time_str?(s) ⇒ `Boolean`

#measure_line_indent ⇒ `Object`

#new_table ⇒ `Object`

#parse_bare_key ⇒ `Object`

#parse_basic_string_value ⇒ `Object`

#parse_block_value(indent) ⇒ `Object`

#parse_body ⇒ `Object`

#parse_body_as_table ⇒ `Object`

#parse_bool_value ⇒ `Object`

#parse_datetime_value ⇒ `Object`

#parse_dec_float(s) ⇒ `Object`

#parse_float_lit(s) ⇒ `Object`

#parse_flow_array ⇒ `Object`

#parse_flow_table ⇒ `Object`

#parse_front_matter ⇒ `Object`

#parse_heredoc_basic ⇒ `Object`

#parse_heredoc_label ⇒ `Object`

#parse_heredoc_literal ⇒ `Object`

#parse_heredoc_modifiers ⇒ `Object`

#parse_inf_value ⇒ `Object`

#parse_inline_value_in_flow ⇒ `Object`

#parse_inline_value_or_heredoc ⇒ `Object`

#parse_integer_lit(s) ⇒ `Object`

#parse_key ⇒ `Object`

#parse_kvpair(parent_indent) ⇒ `Object`

#parse_list_block(indent) ⇒ `Object`

#parse_list_item_value(list_indent) ⇒ `Object`

#parse_literal_string_value ⇒ `Object`

#parse_local_time_value ⇒ `Object`

#parse_modifier_call_args ⇒ `Object`

#parse_nan_value ⇒ `Object`

#parse_nondec_float(s) ⇒ `Object`

#parse_number_or_datetime ⇒ `Object`

#parse_one_modifier ⇒ `Object`

#parse_table_block(indent) ⇒ `Object`

#peek_after_plus_is_space_or_eol? ⇒ `Boolean`

#peek_byte ⇒ `Object`

#peek_char_byte_safe ⇒ `Object`

#per_line_edges(s, char_set, replacement) ⇒ `Object`

#read_c_block_comment ⇒ `Object`

#read_hash_block_comment ⇒ `Object`

#read_hex_codepoint(n) ⇒ `Object`

#read_line_comment_to_eol ⇒ `Object`

#record_form(lit) ⇒ `Object`

#reject_reserved_sigil_at_line_start! ⇒ `Object`

#replace_all_runs(s, char_set, replacement) ⇒ `Object`

#replace_leading_run(s, char_set, replacement) ⇒ `Object`

#replace_trailing_run(s, char_set, replacement) ⇒ `Object`

#scan_number_token ⇒ `Object`

#skip_flow_ws ⇒ `Object`

#skip_inline_ws ⇒ `Object`

#skip_trivia ⇒ `Object`