Class: LexerKit::IR::Serializer

Inherits:
Object
  • Object
show all
Defined in:
lib/lexer_kit/ir/serializer.rb

Overview

Serializer handles binary encoding/decoding of CompiledProgram. Extracted from CompiledProgram to separate serialization concerns.

Constant Summary collapse

MAGIC =
"LKT1"
FORMAT_VERSION =

v3: added token_meta

3

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.from_binary(bytes) ⇒ CompiledProgram

Decode binary data to a CompiledProgram.

Parameters:

  • bytes (String)

    binary data

Returns:



21
22
23
# File 'lib/lexer_kit/ir/serializer.rb', line 21

def self.from_binary(bytes)
  new.decode(bytes)
end

.to_binary(program) ⇒ String

Encode a CompiledProgram to binary format.

Parameters:

Returns:

  • (String)

    binary data



14
15
16
# File 'lib/lexer_kit/ir/serializer.rb', line 14

def self.to_binary(program)
  new.encode(program)
end

Instance Method Details

#decode(bytes) ⇒ CompiledProgram

Decode binary data to CompiledProgram.

Parameters:

  • bytes (String)

Returns:

Raises:

  • (ArgumentError)


86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/lexer_kit/ir/serializer.rb', line 86

def decode(bytes)
  @bytes = bytes
  @pos = 0

  # Magic
  magic = read_bytes(4)
  raise ArgumentError, "invalid magic: #{magic.inspect}" unless magic == MAGIC

  # Format version
  format_version = read_uint16
  raise ArgumentError, "unsupported format version: #{format_version}" unless format_version == FORMAT_VERSION

  # User version
  version = read_uint32

  # Token names
  token_names = decode_symbol_array

  # Mode names
  mode_names = decode_symbol_array

  # Modes
  mode_count = read_uint16
  modes = {}
  mode_count.times do
    name = read_length_prefixed_string.to_sym
    offset = read_uint32
    modes[name] = offset
  end

  # Constant pool
  pool_len = read_uint32
  constant_pool, = ConstantPool.from_binary(read_bytes(pool_len))

  # DFA tables
  dfa_tables = decode_table_array(DFATable)

  # Jump tables
  jump_tables = decode_table_array(JumpTable)

  # Keyword tables
  keyword_tables = decode_table_array(KeywordTable)

  # Token metadata
  token_meta = format_version >= 3 ? decode_token_meta : {}

  # Instructions
  instr_count = read_uint32
  instructions = []
  instr_count.times do
    instructions << Instruction.from_binary(read_bytes(4))
  end

  CompiledProgram.new(
    instructions: instructions,
    dfa_tables: dfa_tables,
    jump_tables: jump_tables,
    constant_pool: constant_pool,
    modes: modes,
    token_names: token_names,
    mode_names: mode_names,
    keyword_tables: keyword_tables,
    token_meta: token_meta,
    version: version
  )
end

#encode(program) ⇒ String

Encode CompiledProgram to binary.

Parameters:

Returns:

  • (String)


28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/lexer_kit/ir/serializer.rb', line 28

def encode(program)
  parts = []

  # Magic (4 bytes)
  parts << MAGIC

  # Format version (u16)
  parts << [FORMAT_VERSION].pack("S>")

  # User version (u32)
  parts << [program.version].pack("L>")

  # Token names
  encode_string_array(parts, program.token_names)

  # Mode names
  encode_string_array(parts, program.mode_names)

  # Modes (name → offset mapping)
  modes = program.mode_offsets
  parts << [modes.size].pack("S>")
  modes.each do |name, offset|
    name_bytes = name.to_s.b
    parts << [name_bytes.bytesize].pack("S>")
    parts << name_bytes
    parts << [offset].pack("L>")
  end

  # Constant pool
  pool_binary = program.constant_pool.to_binary
  parts << [pool_binary.bytesize].pack("L>")
  parts << pool_binary

  # DFA tables
  encode_table_array(parts, program.dfa_tables)

  # Jump tables
  encode_table_array(parts, program.jump_tables)

  # Keyword tables
  encode_table_array(parts, program.keyword_tables)

  # Token metadata
  encode_token_meta(parts, program.token_meta)

  # Instructions
  instructions = program.instructions
  parts << [instructions.size].pack("L>")
  instructions.each do |instr|
    parts << instr.to_binary
  end

  parts.join
end