Class: LexerKit::IR::CompiledProgram

Inherits:
Object
  • Object
show all
Defined in:
lib/lexer_kit/ir/compiled_program.rb

Overview

CompiledProgram is the complete compiled lexer ready for execution. It contains instructions, DFA tables, constants, and metadata. Binary serialization is handled by Serializer class.

Note: Native methods are included by Rust extension when loaded.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(instructions:, dfa_tables: [], jump_tables: [], constant_pool: nil, modes: {}, token_names: [], mode_names: [], keyword_tables: [], token_meta: {}, version: 1) ⇒ CompiledProgram

Returns a new instance of CompiledProgram.

Parameters:

  • instructions (Array<Instruction>)

    instruction list

  • dfa_tables (Array<DFATable>) (defaults to: [])

    DFA tables

  • jump_tables (Array<JumpTable>) (defaults to: [])

    jump tables

  • constant_pool (ConstantPool) (defaults to: nil)

    string constants

  • modes (Hash<Symbol, Integer>) (defaults to: {})

    mode name → start instruction offset

  • token_names (Array<Symbol>) (defaults to: [])

    token ID → name mapping

  • mode_names (Array<Symbol>) (defaults to: [])

    mode ID → name mapping

  • keyword_tables (Array<KeywordTable>) (defaults to: [])

    keyword tables

  • token_meta (Hash<Integer, Hash>) (defaults to: {})

    token ID → metadata hash

  • version (Integer) (defaults to: 1)

    user-defined version number



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/lexer_kit/ir/compiled_program.rb', line 23

def initialize(
  instructions:,
  dfa_tables: [],
  jump_tables: [],
  constant_pool: nil,
  modes: {},
  token_names: [],
  mode_names: [],
  keyword_tables: [],
  token_meta: {},
  version: 1
)
  @instructions = instructions.freeze
  @dfa_tables = dfa_tables.freeze
  @jump_tables = jump_tables.freeze
  @constant_pool = constant_pool || ConstantPool.new
  @mode_offsets = modes.freeze
  @token_names = token_names.freeze
  @mode_names = mode_names.freeze
  @keyword_tables = keyword_tables.freeze
  @token_meta = token_meta.freeze
  @version = version
end

Instance Attribute Details

#constant_poolObject (readonly)

Returns the value of attribute constant_pool.



11
12
13
# File 'lib/lexer_kit/ir/compiled_program.rb', line 11

def constant_pool
  @constant_pool
end

#dfa_tablesObject (readonly)

Returns the value of attribute dfa_tables.



11
12
13
# File 'lib/lexer_kit/ir/compiled_program.rb', line 11

def dfa_tables
  @dfa_tables
end

#instructionsObject (readonly)

Returns the value of attribute instructions.



11
12
13
# File 'lib/lexer_kit/ir/compiled_program.rb', line 11

def instructions
  @instructions
end

#jump_tablesObject (readonly)

Returns the value of attribute jump_tables.



11
12
13
# File 'lib/lexer_kit/ir/compiled_program.rb', line 11

def jump_tables
  @jump_tables
end

#keyword_tablesObject (readonly)

Returns the value of attribute keyword_tables.



11
12
13
# File 'lib/lexer_kit/ir/compiled_program.rb', line 11

def keyword_tables
  @keyword_tables
end

#mode_namesObject (readonly)

Returns the value of attribute mode_names.



11
12
13
# File 'lib/lexer_kit/ir/compiled_program.rb', line 11

def mode_names
  @mode_names
end

#mode_offsetsObject (readonly)

Returns the value of attribute mode_offsets.



11
12
13
# File 'lib/lexer_kit/ir/compiled_program.rb', line 11

def mode_offsets
  @mode_offsets
end

#token_metaObject (readonly)

Returns the value of attribute token_meta.



11
12
13
# File 'lib/lexer_kit/ir/compiled_program.rb', line 11

def token_meta
  @token_meta
end

#token_namesObject (readonly)

Returns the value of attribute token_names.



11
12
13
# File 'lib/lexer_kit/ir/compiled_program.rb', line 11

def token_names
  @token_names
end

#versionObject (readonly)

Returns the value of attribute version.



11
12
13
# File 'lib/lexer_kit/ir/compiled_program.rb', line 11

def version
  @version
end

Class Method Details

.from_binary(bytes) ⇒ CompiledProgram

Decode from binary

Parameters:

  • bytes (String)

Returns:



209
210
211
# File 'lib/lexer_kit/ir/compiled_program.rb', line 209

def self.from_binary(bytes)
  Serializer.from_binary(bytes)
end

Instance Method Details

#error_token?(tok) ⇒ Boolean

Check if a token ID is an error token Use this for fast error detection in the lex loop

Parameters:

  • tok (Integer)

    token ID

Returns:

  • (Boolean)


109
110
111
# File 'lib/lexer_kit/ir/compiled_program.rb', line 109

def error_token?(tok)
  tok == LexerKit::INVALID_TOKEN_ID
end

#inspectObject



213
214
215
# File 'lib/lexer_kit/ir/compiled_program.rb', line 213

def inspect
  "#<CompiledProgram v#{@version} instructions=#{@instructions.size} tokens=#{@token_names.size} native=#{LexerKit.native?}>"
end

#kindSymbol

Kind of compiled program

Returns:

  • (Symbol)


49
50
51
# File 'lib/lexer_kit/ir/compiled_program.rb', line 49

def kind
  :program
end

#load_native!self

Load native representation for fast lexing

Returns:

  • (self)


180
181
182
183
184
185
# File 'lib/lexer_kit/ir/compiled_program.rb', line 180

def load_native!
  return self unless LexerKit.native?

  load_rust_native(to_native_data)
  self
end

#lowlevel_each(bytes) {|Integer, Integer, Integer| ... } ⇒ Object

Low-level lexing with callback (for performance-critical code)

Parameters:

  • bytes (String)

    input bytes

Yields:

  • (Integer, Integer, Integer)

    token_id, start, length

Raises:



97
98
99
100
101
102
103
# File 'lib/lexer_kit/ir/compiled_program.rb', line 97

def lowlevel_each(bytes, &)
  bytes = bytes.b
  raise LexerKit::NativeExtensionError, "Rust extension not loaded" unless LexerKit.native?

  ensure_rust_native!
  lex_rust_native(bytes, &)
end

#make_token(tok, start, len, input:, filename: nil) ⇒ Core::Token

Create a Token object on demand Use this to get rich token info only when needed (e.g., for errors) Source is created internally, so there’s zero overhead if not called

Parameters:

  • tok (Integer)

    token ID

  • start (Integer)

    start byte offset

  • len (Integer)

    length in bytes

  • input (String)

    original input string

  • filename (String, nil) (defaults to: nil)

    optional filename for diagnostics

Returns:



129
130
131
132
133
134
135
136
137
138
139
# File 'lib/lexer_kit/ir/compiled_program.rb', line 129

def make_token(tok, start, len, input:, filename: nil)
  source = Core::Source.new(input, filename: filename)
  Core::Token.new(
    id: tok,
    name: token_name(tok),
    start: start,
    len: len,
    source: source,
    meta: @token_meta[tok]
  )
end

#mode_id(name) ⇒ Integer?

Get mode ID by name

Parameters:

  • name (Symbol)

Returns:

  • (Integer, nil)


82
83
84
# File 'lib/lexer_kit/ir/compiled_program.rb', line 82

def mode_id(name)
  @mode_names.index(name)
end

#mode_offset(name) ⇒ Integer?

Get mode start offset

Parameters:

  • name (Symbol)

Returns:

  • (Integer, nil)


89
90
91
# File 'lib/lexer_kit/ir/compiled_program.rb', line 89

def mode_offset(name)
  @mode_offsets[name]
end

#modesArray<Symbol>

Get all mode names

Returns:

  • (Array<Symbol>)


75
76
77
# File 'lib/lexer_kit/ir/compiled_program.rb', line 75

def modes
  @mode_names.dup
end

#stream(input, filename: nil) ⇒ Runner

Create a stream-based lexer with lookahead support Returns a Runner that wraps the underlying LexStream.

Parameters:

  • input (String)

    input string

  • filename (String, nil) (defaults to: nil)

    optional filename for diagnostics

Returns:

Raises:



147
148
149
150
151
152
153
# File 'lib/lexer_kit/ir/compiled_program.rb', line 147

def stream(input, filename: nil)
  raise LexerKit::NativeExtensionError, "Rust extension not loaded" unless LexerKit.native?

  ensure_rust_native!
  lex_stream = create_rust_stream(input)
  Runner.new(self, lex_stream, filename: filename)
end

#to_binaryString

Encode to binary

Returns:

  • (String)


202
203
204
# File 'lib/lexer_kit/ir/compiled_program.rb', line 202

def to_binary
  Serializer.to_binary(self)
end

#to_native_dataHash

Convert to data format for Rust native loading

Returns:

  • (Hash)

    data for Rust extension



189
190
191
192
193
194
195
196
197
198
# File 'lib/lexer_kit/ir/compiled_program.rb', line 189

def to_native_data
  {
    instructions: @instructions.map(&:to_binary).join,
    dfa_tables: @dfa_tables.map(&:to_native_format),
    jump_tables: @jump_tables.map(&:to_native_format),
    keyword_tables: @keyword_tables.map(&:to_native_format),
    constant_pool: @constant_pool.entries,
    modes: @mode_offsets.map { |name, offset| [name.to_s, offset] }
  }
end

#token_id(name) ⇒ Integer?

Get token ID by name

Parameters:

  • name (Symbol)

Returns:

  • (Integer, nil)


56
57
58
# File 'lib/lexer_kit/ir/compiled_program.rb', line 56

def token_id(name)
  @token_names.index(name)
end

#token_meta_for(tok) ⇒ Hash

Get metadata for a token ID

Parameters:

  • tok (Integer)

    token ID

Returns:

  • (Hash)

    metadata hash (empty hash if no metadata)



116
117
118
# File 'lib/lexer_kit/ir/compiled_program.rb', line 116

def token_meta_for(tok)
  @token_meta[tok] || {}
end

#token_name(id) ⇒ Symbol?

Get token name by ID

Parameters:

  • id (Integer)

Returns:

  • (Symbol, nil)


63
64
65
# File 'lib/lexer_kit/ir/compiled_program.rb', line 63

def token_name(id)
  @token_names[id]
end

#tokenize(input, filename: nil) ⇒ Array<Core::Token>

Tokenize input and return array of Token objects Source is shared across all tokens for efficient line/col lookup

Parameters:

  • input (String)

    input string

  • filename (String, nil) (defaults to: nil)

    optional filename for diagnostics

Returns:



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/lexer_kit/ir/compiled_program.rb', line 160

def tokenize(input, filename: nil)
  bytes = input.b
  source = Core::Source.new(bytes, filename: filename)

  tokens = []
  lowlevel_each(bytes) do |tok_id, start, len|
    tokens << Core::Token.new(
      id: tok_id,
      name: token_name(tok_id),
      start: start,
      len: len,
      source: source,
      meta: @token_meta[tok_id]
    )
  end
  tokens
end

#tokensArray<Symbol>

Get all token names (excludes reserved placeholder tokens)

Returns:

  • (Array<Symbol>)


69
70
71
# File 'lib/lexer_kit/ir/compiled_program.rb', line 69

def tokens
  @token_names.reject { |name| name.to_s.start_with?("__RESERVED_") }
end