Class: LexerKit::IR::KeywordTable
- Inherits:
-
Object
- Object
- LexerKit::IR::KeywordTable
- Defined in:
- lib/lexer_kit/ir/keyword_table.rb
Overview
KeywordTable maps keyword strings to token IDs. Used for efficient O(1) keyword lookup after identifier match.
Keywords must be UTF-8 encoded strings. This is consistent with LexerKit’s token specification which requires UTF-8 encoding.
Instance Attribute Summary collapse
-
#base_token_id ⇒ Object
readonly
Returns the value of attribute base_token_id.
-
#keywords ⇒ Object
readonly
Returns the value of attribute keywords.
Class Method Summary collapse
-
.from_binary(bytes) ⇒ Array(KeywordTable, Integer)
Decode from binary.
Instance Method Summary collapse
-
#initialize(base_token_id:, keywords:) ⇒ KeywordTable
constructor
A new instance of KeywordTable.
-
#to_binary ⇒ String
Encode to binary format Format: base_token_id: u16 keyword_count: u16 for each keyword: key_len: u16 key: bytes token_id: u16.
-
#to_native_format ⇒ Hash
Convert to format suitable for C native loading.
Constructor Details
#initialize(base_token_id:, keywords:) ⇒ KeywordTable
Returns a new instance of KeywordTable.
15 16 17 18 19 20 21 |
# File 'lib/lexer_kit/ir/keyword_table.rb', line 15 def initialize(base_token_id:, keywords:) validate_base_token_id(base_token_id) validate_keywords(keywords) @base_token_id = base_token_id @keywords = keywords.freeze end |
Instance Attribute Details
#base_token_id ⇒ Object (readonly)
Returns the value of attribute base_token_id.
11 12 13 |
# File 'lib/lexer_kit/ir/keyword_table.rb', line 11 def base_token_id @base_token_id end |
#keywords ⇒ Object (readonly)
Returns the value of attribute keywords.
11 12 13 |
# File 'lib/lexer_kit/ir/keyword_table.rb', line 11 def keywords @keywords end |
Class Method Details
.from_binary(bytes) ⇒ Array(KeywordTable, Integer)
Decode from binary
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# File 'lib/lexer_kit/ir/keyword_table.rb', line 77 def self.from_binary(bytes) raise LexerKit::InvalidBinaryError, "Binary data too short (expected at least 4 bytes, got #{bytes.bytesize})" if bytes.bytesize < 4 pos = 0 # Base token ID base_token_id = bytes.byteslice(pos, 2)&.unpack1("S>") raise LexerKit::InvalidBinaryError, "Invalid header data" if base_token_id.nil? pos += 2 # Keyword count keyword_count = bytes.byteslice(pos, 2)&.unpack1("S>") raise LexerKit::InvalidBinaryError, "Invalid header data" if keyword_count.nil? pos += 2 # Keywords keywords = {} keyword_count.times do raise LexerKit::InvalidBinaryError, "Unexpected end of data while reading keyword entry" if pos + 2 > bytes.bytesize key_len = bytes.byteslice(pos, 2)&.unpack1("S>") raise LexerKit::InvalidBinaryError, "Invalid key length" if key_len.nil? pos += 2 raise LexerKit::InvalidBinaryError, "Unexpected end of data while reading keyword" if pos + key_len + 2 > bytes.bytesize key = bytes.byteslice(pos, key_len).force_encoding(Encoding::UTF_8) pos += key_len token_id = bytes.byteslice(pos, 2)&.unpack1("S>") raise LexerKit::InvalidBinaryError, "Invalid token ID" if token_id.nil? pos += 2 keywords[key] = token_id end [new(base_token_id: base_token_id, keywords: keywords), pos] rescue LexerKit::InvalidBinaryError raise rescue StandardError => e raise LexerKit::InvalidBinaryError, "Failed to parse binary data: #{e.}" end |
Instance Method Details
#to_binary ⇒ String
Encode to binary format Format:
base_token_id: u16
keyword_count: u16
for each keyword:
key_len: u16
key: bytes
token_id: u16
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/lexer_kit/ir/keyword_table.rb', line 33 def to_binary # Validate keyword count fits in u16 if @keywords.size > 0xFFFF raise ArgumentError, "Too many keywords (#{@keywords.size}): maximum is 65535" end parts = [] # Base token ID (u16) parts << [@base_token_id].pack("S>") # Keyword count (u16) parts << [@keywords.size].pack("S>") # Keywords @keywords.each do |key, token_id| key_bytes = key.b # Validate keyword length fits in u16 if key_bytes.bytesize > 0xFFFF raise ArgumentError, "Keyword too long (#{key_bytes.bytesize} bytes): maximum is 65535 bytes" end parts << [key_bytes.bytesize].pack("S>") parts << key_bytes parts << [token_id].pack("S>") end parts.join end |
#to_native_format ⇒ Hash
Convert to format suitable for C native loading
66 67 68 69 70 71 |
# File 'lib/lexer_kit/ir/keyword_table.rb', line 66 def to_native_format { base_token_id: @base_token_id, keywords: @keywords.map { |key, token_id| [key.b, token_id] } } end |