Class: Parselly::Lexer
- Inherits:
-
Object
- Object
- Parselly::Lexer
- Defined in:
- lib/parselly/lexer.rb
Defined Under Namespace
Classes: Identifier, Token, TokenValue
Constant Summary collapse
- TOKENS =
{ # Namespace and column combinators '|' => :PIPE, '||' => :COLUMN, # Combinators '>' => :CHILD, '+' => :ADJACENT, '~' => :SIBLING, # Delimiters '[' => :LBRACKET, ']' => :RBRACKET, '(' => :LPAREN, ')' => :RPAREN, ':' => :COLON, ',' => :COMMA, '.' => :DOT, '#' => :HASH, '*' => :STAR, '=' => :EQUAL, '-' => :MINUS, # Attribute operators '~=' => :INCLUDES, '|=' => :DASHMATCH, '^=' => :PREFIXMATCH, '$=' => :SUFFIXMATCH, '*=' => :SUBSTRINGMATCH }.freeze
- MULTI_CHAR_TOKENS =
{ '~=' => :INCLUDES, '|=' => :DASHMATCH, '^=' => :PREFIXMATCH, '$=' => :SUFFIXMATCH, '*=' => :SUBSTRINGMATCH, '||' => :COLUMN }.freeze
- SINGLE_CHAR_OPERATOR_REGEX =
/[|>+~\[\]():,.#*=-]/.freeze
- WHITESPACE_REGEX =
/[ \t\n\r\f]+/.freeze
- COMMENT_REGEX =
%r{/\*[^*]*\*+(?:[^/*][^*]*\*+)*/}.freeze
- ESCAPE_SEQUENCE =
/\\(?:[0-9a-fA-F]{1,6}[ \t\n\r\f]?|[^\n\r\f])/.freeze
- IDENTIFIER_REGEX =
/ (?: -- | -?(?:[a-zA-Z_]|[^\x00-\x7F]|#{ESCAPE_SEQUENCE}) ) (?:[a-zA-Z0-9_-]|[^\x00-\x7F]|#{ESCAPE_SEQUENCE})* /x.freeze
- NUMBER_REGEX =
/\d+(\.\d+)?/.freeze
- HEX_ESCAPE_REGEX =
/\\([0-9a-fA-F]{1,6})([ \t\n\r\f])?/.freeze
- ESCAPED_NEWLINE_REGEX =
/\\(?:\r\n|[\n\r\f])/.freeze
- SIMPLE_ESCAPE_REGEX =
/\\([^\n\r\f])/.freeze
- REPLACEMENT_CHARACTER =
"\uFFFD"
Instance Attribute Summary collapse
-
#column ⇒ Object
readonly
Returns the value of attribute column.
-
#line ⇒ Object
readonly
Returns the value of attribute line.
Instance Method Summary collapse
-
#initialize(input) ⇒ Lexer
constructor
A new instance of Lexer.
- #tokenize ⇒ Object
Constructor Details
#initialize(input) ⇒ Lexer
Returns a new instance of Lexer.
133 134 135 136 137 138 139 140 141 142 143 |
# File 'lib/parselly/lexer.rb', line 133 def initialize(input) unless input.valid_encoding? raise_lexer_error('Invalid input encoding', { line: 1, column: 1, offset: 0 }) end preprocessed_input, @offset_map = preprocess_input(input) @scanner = StringScanner.new(preprocessed_input) @line = 1 @column = 1 @tokens = [] end |
Instance Attribute Details
#column ⇒ Object (readonly)
Returns the value of attribute column.
131 132 133 |
# File 'lib/parselly/lexer.rb', line 131 def column @column end |
#line ⇒ Object (readonly)
Returns the value of attribute line.
131 132 133 |
# File 'lib/parselly/lexer.rb', line 131 def line @line end |
Instance Method Details
#tokenize ⇒ Object
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
# File 'lib/parselly/lexer.rb', line 145 def tokenize until @scanner.eos? skip_ignored break if @scanner.eos? start_position = current_position if (token = scan_string(start_position)) type, value = token @tokens << build_token(type, value, start_position) elsif (value = scan_number) @tokens << build_token(:NUMBER, value, start_position) elsif (type = scan_operator) @tokens << build_token(type, @scanner.matched, start_position) elsif (value = scan_identifier(start_position)) @tokens << build_token(:IDENT, value, start_position) else char = @scanner.getch raise_lexer_error("Unexpected character: #{char}", start_position) end end @tokens << Token.new(type: false, value: nil, position: eof_position) @tokens end |