Class: CSS::Tokenizer

Inherits:

Object

Object
CSS::Tokenizer

show all

Includes:: CodePoints

Defined in:: lib/css/tokenizer.rb

Overview

Tokenizer based on CSS Syntax Module Level 3/4 §4. www.w3.org/TR/css-syntax-3/#tokenization

Constant Summary collapse

PUNCTUATION =

{
  '(' => :lparen,
  ')' => :rparen,
  ',' => :comma,
  ':' => :colon,
  ';' => :semicolon,
  '[' => :lbracket,
  ']' => :rbracket,
  '{' => :lbrace,
  '}' => :rbrace
}.freeze

PREPROCESS_RE = CR / FF (and CR LF) collapse to LF; NUL collapses to U+FFFD. Done in one pass.

/\r\n?|\f|\0/.freeze

Constants included from CodePoints

CodePoints::REPLACEMENT

Instance Method Summary collapse

#initialize(input, preserve_comments: false) ⇒ Tokenizer constructor

A new instance of Tokenizer.
#next_token ⇒ Object
#tokenize ⇒ Object

Methods included from CodePoints

digit?, hex_digit?, ident_code_point?, ident_start_code_point?

Constructor Details

#initialize(input, preserve_comments: false) ⇒ `Tokenizer`

Returns a new instance of Tokenizer.

# File 'lib/css/tokenizer.rb', line 23

def initialize(input, preserve_comments: false)
  @input             = preprocess(input)
  @pos               = 0
  @newlines          = collect_newline_offsets(@input)
  @preserve_comments = preserve_comments
end

Instance Method Details

#next_token ⇒ `Object`

# File 'lib/css/tokenizer.rb', line 43

def next_token
  consume_comments unless @preserve_comments

  return Token.new(:eof) if @pos >= @input.length

  start_offset = @pos
  tok          = consume_one_token
  line, column = line_column_at(start_offset)

  tok.assign_position!(Position.new(line:, column:, offset: start_offset, end_offset: @pos))
end

#tokenize ⇒ `Object`

# File 'lib/css/tokenizer.rb', line 30

def tokenize
  tokens = []

  loop do
    token = next_token
    break if token.type == :eof

    tokens << token
  end

  tokens
end