Class: Crass::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/crass/parser.rb

Overview

Parses a CSS string or list of tokens.

  1. dev.w3.org/csswg/css-syntax/#parsing

Constant Summary collapse

BLOCK_END_TOKENS =
{
  :'{' => :'}',
  :'[' => :']',
  :'(' => :')'
}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input, options = {}) ⇒ Parser

Initializes a parser based on the given input, which may be a CSS string or an array of tokens.

See Tokenizer#initialize for options.



126
127
128
129
130
131
132
# File 'lib/crass/parser.rb', line 126

def initialize(input, options = {})
  unless input.kind_of?(Enumerable)
    input = Tokenizer.tokenize(input, options)
  end

  @tokens = TokenScanner.new(input)
end

Instance Attribute Details

#tokensObject (readonly)

TokenScanner wrapping the tokens generated from this parser's input.



120
121
122
# File 'lib/crass/parser.rb', line 120

def tokens
  @tokens
end

Class Method Details

.parse_properties(input, options = {}) ⇒ Object

Parses CSS properties (such as the contents of an HTML element's style attribute) and returns a parse tree.

See Tokenizer#initialize for options.

5.3.6. dev.w3.org/csswg/css-syntax/#parse-a-list-of-declarations



25
26
27
# File 'lib/crass/parser.rb', line 25

def self.parse_properties(input, options = {})
  Parser.new(input, options).parse_properties
end

.parse_rules(input, options = {}) ⇒ Object

Parses CSS rules (such as the content of a @media block) and returns a parse tree. The only difference from parse_stylesheet is that CDO/CDC nodes (<!-- and -->) aren't ignored.

See Tokenizer#initialize for options.

5.3.3. dev.w3.org/csswg/css-syntax/#parse-a-list-of-rules



36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/crass/parser.rb', line 36

def self.parse_rules(input, options = {})
  parser = Parser.new(input, options)
  rules  = parser.consume_rules

  rules.map do |rule|
    if rule[:node] == :qualified_rule
      parser.create_style_rule(rule)
    else
      rule
    end
  end
end

.parse_stylesheet(input, options = {}) ⇒ Object

Parses a CSS stylesheet and returns a parse tree.

See Tokenizer#initialize for options.

5.3.2. dev.w3.org/csswg/css-syntax/#parse-a-stylesheet



54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/crass/parser.rb', line 54

def self.parse_stylesheet(input, options = {})
  parser = Parser.new(input, options)
  rules  = parser.consume_rules(:top_level => true)

  rules.map do |rule|
    if rule[:node] == :qualified_rule
      parser.create_style_rule(rule)
    else
      rule
    end
  end
end

.stringify(nodes, options = {}) ⇒ Object

Converts a node or array of nodes into a CSS string based on their original tokenized input.

Options:

  • :exclude_comments - When true, comments will be excluded.



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/crass/parser.rb', line 74

def self.stringify(nodes, options = {})
  nodes  = [nodes] unless nodes.is_a?(Array)
  string = String.new

  nodes.each do |node|
    next if node.nil?

    case node[:node]
    when :at_rule
      string << '@'
      string << node[:name]
      string << self.stringify(node[:prelude], options)

      if node[:block]
        string << '{' << self.stringify(node[:block], options) << '}'
      else
        string << ';'
      end

    when :comment
      string << node[:raw] unless options[:exclude_comments]

    when :simple_block
      string << node[:start]
      string << self.stringify(node[:value], options)
      string << node[:end]

    when :style_rule
      string << self.stringify(node[:selector][:tokens], options)
      string << '{' << self.stringify(node[:children], options) << '}'

    else
      if node.key?(:raw)
        string << node[:raw]
      elsif node.key?(:tokens)
        string << self.stringify(node[:tokens], options)
      end
    end
  end

  string
end

Instance Method Details

#consume_at_rule(input = @tokens) ⇒ Object

Consumes an at-rule and returns it.

5.4.2. dev.w3.org/csswg/css-syntax-3/#consume-at-rule



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/crass/parser.rb', line 137

def consume_at_rule(input = @tokens)
  rule = {}

  rule[:tokens] = input.collect do
    rule[:name]    = input.consume[:value]
    rule[:prelude] = []

    while token = input.consume
      node = token[:node]

      if node == :comment # Non-standard.
        next

      elsif node == :semicolon
        break

      elsif node === :'{'
        # Note: The spec says the block should _be_ the consumed simple
        # block, but Simon Sapin's CSS parsing tests and tinycss2 expect
        # only the _value_ of the consumed simple block here. I assume I'm
        # interpreting the spec too literally, so I'm going with the
        # tinycss2 behavior.
        rule[:block] = consume_simple_block(input)[:value]
        break

      elsif node == :simple_block && token[:start] == '{'
        # Note: The spec says the block should _be_ the simple block, but
        # Simon Sapin's CSS parsing tests and tinycss2 expect only the
        # _value_ of the simple block here. I assume I'm interpreting the
        # spec too literally, so I'm going with the tinycss2 behavior.
        rule[:block] = token[:value]
        break

      else
        input.reconsume
        rule[:prelude] << consume_component_value(input)
      end
    end
  end

  create_node(:at_rule, rule)
end

#consume_component_value(input = @tokens) ⇒ Object

Consumes a component value and returns it, or nil if there are no more tokens.

5.4.6. dev.w3.org/csswg/css-syntax-3/#consume-a-component-value



184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/crass/parser.rb', line 184

def consume_component_value(input = @tokens)
  return nil unless token = input.consume

  case token[:node]
  when :'{', :'[', :'('
    consume_simple_block(input)

  when :function
    if token.key?(:name)
      # This is a parsed function, not a function token. This step isn't
      # mentioned in the spec, but it's necessary to avoid re-parsing
      # functions that have already been parsed.
      token
    else
      consume_function(input)
    end

  else
    token
  end
end

#consume_declaration(input = @tokens) ⇒ Object

Consumes a declaration and returns it, or nil on parse error.

5.4.5. dev.w3.org/csswg/css-syntax-3/#consume-a-declaration



209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/crass/parser.rb', line 209

def consume_declaration(input = @tokens)
  declaration = {}
  value       = []

  declaration[:tokens] = input.collect do
    declaration[:name] = input.consume[:value]

    next_token = input.peek

    while next_token && next_token[:node] == :whitespace
      input.consume
      next_token = input.peek
    end

    unless next_token && next_token[:node] == :colon
      # Parse error.
      #
      # Note: The spec explicitly says to return nothing here, but Simon
      # Sapin's CSS parsing tests expect an error node.
      return create_node(:error, :value => 'invalid')
    end

    input.consume

    until input.peek.nil?
      value << consume_component_value(input)
    end
  end

  # Look for !important.
  important_tokens = value.reject {|token|
    node = token[:node]
    node == :whitespace || node == :comment || node == :semicolon
  }.last(2)

  if important_tokens.size == 2 &&
      important_tokens[0][:node] == :delim &&
      important_tokens[0][:value] == '!' &&
      important_tokens[1][:node] == :ident &&
      important_tokens[1][:value].downcase == 'important'

    declaration[:important] = true
    excl_index = value.index(important_tokens[0])

    # Technically the spec doesn't require us to trim trailing tokens after
    # the !important, but Simon Sapin's CSS parsing tests expect it and
    # tinycss2 does it, so we'll go along with the cool kids.
    value.slice!(excl_index, value.size - excl_index)
  else
    declaration[:important] = false
  end

  declaration[:value] = value
  create_node(:declaration, declaration)
end

#consume_declarations(input = @tokens, options = {}) ⇒ Object

Consumes a list of declarations and returns them.

By default, the returned list may include :comment, :semicolon, and :whitespace nodes, which is non-standard.

Options:

  • :strict - Set to true to exclude non-standard :comment, :semicolon, and :whitespace nodes.

5.4.4. dev.w3.org/csswg/css-syntax/#consume-a-list-of-declarations



276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# File 'lib/crass/parser.rb', line 276

def consume_declarations(input = @tokens, options = {})
  declarations = []

  while token = input.consume
    case token[:node]

    # Non-standard: Preserve comments, semicolons, and whitespace.
    when :comment, :semicolon, :whitespace
      declarations << token unless options[:strict]

    when :at_keyword
      # When parsing a style rule, this is a parse error. Otherwise it's
      # not.
      input.reconsume
      declarations << consume_at_rule(input)

    when :ident
      decl_tokens = [token]

      while next_token = input.peek
        break if next_token[:node] == :semicolon
        decl_tokens << consume_component_value(input)
      end

      if decl = consume_declaration(TokenScanner.new(decl_tokens))
        declarations << decl
      end

    else
      # Parse error (invalid property name, etc.).
      #
      # Note: The spec doesn't say we should append anything to the list of
      # declarations here, but Simon Sapin's CSS parsing tests expect an
      # error node.
      declarations << create_node(:error, :value => 'invalid')
      input.reconsume

      while next_token = input.peek
        break if next_token[:node] == :semicolon
        consume_component_value(input)
      end
    end
  end

  declarations
end

#consume_function(input = @tokens) ⇒ Object

Consumes a function and returns it.

5.4.8. dev.w3.org/csswg/css-syntax-3/#consume-a-function



326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# File 'lib/crass/parser.rb', line 326

def consume_function(input = @tokens)
  function = {
    :name   => input.current[:value],
    :value  => [],
    :tokens => [input.current] # Non-standard, used for serialization.
  }

  function[:tokens].concat(input.collect {
    while token = input.consume
      case token[:node]
      when :')'
        break

      # Non-standard.
      when :comment
        next

      else
        input.reconsume
        function[:value] << consume_component_value(input)
      end
    end
  })

  create_node(:function, function)
end

#consume_qualified_rule(input = @tokens) ⇒ Object

Consumes a qualified rule and returns it, or nil if a parse error occurs.

5.4.3. dev.w3.org/csswg/css-syntax-3/#consume-a-qualified-rule



357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
# File 'lib/crass/parser.rb', line 357

def consume_qualified_rule(input = @tokens)
  rule = {:prelude => []}

  rule[:tokens] = input.collect do
    while true
      unless token = input.consume
        # Parse error.
        #
        # Note: The spec explicitly says to return nothing here, but Simon
        # Sapin's CSS parsing tests expect an error node.
        return create_node(:error, :value => 'invalid')
      end

      if token[:node] == :'{'
        # Note: The spec says the block should _be_ the consumed simple
        # block, but Simon Sapin's CSS parsing tests and tinycss2 expect
        # only the _value_ of the consumed simple block here. I assume I'm
        # interpreting the spec too literally, so I'm going with the
        # tinycss2 behavior.
        rule[:block] = consume_simple_block(input)[:value]
        break
      elsif token[:node] == :simple_block && token[:start] == '{'
        # Note: The spec says the block should _be_ the simple block, but
        # Simon Sapin's CSS parsing tests and tinycss2 expect only the
        # _value_ of the simple block here. I assume I'm interpreting the
        # spec too literally, so I'm going with the tinycss2 behavior.
        rule[:block] = token[:value]
        break
      else
        input.reconsume
        rule[:prelude] << consume_component_value(input)
      end
    end
  end

  create_node(:qualified_rule, rule)
end

#consume_rules(flags = {}) ⇒ Object

Consumes a list of rules and returns them.

5.4.1. dev.w3.org/csswg/css-syntax/#consume-a-list-of-rules



398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
# File 'lib/crass/parser.rb', line 398

def consume_rules(flags = {})
  rules = []

  while token = @tokens.consume
    case token[:node]
      # Non-standard. Spec says to discard comments and whitespace, but we
      # keep them so we can serialize faithfully.
    when :comment, :whitespace
      rules << token

    when :cdc, :cdo
      unless flags[:top_level]
        @tokens.reconsume
        rule = consume_qualified_rule
        rules << rule if rule
      end

    when :at_keyword
      @tokens.reconsume
      rule = consume_at_rule
      rules << rule if rule

    else
      @tokens.reconsume
      rule = consume_qualified_rule
      rules << rule if rule
    end
  end

  rules
end

#consume_simple_block(input = @tokens) ⇒ Object

Consumes and returns a simple block associated with the current input token.

5.4.7. dev.w3.org/csswg/css-syntax/#consume-a-simple-block



434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
# File 'lib/crass/parser.rb', line 434

def consume_simple_block(input = @tokens)
  start_token = input.current[:node]
  end_token   = BLOCK_END_TOKENS[start_token]

  block = {
    :start  => start_token.to_s,
    :end    => end_token.to_s,
    :value  => [],
    :tokens => [input.current] # Non-standard. Used for serialization.
  }

  block[:tokens].concat(input.collect do
    while token = input.consume
      break if token[:node] == end_token

      input.reconsume
      block[:value] << consume_component_value(input)
    end
  end)

  create_node(:simple_block, block)
end

#create_node(type, properties = {}) ⇒ Object

Creates and returns a new parse node with the given properties.



458
459
460
# File 'lib/crass/parser.rb', line 458

def create_node(type, properties = {})
  {:node => type}.merge!(properties)
end

#create_selector(input) ⇒ Object

Parses the given input tokens into a selector node and returns it.

Doesn't bother splitting the selector list into individual selectors or validating them. Feel free to do that yourself! It'll be fun!



466
467
468
469
470
# File 'lib/crass/parser.rb', line 466

def create_selector(input)
  create_node(:selector,
    :value  => parse_value(input),
    :tokens => input)
end

#create_style_rule(rule) ⇒ Object

Creates a :style_rule node from the given qualified rule, and returns it.



474
475
476
477
478
# File 'lib/crass/parser.rb', line 474

def create_style_rule(rule)
  create_node(:style_rule,
    :selector => create_selector(rule[:prelude]),
    :children => parse_properties(rule[:block]))
end

#parse_component_value(input = @tokens) ⇒ Object

Parses a single component value and returns it.

5.3.7. dev.w3.org/csswg/css-syntax-3/#parse-a-component-value



483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
# File 'lib/crass/parser.rb', line 483

def parse_component_value(input = @tokens)
  input = TokenScanner.new(input) unless input.is_a?(TokenScanner)

  while input.peek && input.peek[:node] == :whitespace
    input.consume
  end

  if input.peek.nil?
    return create_node(:error, :value => 'empty')
  end

  value = consume_component_value(input)

  while input.peek && input.peek[:node] == :whitespace
    input.consume
  end

  if input.peek.nil?
    value
  else
    create_node(:error, :value => 'extra-input')
  end
end

#parse_component_values(input = @tokens) ⇒ Object

Parses a list of component values and returns an array of parsed tokens.

5.3.8. dev.w3.org/csswg/css-syntax/#parse-a-list-of-component-values



510
511
512
513
514
515
516
517
518
519
# File 'lib/crass/parser.rb', line 510

def parse_component_values(input = @tokens)
  input  = TokenScanner.new(input) unless input.is_a?(TokenScanner)
  tokens = []

  while token = consume_component_value(input)
    tokens << token
  end

  tokens
end

#parse_declaration(input = @tokens) ⇒ Object

Parses a single declaration and returns it.

5.3.5. dev.w3.org/csswg/css-syntax/#parse-a-declaration



524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
# File 'lib/crass/parser.rb', line 524

def parse_declaration(input = @tokens)
  input = TokenScanner.new(input) unless input.is_a?(TokenScanner)

  while input.peek && input.peek[:node] == :whitespace
    input.consume
  end

  if input.peek.nil?
    # Syntax error.
    return create_node(:error, :value => 'empty')
  elsif input.peek[:node] != :ident
    # Syntax error.
    return create_node(:error, :value => 'invalid')
  end

  if decl = consume_declaration(input)
    return decl
  end

  # Syntax error.
  create_node(:error, :value => 'invalid')
end

#parse_declarations(input = @tokens, options = {}) ⇒ Object

Parses a list of declarations and returns them.

See #consume_declarations for options.

5.3.6. dev.w3.org/csswg/css-syntax/#parse-a-list-of-declarations



552
553
554
555
# File 'lib/crass/parser.rb', line 552

def parse_declarations(input = @tokens, options = {})
  input = TokenScanner.new(input) unless input.is_a?(TokenScanner)
  consume_declarations(input, options)
end

#parse_properties(input = @tokens) ⇒ Object

Parses a list of declarations and returns an array of :property nodes (and any non-declaration nodes that were in the input). This is useful for parsing the contents of an HTML element's style attribute.



560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
# File 'lib/crass/parser.rb', line 560

def parse_properties(input = @tokens)
  properties = []

  parse_declarations(input).each do |decl|
    unless decl[:node] == :declaration
      properties << decl
      next
    end

    children = decl[:value].dup
    children.pop if children.last && children.last[:node] == :semicolon

    properties << create_node(:property,
      :name      => decl[:name],
      :value     => parse_value(decl[:value]),
      :children  => children,
      :important => decl[:important],
      :tokens    => decl[:tokens])
  end

  properties
end

#parse_rule(input = @tokens) ⇒ Object

Parses a single rule and returns it.

5.3.4. dev.w3.org/csswg/css-syntax-3/#parse-a-rule



586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
# File 'lib/crass/parser.rb', line 586

def parse_rule(input = @tokens)
  input = TokenScanner.new(input) unless input.is_a?(TokenScanner)

  while input.peek && input.peek[:node] == :whitespace
    input.consume
  end

  if input.peek.nil?
    # Syntax error.
    return create_node(:error, :value => 'empty')
  elsif input.peek[:node] == :at_keyword
    rule = consume_at_rule(input)
  else
    rule = consume_qualified_rule(input)
  end

  while input.peek && input.peek[:node] == :whitespace
    input.consume
  end

  if input.peek.nil?
    rule
  else
    # Syntax error.
    create_node(:error, :value => 'extra-input')
  end
end

#parse_value(nodes) ⇒ Object

Returns the unescaped value of a selector name or property declaration.



615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
# File 'lib/crass/parser.rb', line 615

def parse_value(nodes)
  nodes  = [nodes] unless nodes.is_a?(Array)
  string = String.new

  nodes.each do |node|
    case node[:node]
    when :comment, :semicolon
      next

    when :at_keyword, :ident
      string << node[:value]

    when :function
      if node[:value].is_a?(String)
        string << node[:value]
        string << '('
      else
        string << parse_value(node[:tokens])
      end

    else
      if node.key?(:raw)
        string << node[:raw]
      elsif node.key?(:tokens)
        string << parse_value(node[:tokens])
      end
    end
  end

  string.strip
end