Class: Itonoko::Parser::HtmlParser

Inherits:
Object
  • Object
show all
Defined in:
lib/itonoko/parser/html_parser.rb

Constant Summary collapse

VOID_ELEMENTS =
%w[
  area base br col embed hr img input link meta param source track wbr
].to_set.freeze
RAW_TEXT_ELEMENTS =
%w[script style].to_set.freeze
AUTO_CLOSE =
{
  "p"        => %w[p],
  "li"       => %w[li],
  "dt"       => %w[dt dd],
  "dd"       => %w[dd dt],
  "td"       => %w[td th],
  "th"       => %w[td th],
  "tr"       => %w[tr],
  "colgroup" => %w[colgroup],
  "caption"  => %w[caption],
  "option"   => %w[option],
  "optgroup" => %w[optgroup option],
  "rb"       => %w[rb rt rtc rp],
  "rt"       => %w[rb rt rp],
  "rp"       => %w[rb rt rtc rp],
  "rtc"      => %w[rb rtc rp],
}.freeze

Instance Method Summary collapse

Instance Method Details

#parse(html) ⇒ Object



38
39
40
41
42
43
44
45
# File 'lib/itonoko/parser/html_parser.rb', line 38

def parse(html)
  @doc        = HTML::Document.new
  @doc.errors = []
  @open_stack = [@doc]
  @buf        = +""          # reused mutable buffer, never reallocated
  tokenize_and_build(StringScanner.new(html.to_s))
  @doc
end