Class: Meteor::Ml::Html4::ParserImpl

Inherits:
Core::Kernel show all
Defined in:
lib/meteor/ml/html4/parser_impl.rb

Overview

HTML4 parser (HTMLパーサ)

Direct Known Subclasses

Meteor::Ml::Html::ParserImpl

Constant Summary collapse

KAIGYO_CODE =

KAIGYO_CODE = “r?n|r” KAIGYO_CODE = “rn|n|r”

["\r\n", "\n", "\r"]
BR =
"<br>"
DISABLE_ELEMENT =

DISABLE_ELEMENT = “input|textarea|select|optgroup”

Array

elements with the disabled attribute (disabled属性のある要素)

["input", "textarea", "select", "optgroup"]
READONLY_TYPE =

READONLY_TYPE = “text|password”

Array

the type of an input element with a readonly attribute (readonly属性のあるinput要素のタイプ)

["text", "password"]
SELECTED_M =
"\\sselected\\s|\\sselected$|\\sSELECTED\\s|\\sSELECTED$"
SELECTED_R =

SELECTED_M = [‘ selected ’,‘ selected’,‘ SELECTED ’,‘ SELECTED’]

"selected\\s|selected$|SELECTED\\s|SELECTED$"
CHECKED_M =
"\\schecked\\s|\\schecked$|\\sCHECKED\\s|\\sCHECKED$"
CHECKED_R =

CHECKED_M = [‘ checked ’,‘ checked’,‘ CHECKED ’,‘ CHECKED’]

"checked\\s|checked$|CHECKED\\s|CHECKED$"
DISABLED_M =
"\\sdisabled\\s|\\sdisabled$|\\sDISABLED\\s|\\sDISABLED$"
DISABLED_R =

DISABLED_M = [‘ disabled ’,‘ disiabled’,‘ DISABLED ’,‘ DISABLED’]

"disabled\\s|disabled$|DISABLED\\s|DISABLED$"
READONLY_M =
"\\sreadonly\\s|\\sreadonly$|\\sREADONLY\\s|\\sREADONLY$"
READONLY_R =

READONLY_M = [‘ readonly ’,‘ readonly’,‘ READONLY ’,‘ READONLY’]

"readonly\\s|readonly$|READONLY\\s|READONLY$"
MULTIPLE_M =
"\\smultiple\\s|\\smultiple$|\\sMULTIPLE\\s|\\sMULTIPLE$"
MULTIPLE_R =

MULTIPLE_M = [‘ multiple ’,‘ multiple’,‘ MULTIPLE ’,‘ MULTIPLE’]

"multiple\\s|multiple$|MULTIPLE\\s|MULTIPLE$"
PATTERN_UNESCAPE =

@@pattern_true = Regexp.new(“true”) @@pattern_false = Regexp.new(“false”)

"&(amp|quot|apos|gt|lt|nbsp);"
GET_ATTRS_MAP2 =
"\\s(disabled|readonly|checked|selected|multiple)"
TABLE_FOR_ESCAPE_ =

@@pattern_match_tag = Regexp.new(@@match_tag) @@pattern_match_tag2 = Regexp.new(@@match_tag_2)

{
  "&" => "&amp;",
  "\"" => "&quot;",
  "'" => "&apos;",
  "<" => "&lt;",
  ">" => "&gt;",
  " " => "&nbsp;"
}
TABLE_FOR_ESCAPE_CONTENT_ =
{
  "&" => "&amp;",
  "\"" => "&quot;",
  "'" => "&apos;",
  "<" => "&lt;",
  ">" => "&gt;",
  " " => "&nbsp;",
  "\r\n" => "<br>",
  "\r" => "<br>",
  "\n" => "<br>"
}
PATTERN_ESCAPE =
"[&\"'<> ]"
PATTERN_ESCAPE_CONTENT =
"[&\"'<> \\n]"
@@match_tag =

@@match_tag = “br|hr|img|input|meta|base”

Array

void elemets (空要素)

["br", "hr", "img", "input", "meta", "base"]
@@match_tag_2 =

@@match_tag_2 = “textarea|option|pre”

Array

elements where line breaks do not need to be converted to
(改行を
に変換する必要のない要素)

["textarea", "option", "pre"]
@@match_tag_sng =
Array

non-nestable elements (入れ子にできない要素)

["texarea", "select", "option", "form", "fieldset"]
@@attr_logic =
Array

boolean elements (論理値で指定する属性)

["disabled", "readonly", "checked", "selected", "multiple"]
@@pattern_selected_m =
Regexp.new(SELECTED_M)
@@pattern_selected_r =
Regexp.new(SELECTED_R)
@@pattern_checked_m =
Regexp.new(CHECKED_M)
@@pattern_checked_r =
Regexp.new(CHECKED_R)
@@pattern_disabled_m =
Regexp.new(DISABLED_M)
@@pattern_disabled_r =
Regexp.new(DISABLED_R)
@@pattern_readonly_m =
Regexp.new(READONLY_M)
@@pattern_readonly_r =
Regexp.new(READONLY_R)
@@pattern_multiple_m =
Regexp.new(MULTIPLE_M)
@@pattern_multiple_r =
Regexp.new(MULTIPLE_R)
@@pattern_unescape =
Regexp.new(PATTERN_UNESCAPE)
@@pattern_get_attrs_map2 =
Regexp.new(GET_ATTRS_MAP2)
@@pattern_escape =
Regexp.new(PATTERN_ESCAPE)
@@pattern_escape_content =
Regexp.new(PATTERN_ESCAPE_CONTENT)
@@pattern_br_2 =
Regexp.new(BR)

Constants inherited from Core::Kernel

Core::Kernel::PATTERN_FIND_1, Core::Kernel::PATTERN_FIND_2_1, Core::Kernel::PATTERN_FIND_2_2, Core::Kernel::PATTERN_FIND_2_3, Core::Kernel::PATTERN_FIND_3_1, Core::Kernel::PATTERN_FIND_3_2, Core::Kernel::PATTERN_FIND_3_3, Core::Kernel::PATTERN_FIND_4, Core::Kernel::PATTERN_FIND_5

Constants inherited from Parser

Parser::HTML, Parser::HTML4, Parser::XHTML, Parser::XHTML4, Parser::XML

Instance Attribute Summary

Attributes inherited from Core::Kernel

#doc_type, #document_hook, #element_cache, #element_hook

Instance Method Summary collapse

Methods inherited from Core::Kernel

#attr, #attr_map, #attrs, #character_encoding, #character_encoding=, #content, #cxtag, #document, #document=, #element, #elements, #elements_, #find, #flash, #read, #remove_attr, #remove_element, #root_element, #shadow

Constructor Details

#initializeParserImpl #initialize(ps) ⇒ ParserImpl

initializer (イニシャライザ)

Overloads:



109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/meteor/ml/html4/parser_impl.rb', line 109

def initialize(*args)
  super()
  @doc_type = Parser::HTML4
  case args.length
  when ZERO
    # initialize_0
  when ONE
    initialize_1(args[0])
  else
    raise ArgumentError
  end
end

Instance Method Details

#content_typeString

get content type (コンテントタイプを取得する)

Returns:

  • (String)

    conent type (コンテントタイプ)



167
168
169
# File 'lib/meteor/ml/html4/parser_impl.rb', line 167

def content_type
  @root.content_type
end

#parseObject

parse document (ドキュメントを解析する)



147
148
149
# File 'lib/meteor/ml/html4/parser_impl.rb', line 147

def parse
  analyze_ml
end