Class: Itonoko::HTML::Document

Inherits:
XML::Document show all
Defined in:
lib/itonoko/html/document.rb

Constant Summary

Constants inherited from XML::Node

XML::Node::ATTRIBUTE_NODE, XML::Node::CDATA_SECTION_NODE, XML::Node::COMMENT_NODE, XML::Node::DOCUMENT_FRAGMENT_NODE, XML::Node::DOCUMENT_NODE, XML::Node::DOCUMENT_TYPE_NODE, XML::Node::ELEMENT_NODE, XML::Node::ESCAPE_ATTR, XML::Node::ESCAPE_TEXT, XML::Node::PROCESSING_INSTRUCTION_NODE, XML::Node::TEXT_NODE

Instance Attribute Summary

Attributes inherited from XML::Document

#encoding, #errors, #version

Attributes inherited from XML::Node

#children, #document, #node_name, #node_type, #parent

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from XML::Document

#collect_namespaces, #create_cdata, #create_comment, #create_element, #create_text_node, #css, #doc, #initialize, #root, #root=, #text, #to_xml, #xpath

Methods inherited from XML::Node

#==, #[], #[]=, #add_child, #add_next_sibling, #add_previous_sibling, #ancestors, #at, #at_css, #at_xpath, #attribute, #attribute_nodes, #attributes, #cdata_node?, #child, #comment?, #css, #description, #document?, #element?, #element_children, #fragment?, #get_attribute, #has_attribute?, #initialize, #inner_html, #inner_html=, #inspect, #keys, #matches?, #name, #next_element, #next_sibling, #prepend_child, #previous_element, #previous_sibling, #remove, #remove_attribute, #replace, #root, #search, #set_attribute, #text, #text=, #text?, #to_xml, #xpath

Constructor Details

This class inherits a constructor from Itonoko::XML::Document

Class Method Details

.parse(string, url = nil, encoding = nil, options = nil, &block) ⇒ Object



6
7
8
9
# File 'lib/itonoko/html/document.rb', line 6

def self.parse(string, url = nil, encoding = nil, options = nil, &block)
  require_relative "../parser/html_parser"
  Parser::HtmlParser.new.parse(string.to_s)
end

Instance Method Details

#meta_encodingObject



23
24
25
26
27
28
29
# File 'lib/itonoko/html/document.rb', line 23

def meta_encoding
  node = at_css("meta[charset]")
  return node["charset"] if node
  node = at_css("meta[http-equiv='content-type']")
  return unless node
  node["content"]&.match(/charset=([^\s;]+)/i)&.[](1)
end

#titleObject



19
20
21
# File 'lib/itonoko/html/document.rb', line 19

def title
  at_css("title")&.text
end

#to_htmlObject



11
12
13
# File 'lib/itonoko/html/document.rb', line 11

def to_html
  children.map(&:to_html).join
end

#to_sObject



15
16
17
# File 'lib/itonoko/html/document.rb', line 15

def to_s
  to_html
end