Class: Aranha::Parsers::Html::Base

Inherits:
Base
  • Object
show all
Defined in:
lib/aranha/parsers/html/base.rb

Direct Known Subclasses

Item, ItemList

Defined Under Namespace

Classes: Field

Constant Summary

Constants inherited from Base

Base::LOG_DIR_ENVVAR

Instance Attribute Summary

Attributes inherited from Base

#source_address

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#content, #content_encoding, from_content, from_string, #initialize, #source_address_content

Constructor Details

This class inherits a constructor from Aranha::Parsers::Base

Class Method Details

.field(name, type, xpath) ⇒ Object



17
18
19
20
# File 'lib/aranha/parsers/html/base.rb', line 17

def field(name, type, xpath)
  @fields ||= []
  @fields << Field.new(name, type, xpath)
end

.fieldsObject



12
13
14
15
# File 'lib/aranha/parsers/html/base.rb', line 12

def fields
  @fields ||= []
  @fields.dup
end

.from_node(node) ⇒ Aranha::Parsers::Html::Base

Parameters:

  • node (Nokogiri::XML::Node)

Returns:



24
25
26
# File 'lib/aranha/parsers/html/base.rb', line 24

def from_node(node)
  from_string(node.to_html)
end

.xpath_contains_class(klass, node = '@class') ⇒ String

Parameters:

  • node (String) (defaults to: '@class')
  • klass (String)

Returns:

  • (String)


31
32
33
# File 'lib/aranha/parsers/html/base.rb', line 31

def xpath_contains_class(klass, node = '@class')
  "contains(concat(' ', normalize-space(#{node}), ' '), ' #{klass} ')"
end

.xpath_ends_with(haystack, needle) ⇒ String

Parameters:

  • haystack (String)
  • needle (String)

Returns:

  • (String)


38
39
40
41
# File 'lib/aranha/parsers/html/base.rb', line 38

def xpath_ends_with(haystack, needle)
  "substring(#{haystack}, string-length(#{haystack}) - string-length(#{needle}) + 1) " \
    "= #{needle}"
end

Instance Method Details

#nokogiriObject



46
47
48
# File 'lib/aranha/parsers/html/base.rb', line 46

def nokogiri
  @nokogiri ||= Nokogiri::HTML(content, &:noblanks)
end