Class: Aranha::Parsers::Html::Base

Inherits:
Base
  • Object
show all
Defined in:
lib/aranha/parsers/html/base.rb

Direct Known Subclasses

Item, ItemList

Defined Under Namespace

Classes: Field

Constant Summary

Constants inherited from Base

Base::LOG_DIR_ENVVAR

Instance Attribute Summary

Attributes inherited from Base

#source_address

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#content, #content_encoding, from_content, from_string, #initialize, #source_address_content

Constructor Details

This class inherits a constructor from Aranha::Parsers::Base

Class Method Details

.field(name, type, xpath) ⇒ Object



15
16
17
18
# File 'lib/aranha/parsers/html/base.rb', line 15

def field(name, type, xpath)
  @fields ||= []
  @fields << Field.new(name, type, xpath)
end

.fieldsObject



10
11
12
13
# File 'lib/aranha/parsers/html/base.rb', line 10

def fields
  @fields ||= []
  @fields.dup
end

.from_node(node) ⇒ Aranha::Parsers::Html::Base

Parameters:

  • node (Nokogiri::XML::Node)

Returns:



22
23
24
# File 'lib/aranha/parsers/html/base.rb', line 22

def from_node(node)
  from_string(node.to_html)
end

.xpath_contains_class(klass, node = '@class') ⇒ String

Parameters:

  • node (String) (defaults to: '@class')
  • klass (String)

Returns:

  • (String)


29
30
31
# File 'lib/aranha/parsers/html/base.rb', line 29

def xpath_contains_class(klass, node = '@class')
  "contains(concat(' ', normalize-space(#{node}), ' '), ' #{klass} ')"
end

.xpath_ends_with(haystack, needle) ⇒ String

Parameters:

  • haystack (String)
  • needle (String)

Returns:

  • (String)


36
37
38
39
# File 'lib/aranha/parsers/html/base.rb', line 36

def xpath_ends_with(haystack, needle)
  "substring(#{haystack}, string-length(#{haystack}) - string-length(#{needle}) + 1) " \
    "= #{needle}"
end

Instance Method Details

#nokogiriObject



44
45
46
# File 'lib/aranha/parsers/html/base.rb', line 44

def nokogiri
  @nokogiri ||= Nokogiri::HTML(content, &:noblanks)
end