Class: Aranha::Parsers::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/aranha/parsers/base.rb

Direct Known Subclasses

Html::Base, Json::Base

Constant Summary collapse

LOG_DIR_ENVVAR =
'ARANHA_PARSERS_LOG_DIR'

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url) ⇒ Base

Returns a new instance of Base.



37
38
39
40
# File 'lib/aranha/parsers/base.rb', line 37

def initialize(url)
  @source_address = ::Aranha::Parsers::SourceAddress.new(url)
  log_content(source_address.serialize, '-source-address')
end

Instance Attribute Details

#source_addressObject (readonly)

Returns the value of attribute source_address.



35
36
37
# File 'lib/aranha/parsers/base.rb', line 35

def source_address
  @source_address
end

Class Method Details

.from_content(content) ⇒ Aranha::Parsers::Base

Deprecated.

Use #from_string instead.

Parameters:

  • content (String)

Returns:



15
16
17
# File 'lib/aranha/parsers/base.rb', line 15

def from_content(content)
  from_string(content)
end

.from_string(string) ⇒ Aranha::Parsers::Base

Parameters:

  • string (String)

Returns:



21
22
23
24
25
26
27
28
29
30
# File 'lib/aranha/parsers/base.rb', line 21

def from_string(string)
  ::EacRubyUtils::Fs::Temp.on_file do |path|
    ::File.open(path.to_s, 'w:UTF-8') do |f|
      f.write string.dup.force_encoding('UTF-8')
    end
    r = new(path.to_path)
    r.content
    r
  end
end

Instance Method Details

#contentObject



44
45
46
# File 'lib/aranha/parsers/base.rb', line 44

def content
  @content ||= log_content(source_address_content)
end

#content_encodingString?

Returns:

  • (String, nil)


49
50
51
# File 'lib/aranha/parsers/base.rb', line 49

def content_encoding
  nil
end

#source_address_contentString

Returns:

  • (String)


54
55
56
57
58
# File 'lib/aranha/parsers/base.rb', line 54

def source_address_content
  source_address.content.then do |r|
    content_encoding.if_present(r) { |v| r.force_encoding(v) }
  end
end