Module: Scrapetor::RDFa

Defined in:
lib/scrapetor/microdata.rb

Overview

RDFa extractor — minimal implementation covering the typical subset used on the web (property, content, datatype, typeof).

Class Method Summary collapse

Class Method Details

.collect_props(scope) ⇒ Object



115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/scrapetor/microdata.rb', line 115

def self.collect_props(scope)
  props = {}
  scope.css("[property]").each do |el|
    names = (el["property"] || "").split(/\s+/).reject(&:empty?)
    value = el["content"] || el.text.to_s.strip
    names.each do |n|
      if props.key?(n)
        props[n] = [props[n]] unless props[n].is_a?(Array)
        props[n] << value
      else
        props[n] = value
      end
    end
  end
  props
end

.extract(doc) ⇒ Object



102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/scrapetor/microdata.rb', line 102

def self.extract(doc)
  out = []
  doc.css("[typeof]").each do |node|
    item = {
      "type"       => node["typeof"],
      "about"      => node["about"] || node["resource"],
      "properties" => collect_props(node)
    }
    out << item
  end
  out
end