Class: Scrapetor::Dom::Element

Inherits:
Object
  • Object
show all
Includes:
NodeMethods
Defined in:
lib/scrapetor/dom.rb

Instance Attribute Summary collapse

Attributes included from NodeMethods

#parent

Instance Method Summary collapse

Methods included from NodeMethods

#add_next_sibling, #add_previous_sibling, #comment?, #doctype?, #document, #next_element_sibling, #next_sibling, #previous_element_sibling, #previous_sibling, #remove, #replace, #text?

Constructor Details

#initialize(name, attributes = {}, line: nil) ⇒ Element

Returns a new instance of Element.



107
108
109
110
111
112
113
# File 'lib/scrapetor/dom.rb', line 107

def initialize(name, attributes = {}, line: nil)
  @name       = name.to_s.downcase
  @attributes = attributes
  @children   = []
  @parent     = nil
  @line       = line
end

Instance Attribute Details

#attributesObject

Returns the value of attribute attributes.



105
106
107
# File 'lib/scrapetor/dom.rb', line 105

def attributes
  @attributes
end

#childrenObject

Returns the value of attribute children.



105
106
107
# File 'lib/scrapetor/dom.rb', line 105

def children
  @children
end

#lineObject

Returns the value of attribute line.



105
106
107
# File 'lib/scrapetor/dom.rb', line 105

def line
  @line
end

#nameObject

Returns the value of attribute name.



105
106
107
# File 'lib/scrapetor/dom.rb', line 105

def name
  @name
end

Instance Method Details

#[](key) ⇒ Object

—– attribute access —–



119
120
121
# File 'lib/scrapetor/dom.rb', line 119

def [](key)
  @attributes[key.to_s]
end

#[]=(key, value) ⇒ Object



123
124
125
126
127
128
129
130
# File 'lib/scrapetor/dom.rb', line 123

def []=(key, value)
  if value.nil?
    @attributes.delete(key.to_s)
  else
    @attributes[key.to_s] = value.to_s
  end
  value
end

#add_child(node_or_html) ⇒ Object Also known as: <<

—– children / traversal —–



225
226
227
228
229
# File 'lib/scrapetor/dom.rb', line 225

def add_child(node_or_html)
  nodes = Dom.normalize_replacement(node_or_html, parent: self)
  nodes.each { |n| n.parent = self; @children << n }
  nodes.last
end

#add_class(klass) ⇒ Object Also known as: append_class



159
160
161
162
163
164
# File 'lib/scrapetor/dom.rb', line 159

def add_class(klass)
  set = classes
  klass.to_s.split(/\s+/).each { |c| set << c unless set.include?(c) || c.empty? }
  self["class"] = set.join(" ")
  self
end

#at_css(selector) ⇒ Object Also known as: at



251
252
253
# File 'lib/scrapetor/dom.rb', line 251

def at_css(selector)
  css(selector).first
end

#at_xpath(expr) ⇒ Object



264
265
266
# File 'lib/scrapetor/dom.rb', line 264

def at_xpath(expr)
  xpath(expr).first
end

#attribute(name) ⇒ Object



346
347
348
# File 'lib/scrapetor/dom.rb', line 346

def attribute(name)
  attribute_nodes.find { |a| a.name == name.to_s }
end

#attribute_nodesObject



342
343
344
# File 'lib/scrapetor/dom.rb', line 342

def attribute_nodes
  @attributes.map { |k, v| AttrNode.new(k, v, self) }
end

#attribute_value(key) ⇒ Object



132
133
134
# File 'lib/scrapetor/dom.rb', line 132

def attribute_value(key)
  self[key]
end

#classesObject

—– class manipulation —–



155
156
157
# File 'lib/scrapetor/dom.rb', line 155

def classes
  (self["class"] || "").split(/\s+/).reject(&:empty?)
end

#css(selector) ⇒ Object Also known as: search

—– selectors —–



247
248
249
# File 'lib/scrapetor/dom.rb', line 247

def css(selector)
  Dom::Selectors.css(self, selector)
end

#element?Boolean

Returns:

  • (Boolean)


115
# File 'lib/scrapetor/dom.rb', line 115

def element?; true; end

#element_childrenObject Also known as: elements



232
233
234
# File 'lib/scrapetor/dom.rb', line 232

def element_children
  @children.select(&:element?)
end

#first_element_childObject



237
238
239
# File 'lib/scrapetor/dom.rb', line 237

def first_element_child
  @children.find(&:element?)
end

#has_attribute?(key) ⇒ Boolean

Returns:

  • (Boolean)


141
142
143
# File 'lib/scrapetor/dom.rb', line 141

def has_attribute?(key)
  @attributes.key?(key.to_s)
end

#has_class?(klass) ⇒ Boolean

Returns:

  • (Boolean)


182
183
184
# File 'lib/scrapetor/dom.rb', line 182

def has_class?(klass)
  classes.include?(klass.to_s)
end

#inner_htmlObject



200
201
202
# File 'lib/scrapetor/dom.rb', line 200

def inner_html
  @children.map(&:to_html).join
end

#inner_html=(html) ⇒ Object



204
205
206
207
208
209
# File 'lib/scrapetor/dom.rb', line 204

def inner_html=(html)
  nodes = Dom::Parser.fragment(html.to_s)
  nodes.each { |n| n.parent = self }
  @children = nodes
  html
end

#keysObject



145
146
147
# File 'lib/scrapetor/dom.rb', line 145

def keys
  @attributes.keys
end

#last_element_childObject



241
242
243
# File 'lib/scrapetor/dom.rb', line 241

def last_element_child
  @children.reverse_each.find(&:element?)
end

#matches?(selector) ⇒ Boolean

Returns:

  • (Boolean)


295
296
297
# File 'lib/scrapetor/dom.rb', line 295

def matches?(selector)
  document.css(selector).any? { |n| n.equal?(self) }
end

#node_nameObject



273
# File 'lib/scrapetor/dom.rb', line 273

def node_name;   @name; end

#node_typeObject

—– node type / misc —–



270
# File 'lib/scrapetor/dom.rb', line 270

def node_type;   1; end

#outer_htmlObject Also known as: to_html, to_xml, to_s



211
212
213
214
215
216
217
218
# File 'lib/scrapetor/dom.rb', line 211

def outer_html
  attrs = serialize_attrs
  if VOID.include?(@name) && @children.empty?
    "<#{@name}#{attrs}>"
  else
    "<#{@name}#{attrs}>#{inner_html}</#{@name}>"
  end
end

#pathObject



275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/scrapetor/dom.rb', line 275

def path
  parts = []
  cur = self
  while cur.is_a?(Element)
    if cur["id"] && !cur["id"].empty?
      parts.unshift(cur.name + "[@id='#{cur['id']}']")
      break
    end
    idx = 1
    sib = cur.previous_sibling
    while sib
      idx += 1 if sib.is_a?(Element) && sib.name == cur.name
      sib = sib.previous_sibling
    end
    parts.unshift("#{cur.name}[#{idx}]")
    cur = cur.parent
  end
  "/" + parts.join("/")
end

#remove_attribute(key) ⇒ Object



136
137
138
139
# File 'lib/scrapetor/dom.rb', line 136

def remove_attribute(key)
  @attributes.delete(key.to_s)
  self
end

#remove_class(klass = nil) ⇒ Object



167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/scrapetor/dom.rb', line 167

def remove_class(klass = nil)
  if klass.nil?
    remove_attribute("class")
  else
    set = classes
    klass.to_s.split(/\s+/).each { |c| set.delete(c) }
    if set.empty?
      remove_attribute("class")
    else
      self["class"] = set.join(" ")
    end
  end
  self
end

#tag_nameObject



272
# File 'lib/scrapetor/dom.rb', line 272

def tag_name;    @name; end

#textObject Also known as: content, inner_text

—– text / inner_html —–



188
189
190
# File 'lib/scrapetor/dom.rb', line 188

def text
  @children.map(&:text).join
end

#text=(s) ⇒ Object Also known as: content=



194
195
196
197
# File 'lib/scrapetor/dom.rb', line 194

def text=(s)
  @children = [Text.new(s.to_s, parent: self)]
  s
end

#traverse {|_self| ... } ⇒ Object

Yields:

  • (_self)

Yield Parameters:



329
330
331
332
333
334
335
336
337
338
339
340
# File 'lib/scrapetor/dom.rb', line 329

def traverse(&block)
  return enum_for(:traverse) unless block_given?
  yield self
  @children.each do |c|
    if c.respond_to?(:traverse)
      c.traverse(&block)
    else
      yield c
    end
  end
  self
end

#typeObject



271
# File 'lib/scrapetor/dom.rb', line 271

def type;        1; end

#valuesObject



149
150
151
# File 'lib/scrapetor/dom.rb', line 149

def values
  @attributes.values
end

#wrap(html_or_node) ⇒ Object

Wrap this element in an HTML fragment (string) or another element, placing this element as the deepest descendant of the wrapping tree. Matches Nokogiri’s ‘Node#wrap` semantics.



302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
# File 'lib/scrapetor/dom.rb', line 302

def wrap(html_or_node)
  return self unless @parent
  wrapper = case html_or_node
            when String
              fragment = Dom::Parser.fragment(html_or_node)
              fragment.find(&:element?) || fragment.first
            when Element
              html_or_node
            else
              Dom::Parser.fragment(html_or_node.to_s).find(&:element?)
            end
  return self if wrapper.nil?
  # Drill to the deepest first element.
  deepest = wrapper
  while (next_level = deepest.first_element_child)
    deepest = next_level
  end
  # Replace self with the wrapper, then re-parent self under deepest.
  idx = @parent.children.index(self)
  return self unless idx
  wrapper.parent = @parent
  @parent.children[idx, 1] = [wrapper]
  @parent = deepest
  deepest.children << self
  self
end

#xpath(_expr) ⇒ Object



257
258
259
260
261
262
# File 'lib/scrapetor/dom.rb', line 257

def xpath(_expr)
  # Minimal XPath support is out of scope for the pure-Ruby DOM.
  # Callers that need full XPath can install nokogiri/nokolexbor
  # separately and pass HTML through them.
  []
end