Class: Scrapetor::Dom::Element
- Inherits:
-
Object
- Object
- Scrapetor::Dom::Element
show all
- Includes:
- NodeMethods
- Defined in:
- lib/scrapetor/dom.rb
Instance Attribute Summary collapse
Attributes included from NodeMethods
#parent
Instance Method Summary
collapse
#add_next_sibling, #add_previous_sibling, #comment?, #doctype?, #document, #next_element_sibling, #next_sibling, #previous_element_sibling, #previous_sibling, #remove, #replace, #text?
Constructor Details
#initialize(name, attributes = {}, line: nil) ⇒ Element
Returns a new instance of Element.
107
108
109
110
111
112
113
|
# File 'lib/scrapetor/dom.rb', line 107
def initialize(name, attributes = {}, line: nil)
@name = name.to_s.downcase
@attributes = attributes
@children = []
@parent = nil
@line = line
end
|
Instance Attribute Details
#attributes ⇒ Object
Returns the value of attribute attributes.
105
106
107
|
# File 'lib/scrapetor/dom.rb', line 105
def attributes
@attributes
end
|
#children ⇒ Object
Returns the value of attribute children.
105
106
107
|
# File 'lib/scrapetor/dom.rb', line 105
def children
@children
end
|
#line ⇒ Object
Returns the value of attribute line.
105
106
107
|
# File 'lib/scrapetor/dom.rb', line 105
def line
@line
end
|
#name ⇒ Object
Returns the value of attribute name.
105
106
107
|
# File 'lib/scrapetor/dom.rb', line 105
def name
@name
end
|
Instance Method Details
#[](key) ⇒ Object
119
120
121
|
# File 'lib/scrapetor/dom.rb', line 119
def [](key)
@attributes[key.to_s]
end
|
#[]=(key, value) ⇒ Object
123
124
125
126
127
128
129
130
|
# File 'lib/scrapetor/dom.rb', line 123
def []=(key, value)
if value.nil?
@attributes.delete(key.to_s)
else
@attributes[key.to_s] = value.to_s
end
value
end
|
#add_child(node_or_html) ⇒ Object
Also known as:
<<
—– children / traversal —–
225
226
227
228
229
|
# File 'lib/scrapetor/dom.rb', line 225
def add_child(node_or_html)
nodes = Dom.normalize_replacement(node_or_html, parent: self)
nodes.each { |n| n.parent = self; @children << n }
nodes.last
end
|
#add_class(klass) ⇒ Object
Also known as:
append_class
159
160
161
162
163
164
|
# File 'lib/scrapetor/dom.rb', line 159
def add_class(klass)
set = classes
klass.to_s.split(/\s+/).each { |c| set << c unless set.include?(c) || c.empty? }
self["class"] = set.join(" ")
self
end
|
#at_css(selector) ⇒ Object
Also known as:
at
251
252
253
|
# File 'lib/scrapetor/dom.rb', line 251
def at_css(selector)
css(selector).first
end
|
#at_xpath(expr) ⇒ Object
264
265
266
|
# File 'lib/scrapetor/dom.rb', line 264
def at_xpath(expr)
xpath(expr).first
end
|
#attribute(name) ⇒ Object
346
347
348
|
# File 'lib/scrapetor/dom.rb', line 346
def attribute(name)
attribute_nodes.find { |a| a.name == name.to_s }
end
|
#attribute_nodes ⇒ Object
342
343
344
|
# File 'lib/scrapetor/dom.rb', line 342
def attribute_nodes
@attributes.map { |k, v| AttrNode.new(k, v, self) }
end
|
#attribute_value(key) ⇒ Object
132
133
134
|
# File 'lib/scrapetor/dom.rb', line 132
def attribute_value(key)
self[key]
end
|
#classes ⇒ Object
155
156
157
|
# File 'lib/scrapetor/dom.rb', line 155
def classes
(self["class"] || "").split(/\s+/).reject(&:empty?)
end
|
#css(selector) ⇒ Object
Also known as:
search
247
248
249
|
# File 'lib/scrapetor/dom.rb', line 247
def css(selector)
Dom::Selectors.css(self, selector)
end
|
#element? ⇒ Boolean
115
|
# File 'lib/scrapetor/dom.rb', line 115
def element?; true; end
|
#element_children ⇒ Object
Also known as:
elements
232
233
234
|
# File 'lib/scrapetor/dom.rb', line 232
def element_children
@children.select(&:element?)
end
|
#first_element_child ⇒ Object
237
238
239
|
# File 'lib/scrapetor/dom.rb', line 237
def first_element_child
@children.find(&:element?)
end
|
#has_attribute?(key) ⇒ Boolean
141
142
143
|
# File 'lib/scrapetor/dom.rb', line 141
def has_attribute?(key)
@attributes.key?(key.to_s)
end
|
#has_class?(klass) ⇒ Boolean
182
183
184
|
# File 'lib/scrapetor/dom.rb', line 182
def has_class?(klass)
classes.include?(klass.to_s)
end
|
#inner_html ⇒ Object
200
201
202
|
# File 'lib/scrapetor/dom.rb', line 200
def inner_html
@children.map(&:to_html).join
end
|
#inner_html=(html) ⇒ Object
204
205
206
207
208
209
|
# File 'lib/scrapetor/dom.rb', line 204
def inner_html=(html)
nodes = Dom::Parser.fragment(html.to_s)
nodes.each { |n| n.parent = self }
@children = nodes
html
end
|
#keys ⇒ Object
145
146
147
|
# File 'lib/scrapetor/dom.rb', line 145
def keys
@attributes.keys
end
|
#last_element_child ⇒ Object
241
242
243
|
# File 'lib/scrapetor/dom.rb', line 241
def last_element_child
@children.reverse_each.find(&:element?)
end
|
#matches?(selector) ⇒ Boolean
295
296
297
|
# File 'lib/scrapetor/dom.rb', line 295
def matches?(selector)
document.css(selector).any? { |n| n.equal?(self) }
end
|
#node_name ⇒ Object
273
|
# File 'lib/scrapetor/dom.rb', line 273
def node_name; @name; end
|
#node_type ⇒ Object
270
|
# File 'lib/scrapetor/dom.rb', line 270
def node_type; 1; end
|
#outer_html ⇒ Object
Also known as:
to_html, to_xml, to_s
211
212
213
214
215
216
217
218
|
# File 'lib/scrapetor/dom.rb', line 211
def outer_html
attrs = serialize_attrs
if VOID.include?(@name) && @children.empty?
"<#{@name}#{attrs}>"
else
"<#{@name}#{attrs}>#{inner_html}</#{@name}>"
end
end
|
#path ⇒ Object
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
|
# File 'lib/scrapetor/dom.rb', line 275
def path
parts = []
cur = self
while cur.is_a?(Element)
if cur["id"] && !cur["id"].empty?
parts.unshift(cur.name + "[@id='#{cur['id']}']")
break
end
idx = 1
sib = cur.previous_sibling
while sib
idx += 1 if sib.is_a?(Element) && sib.name == cur.name
sib = sib.previous_sibling
end
parts.unshift("#{cur.name}[#{idx}]")
cur = cur.parent
end
"/" + parts.join("/")
end
|
#remove_attribute(key) ⇒ Object
136
137
138
139
|
# File 'lib/scrapetor/dom.rb', line 136
def remove_attribute(key)
@attributes.delete(key.to_s)
self
end
|
#remove_class(klass = nil) ⇒ Object
167
168
169
170
171
172
173
174
175
176
177
178
179
180
|
# File 'lib/scrapetor/dom.rb', line 167
def remove_class(klass = nil)
if klass.nil?
remove_attribute("class")
else
set = classes
klass.to_s.split(/\s+/).each { |c| set.delete(c) }
if set.empty?
remove_attribute("class")
else
self["class"] = set.join(" ")
end
end
self
end
|
#tag_name ⇒ Object
272
|
# File 'lib/scrapetor/dom.rb', line 272
def tag_name; @name; end
|
#text ⇒ Object
Also known as:
content, inner_text
188
189
190
|
# File 'lib/scrapetor/dom.rb', line 188
def text
@children.map(&:text).join
end
|
#text=(s) ⇒ Object
Also known as:
content=
194
195
196
197
|
# File 'lib/scrapetor/dom.rb', line 194
def text=(s)
@children = [Text.new(s.to_s, parent: self)]
s
end
|
#traverse {|_self| ... } ⇒ Object
329
330
331
332
333
334
335
336
337
338
339
340
|
# File 'lib/scrapetor/dom.rb', line 329
def traverse(&block)
return enum_for(:traverse) unless block_given?
yield self
@children.each do |c|
if c.respond_to?(:traverse)
c.traverse(&block)
else
yield c
end
end
self
end
|
#type ⇒ Object
271
|
# File 'lib/scrapetor/dom.rb', line 271
def type; 1; end
|
#values ⇒ Object
149
150
151
|
# File 'lib/scrapetor/dom.rb', line 149
def values
@attributes.values
end
|
#wrap(html_or_node) ⇒ Object
Wrap this element in an HTML fragment (string) or another element, placing this element as the deepest descendant of the wrapping tree. Matches Nokogiri’s ‘Node#wrap` semantics.
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
|
# File 'lib/scrapetor/dom.rb', line 302
def wrap(html_or_node)
return self unless @parent
wrapper = case html_or_node
when String
fragment = Dom::Parser.fragment(html_or_node)
fragment.find(&:element?) || fragment.first
when Element
html_or_node
else
Dom::Parser.fragment(html_or_node.to_s).find(&:element?)
end
return self if wrapper.nil?
deepest = wrapper
while (next_level = deepest.first_element_child)
deepest = next_level
end
idx = @parent.children.index(self)
return self unless idx
wrapper.parent = @parent
@parent.children[idx, 1] = [wrapper]
@parent = deepest
deepest.children << self
self
end
|
#xpath(_expr) ⇒ Object
257
258
259
260
261
262
|
# File 'lib/scrapetor/dom.rb', line 257
def xpath(_expr)
[]
end
|