Class: Moxml::Adapter::Nokogiri

Inherits:
Base
  • Object
show all
Defined in:
lib/moxml/adapter/nokogiri.rb

Defined Under Namespace

Classes: NokogiriSAXBridge

Constant Summary

Constants inherited from Base

Base::ENTITY_MARKER, Base::ENTITY_MARKER_RE, Base::ENTITY_NAME_PATTERN, Base::ENTITY_NAME_RE, Base::SERIALIZED_ENTITY_MARKER_RE, Base::STANDARD_ENTITIES

Class Method Summary collapse

Methods inherited from Base

actual_native, create_cdata, create_comment, create_declaration, create_doctype, create_element, create_entity_reference, create_namespace, create_processing_instruction, create_text, duplicate_node, patch_node, prepare_for_new_document, preprocess_entities, restore_entities, sax_supported?, set_attribute_name, set_attribute_value

Methods included from XmlUtils

#encode_entities, #normalize_xml_value, #validate_comment_content, #validate_declaration_encoding, #validate_declaration_standalone, #validate_declaration_version, #validate_element_name, #validate_entity_reference_name, #validate_pi_target, #validate_prefix, #validate_uri

Class Method Details

.add_child(element, child) ⇒ Object



248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
# File 'lib/moxml/adapter/nokogiri.rb', line 248

def add_child(element, child)
  # Special handling for declarations on Nokogiri documents
  if element.is_a?(::Nokogiri::XML::Document) &&
      child.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
      child.name == "xml"
    # Set document's xml_decl property
    version = declaration_attribute(child, "version") || "1.0"
    encoding = declaration_attribute(child, "encoding")
    standalone = declaration_attribute(child, "standalone")

    # Store declaration state in attachment map
    attachments.set(element, :xml_decl, {
      version: version,
      encoding: encoding,
      standalone: standalone,
    }.compact)
  end

  if node_type(child) == :doctype
    # avoid exceptions: cannot reparent Nokogiri::XML::DTD there
    element.create_internal_subset(
      child.name, child.external_id, child.system_id
    )
  else
    element.add_child(child)
  end
end

.add_next_sibling(node, sibling) ⇒ Object



280
281
282
# File 'lib/moxml/adapter/nokogiri.rb', line 280

def add_next_sibling(node, sibling)
  node.add_next_sibling(sibling)
end

.add_previous_sibling(node, sibling) ⇒ Object



276
277
278
# File 'lib/moxml/adapter/nokogiri.rb', line 276

def add_previous_sibling(node, sibling)
  node.add_previous_sibling(sibling)
end

.adjacent_to_entity_reference?(node) ⇒ Boolean

Returns:

  • (Boolean)


192
193
194
195
# File 'lib/moxml/adapter/nokogiri.rb', line 192

def adjacent_to_entity_reference?(node)
  node.previous_sibling.is_a?(::Nokogiri::XML::EntityReference) ||
    node.next_sibling.is_a?(::Nokogiri::XML::EntityReference)
end

.at_xpath(node, expression, namespaces = nil) ⇒ Object



379
380
381
382
383
384
385
386
387
388
# File 'lib/moxml/adapter/nokogiri.rb', line 379

def at_xpath(node, expression, namespaces = nil)
  node.at_xpath(expression, namespaces)
rescue ::Nokogiri::XML::XPath::SyntaxError => e
  raise Moxml::XPathError.new(
    e.message,
    expression: expression,
    adapter: "Nokogiri",
    node: node,
  )
end

.attachmentsObject



10
11
12
# File 'lib/moxml/adapter/nokogiri.rb', line 10

def attachments
  @attachments ||= Moxml::NativeAttachment.new
end

.attribute_element(attr) ⇒ Object



222
223
224
# File 'lib/moxml/adapter/nokogiri.rb', line 222

def attribute_element(attr)
  attr.parent
end

.attributes(element) ⇒ Object



226
227
228
# File 'lib/moxml/adapter/nokogiri.rb', line 226

def attributes(element)
  element.attributes.values
end

.cdata_content(node) ⇒ Object



315
316
317
# File 'lib/moxml/adapter/nokogiri.rb', line 315

def cdata_content(node)
  node.content
end

.children(node) ⇒ Object



184
185
186
187
188
189
190
# File 'lib/moxml/adapter/nokogiri.rb', line 184

def children(node)
  node.children.reject do |child|
    child.text? && child.content.strip.empty? &&
      !(child.previous_sibling.nil? && child.next_sibling.nil?) &&
      !adjacent_to_entity_reference?(child)
  end
end

.comment_content(node) ⇒ Object



323
324
325
# File 'lib/moxml/adapter/nokogiri.rb', line 323

def comment_content(node)
  node.content
end

.create_document(_native_doc = nil) ⇒ Object



69
70
71
# File 'lib/moxml/adapter/nokogiri.rb', line 69

def create_document(_native_doc = nil)
  ::Nokogiri::XML::Document.new
end

.create_fragmentObject



73
74
75
76
77
78
79
# File 'lib/moxml/adapter/nokogiri.rb', line 73

def create_fragment
  # document fragments are weird and should be used with caution:
  # https://github.com/sparklemotion/nokogiri/issues/572
  ::Nokogiri::XML::DocumentFragment.new(
    ::Nokogiri::XML::Document.new,
  )
end

.create_native_cdata(content, owner_doc = nil) ⇒ Object



89
90
91
# File 'lib/moxml/adapter/nokogiri.rb', line 89

def create_native_cdata(content, owner_doc = nil)
  ::Nokogiri::XML::CDATA.new(owner_doc || create_document, content)
end

.create_native_comment(content, owner_doc = nil) ⇒ Object



93
94
95
# File 'lib/moxml/adapter/nokogiri.rb', line 93

def create_native_comment(content, owner_doc = nil)
  ::Nokogiri::XML::Comment.new(owner_doc || create_document, content)
end

.create_native_declaration(version, encoding, standalone) ⇒ Object



109
110
111
112
113
114
115
# File 'lib/moxml/adapter/nokogiri.rb', line 109

def create_native_declaration(version, encoding, standalone)
  ::Nokogiri::XML::ProcessingInstruction.new(
    create_document,
    "xml",
    build_declaration_attrs(version, encoding, standalone),
  )
end

.create_native_doctype(name, external_id, system_id) ⇒ Object



97
98
99
100
101
# File 'lib/moxml/adapter/nokogiri.rb', line 97

def create_native_doctype(name, external_id, system_id)
  create_document.create_internal_subset(
    name, external_id, system_id
  )
end

.create_native_element(name, owner_doc = nil) ⇒ Object



81
82
83
# File 'lib/moxml/adapter/nokogiri.rb', line 81

def create_native_element(name, owner_doc = nil)
  ::Nokogiri::XML::Element.new(name, owner_doc || create_document)
end

.create_native_entity_reference(name) ⇒ Object



117
118
119
# File 'lib/moxml/adapter/nokogiri.rb', line 117

def create_native_entity_reference(name)
  ::Nokogiri::XML::EntityReference.new(create_document, name)
end

.create_native_namespace(element, prefix, uri) ⇒ Object



156
157
158
# File 'lib/moxml/adapter/nokogiri.rb', line 156

def create_native_namespace(element, prefix, uri)
  element.add_namespace_definition(prefix, uri)
end

.create_native_processing_instruction(target, content) ⇒ Object



103
104
105
106
107
# File 'lib/moxml/adapter/nokogiri.rb', line 103

def create_native_processing_instruction(target, content)
  ::Nokogiri::XML::ProcessingInstruction.new(
    ::Nokogiri::XML::Document.new, target, content
  )
end

.create_native_text(content, owner_doc = nil) ⇒ Object



85
86
87
# File 'lib/moxml/adapter/nokogiri.rb', line 85

def create_native_text(content, owner_doc = nil)
  ::Nokogiri::XML::Text.new(content, owner_doc || create_document)
end

.declaration_attribute(declaration, attr_name) ⇒ Object



125
126
127
128
129
130
# File 'lib/moxml/adapter/nokogiri.rb', line 125

def declaration_attribute(declaration, attr_name)
  return nil unless declaration.content

  match = declaration.content.match(/#{attr_name}="([^"]*)"/)
  match && match[1]
end

.doctype_external_id(native) ⇒ Object



360
361
362
# File 'lib/moxml/adapter/nokogiri.rb', line 360

def doctype_external_id(native)
  native.external_id
end

.doctype_name(native) ⇒ Object

Doctype accessor methods



356
357
358
# File 'lib/moxml/adapter/nokogiri.rb', line 356

def doctype_name(native)
  native.name
end

.doctype_system_id(native) ⇒ Object



364
365
366
# File 'lib/moxml/adapter/nokogiri.rb', line 364

def doctype_system_id(native)
  native.system_id
end

.document(node) ⇒ Object



214
215
216
# File 'lib/moxml/adapter/nokogiri.rb', line 214

def document(node)
  node.document
end

.entity_reference_name(node) ⇒ Object



121
122
123
# File 'lib/moxml/adapter/nokogiri.rb', line 121

def entity_reference_name(node)
  node.name
end

.get_attribute(element, name) ⇒ Object



234
235
236
237
# File 'lib/moxml/adapter/nokogiri.rb', line 234

def get_attribute(element, name)
  # attributes keys don't include attribute namespaces
  element.attributes[name.to_s]
end

.get_attribute_value(element, name) ⇒ Object



239
240
241
242
# File 'lib/moxml/adapter/nokogiri.rb', line 239

def get_attribute_value(element, name)
  # get the attribute value by its name including a namespace
  element[name.to_s]
end

.has_declaration?(native_doc, wrapper) ⇒ Boolean

Returns:

  • (Boolean)


420
421
422
423
424
425
426
# File 'lib/moxml/adapter/nokogiri.rb', line 420

def has_declaration?(native_doc, wrapper)
  if attachments.key?(native_doc, :xml_decl)
    !attachments.get(native_doc, :xml_decl).nil?
  else
    wrapper.has_xml_declaration
  end
end

.in_scope_namespaces(element) ⇒ Object



351
352
353
# File 'lib/moxml/adapter/nokogiri.rb', line 351

def in_scope_namespaces(element)
  element.namespace_scopes
end

.inner_text(node) ⇒ Object



304
305
306
307
308
309
# File 'lib/moxml/adapter/nokogiri.rb', line 304

def inner_text(node)
  text_children = node.children.reject do |c|
    c.element? || c.comment?
  end
  text_children.map(&:content).join
end

.namespace(element) ⇒ Object



148
149
150
# File 'lib/moxml/adapter/nokogiri.rb', line 148

def namespace(element)
  element.namespace
end

.namespace_definitions(node) ⇒ Object



347
348
349
# File 'lib/moxml/adapter/nokogiri.rb', line 347

def namespace_definitions(node)
  node.namespace_definitions
end

.namespace_prefix(namespace) ⇒ Object



339
340
341
# File 'lib/moxml/adapter/nokogiri.rb', line 339

def namespace_prefix(namespace)
  namespace.prefix
end

.namespace_uri(namespace) ⇒ Object



343
344
345
# File 'lib/moxml/adapter/nokogiri.rb', line 343

def namespace_uri(namespace)
  namespace.href
end

.next_sibling(node) ⇒ Object



206
207
208
# File 'lib/moxml/adapter/nokogiri.rb', line 206

def next_sibling(node)
  node.next_sibling
end

.node_name(node) ⇒ Object



176
177
178
# File 'lib/moxml/adapter/nokogiri.rb', line 176

def node_name(node)
  node.name
end

.node_type(node) ⇒ Object



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/moxml/adapter/nokogiri.rb', line 160

def node_type(node)
  case node
  when ::Nokogiri::XML::Element then :element
  when ::Nokogiri::XML::CDATA then :cdata
  when ::Nokogiri::XML::Text then :text
  when ::Nokogiri::XML::Comment then :comment
  when ::Nokogiri::XML::Attr then :attribute
  when ::Nokogiri::XML::Namespace then :namespace
  when ::Nokogiri::XML::ProcessingInstruction then :processing_instruction
  when ::Nokogiri::XML::Document, ::Nokogiri::XML::DocumentFragment then :document
  when ::Nokogiri::XML::DTD then :doctype
  when ::Nokogiri::XML::EntityReference then :entity_reference
  else :unknown
  end
end

.parent(node) ⇒ Object



202
203
204
# File 'lib/moxml/adapter/nokogiri.rb', line 202

def parent(node)
  node.parent
end

.parse(xml, options = {}, _context = nil) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/moxml/adapter/nokogiri.rb', line 18

def parse(xml, options = {}, _context = nil)
  processed_xml = preprocess_entities(xml)

  # preprocess_entities always returns UTF-8, so tell Nokogiri to
  # parse as UTF-8 regardless of any original encoding option.
  native_doc = begin
    if options[:fragment]
      ::Nokogiri::XML::DocumentFragment.parse(processed_xml) do |config|
        config.strict.nonet
        config.recover unless options[:strict]
      end
    else
      ::Nokogiri::XML(processed_xml, nil, "UTF-8") do |config|
        config.strict.nonet
        config.recover unless options[:strict]
      end
    end
  rescue ::Nokogiri::XML::SyntaxError => e
    raise Moxml::ParseError.new(e.message, line: e.line,
                                           column: e.column)
  end

  # Use provided context if available, otherwise create new one
  ctx = _context || Context.new(:nokogiri)
  Document.new(native_doc, ctx)
end

.previous_sibling(node) ⇒ Object



210
211
212
# File 'lib/moxml/adapter/nokogiri.rb', line 210

def previous_sibling(node)
  node.previous_sibling
end

.processing_instruction_content(node) ⇒ Object



331
332
333
# File 'lib/moxml/adapter/nokogiri.rb', line 331

def processing_instruction_content(node)
  node.content
end

.processing_instruction_target(node) ⇒ Object



152
153
154
# File 'lib/moxml/adapter/nokogiri.rb', line 152

def processing_instruction_target(node)
  node.name
end

.remove(node) ⇒ Object



284
285
286
287
288
289
290
291
292
293
294
# File 'lib/moxml/adapter/nokogiri.rb', line 284

def remove(node)
  # Special handling for declarations on Nokogiri documents
  if node.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
      node.name == "xml" &&
      node.parent.is_a?(::Nokogiri::XML::Document)
    # Clear document's xml_decl when removing declaration
    attachments.set(node.parent, :xml_decl, nil)
  end

  node.remove
end

.remove_attribute(element, name) ⇒ Object



244
245
246
# File 'lib/moxml/adapter/nokogiri.rb', line 244

def remove_attribute(element, name)
  element.remove_attribute(name.to_s)
end

.replace(node, new_node) ⇒ Object



296
297
298
# File 'lib/moxml/adapter/nokogiri.rb', line 296

def replace(node, new_node)
  node.replace(new_node)
end

.replace_children(node, new_children) ⇒ Object



197
198
199
200
# File 'lib/moxml/adapter/nokogiri.rb', line 197

def replace_children(node, new_children)
  node.children.unlink
  new_children.each { |child| add_child(node, child) }
end

.root(document) ⇒ Object



218
219
220
# File 'lib/moxml/adapter/nokogiri.rb', line 218

def root(document)
  document.is_a?(::Nokogiri::XML::Document) ? document.root : document.children.first
end

.sax_parse(xml, handler) ⇒ void

This method returns an undefined value.

SAX parsing implementation for Nokogiri

Parameters:



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/moxml/adapter/nokogiri.rb', line 50

def sax_parse(xml, handler)
  # Create bridge that translates Nokogiri SAX to Moxml SAX
  bridge = NokogiriSAXBridge.new(handler)

  # Create Nokogiri SAX parser
  parser = ::Nokogiri::XML::SAX::Parser.new(bridge)

  # Parse
  if xml.is_a?(IO) || xml.is_a?(StringIO)
    parser.parse(xml)
  else
    parser.parse(xml.to_s)
  end
rescue ::Nokogiri::XML::SyntaxError => e
  error = Moxml::ParseError.new(e.message, line: e.line,
                                           column: e.column)
  handler.on_error(error)
end

.serialize(node, options = {}) ⇒ Object



390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
# File 'lib/moxml/adapter/nokogiri.rb', line 390

def serialize(node, options = {})
  save_options = ::Nokogiri::XML::Node::SaveOptions::AS_XML

  # Don't force expand empty elements if they're really empty
  if options[:expand_empty]
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
  end
  if options[:indent].to_i.positive?
    save_options |= ::Nokogiri::XML::Node::SaveOptions::FORMAT
  end

  # Handle declaration option
  # Priority:
  # 1. Explicit no_declaration option
  # 2. Check attachment-stored xml_decl (when remove is called, this becomes nil)
  if options.key?(:no_declaration)
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if options[:no_declaration]
  elsif attachments.key?(node, :xml_decl)
    # State stored in attachment - if nil, declaration was removed
    xml_decl = attachments.get(node, :xml_decl)
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if xml_decl.nil?
  end

  node.to_xml(
    indent: options[:indent],
    encoding: options[:encoding],
    save_with: save_options,
  )
end

.set_attribute(element, name, value) ⇒ Object



230
231
232
# File 'lib/moxml/adapter/nokogiri.rb', line 230

def set_attribute(element, name, value)
  element[name.to_s] = value.to_s
end

.set_cdata_content(node, content) ⇒ Object



319
320
321
# File 'lib/moxml/adapter/nokogiri.rb', line 319

def set_cdata_content(node, content)
  node.content = content
end

.set_comment_content(node, content) ⇒ Object



327
328
329
# File 'lib/moxml/adapter/nokogiri.rb', line 327

def set_comment_content(node, content)
  node.native_content = content
end

.set_declaration_attribute(declaration, attr_name, value) ⇒ Object



132
133
134
135
136
137
138
139
140
141
142
# File 'lib/moxml/adapter/nokogiri.rb', line 132

def set_declaration_attribute(declaration, attr_name, value)
  attrs = current_declaration_attributes(declaration)
  if value.nil?
    attrs.delete(attr_name)
  else
    attrs[attr_name] = value
  end

  declaration.native_content =
    attrs.map { |k, v| %(#{k}="#{v}") }.join(" ")
end

.set_namespace(element, ns) ⇒ Object



144
145
146
# File 'lib/moxml/adapter/nokogiri.rb', line 144

def set_namespace(element, ns)
  element.namespace = ns
end

.set_node_name(node, name) ⇒ Object



180
181
182
# File 'lib/moxml/adapter/nokogiri.rb', line 180

def set_node_name(node, name)
  node.name = name
end

.set_processing_instruction_content(node, content) ⇒ Object



335
336
337
# File 'lib/moxml/adapter/nokogiri.rb', line 335

def set_processing_instruction_content(node, content)
  node.native_content = content
end

.set_root(doc, element) ⇒ Object



14
15
16
# File 'lib/moxml/adapter/nokogiri.rb', line 14

def set_root(doc, element)
  doc.root = element
end

.set_text_content(node, content) ⇒ Object



311
312
313
# File 'lib/moxml/adapter/nokogiri.rb', line 311

def set_text_content(node, content)
  node.native_content = content
end

.text_content(node) ⇒ Object



300
301
302
# File 'lib/moxml/adapter/nokogiri.rb', line 300

def text_content(node)
  node.text.to_s
end

.xpath(node, expression, namespaces = nil) ⇒ Object



368
369
370
371
372
373
374
375
376
377
# File 'lib/moxml/adapter/nokogiri.rb', line 368

def xpath(node, expression, namespaces = nil)
  node.xpath(expression, namespaces).to_a
rescue ::Nokogiri::XML::XPath::SyntaxError => e
  raise Moxml::XPathError.new(
    e.message,
    expression: expression,
    adapter: "Nokogiri",
    node: node,
  )
end