Class: Moxml::Adapter::Nokogiri

Inherits:
Base
  • Object
show all
Defined in:
lib/moxml/adapter/nokogiri.rb

Defined Under Namespace

Classes: NokogiriSAXBridge

Constant Summary

Constants inherited from Base

Base::ENTITY_MARKER, Base::ENTITY_MARKER_RE, Base::ENTITY_NAME_PATTERN, Base::ENTITY_NAME_RE, Base::SERIALIZED_ENTITY_MARKER_RE, Base::STANDARD_ENTITIES

Class Method Summary collapse

Methods inherited from Base

actual_native, create_cdata, create_comment, create_declaration, create_doctype, create_element, create_entity_reference, create_namespace, create_processing_instruction, create_text, duplicate_node, patch_node, prepare_for_new_document, preprocess_entities, restore_entities, sax_supported?, set_attribute_name, set_attribute_value

Methods included from XmlUtils

#encode_entities, #normalize_xml_value, #validate_comment_content, #validate_declaration_encoding, #validate_declaration_standalone, #validate_declaration_version, #validate_element_name, #validate_entity_reference_name, #validate_pi_target, #validate_prefix, #validate_uri

Class Method Details

.add_child(element, child) ⇒ Object



247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# File 'lib/moxml/adapter/nokogiri.rb', line 247

def add_child(element, child)
  if element.is_a?(::Nokogiri::XML::Document) &&
      child.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
      child.name == "xml"
    version = declaration_attribute(child, "version") || "1.0"
    encoding = declaration_attribute(child, "encoding")
    standalone = declaration_attribute(child, "standalone")

    attachments.set(element, :xml_decl, {
      version: version,
      encoding: encoding,
      standalone: standalone,
    }.compact)
    return
  end

  if node_type(child) == :doctype
    element.create_internal_subset(
      child.name, child.external_id, child.system_id
    )
  else
    element.add_child(child)
  end
end

.add_next_sibling(node, sibling) ⇒ Object



276
277
278
# File 'lib/moxml/adapter/nokogiri.rb', line 276

def add_next_sibling(node, sibling)
  node.add_next_sibling(sibling)
end

.add_previous_sibling(node, sibling) ⇒ Object



272
273
274
# File 'lib/moxml/adapter/nokogiri.rb', line 272

def add_previous_sibling(node, sibling)
  node.add_previous_sibling(sibling)
end

.adjacent_to_entity_reference?(node) ⇒ Boolean

Returns:

  • (Boolean)


191
192
193
194
# File 'lib/moxml/adapter/nokogiri.rb', line 191

def adjacent_to_entity_reference?(node)
  node.previous_sibling.is_a?(::Nokogiri::XML::EntityReference) ||
    node.next_sibling.is_a?(::Nokogiri::XML::EntityReference)
end

.at_xpath(node, expression, namespaces = nil) ⇒ Object



375
376
377
378
379
380
381
382
383
384
# File 'lib/moxml/adapter/nokogiri.rb', line 375

def at_xpath(node, expression, namespaces = nil)
  node.at_xpath(expression, namespaces)
rescue ::Nokogiri::XML::XPath::SyntaxError => e
  raise Moxml::XPathError.new(
    e.message,
    expression: expression,
    adapter: "Nokogiri",
    node: node,
  )
end

.attachmentsObject



13
14
15
# File 'lib/moxml/adapter/nokogiri.rb', line 13

def attachments
  @attachments ||= Moxml::NativeAttachment.new
end

.attribute_element(attr) ⇒ Object



221
222
223
# File 'lib/moxml/adapter/nokogiri.rb', line 221

def attribute_element(attr)
  attr.parent
end

.attributes(element) ⇒ Object



225
226
227
# File 'lib/moxml/adapter/nokogiri.rb', line 225

def attributes(element)
  element.attributes.values
end

.cdata_content(node) ⇒ Object



311
312
313
# File 'lib/moxml/adapter/nokogiri.rb', line 311

def cdata_content(node)
  node.content
end

.children(node) ⇒ Object



187
188
189
# File 'lib/moxml/adapter/nokogiri.rb', line 187

def children(node)
  node.children
end

.comment_content(node) ⇒ Object



319
320
321
# File 'lib/moxml/adapter/nokogiri.rb', line 319

def comment_content(node)
  node.content
end

.create_document(_native_doc = nil) ⇒ Object



72
73
74
# File 'lib/moxml/adapter/nokogiri.rb', line 72

def create_document(_native_doc = nil)
  ::Nokogiri::XML::Document.new
end

.create_fragmentObject



76
77
78
79
80
81
82
# File 'lib/moxml/adapter/nokogiri.rb', line 76

def create_fragment
  # document fragments are weird and should be used with caution:
  # https://github.com/sparklemotion/nokogiri/issues/572
  ::Nokogiri::XML::DocumentFragment.new(
    ::Nokogiri::XML::Document.new,
  )
end

.create_native_cdata(content, owner_doc = nil) ⇒ Object



92
93
94
# File 'lib/moxml/adapter/nokogiri.rb', line 92

def create_native_cdata(content, owner_doc = nil)
  ::Nokogiri::XML::CDATA.new(owner_doc || create_document, content)
end

.create_native_comment(content, owner_doc = nil) ⇒ Object



96
97
98
# File 'lib/moxml/adapter/nokogiri.rb', line 96

def create_native_comment(content, owner_doc = nil)
  ::Nokogiri::XML::Comment.new(owner_doc || create_document, content)
end

.create_native_declaration(version, encoding, standalone) ⇒ Object



112
113
114
115
116
117
118
# File 'lib/moxml/adapter/nokogiri.rb', line 112

def create_native_declaration(version, encoding, standalone)
  ::Nokogiri::XML::ProcessingInstruction.new(
    create_document,
    "xml",
    build_declaration_attrs(version, encoding, standalone),
  )
end

.create_native_doctype(name, external_id, system_id) ⇒ Object



100
101
102
103
104
# File 'lib/moxml/adapter/nokogiri.rb', line 100

def create_native_doctype(name, external_id, system_id)
  create_document.create_internal_subset(
    name, external_id, system_id
  )
end

.create_native_element(name, owner_doc = nil) ⇒ Object



84
85
86
# File 'lib/moxml/adapter/nokogiri.rb', line 84

def create_native_element(name, owner_doc = nil)
  ::Nokogiri::XML::Element.new(name, owner_doc || create_document)
end

.create_native_entity_reference(name) ⇒ Object



120
121
122
# File 'lib/moxml/adapter/nokogiri.rb', line 120

def create_native_entity_reference(name)
  ::Nokogiri::XML::EntityReference.new(create_document, name)
end

.create_native_namespace(element, prefix, uri) ⇒ Object



159
160
161
# File 'lib/moxml/adapter/nokogiri.rb', line 159

def create_native_namespace(element, prefix, uri)
  element.add_namespace_definition(prefix, uri)
end

.create_native_processing_instruction(target, content) ⇒ Object



106
107
108
109
110
# File 'lib/moxml/adapter/nokogiri.rb', line 106

def create_native_processing_instruction(target, content)
  ::Nokogiri::XML::ProcessingInstruction.new(
    ::Nokogiri::XML::Document.new, target, content
  )
end

.create_native_text(content, owner_doc = nil) ⇒ Object



88
89
90
# File 'lib/moxml/adapter/nokogiri.rb', line 88

def create_native_text(content, owner_doc = nil)
  ::Nokogiri::XML::Text.new(content, owner_doc || create_document)
end

.declaration_attribute(declaration, attr_name) ⇒ Object



128
129
130
131
132
133
# File 'lib/moxml/adapter/nokogiri.rb', line 128

def declaration_attribute(declaration, attr_name)
  return nil unless declaration.content

  match = declaration.content.match(/#{attr_name}="([^"]*)"/)
  match && match[1]
end

.doctype_external_id(native) ⇒ Object



356
357
358
# File 'lib/moxml/adapter/nokogiri.rb', line 356

def doctype_external_id(native)
  native.external_id
end

.doctype_name(native) ⇒ Object

Doctype accessor methods



352
353
354
# File 'lib/moxml/adapter/nokogiri.rb', line 352

def doctype_name(native)
  native.name
end

.doctype_system_id(native) ⇒ Object



360
361
362
# File 'lib/moxml/adapter/nokogiri.rb', line 360

def doctype_system_id(native)
  native.system_id
end

.document(node) ⇒ Object



213
214
215
# File 'lib/moxml/adapter/nokogiri.rb', line 213

def document(node)
  node.document
end

.entity_reference_name(node) ⇒ Object



124
125
126
# File 'lib/moxml/adapter/nokogiri.rb', line 124

def entity_reference_name(node)
  node.name
end

.get_attribute(element, name) ⇒ Object



233
234
235
236
# File 'lib/moxml/adapter/nokogiri.rb', line 233

def get_attribute(element, name)
  # attributes keys don't include attribute namespaces
  element.attributes[name.to_s]
end

.get_attribute_value(element, name) ⇒ Object



238
239
240
241
# File 'lib/moxml/adapter/nokogiri.rb', line 238

def get_attribute_value(element, name)
  # get the attribute value by its name including a namespace
  element[name.to_s]
end

.has_declaration?(native_doc, wrapper) ⇒ Boolean

Returns:

  • (Boolean)


421
422
423
424
425
426
427
# File 'lib/moxml/adapter/nokogiri.rb', line 421

def has_declaration?(native_doc, wrapper)
  if attachments.key?(native_doc, :xml_decl)
    !attachments.get(native_doc, :xml_decl).nil?
  else
    wrapper.has_xml_declaration
  end
end

.in_scope_namespaces(element) ⇒ Object



347
348
349
# File 'lib/moxml/adapter/nokogiri.rb', line 347

def in_scope_namespaces(element)
  element.namespace_scopes
end

.inner_text(node) ⇒ Object



300
301
302
303
304
305
# File 'lib/moxml/adapter/nokogiri.rb', line 300

def inner_text(node)
  text_children = node.children.reject do |c|
    c.element? || c.comment?
  end
  text_children.map(&:content).join
end

.namespace(element) ⇒ Object



151
152
153
# File 'lib/moxml/adapter/nokogiri.rb', line 151

def namespace(element)
  element.namespace
end

.namespace_definitions(node) ⇒ Object



343
344
345
# File 'lib/moxml/adapter/nokogiri.rb', line 343

def namespace_definitions(node)
  node.namespace_definitions
end

.namespace_prefix(namespace) ⇒ Object



335
336
337
# File 'lib/moxml/adapter/nokogiri.rb', line 335

def namespace_prefix(namespace)
  namespace.prefix
end

.namespace_uri(namespace) ⇒ Object



339
340
341
# File 'lib/moxml/adapter/nokogiri.rb', line 339

def namespace_uri(namespace)
  namespace.href
end

.next_sibling(node) ⇒ Object



205
206
207
# File 'lib/moxml/adapter/nokogiri.rb', line 205

def next_sibling(node)
  node.next_sibling
end

.node_name(node) ⇒ Object



179
180
181
# File 'lib/moxml/adapter/nokogiri.rb', line 179

def node_name(node)
  node.name
end

.node_type(node) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/moxml/adapter/nokogiri.rb', line 163

def node_type(node)
  case node
  when ::Nokogiri::XML::Element then :element
  when ::Nokogiri::XML::CDATA then :cdata
  when ::Nokogiri::XML::Text then :text
  when ::Nokogiri::XML::Comment then :comment
  when ::Nokogiri::XML::Attr then :attribute
  when ::Nokogiri::XML::Namespace then :namespace
  when ::Nokogiri::XML::ProcessingInstruction then :processing_instruction
  when ::Nokogiri::XML::Document, ::Nokogiri::XML::DocumentFragment then :document
  when ::Nokogiri::XML::DTD then :doctype
  when ::Nokogiri::XML::EntityReference then :entity_reference
  else :unknown
  end
end

.parent(node) ⇒ Object



201
202
203
# File 'lib/moxml/adapter/nokogiri.rb', line 201

def parent(node)
  node.parent
end

.parse(xml, options = {}, _context = nil) ⇒ Object



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/moxml/adapter/nokogiri.rb', line 21

def parse(xml, options = {}, _context = nil)
  processed_xml = preprocess_entities(xml)

  # preprocess_entities always returns UTF-8, so tell Nokogiri to
  # parse as UTF-8 regardless of any original encoding option.
  native_doc = begin
    if options[:fragment]
      ::Nokogiri::XML::DocumentFragment.parse(processed_xml) do |config|
        config.strict.nonet
        config.recover unless options[:strict]
      end
    else
      ::Nokogiri::XML(processed_xml, nil, "UTF-8") do |config|
        config.strict.nonet
        config.recover unless options[:strict]
      end
    end
  rescue ::Nokogiri::XML::SyntaxError => e
    raise Moxml::ParseError.new(e.message, line: e.line,
                                           column: e.column)
  end

  # Use provided context if available, otherwise create new one
  ctx = _context || Context.new(:nokogiri)
  Document.new(native_doc, ctx)
end

.previous_sibling(node) ⇒ Object



209
210
211
# File 'lib/moxml/adapter/nokogiri.rb', line 209

def previous_sibling(node)
  node.previous_sibling
end

.processing_instruction_content(node) ⇒ Object



327
328
329
# File 'lib/moxml/adapter/nokogiri.rb', line 327

def processing_instruction_content(node)
  node.content
end

.processing_instruction_target(node) ⇒ Object



155
156
157
# File 'lib/moxml/adapter/nokogiri.rb', line 155

def processing_instruction_target(node)
  node.name
end

.remove(node) ⇒ Object



280
281
282
283
284
285
286
287
288
289
290
# File 'lib/moxml/adapter/nokogiri.rb', line 280

def remove(node)
  # Special handling for declarations on Nokogiri documents
  if node.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
      node.name == "xml" &&
      node.parent.is_a?(::Nokogiri::XML::Document)
    # Clear document's xml_decl when removing declaration
    attachments.set(node.parent, :xml_decl, nil)
  end

  node.remove
end

.remove_attribute(element, name) ⇒ Object



243
244
245
# File 'lib/moxml/adapter/nokogiri.rb', line 243

def remove_attribute(element, name)
  element.remove_attribute(name.to_s)
end

.replace(node, new_node) ⇒ Object



292
293
294
# File 'lib/moxml/adapter/nokogiri.rb', line 292

def replace(node, new_node)
  node.replace(new_node)
end

.replace_children(node, new_children) ⇒ Object



196
197
198
199
# File 'lib/moxml/adapter/nokogiri.rb', line 196

def replace_children(node, new_children)
  node.children.unlink
  new_children.each { |child| add_child(node, child) }
end

.root(document) ⇒ Object



217
218
219
# File 'lib/moxml/adapter/nokogiri.rb', line 217

def root(document)
  document.is_a?(::Nokogiri::XML::Document) ? document.root : document.children.first
end

.sax_parse(xml, handler) ⇒ void

This method returns an undefined value.

SAX parsing implementation for Nokogiri

Parameters:



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/moxml/adapter/nokogiri.rb', line 53

def sax_parse(xml, handler)
  # Create bridge that translates Nokogiri SAX to Moxml SAX
  bridge = NokogiriSAXBridge.new(handler)

  # Create Nokogiri SAX parser
  parser = ::Nokogiri::XML::SAX::Parser.new(bridge)

  # Parse
  if xml.is_a?(IO) || xml.is_a?(StringIO)
    parser.parse(xml)
  else
    parser.parse(xml.to_s)
  end
rescue ::Nokogiri::XML::SyntaxError => e
  error = Moxml::ParseError.new(e.message, line: e.line,
                                           column: e.column)
  handler.on_error(error)
end

.serialize(node, options = {}) ⇒ Object



386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
# File 'lib/moxml/adapter/nokogiri.rb', line 386

def serialize(node, options = {})
  save_options = ::Nokogiri::XML::Node::SaveOptions::AS_XML

  # Don't force expand empty elements if they're really empty
  if options[:expand_empty]
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
  end
  if options[:indent].to_i.positive?
    save_options |= ::Nokogiri::XML::Node::SaveOptions::FORMAT
  end

  custom_decl = nil
  if options[:no_declaration]
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
  elsif attachments.key?(node, :xml_decl) && (xml_decl = attachments.get(node, :xml_decl))
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
    attrs = ["version=\"#{xml_decl[:version]}\""]
    attrs << "encoding=\"#{xml_decl[:encoding]}\"" if xml_decl[:encoding]
    attrs << "standalone=\"#{xml_decl[:standalone]}\"" if xml_decl[:standalone]
    custom_decl = "<?xml #{attrs.join(' ')}?>"
  end

  result = node.to_xml(
    indent: options[:indent],
    encoding: options[:encoding],
    save_with: save_options,
  )

  if custom_decl
    result = "#{custom_decl}\n#{result}"
  end

  result
end

.set_attribute(element, name, value) ⇒ Object



229
230
231
# File 'lib/moxml/adapter/nokogiri.rb', line 229

def set_attribute(element, name, value)
  element[name.to_s] = value.to_s
end

.set_cdata_content(node, content) ⇒ Object



315
316
317
# File 'lib/moxml/adapter/nokogiri.rb', line 315

def set_cdata_content(node, content)
  node.content = content
end

.set_comment_content(node, content) ⇒ Object



323
324
325
# File 'lib/moxml/adapter/nokogiri.rb', line 323

def set_comment_content(node, content)
  node.native_content = content
end

.set_declaration_attribute(declaration, attr_name, value) ⇒ Object



135
136
137
138
139
140
141
142
143
144
145
# File 'lib/moxml/adapter/nokogiri.rb', line 135

def set_declaration_attribute(declaration, attr_name, value)
  attrs = current_declaration_attributes(declaration)
  if value.nil?
    attrs.delete(attr_name)
  else
    attrs[attr_name] = value
  end

  declaration.native_content =
    attrs.map { |k, v| %(#{k}="#{v}") }.join(" ")
end

.set_namespace(element, ns) ⇒ Object



147
148
149
# File 'lib/moxml/adapter/nokogiri.rb', line 147

def set_namespace(element, ns)
  element.namespace = ns
end

.set_node_name(node, name) ⇒ Object



183
184
185
# File 'lib/moxml/adapter/nokogiri.rb', line 183

def set_node_name(node, name)
  node.name = name
end

.set_processing_instruction_content(node, content) ⇒ Object



331
332
333
# File 'lib/moxml/adapter/nokogiri.rb', line 331

def set_processing_instruction_content(node, content)
  node.native_content = content
end

.set_root(doc, element) ⇒ Object



17
18
19
# File 'lib/moxml/adapter/nokogiri.rb', line 17

def set_root(doc, element)
  doc.root = element
end

.set_text_content(node, content) ⇒ Object



307
308
309
# File 'lib/moxml/adapter/nokogiri.rb', line 307

def set_text_content(node, content)
  node.native_content = content
end

.text_content(node) ⇒ Object



296
297
298
# File 'lib/moxml/adapter/nokogiri.rb', line 296

def text_content(node)
  node.text.to_s
end

.xpath(node, expression, namespaces = nil) ⇒ Object



364
365
366
367
368
369
370
371
372
373
# File 'lib/moxml/adapter/nokogiri.rb', line 364

def xpath(node, expression, namespaces = nil)
  node.xpath(expression, namespaces).to_a
rescue ::Nokogiri::XML::XPath::SyntaxError => e
  raise Moxml::XPathError.new(
    e.message,
    expression: expression,
    adapter: "Nokogiri",
    node: node,
  )
end