Class: Moxml::Adapter::Nokogiri

Inherits:
Base
  • Object
show all
Defined in:
lib/moxml/adapter/nokogiri.rb

Defined Under Namespace

Classes: NokogiriSAXBridge

Constant Summary

Constants inherited from Base

Base::ENTITY_MARKER, Base::ENTITY_MARKER_RE, Base::ENTITY_NAME_PATTERN, Base::ENTITY_NAME_RE, Base::SERIALIZED_ENTITY_MARKER_RE, Base::STANDARD_ENTITIES

Class Method Summary collapse

Methods inherited from Base

actual_native, create_cdata, create_comment, create_declaration, create_doctype, create_element, create_entity_reference, create_namespace, create_processing_instruction, create_text, duplicate_node, patch_node, prepare_for_new_document, preprocess_entities, restore_entities, sax_supported?, set_attribute_name, set_attribute_value

Methods included from XmlUtils

#encode_entities, #normalize_xml_value, #validate_comment_content, #validate_declaration_encoding, #validate_declaration_standalone, #validate_declaration_version, #validate_element_name, #validate_entity_reference_name, #validate_pi_target, #validate_prefix, #validate_uri

Class Method Details

.add_child(element, child) ⇒ Object



245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# File 'lib/moxml/adapter/nokogiri.rb', line 245

def add_child(element, child)
  if element.is_a?(::Nokogiri::XML::Document) &&
      child.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
      child.name == "xml"
    version = declaration_attribute(child, "version") || "1.0"
    encoding = declaration_attribute(child, "encoding")
    standalone = declaration_attribute(child, "standalone")

    attachments.set(element, :xml_decl, {
      version: version,
      encoding: encoding,
      standalone: standalone,
    }.compact)
    return
  end

  if node_type(child) == :doctype
    element.create_internal_subset(
      child.name, child.external_id, child.system_id
    )
  else
    element.add_child(child)
  end
end

.add_next_sibling(node, sibling) ⇒ Object



274
275
276
# File 'lib/moxml/adapter/nokogiri.rb', line 274

def add_next_sibling(node, sibling)
  node.add_next_sibling(sibling)
end

.add_previous_sibling(node, sibling) ⇒ Object



270
271
272
# File 'lib/moxml/adapter/nokogiri.rb', line 270

def add_previous_sibling(node, sibling)
  node.add_previous_sibling(sibling)
end

.adjacent_to_entity_reference?(node) ⇒ Boolean

Returns:

  • (Boolean)


189
190
191
192
# File 'lib/moxml/adapter/nokogiri.rb', line 189

def adjacent_to_entity_reference?(node)
  node.previous_sibling.is_a?(::Nokogiri::XML::EntityReference) ||
    node.next_sibling.is_a?(::Nokogiri::XML::EntityReference)
end

.at_xpath(node, expression, namespaces = nil) ⇒ Object



373
374
375
376
377
378
379
380
381
382
# File 'lib/moxml/adapter/nokogiri.rb', line 373

def at_xpath(node, expression, namespaces = nil)
  node.at_xpath(expression, namespaces)
rescue ::Nokogiri::XML::XPath::SyntaxError => e
  raise Moxml::XPathError.new(
    e.message,
    expression: expression,
    adapter: "Nokogiri",
    node: node,
  )
end

.attachmentsObject



11
12
13
# File 'lib/moxml/adapter/nokogiri.rb', line 11

def attachments
  @attachments ||= Moxml::NativeAttachment.new
end

.attribute_element(attr) ⇒ Object



219
220
221
# File 'lib/moxml/adapter/nokogiri.rb', line 219

def attribute_element(attr)
  attr.parent
end

.attributes(element) ⇒ Object



223
224
225
# File 'lib/moxml/adapter/nokogiri.rb', line 223

def attributes(element)
  element.attributes.values
end

.cdata_content(node) ⇒ Object



309
310
311
# File 'lib/moxml/adapter/nokogiri.rb', line 309

def cdata_content(node)
  node.content
end

.children(node) ⇒ Object



185
186
187
# File 'lib/moxml/adapter/nokogiri.rb', line 185

def children(node)
  node.children
end

.comment_content(node) ⇒ Object



317
318
319
# File 'lib/moxml/adapter/nokogiri.rb', line 317

def comment_content(node)
  node.content
end

.create_document(_native_doc = nil) ⇒ Object



70
71
72
# File 'lib/moxml/adapter/nokogiri.rb', line 70

def create_document(_native_doc = nil)
  ::Nokogiri::XML::Document.new
end

.create_fragmentObject



74
75
76
77
78
79
80
# File 'lib/moxml/adapter/nokogiri.rb', line 74

def create_fragment
  # document fragments are weird and should be used with caution:
  # https://github.com/sparklemotion/nokogiri/issues/572
  ::Nokogiri::XML::DocumentFragment.new(
    ::Nokogiri::XML::Document.new,
  )
end

.create_native_cdata(content, owner_doc = nil) ⇒ Object



90
91
92
# File 'lib/moxml/adapter/nokogiri.rb', line 90

def create_native_cdata(content, owner_doc = nil)
  ::Nokogiri::XML::CDATA.new(owner_doc || create_document, content)
end

.create_native_comment(content, owner_doc = nil) ⇒ Object



94
95
96
# File 'lib/moxml/adapter/nokogiri.rb', line 94

def create_native_comment(content, owner_doc = nil)
  ::Nokogiri::XML::Comment.new(owner_doc || create_document, content)
end

.create_native_declaration(version, encoding, standalone) ⇒ Object



110
111
112
113
114
115
116
# File 'lib/moxml/adapter/nokogiri.rb', line 110

def create_native_declaration(version, encoding, standalone)
  ::Nokogiri::XML::ProcessingInstruction.new(
    create_document,
    "xml",
    build_declaration_attrs(version, encoding, standalone),
  )
end

.create_native_doctype(name, external_id, system_id) ⇒ Object



98
99
100
101
102
# File 'lib/moxml/adapter/nokogiri.rb', line 98

def create_native_doctype(name, external_id, system_id)
  create_document.create_internal_subset(
    name, external_id, system_id
  )
end

.create_native_element(name, owner_doc = nil) ⇒ Object



82
83
84
# File 'lib/moxml/adapter/nokogiri.rb', line 82

def create_native_element(name, owner_doc = nil)
  ::Nokogiri::XML::Element.new(name, owner_doc || create_document)
end

.create_native_entity_reference(name) ⇒ Object



118
119
120
# File 'lib/moxml/adapter/nokogiri.rb', line 118

def create_native_entity_reference(name)
  ::Nokogiri::XML::EntityReference.new(create_document, name)
end

.create_native_namespace(element, prefix, uri) ⇒ Object



157
158
159
# File 'lib/moxml/adapter/nokogiri.rb', line 157

def create_native_namespace(element, prefix, uri)
  element.add_namespace_definition(prefix, uri)
end

.create_native_processing_instruction(target, content) ⇒ Object



104
105
106
107
108
# File 'lib/moxml/adapter/nokogiri.rb', line 104

def create_native_processing_instruction(target, content)
  ::Nokogiri::XML::ProcessingInstruction.new(
    ::Nokogiri::XML::Document.new, target, content
  )
end

.create_native_text(content, owner_doc = nil) ⇒ Object



86
87
88
# File 'lib/moxml/adapter/nokogiri.rb', line 86

def create_native_text(content, owner_doc = nil)
  ::Nokogiri::XML::Text.new(content, owner_doc || create_document)
end

.declaration_attribute(declaration, attr_name) ⇒ Object



126
127
128
129
130
131
# File 'lib/moxml/adapter/nokogiri.rb', line 126

def declaration_attribute(declaration, attr_name)
  return nil unless declaration.content

  match = declaration.content.match(/#{attr_name}="([^"]*)"/)
  match && match[1]
end

.doctype_external_id(native) ⇒ Object



354
355
356
# File 'lib/moxml/adapter/nokogiri.rb', line 354

def doctype_external_id(native)
  native.external_id
end

.doctype_name(native) ⇒ Object

Doctype accessor methods



350
351
352
# File 'lib/moxml/adapter/nokogiri.rb', line 350

def doctype_name(native)
  native.name
end

.doctype_system_id(native) ⇒ Object



358
359
360
# File 'lib/moxml/adapter/nokogiri.rb', line 358

def doctype_system_id(native)
  native.system_id
end

.document(node) ⇒ Object



211
212
213
# File 'lib/moxml/adapter/nokogiri.rb', line 211

def document(node)
  node.document
end

.entity_reference_name(node) ⇒ Object



122
123
124
# File 'lib/moxml/adapter/nokogiri.rb', line 122

def entity_reference_name(node)
  node.name
end

.get_attribute(element, name) ⇒ Object



231
232
233
234
# File 'lib/moxml/adapter/nokogiri.rb', line 231

def get_attribute(element, name)
  # attributes keys don't include attribute namespaces
  element.attributes[name.to_s]
end

.get_attribute_value(element, name) ⇒ Object



236
237
238
239
# File 'lib/moxml/adapter/nokogiri.rb', line 236

def get_attribute_value(element, name)
  # get the attribute value by its name including a namespace
  element[name.to_s]
end

.has_declaration?(native_doc, wrapper) ⇒ Boolean

Returns:

  • (Boolean)


419
420
421
422
423
424
425
# File 'lib/moxml/adapter/nokogiri.rb', line 419

def has_declaration?(native_doc, wrapper)
  if attachments.key?(native_doc, :xml_decl)
    !attachments.get(native_doc, :xml_decl).nil?
  else
    wrapper.has_xml_declaration
  end
end

.in_scope_namespaces(element) ⇒ Object



345
346
347
# File 'lib/moxml/adapter/nokogiri.rb', line 345

def in_scope_namespaces(element)
  element.namespace_scopes
end

.inner_text(node) ⇒ Object



298
299
300
301
302
303
# File 'lib/moxml/adapter/nokogiri.rb', line 298

def inner_text(node)
  text_children = node.children.reject do |c|
    c.element? || c.comment?
  end
  text_children.map(&:content).join
end

.namespace(element) ⇒ Object



149
150
151
# File 'lib/moxml/adapter/nokogiri.rb', line 149

def namespace(element)
  element.namespace
end

.namespace_definitions(node) ⇒ Object



341
342
343
# File 'lib/moxml/adapter/nokogiri.rb', line 341

def namespace_definitions(node)
  node.namespace_definitions
end

.namespace_prefix(namespace) ⇒ Object



333
334
335
# File 'lib/moxml/adapter/nokogiri.rb', line 333

def namespace_prefix(namespace)
  namespace.prefix
end

.namespace_uri(namespace) ⇒ Object



337
338
339
# File 'lib/moxml/adapter/nokogiri.rb', line 337

def namespace_uri(namespace)
  namespace.href
end

.next_sibling(node) ⇒ Object



203
204
205
# File 'lib/moxml/adapter/nokogiri.rb', line 203

def next_sibling(node)
  node.next_sibling
end

.node_name(node) ⇒ Object



177
178
179
# File 'lib/moxml/adapter/nokogiri.rb', line 177

def node_name(node)
  node.name
end

.node_type(node) ⇒ Object



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/moxml/adapter/nokogiri.rb', line 161

def node_type(node)
  case node
  when ::Nokogiri::XML::Element then :element
  when ::Nokogiri::XML::CDATA then :cdata
  when ::Nokogiri::XML::Text then :text
  when ::Nokogiri::XML::Comment then :comment
  when ::Nokogiri::XML::Attr then :attribute
  when ::Nokogiri::XML::Namespace then :namespace
  when ::Nokogiri::XML::ProcessingInstruction then :processing_instruction
  when ::Nokogiri::XML::Document, ::Nokogiri::XML::DocumentFragment then :document
  when ::Nokogiri::XML::DTD then :doctype
  when ::Nokogiri::XML::EntityReference then :entity_reference
  else :unknown
  end
end

.parent(node) ⇒ Object



199
200
201
# File 'lib/moxml/adapter/nokogiri.rb', line 199

def parent(node)
  node.parent
end

.parse(xml, options = {}, _context = nil) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/moxml/adapter/nokogiri.rb', line 19

def parse(xml, options = {}, _context = nil)
  processed_xml = preprocess_entities(xml)

  # preprocess_entities always returns UTF-8, so tell Nokogiri to
  # parse as UTF-8 regardless of any original encoding option.
  native_doc = begin
    if options[:fragment]
      ::Nokogiri::XML::DocumentFragment.parse(processed_xml) do |config|
        config.strict.nonet
        config.recover unless options[:strict]
      end
    else
      ::Nokogiri::XML(processed_xml, nil, "UTF-8") do |config|
        config.strict.nonet
        config.recover unless options[:strict]
      end
    end
  rescue ::Nokogiri::XML::SyntaxError => e
    raise Moxml::ParseError.new(e.message, line: e.line,
                                           column: e.column)
  end

  # Use provided context if available, otherwise create new one
  ctx = _context || Context.new(:nokogiri)
  Document.new(native_doc, ctx)
end

.previous_sibling(node) ⇒ Object



207
208
209
# File 'lib/moxml/adapter/nokogiri.rb', line 207

def previous_sibling(node)
  node.previous_sibling
end

.processing_instruction_content(node) ⇒ Object



325
326
327
# File 'lib/moxml/adapter/nokogiri.rb', line 325

def processing_instruction_content(node)
  node.content
end

.processing_instruction_target(node) ⇒ Object



153
154
155
# File 'lib/moxml/adapter/nokogiri.rb', line 153

def processing_instruction_target(node)
  node.name
end

.remove(node) ⇒ Object



278
279
280
281
282
283
284
285
286
287
288
# File 'lib/moxml/adapter/nokogiri.rb', line 278

def remove(node)
  # Special handling for declarations on Nokogiri documents
  if node.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
      node.name == "xml" &&
      node.parent.is_a?(::Nokogiri::XML::Document)
    # Clear document's xml_decl when removing declaration
    attachments.set(node.parent, :xml_decl, nil)
  end

  node.remove
end

.remove_attribute(element, name) ⇒ Object



241
242
243
# File 'lib/moxml/adapter/nokogiri.rb', line 241

def remove_attribute(element, name)
  element.remove_attribute(name.to_s)
end

.remove_declaration(native_doc) ⇒ Object



427
428
429
# File 'lib/moxml/adapter/nokogiri.rb', line 427

def remove_declaration(native_doc)
  attachments.set(native_doc, :xml_decl, nil)
end

.replace(node, new_node) ⇒ Object



290
291
292
# File 'lib/moxml/adapter/nokogiri.rb', line 290

def replace(node, new_node)
  node.replace(new_node)
end

.replace_children(node, new_children) ⇒ Object



194
195
196
197
# File 'lib/moxml/adapter/nokogiri.rb', line 194

def replace_children(node, new_children)
  node.children.unlink
  new_children.each { |child| add_child(node, child) }
end

.root(document) ⇒ Object



215
216
217
# File 'lib/moxml/adapter/nokogiri.rb', line 215

def root(document)
  document.is_a?(::Nokogiri::XML::Document) ? document.root : document.children.first
end

.sax_parse(xml, handler) ⇒ void

This method returns an undefined value.

SAX parsing implementation for Nokogiri

Parameters:



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/moxml/adapter/nokogiri.rb', line 51

def sax_parse(xml, handler)
  # Create bridge that translates Nokogiri SAX to Moxml SAX
  bridge = NokogiriSAXBridge.new(handler)

  # Create Nokogiri SAX parser
  parser = ::Nokogiri::XML::SAX::Parser.new(bridge)

  # Parse
  if xml.is_a?(IO) || xml.is_a?(StringIO)
    parser.parse(xml)
  else
    parser.parse(xml.to_s)
  end
rescue ::Nokogiri::XML::SyntaxError => e
  error = Moxml::ParseError.new(e.message, line: e.line,
                                           column: e.column)
  handler.on_error(error)
end

.serialize(node, options = {}) ⇒ Object



384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
# File 'lib/moxml/adapter/nokogiri.rb', line 384

def serialize(node, options = {})
  save_options = ::Nokogiri::XML::Node::SaveOptions::AS_XML

  # Don't force expand empty elements if they're really empty
  if options[:expand_empty]
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
  end
  if options[:indent].to_i.positive?
    save_options |= ::Nokogiri::XML::Node::SaveOptions::FORMAT
  end

  custom_decl = nil
  if options[:no_declaration]
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
  elsif attachments.key?(node, :xml_decl) && (xml_decl = attachments.get(node, :xml_decl))
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
    attrs = ["version=\"#{xml_decl[:version]}\""]
    attrs << "encoding=\"#{xml_decl[:encoding]}\"" if xml_decl[:encoding]
    attrs << "standalone=\"#{xml_decl[:standalone]}\"" if xml_decl[:standalone]
    custom_decl = "<?xml #{attrs.join(' ')}?>"
  end

  result = node.to_xml(
    indent: options[:indent],
    encoding: options[:encoding],
    save_with: save_options,
  )

  if custom_decl
    result = "#{custom_decl}\n#{result}"
  end

  result
end

.set_attribute(element, name, value) ⇒ Object



227
228
229
# File 'lib/moxml/adapter/nokogiri.rb', line 227

def set_attribute(element, name, value)
  element[name.to_s] = value.to_s
end

.set_cdata_content(node, content) ⇒ Object



313
314
315
# File 'lib/moxml/adapter/nokogiri.rb', line 313

def set_cdata_content(node, content)
  node.content = content
end

.set_comment_content(node, content) ⇒ Object



321
322
323
# File 'lib/moxml/adapter/nokogiri.rb', line 321

def set_comment_content(node, content)
  node.native_content = content
end

.set_declaration_attribute(declaration, attr_name, value) ⇒ Object



133
134
135
136
137
138
139
140
141
142
143
# File 'lib/moxml/adapter/nokogiri.rb', line 133

def set_declaration_attribute(declaration, attr_name, value)
  attrs = current_declaration_attributes(declaration)
  if value.nil?
    attrs.delete(attr_name)
  else
    attrs[attr_name] = value
  end

  declaration.native_content =
    attrs.map { |k, v| %(#{k}="#{v}") }.join(" ")
end

.set_namespace(element, ns) ⇒ Object



145
146
147
# File 'lib/moxml/adapter/nokogiri.rb', line 145

def set_namespace(element, ns)
  element.namespace = ns
end

.set_node_name(node, name) ⇒ Object



181
182
183
# File 'lib/moxml/adapter/nokogiri.rb', line 181

def set_node_name(node, name)
  node.name = name
end

.set_processing_instruction_content(node, content) ⇒ Object



329
330
331
# File 'lib/moxml/adapter/nokogiri.rb', line 329

def set_processing_instruction_content(node, content)
  node.native_content = content
end

.set_root(doc, element) ⇒ Object



15
16
17
# File 'lib/moxml/adapter/nokogiri.rb', line 15

def set_root(doc, element)
  doc.root = element
end

.set_text_content(node, content) ⇒ Object



305
306
307
# File 'lib/moxml/adapter/nokogiri.rb', line 305

def set_text_content(node, content)
  node.native_content = content
end

.text_content(node) ⇒ Object



294
295
296
# File 'lib/moxml/adapter/nokogiri.rb', line 294

def text_content(node)
  node.text.to_s
end

.xpath(node, expression, namespaces = nil) ⇒ Object



362
363
364
365
366
367
368
369
370
371
# File 'lib/moxml/adapter/nokogiri.rb', line 362

def xpath(node, expression, namespaces = nil)
  node.xpath(expression, namespaces).to_a
rescue ::Nokogiri::XML::XPath::SyntaxError => e
  raise Moxml::XPathError.new(
    e.message,
    expression: expression,
    adapter: "Nokogiri",
    node: node,
  )
end