Class: Moxml::Adapter::Nokogiri

Inherits:
Base
  • Object
show all
Defined in:
lib/moxml/adapter/nokogiri.rb

Defined Under Namespace

Classes: NokogiriSAXBridge

Class Method Summary collapse

Methods inherited from Base

create_cdata, create_comment, create_declaration, create_doctype, create_element, create_entity_reference, create_namespace, create_processing_instruction, create_text, duplicate_node, patch_node, prepare_for_new_document, sax_supported?, set_attribute_name, set_attribute_value

Methods included from XmlUtils

#encode_entities, #normalize_xml_value, #validate_comment_content, #validate_declaration_encoding, #validate_declaration_standalone, #validate_declaration_version, #validate_element_name, #validate_entity_reference_name, #validate_pi_target, #validate_prefix, #validate_uri

Class Method Details

.add_child(element, child) ⇒ Object



234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/moxml/adapter/nokogiri.rb', line 234

def add_child(element, child)
  # Special handling for declarations on Nokogiri documents
  if element.is_a?(::Nokogiri::XML::Document) &&
      child.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
      child.name == "xml"
    # Set document's xml_decl property
    version = declaration_attribute(child, "version") || "1.0"
    encoding = declaration_attribute(child, "encoding")
    standalone = declaration_attribute(child, "standalone")

    # Nokogiri's xml_decl can only be set via instance variable
    element.instance_variable_set(:@xml_decl, {
      version: version,
      encoding: encoding,
      standalone: standalone,
    }.compact)
  end

  if node_type(child) == :doctype
    # avoid exceptions: cannot reparent Nokogiri::XML::DTD there
    element.create_internal_subset(
      child.name, child.external_id, child.system_id
    )
  else
    element.add_child(child)
  end
end

.add_next_sibling(node, sibling) ⇒ Object



266
267
268
# File 'lib/moxml/adapter/nokogiri.rb', line 266

def add_next_sibling(node, sibling)
  node.add_next_sibling(sibling)
end

.add_previous_sibling(node, sibling) ⇒ Object



262
263
264
# File 'lib/moxml/adapter/nokogiri.rb', line 262

def add_previous_sibling(node, sibling)
  node.add_previous_sibling(sibling)
end

.at_xpath(node, expression, namespaces = nil) ⇒ Object



361
362
363
364
365
366
367
368
369
370
# File 'lib/moxml/adapter/nokogiri.rb', line 361

def at_xpath(node, expression, namespaces = nil)
  node.at_xpath(expression, namespaces)
rescue ::Nokogiri::XML::XPath::SyntaxError => e
  raise Moxml::XPathError.new(
    e.message,
    expression: expression,
    adapter: "Nokogiri",
    node: node,
  )
end

.attribute_element(attr) ⇒ Object



208
209
210
# File 'lib/moxml/adapter/nokogiri.rb', line 208

def attribute_element(attr)
  attr.parent
end

.attributes(element) ⇒ Object



212
213
214
# File 'lib/moxml/adapter/nokogiri.rb', line 212

def attributes(element)
  element.attributes.values
end

.cdata_content(node) ⇒ Object



301
302
303
# File 'lib/moxml/adapter/nokogiri.rb', line 301

def cdata_content(node)
  node.content
end

.children(node) ⇒ Object



176
177
178
179
180
181
# File 'lib/moxml/adapter/nokogiri.rb', line 176

def children(node)
  node.children.reject do |child|
    child.text? && child.content.strip.empty? &&
      !(child.previous_sibling.nil? && child.next_sibling.nil?)
  end
end

.comment_content(node) ⇒ Object



309
310
311
# File 'lib/moxml/adapter/nokogiri.rb', line 309

def comment_content(node)
  node.content
end

.create_document(_native_doc = nil) ⇒ Object



61
62
63
# File 'lib/moxml/adapter/nokogiri.rb', line 61

def create_document(_native_doc = nil)
  ::Nokogiri::XML::Document.new
end

.create_fragmentObject



65
66
67
68
69
70
71
# File 'lib/moxml/adapter/nokogiri.rb', line 65

def create_fragment
  # document fragments are weird and should be used with caution:
  # https://github.com/sparklemotion/nokogiri/issues/572
  ::Nokogiri::XML::DocumentFragment.new(
    ::Nokogiri::XML::Document.new,
  )
end

.create_native_cdata(content) ⇒ Object



81
82
83
# File 'lib/moxml/adapter/nokogiri.rb', line 81

def create_native_cdata(content)
  ::Nokogiri::XML::CDATA.new(create_document, content)
end

.create_native_comment(content) ⇒ Object



85
86
87
# File 'lib/moxml/adapter/nokogiri.rb', line 85

def create_native_comment(content)
  ::Nokogiri::XML::Comment.new(create_document, content)
end

.create_native_declaration(version, encoding, standalone) ⇒ Object



101
102
103
104
105
106
107
# File 'lib/moxml/adapter/nokogiri.rb', line 101

def create_native_declaration(version, encoding, standalone)
  ::Nokogiri::XML::ProcessingInstruction.new(
    create_document,
    "xml",
    build_declaration_attrs(version, encoding, standalone),
  )
end

.create_native_doctype(name, external_id, system_id) ⇒ Object



89
90
91
92
93
# File 'lib/moxml/adapter/nokogiri.rb', line 89

def create_native_doctype(name, external_id, system_id)
  create_document.create_internal_subset(
    name, external_id, system_id
  )
end

.create_native_element(name) ⇒ Object



73
74
75
# File 'lib/moxml/adapter/nokogiri.rb', line 73

def create_native_element(name)
  ::Nokogiri::XML::Element.new(name, create_document)
end

.create_native_entity_reference(name) ⇒ Object



109
110
111
# File 'lib/moxml/adapter/nokogiri.rb', line 109

def create_native_entity_reference(name)
  ::Nokogiri::XML::EntityReference.new(create_document, name)
end

.create_native_namespace(element, prefix, uri) ⇒ Object



148
149
150
# File 'lib/moxml/adapter/nokogiri.rb', line 148

def create_native_namespace(element, prefix, uri)
  element.add_namespace_definition(prefix, uri)
end

.create_native_processing_instruction(target, content) ⇒ Object



95
96
97
98
99
# File 'lib/moxml/adapter/nokogiri.rb', line 95

def create_native_processing_instruction(target, content)
  ::Nokogiri::XML::ProcessingInstruction.new(
    ::Nokogiri::XML::Document.new, target, content
  )
end

.create_native_text(content) ⇒ Object



77
78
79
# File 'lib/moxml/adapter/nokogiri.rb', line 77

def create_native_text(content)
  ::Nokogiri::XML::Text.new(content, create_document)
end

.declaration_attribute(declaration, attr_name) ⇒ Object



117
118
119
120
121
122
# File 'lib/moxml/adapter/nokogiri.rb', line 117

def declaration_attribute(declaration, attr_name)
  return nil unless declaration.content

  match = declaration.content.match(/#{attr_name}="([^"]*)"/)
  match && match[1]
end

.doctype_external_id(native) ⇒ Object



342
343
344
# File 'lib/moxml/adapter/nokogiri.rb', line 342

def doctype_external_id(native)
  native.external_id
end

.doctype_name(native) ⇒ Object

Doctype accessor methods



338
339
340
# File 'lib/moxml/adapter/nokogiri.rb', line 338

def doctype_name(native)
  native.name
end

.doctype_system_id(native) ⇒ Object



346
347
348
# File 'lib/moxml/adapter/nokogiri.rb', line 346

def doctype_system_id(native)
  native.system_id
end

.document(node) ⇒ Object



200
201
202
# File 'lib/moxml/adapter/nokogiri.rb', line 200

def document(node)
  node.document
end

.entity_reference_name(node) ⇒ Object



113
114
115
# File 'lib/moxml/adapter/nokogiri.rb', line 113

def entity_reference_name(node)
  node.name
end

.get_attribute(element, name) ⇒ Object



220
221
222
223
# File 'lib/moxml/adapter/nokogiri.rb', line 220

def get_attribute(element, name)
  # attributes keys don't include attribute namespaces
  element.attributes[name.to_s]
end

.get_attribute_value(element, name) ⇒ Object



225
226
227
228
# File 'lib/moxml/adapter/nokogiri.rb', line 225

def get_attribute_value(element, name)
  # get the attribute value by its name including a namespace
  element[name.to_s]
end

.inner_text(node) ⇒ Object



290
291
292
293
294
295
# File 'lib/moxml/adapter/nokogiri.rb', line 290

def inner_text(node)
  text_children = node.children.reject do |c|
    c.element? || c.comment?
  end
  text_children.map(&:content).join
end

.namespace(element) ⇒ Object



140
141
142
# File 'lib/moxml/adapter/nokogiri.rb', line 140

def namespace(element)
  element.namespace
end

.namespace_definitions(node) ⇒ Object



333
334
335
# File 'lib/moxml/adapter/nokogiri.rb', line 333

def namespace_definitions(node)
  node.namespace_definitions
end

.namespace_prefix(namespace) ⇒ Object



325
326
327
# File 'lib/moxml/adapter/nokogiri.rb', line 325

def namespace_prefix(namespace)
  namespace.prefix
end

.namespace_uri(namespace) ⇒ Object



329
330
331
# File 'lib/moxml/adapter/nokogiri.rb', line 329

def namespace_uri(namespace)
  namespace.href
end

.next_sibling(node) ⇒ Object



192
193
194
# File 'lib/moxml/adapter/nokogiri.rb', line 192

def next_sibling(node)
  node.next_sibling
end

.node_name(node) ⇒ Object



168
169
170
# File 'lib/moxml/adapter/nokogiri.rb', line 168

def node_name(node)
  node.name
end

.node_type(node) ⇒ Object



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/moxml/adapter/nokogiri.rb', line 152

def node_type(node)
  case node
  when ::Nokogiri::XML::Element then :element
  when ::Nokogiri::XML::CDATA then :cdata
  when ::Nokogiri::XML::Text then :text
  when ::Nokogiri::XML::Comment then :comment
  when ::Nokogiri::XML::Attr then :attribute
  when ::Nokogiri::XML::Namespace then :namespace
  when ::Nokogiri::XML::ProcessingInstruction then :processing_instruction
  when ::Nokogiri::XML::Document, ::Nokogiri::XML::DocumentFragment then :document
  when ::Nokogiri::XML::DTD then :doctype
  when ::Nokogiri::XML::EntityReference then :entity_reference
  else :unknown
  end
end

.parent(node) ⇒ Object



188
189
190
# File 'lib/moxml/adapter/nokogiri.rb', line 188

def parent(node)
  node.parent
end

.parse(xml, options = {}, _context = nil) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/moxml/adapter/nokogiri.rb', line 14

def parse(xml, options = {}, _context = nil)
  native_doc = begin
    if options[:fragment]
      ::Nokogiri::XML::DocumentFragment.parse(xml) do |config|
        config.strict.nonet
        config.recover unless options[:strict]
      end
    else
      ::Nokogiri::XML(xml, nil, options[:encoding]) do |config|
        config.strict.nonet
        config.recover unless options[:strict]
      end
    end
  rescue ::Nokogiri::XML::SyntaxError => e
    raise Moxml::ParseError.new(e.message, line: e.line,
                                           column: e.column)
  end

  # Use provided context if available, otherwise create new one
  ctx = _context || Context.new(:nokogiri)
  DocumentBuilder.new(ctx).build(native_doc)
end

.previous_sibling(node) ⇒ Object



196
197
198
# File 'lib/moxml/adapter/nokogiri.rb', line 196

def previous_sibling(node)
  node.previous_sibling
end

.processing_instruction_content(node) ⇒ Object



317
318
319
# File 'lib/moxml/adapter/nokogiri.rb', line 317

def processing_instruction_content(node)
  node.content
end

.processing_instruction_target(node) ⇒ Object



144
145
146
# File 'lib/moxml/adapter/nokogiri.rb', line 144

def processing_instruction_target(node)
  node.name
end

.remove(node) ⇒ Object



270
271
272
273
274
275
276
277
278
279
280
# File 'lib/moxml/adapter/nokogiri.rb', line 270

def remove(node)
  # Special handling for declarations on Nokogiri documents
  if node.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
      node.name == "xml" &&
      node.parent.is_a?(::Nokogiri::XML::Document)
    # Clear document's xml_decl when removing declaration
    node.parent.instance_variable_set(:@xml_decl, nil)
  end

  node.remove
end

.remove_attribute(element, name) ⇒ Object



230
231
232
# File 'lib/moxml/adapter/nokogiri.rb', line 230

def remove_attribute(element, name)
  element.remove_attribute(name.to_s)
end

.replace(node, new_node) ⇒ Object



282
283
284
# File 'lib/moxml/adapter/nokogiri.rb', line 282

def replace(node, new_node)
  node.replace(new_node)
end

.replace_children(node, new_children) ⇒ Object



183
184
185
186
# File 'lib/moxml/adapter/nokogiri.rb', line 183

def replace_children(node, new_children)
  node.children.unlink
  new_children.each { |child| add_child(node, child) }
end

.root(document) ⇒ Object



204
205
206
# File 'lib/moxml/adapter/nokogiri.rb', line 204

def root(document)
  document.respond_to?(:root) ? document.root : document.children.first
end

.sax_parse(xml, handler) ⇒ void

This method returns an undefined value.

SAX parsing implementation for Nokogiri

Parameters:



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/moxml/adapter/nokogiri.rb', line 42

def sax_parse(xml, handler)
  # Create bridge that translates Nokogiri SAX to Moxml SAX
  bridge = NokogiriSAXBridge.new(handler)

  # Create Nokogiri SAX parser
  parser = ::Nokogiri::XML::SAX::Parser.new(bridge)

  # Parse
  if xml.respond_to?(:read)
    parser.parse(xml)
  else
    parser.parse(xml.to_s)
  end
rescue ::Nokogiri::XML::SyntaxError => e
  error = Moxml::ParseError.new(e.message, line: e.line,
                                           column: e.column)
  handler.on_error(error)
end

.serialize(node, options = {}) ⇒ Object



372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
# File 'lib/moxml/adapter/nokogiri.rb', line 372

def serialize(node, options = {})
  save_options = ::Nokogiri::XML::Node::SaveOptions::AS_XML

  # Don't force expand empty elements if they're really empty
  if options[:expand_empty]
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
  end
  if options[:indent].to_i.positive?
    save_options |= ::Nokogiri::XML::Node::SaveOptions::FORMAT
  end

  # Handle declaration option
  # Priority:
  # 1. Explicit no_declaration option
  # 2. Check Nokogiri's internal @xml_decl (when remove is called, this becomes nil)
  if options.key?(:no_declaration)
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if options[:no_declaration]
  elsif node.respond_to?(:instance_variable_get) &&
      node.instance_variable_defined?(:@xml_decl)
    # Nokogiri's internal state - if nil, declaration was removed
    xml_decl = node.instance_variable_get(:@xml_decl)
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if xml_decl.nil?
  end

  node.to_xml(
    indent: options[:indent],
    encoding: options[:encoding],
    save_with: save_options,
  )
end

.set_attribute(element, name, value) ⇒ Object



216
217
218
# File 'lib/moxml/adapter/nokogiri.rb', line 216

def set_attribute(element, name, value)
  element[name.to_s] = value.to_s
end

.set_cdata_content(node, content) ⇒ Object



305
306
307
# File 'lib/moxml/adapter/nokogiri.rb', line 305

def set_cdata_content(node, content)
  node.content = content
end

.set_comment_content(node, content) ⇒ Object



313
314
315
# File 'lib/moxml/adapter/nokogiri.rb', line 313

def set_comment_content(node, content)
  node.native_content = content
end

.set_declaration_attribute(declaration, attr_name, value) ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
# File 'lib/moxml/adapter/nokogiri.rb', line 124

def set_declaration_attribute(declaration, attr_name, value)
  attrs = current_declaration_attributes(declaration)
  if value.nil?
    attrs.delete(attr_name)
  else
    attrs[attr_name] = value
  end

  declaration.native_content =
    attrs.map { |k, v| %(#{k}="#{v}") }.join(" ")
end

.set_namespace(element, ns) ⇒ Object



136
137
138
# File 'lib/moxml/adapter/nokogiri.rb', line 136

def set_namespace(element, ns)
  element.namespace = ns
end

.set_node_name(node, name) ⇒ Object



172
173
174
# File 'lib/moxml/adapter/nokogiri.rb', line 172

def set_node_name(node, name)
  node.name = name
end

.set_processing_instruction_content(node, content) ⇒ Object



321
322
323
# File 'lib/moxml/adapter/nokogiri.rb', line 321

def set_processing_instruction_content(node, content)
  node.native_content = content
end

.set_root(doc, element) ⇒ Object



10
11
12
# File 'lib/moxml/adapter/nokogiri.rb', line 10

def set_root(doc, element)
  doc.root = element
end

.set_text_content(node, content) ⇒ Object



297
298
299
# File 'lib/moxml/adapter/nokogiri.rb', line 297

def set_text_content(node, content)
  node.native_content = content
end

.text_content(node) ⇒ Object



286
287
288
# File 'lib/moxml/adapter/nokogiri.rb', line 286

def text_content(node)
  node.text
end

.xpath(node, expression, namespaces = nil) ⇒ Object



350
351
352
353
354
355
356
357
358
359
# File 'lib/moxml/adapter/nokogiri.rb', line 350

def xpath(node, expression, namespaces = nil)
  node.xpath(expression, namespaces).to_a
rescue ::Nokogiri::XML::XPath::SyntaxError => e
  raise Moxml::XPathError.new(
    e.message,
    expression: expression,
    adapter: "Nokogiri",
    node: node,
  )
end