Class: Moxml::Adapter::Nokogiri

Inherits:
Base
  • Object
show all
Defined in:
lib/moxml/adapter/nokogiri.rb

Defined Under Namespace

Classes: NokogiriSAXBridge

Class Method Summary collapse

Methods inherited from Base

create_cdata, create_comment, create_declaration, create_doctype, create_element, create_entity_reference, create_namespace, create_processing_instruction, create_text, duplicate_node, patch_node, prepare_for_new_document, sax_supported?, set_attribute_name, set_attribute_value

Methods included from XmlUtils

#encode_entities, #normalize_xml_value, #validate_comment_content, #validate_declaration_encoding, #validate_declaration_standalone, #validate_declaration_version, #validate_element_name, #validate_entity_reference_name, #validate_pi_target, #validate_prefix, #validate_uri

Class Method Details

.add_child(element, child) ⇒ Object



234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/moxml/adapter/nokogiri.rb', line 234

def add_child(element, child)
  # Special handling for declarations on Nokogiri documents
  if element.is_a?(::Nokogiri::XML::Document) &&
      child.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
      child.name == "xml"
    # Set document's xml_decl property
    version = declaration_attribute(child, "version") || "1.0"
    encoding = declaration_attribute(child, "encoding")
    standalone = declaration_attribute(child, "standalone")

    # Nokogiri's xml_decl can only be set via instance variable
    element.instance_variable_set(:@xml_decl, {
      version: version,
      encoding: encoding,
      standalone: standalone,
    }.compact)
  end

  if node_type(child) == :doctype
    # avoid exceptions: cannot reparent Nokogiri::XML::DTD there
    element.create_internal_subset(
      child.name, child.external_id, child.system_id
    )
  else
    element.add_child(child)
  end
end

.add_next_sibling(node, sibling) ⇒ Object



266
267
268
# File 'lib/moxml/adapter/nokogiri.rb', line 266

def add_next_sibling(node, sibling)
  node.add_next_sibling(sibling)
end

.add_previous_sibling(node, sibling) ⇒ Object



262
263
264
# File 'lib/moxml/adapter/nokogiri.rb', line 262

def add_previous_sibling(node, sibling)
  node.add_previous_sibling(sibling)
end

.at_xpath(node, expression, namespaces = nil) ⇒ Object



359
360
361
362
363
364
365
366
367
368
# File 'lib/moxml/adapter/nokogiri.rb', line 359

def at_xpath(node, expression, namespaces = nil)
  node.at_xpath(expression, namespaces)
rescue ::Nokogiri::XML::XPath::SyntaxError => e
  raise Moxml::XPathError.new(
    e.message,
    expression: expression,
    adapter: "Nokogiri",
    node: node,
  )
end

.attribute_element(attr) ⇒ Object



208
209
210
# File 'lib/moxml/adapter/nokogiri.rb', line 208

def attribute_element(attr)
  attr.parent
end

.attributes(element) ⇒ Object



212
213
214
# File 'lib/moxml/adapter/nokogiri.rb', line 212

def attributes(element)
  element.attributes.values
end

.cdata_content(node) ⇒ Object



299
300
301
# File 'lib/moxml/adapter/nokogiri.rb', line 299

def cdata_content(node)
  node.content
end

.children(node) ⇒ Object



176
177
178
179
180
181
# File 'lib/moxml/adapter/nokogiri.rb', line 176

def children(node)
  node.children.reject do |child|
    child.text? && child.content.strip.empty? &&
      !(child.previous_sibling.nil? && child.next_sibling.nil?)
  end
end

.comment_content(node) ⇒ Object



307
308
309
# File 'lib/moxml/adapter/nokogiri.rb', line 307

def comment_content(node)
  node.content
end

.create_document(_native_doc = nil) ⇒ Object



61
62
63
# File 'lib/moxml/adapter/nokogiri.rb', line 61

def create_document(_native_doc = nil)
  ::Nokogiri::XML::Document.new
end

.create_fragmentObject



65
66
67
68
69
70
71
# File 'lib/moxml/adapter/nokogiri.rb', line 65

def create_fragment
  # document fragments are weird and should be used with caution:
  # https://github.com/sparklemotion/nokogiri/issues/572
  ::Nokogiri::XML::DocumentFragment.new(
    ::Nokogiri::XML::Document.new,
  )
end

.create_native_cdata(content) ⇒ Object



81
82
83
# File 'lib/moxml/adapter/nokogiri.rb', line 81

def create_native_cdata(content)
  ::Nokogiri::XML::CDATA.new(create_document, content)
end

.create_native_comment(content) ⇒ Object



85
86
87
# File 'lib/moxml/adapter/nokogiri.rb', line 85

def create_native_comment(content)
  ::Nokogiri::XML::Comment.new(create_document, content)
end

.create_native_declaration(version, encoding, standalone) ⇒ Object



101
102
103
104
105
106
107
# File 'lib/moxml/adapter/nokogiri.rb', line 101

def create_native_declaration(version, encoding, standalone)
  ::Nokogiri::XML::ProcessingInstruction.new(
    create_document,
    "xml",
    build_declaration_attrs(version, encoding, standalone),
  )
end

.create_native_doctype(name, external_id, system_id) ⇒ Object



89
90
91
92
93
# File 'lib/moxml/adapter/nokogiri.rb', line 89

def create_native_doctype(name, external_id, system_id)
  create_document.create_internal_subset(
    name, external_id, system_id
  )
end

.create_native_element(name) ⇒ Object



73
74
75
# File 'lib/moxml/adapter/nokogiri.rb', line 73

def create_native_element(name)
  ::Nokogiri::XML::Element.new(name, create_document)
end

.create_native_entity_reference(name) ⇒ Object



109
110
111
# File 'lib/moxml/adapter/nokogiri.rb', line 109

def create_native_entity_reference(name)
  ::Nokogiri::XML::EntityReference.new(create_document, name)
end

.create_native_namespace(element, prefix, uri) ⇒ Object



148
149
150
# File 'lib/moxml/adapter/nokogiri.rb', line 148

def create_native_namespace(element, prefix, uri)
  element.add_namespace_definition(prefix, uri)
end

.create_native_processing_instruction(target, content) ⇒ Object



95
96
97
98
99
# File 'lib/moxml/adapter/nokogiri.rb', line 95

def create_native_processing_instruction(target, content)
  ::Nokogiri::XML::ProcessingInstruction.new(
    ::Nokogiri::XML::Document.new, target, content
  )
end

.create_native_text(content) ⇒ Object



77
78
79
# File 'lib/moxml/adapter/nokogiri.rb', line 77

def create_native_text(content)
  ::Nokogiri::XML::Text.new(content, create_document)
end

.declaration_attribute(declaration, attr_name) ⇒ Object



117
118
119
120
121
122
# File 'lib/moxml/adapter/nokogiri.rb', line 117

def declaration_attribute(declaration, attr_name)
  return nil unless declaration.content

  match = declaration.content.match(/#{attr_name}="([^"]*)"/)
  match && match[1]
end

.doctype_external_id(native) ⇒ Object



340
341
342
# File 'lib/moxml/adapter/nokogiri.rb', line 340

def doctype_external_id(native)
  native.external_id
end

.doctype_name(native) ⇒ Object

Doctype accessor methods



336
337
338
# File 'lib/moxml/adapter/nokogiri.rb', line 336

def doctype_name(native)
  native.name
end

.doctype_system_id(native) ⇒ Object



344
345
346
# File 'lib/moxml/adapter/nokogiri.rb', line 344

def doctype_system_id(native)
  native.system_id
end

.document(node) ⇒ Object



200
201
202
# File 'lib/moxml/adapter/nokogiri.rb', line 200

def document(node)
  node.document
end

.entity_reference_name(node) ⇒ Object



113
114
115
# File 'lib/moxml/adapter/nokogiri.rb', line 113

def entity_reference_name(node)
  node.name
end

.get_attribute(element, name) ⇒ Object



220
221
222
223
# File 'lib/moxml/adapter/nokogiri.rb', line 220

def get_attribute(element, name)
  # attributes keys don't include attribute namespaces
  element.attributes[name.to_s]
end

.get_attribute_value(element, name) ⇒ Object



225
226
227
228
# File 'lib/moxml/adapter/nokogiri.rb', line 225

def get_attribute_value(element, name)
  # get the attribute value by its name including a namespace
  element[name.to_s]
end

.inner_text(node) ⇒ Object



290
291
292
293
# File 'lib/moxml/adapter/nokogiri.rb', line 290

def inner_text(node)
  text_children = node.children - node.element_children
  text_children.map(&:content).join
end

.namespace(element) ⇒ Object



140
141
142
# File 'lib/moxml/adapter/nokogiri.rb', line 140

def namespace(element)
  element.namespace
end

.namespace_definitions(node) ⇒ Object



331
332
333
# File 'lib/moxml/adapter/nokogiri.rb', line 331

def namespace_definitions(node)
  node.namespace_definitions
end

.namespace_prefix(namespace) ⇒ Object



323
324
325
# File 'lib/moxml/adapter/nokogiri.rb', line 323

def namespace_prefix(namespace)
  namespace.prefix
end

.namespace_uri(namespace) ⇒ Object



327
328
329
# File 'lib/moxml/adapter/nokogiri.rb', line 327

def namespace_uri(namespace)
  namespace.href
end

.next_sibling(node) ⇒ Object



192
193
194
# File 'lib/moxml/adapter/nokogiri.rb', line 192

def next_sibling(node)
  node.next_sibling
end

.node_name(node) ⇒ Object



168
169
170
# File 'lib/moxml/adapter/nokogiri.rb', line 168

def node_name(node)
  node.name
end

.node_type(node) ⇒ Object



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/moxml/adapter/nokogiri.rb', line 152

def node_type(node)
  case node
  when ::Nokogiri::XML::Element then :element
  when ::Nokogiri::XML::CDATA then :cdata
  when ::Nokogiri::XML::Text then :text
  when ::Nokogiri::XML::Comment then :comment
  when ::Nokogiri::XML::Attr then :attribute
  when ::Nokogiri::XML::Namespace then :namespace
  when ::Nokogiri::XML::ProcessingInstruction then :processing_instruction
  when ::Nokogiri::XML::Document, ::Nokogiri::XML::DocumentFragment then :document
  when ::Nokogiri::XML::DTD then :doctype
  when ::Nokogiri::XML::EntityReference then :entity_reference
  else :unknown
  end
end

.parent(node) ⇒ Object



188
189
190
# File 'lib/moxml/adapter/nokogiri.rb', line 188

def parent(node)
  node.parent
end

.parse(xml, options = {}, _context = nil) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/moxml/adapter/nokogiri.rb', line 14

def parse(xml, options = {}, _context = nil)
  native_doc = begin
    if options[:fragment]
      ::Nokogiri::XML::DocumentFragment.parse(xml) do |config|
        config.strict.nonet
        config.recover unless options[:strict]
      end
    else
      ::Nokogiri::XML(xml, nil, options[:encoding]) do |config|
        config.strict.nonet
        config.recover unless options[:strict]
      end
    end
  rescue ::Nokogiri::XML::SyntaxError => e
    raise Moxml::ParseError.new(e.message, line: e.line,
                                           column: e.column)
  end

  # Use provided context if available, otherwise create new one
  ctx = _context || Context.new(:nokogiri)
  DocumentBuilder.new(ctx).build(native_doc)
end

.previous_sibling(node) ⇒ Object



196
197
198
# File 'lib/moxml/adapter/nokogiri.rb', line 196

def previous_sibling(node)
  node.previous_sibling
end

.processing_instruction_content(node) ⇒ Object



315
316
317
# File 'lib/moxml/adapter/nokogiri.rb', line 315

def processing_instruction_content(node)
  node.content
end

.processing_instruction_target(node) ⇒ Object



144
145
146
# File 'lib/moxml/adapter/nokogiri.rb', line 144

def processing_instruction_target(node)
  node.name
end

.remove(node) ⇒ Object



270
271
272
273
274
275
276
277
278
279
280
# File 'lib/moxml/adapter/nokogiri.rb', line 270

def remove(node)
  # Special handling for declarations on Nokogiri documents
  if node.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
      node.name == "xml" &&
      node.parent.is_a?(::Nokogiri::XML::Document)
    # Clear document's xml_decl when removing declaration
    node.parent.instance_variable_set(:@xml_decl, nil)
  end

  node.remove
end

.remove_attribute(element, name) ⇒ Object



230
231
232
# File 'lib/moxml/adapter/nokogiri.rb', line 230

def remove_attribute(element, name)
  element.remove_attribute(name.to_s)
end

.replace(node, new_node) ⇒ Object



282
283
284
# File 'lib/moxml/adapter/nokogiri.rb', line 282

def replace(node, new_node)
  node.replace(new_node)
end

.replace_children(node, new_children) ⇒ Object



183
184
185
186
# File 'lib/moxml/adapter/nokogiri.rb', line 183

def replace_children(node, new_children)
  node.children.unlink
  new_children.each { |child| add_child(node, child) }
end

.root(document) ⇒ Object



204
205
206
# File 'lib/moxml/adapter/nokogiri.rb', line 204

def root(document)
  document.respond_to?(:root) ? document.root : document.children.first
end

.sax_parse(xml, handler) ⇒ void

This method returns an undefined value.

SAX parsing implementation for Nokogiri

Parameters:



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/moxml/adapter/nokogiri.rb', line 42

def sax_parse(xml, handler)
  # Create bridge that translates Nokogiri SAX to Moxml SAX
  bridge = NokogiriSAXBridge.new(handler)

  # Create Nokogiri SAX parser
  parser = ::Nokogiri::XML::SAX::Parser.new(bridge)

  # Parse
  if xml.respond_to?(:read)
    parser.parse(xml)
  else
    parser.parse(xml.to_s)
  end
rescue ::Nokogiri::XML::SyntaxError => e
  error = Moxml::ParseError.new(e.message, line: e.line,
                                           column: e.column)
  handler.on_error(error)
end

.serialize(node, options = {}) ⇒ Object



370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
# File 'lib/moxml/adapter/nokogiri.rb', line 370

def serialize(node, options = {})
  save_options = ::Nokogiri::XML::Node::SaveOptions::AS_XML

  # Don't force expand empty elements if they're really empty
  if options[:expand_empty]
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS
  end
  if options[:indent].to_i.positive?
    save_options |= ::Nokogiri::XML::Node::SaveOptions::FORMAT
  end

  # Handle declaration option
  # Priority:
  # 1. Explicit no_declaration option
  # 2. Check Nokogiri's internal @xml_decl (when remove is called, this becomes nil)
  if options.key?(:no_declaration)
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if options[:no_declaration]
  elsif node.respond_to?(:instance_variable_get) &&
      node.instance_variable_defined?(:@xml_decl)
    # Nokogiri's internal state - if nil, declaration was removed
    xml_decl = node.instance_variable_get(:@xml_decl)
    save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if xml_decl.nil?
  end

  node.to_xml(
    indent: options[:indent],
    encoding: options[:encoding],
    save_with: save_options,
  )
end

.set_attribute(element, name, value) ⇒ Object



216
217
218
# File 'lib/moxml/adapter/nokogiri.rb', line 216

def set_attribute(element, name, value)
  element[name.to_s] = value.to_s
end

.set_cdata_content(node, content) ⇒ Object



303
304
305
# File 'lib/moxml/adapter/nokogiri.rb', line 303

def set_cdata_content(node, content)
  node.content = content
end

.set_comment_content(node, content) ⇒ Object



311
312
313
# File 'lib/moxml/adapter/nokogiri.rb', line 311

def set_comment_content(node, content)
  node.native_content = content
end

.set_declaration_attribute(declaration, attr_name, value) ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
# File 'lib/moxml/adapter/nokogiri.rb', line 124

def set_declaration_attribute(declaration, attr_name, value)
  attrs = current_declaration_attributes(declaration)
  if value.nil?
    attrs.delete(attr_name)
  else
    attrs[attr_name] = value
  end

  declaration.native_content =
    attrs.map { |k, v| %(#{k}="#{v}") }.join(" ")
end

.set_namespace(element, ns) ⇒ Object



136
137
138
# File 'lib/moxml/adapter/nokogiri.rb', line 136

def set_namespace(element, ns)
  element.namespace = ns
end

.set_node_name(node, name) ⇒ Object



172
173
174
# File 'lib/moxml/adapter/nokogiri.rb', line 172

def set_node_name(node, name)
  node.name = name
end

.set_processing_instruction_content(node, content) ⇒ Object



319
320
321
# File 'lib/moxml/adapter/nokogiri.rb', line 319

def set_processing_instruction_content(node, content)
  node.native_content = content
end

.set_root(doc, element) ⇒ Object



10
11
12
# File 'lib/moxml/adapter/nokogiri.rb', line 10

def set_root(doc, element)
  doc.root = element
end

.set_text_content(node, content) ⇒ Object



295
296
297
# File 'lib/moxml/adapter/nokogiri.rb', line 295

def set_text_content(node, content)
  node.native_content = content
end

.text_content(node) ⇒ Object



286
287
288
# File 'lib/moxml/adapter/nokogiri.rb', line 286

def text_content(node)
  node.text
end

.xpath(node, expression, namespaces = nil) ⇒ Object



348
349
350
351
352
353
354
355
356
357
# File 'lib/moxml/adapter/nokogiri.rb', line 348

def xpath(node, expression, namespaces = nil)
  node.xpath(expression, namespaces).to_a
rescue ::Nokogiri::XML::XPath::SyntaxError => e
  raise Moxml::XPathError.new(
    e.message,
    expression: expression,
    adapter: "Nokogiri",
    node: node,
  )
end