Class: Moxml::Adapter::Libxml

Inherits:
Base
  • Object
show all
Defined in:
lib/moxml/adapter/libxml.rb

Defined Under Namespace

Classes: DoctypeWrapper, LibXMLSAXBridge

Constant Summary

Constants inherited from Base

Base::ENTITY_MARKER, Base::ENTITY_MARKER_RE, Base::ENTITY_NAME_PATTERN, Base::ENTITY_NAME_RE, Base::SERIALIZED_ENTITY_MARKER_RE, Base::STANDARD_ENTITIES

Class Method Summary collapse

Methods inherited from Base

create_cdata, create_comment, create_declaration, create_doctype, create_element, create_entity_reference, create_namespace, create_processing_instruction, create_text, in_scope_namespaces, preprocess_entities, restore_entities, sax_supported?, set_attribute_value

Methods included from XmlUtils

#encode_entities, #normalize_xml_value, #validate_comment_content, #validate_declaration_encoding, #validate_declaration_standalone, #validate_declaration_version, #validate_element_name, #validate_entity_reference_name, #validate_pi_target, #validate_prefix, #validate_uri

Class Method Details

.actual_native(child_native, parent_native) ⇒ Object

LibXML’s doc.root= creates a new Ruby wrapper with different object_id. Return the actual root node so attachments are stored on the correct object.



1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
# File 'lib/moxml/adapter/libxml.rb', line 1231

def actual_native(child_native, parent_native)
  if parent_native.is_a?(::LibXML::XML::Document)
    pending = attachments.get(parent_native, :_pending_root_refresh)
    if pending && pending == child_native.object_id
      attachments.delete(parent_native, :_pending_root_refresh)
      return parent_native.root
    end
  end
  child_native
end

.add_child(element, child) ⇒ Object



502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
# File 'lib/moxml/adapter/libxml.rb', line 502

def add_child(element, child)
  return unless element && child

  # Unwrap both element and child
  native_elem = unpatch_node(element)
  native_child = unpatch_node(child)

  # EntityReference wrappers can't go in LibXML's native tree.
  # Store on the document (stable identity) keyed by element.
  # LibXML creates new Ruby wrappers on each access, so element
  # object_id is unstable — we look up via == comparison.
  if child.is_a?(CustomizedLibxml::EntityReference)
    doc = native_elem.is_a?(::LibXML::XML::Document) ? native_elem : native_elem.doc
    store_entity_ref_on_doc(doc, native_elem, child)
    append_child_sequence_on_doc(doc, native_elem, :eref)
    return
  end

  # For LibXML: if parent has a DEFAULT namespace (nil/empty prefix) and child is an element without a namespace,
  # explicitly set the child's namespace to match the parent's for XPath compatibility
  # NOTE: Prefixed namespaces are NOT inherited, only default namespaces
  if native_elem.is_a?(::LibXML::XML::Node) && native_elem.namespaces&.namespace &&
      native_child.is_a?(::LibXML::XML::Node) && native_child.element? &&
      (!native_child.namespaces.namespace || native_child.namespaces.namespace.href.to_s.empty?)

    parent_ns = native_elem.namespaces.namespace
    # Only set child's namespace if parent's namespace is DEFAULT (nil or empty prefix)
    if parent_ns.prefix.nil? || parent_ns.prefix.to_s.empty?
      native_child.namespaces.namespace = parent_ns
    end
  end

  if native_elem.is_a?(::LibXML::XML::Document)
    # For Declaration wrappers, store them for serialization
    if child.is_a?(CustomizedLibxml::Declaration)
      attachments.set(native_elem, :declaration, child)
      # Also store reference to parent document in the declaration
      child.parent_doc = native_elem
      return
    end

    # For DOCTYPE wrappers, store them for serialization
    if child.is_a?(DoctypeWrapper)
      attachments.set(native_elem, :doctype, child)
      return
    end

    # For document-level PIs, store them for serialization
    if child.is_a?(CustomizedLibxml::ProcessingInstruction)
      pis = attachments.get(native_elem, :pis) || []
      pis << child
      attachments.set(native_elem, :pis, pis)
      return
    end

    # For text nodes added to document, store them for serialization
    # Documents can't have text children in LibXML
    if child.is_a?(CustomizedLibxml::Text)
      texts = attachments.get(native_elem, :texts) || []
      texts << child
      attachments.set(native_elem, :texts, texts)
      return
    end

    # For documents, check if adding the first root element
    if native_elem.root.nil? && node_type(native_child) == :element
      # Set as root element
      native_elem.root = native_child
      # Flag for actual_native to refresh the wrapper's native reference
      attachments.set(native_elem, :_pending_root_refresh, native_child.object_id)
    elsif native_elem.root
      # Document has root, add to it instead
      import_and_add(native_elem.doc, native_elem.root, native_child)
    end
  else
    import_and_add(native_elem.doc, native_elem, native_child)
    doc = native_elem.doc || native_elem
    append_child_sequence_on_doc(doc, native_elem, :native)
  end
end

.add_newlines_to_xml(xml_string) ⇒ Object



1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
# File 'lib/moxml/adapter/libxml.rb', line 1002

def add_newlines_to_xml(xml_string)
  # Add newlines between XML elements for proper indentation
  # But don't add newlines between opening and immediate closing tags (e.g., <tag></tag>)
  # And most importantly, don't add newlines inside CDATA sections

  # First, protect CDATA sections by replacing them with placeholders
  # Manual scanning guarantees O(n) complexity with no backtracking (ReDoS-safe)
  cdata_sections = []
  result = +""
  pos = 0

  loop do
    # Find next CDATA start
    cdata_start = xml_string.index("<![CDATA[", pos)

    if cdata_start
      # Copy everything before CDATA
      result << xml_string[pos...cdata_start]

      # Find CDATA end
      cdata_content_start = cdata_start + 9 # Length of "<![CDATA["
      cdata_end = xml_string.index("]]>", cdata_content_start)

      if cdata_end
        # Extract full CDATA including markers
        full_cdata_end = cdata_end + 3 # Include "]]>"
        cdata_section = xml_string[cdata_start...full_cdata_end]

        # Store and add placeholder
        cdata_sections << cdata_section
        result << "__CDATA_PLACEHOLDER_#{cdata_sections.length - 1}__"

        # Continue after this CDATA
        pos = full_cdata_end
      else
        # Malformed CDATA (no closing "]]>") - copy as-is
        result << xml_string[cdata_start..]
        break
      end
    else
      # No more CDATA sections - copy rest
      result << xml_string[pos..]
      break
    end
  end

  protected = result

  # Add newlines between elements (but not in CDATA - already protected)
  with_newlines = protected.gsub(%r{(<[^>]+)>(?=<(?!/))}, "\\1>\n")

  # Restore CDATA sections
  cdata_sections.each_with_index do |cdata, index|
    with_newlines.sub!("__CDATA_PLACEHOLDER_#{index}__", cdata)
  end

  with_newlines
end

.add_next_sibling(node, sibling) ⇒ Object



650
651
652
653
654
655
656
# File 'lib/moxml/adapter/libxml.rb', line 650

def add_next_sibling(node, sibling)
  return unless node && sibling

  native_node = unpatch_node(node)
  native_sibling = unpatch_node(sibling)
  native_node.next = native_sibling
end

.add_previous_sibling(node, sibling) ⇒ Object



630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
# File 'lib/moxml/adapter/libxml.rb', line 630

def add_previous_sibling(node, sibling)
  return unless node && sibling

  native_node = unpatch_node(node)
  native_sibling = unpatch_node(sibling)

  # Special handling for document-level processing instructions
  # When adding a PI as sibling to root element, store it on document
  if sibling.is_a?(CustomizedLibxml::ProcessingInstruction) &&
      native_node.is_a?(::LibXML::XML::Node) && native_node.doc
    doc = native_node.doc
    pis = attachments.get(doc, :pis) || []
    pis << sibling
    attachments.set(doc, :pis, pis)
    return
  end

  native_node.prev = native_sibling
end

.append_child_sequence(element, type) ⇒ Object



624
625
626
627
628
# File 'lib/moxml/adapter/libxml.rb', line 624

def append_child_sequence(element, type)
  seq = attachments.get(element, :child_sequence) || []
  seq << type
  attachments.set(element, :child_sequence, seq)
end

.append_child_sequence_on_doc(doc, element, type) ⇒ Object

Track child order on the document (stable identity)



605
606
607
608
609
610
611
612
613
614
# File 'lib/moxml/adapter/libxml.rb', line 605

def append_child_sequence_on_doc(doc, element, type)
  pairs = attachments.get(doc, :_child_seq_pairs) || []
  pair = pairs.find { |elem, _| elem == element }
  if pair
    pair[1] << type
  else
    pairs << [element, [type]]
  end
  attachments.set(doc, :_child_seq_pairs, pairs)
end

.at_xpath(node, expression, namespaces = nil) ⇒ Object



902
903
904
905
# File 'lib/moxml/adapter/libxml.rb', line 902

def at_xpath(node, expression, namespaces = nil)
  results = xpath(node, expression, namespaces)
  results&.first
end

.attachmentsObject



41
42
43
# File 'lib/moxml/adapter/libxml.rb', line 41

def attachments
  @attachments ||= Moxml::NativeAttachment.new
end

.attribute_element(attr) ⇒ Object



373
374
375
# File 'lib/moxml/adapter/libxml.rb', line 373

def attribute_element(attr)
  attr&.parent
end

.attribute_namespace(attr) ⇒ Object



377
378
379
380
381
382
# File 'lib/moxml/adapter/libxml.rb', line 377

def attribute_namespace(attr)
  return nil unless attr
  return nil unless attr.is_a?(::LibXML::XML::Attr)

  attr.ns
end

.attributes(element) ⇒ Object



358
359
360
361
362
363
364
365
366
367
368
369
370
371
# File 'lib/moxml/adapter/libxml.rb', line 358

def attributes(element)
  native_elem = unpatch_node(element)
  return [] unless native_elem
  unless native_elem.is_a?(::LibXML::XML::Node) && native_elem.element?
    return []
  end
  return [] unless native_elem.attributes?

  attrs = []
  native_elem.each_attr do |attr|
    attrs << attr unless attr.name.to_s.start_with?("xmlns")
  end
  attrs
end

.cdata_content(node) ⇒ Object



751
752
753
754
755
756
757
758
759
760
761
762
# File 'lib/moxml/adapter/libxml.rb', line 751

def cdata_content(node)
  native_node = unpatch_node(node)
  content = native_node&.content
  # LibXML may HTML-escape CDATA content, un-escape it
  return nil unless content

  content.gsub("&quot;", '"')
    .gsub("&apos;", "'")
    .gsub("&lt;", "<")
    .gsub("&gt;", ">")
    .gsub("&amp;", "&")
end

.children(node) ⇒ Object



276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# File 'lib/moxml/adapter/libxml.rb', line 276

def children(node)
  native_node = unpatch_node(node)
  return [] unless native_node

  # Handle Document specially - it doesn't have children? method
  if native_node.is_a?(::LibXML::XML::Document)
    result = []

    # Include DOCTYPE if present
    doctype_wrapper = attachments.get(native_node, :doctype)
    result << doctype_wrapper if doctype_wrapper

    return result unless native_node.root

    result << patch_node(native_node.root)
    return result
  end

  result = []
  if native_node.children?
    native_node.each_child do |child|
      # Skip whitespace-only text nodes
      next if child.text? && child.content.to_s.strip.empty?

      result << patch_node(child)
    end
  end

  # Include any EntityReference wrappers stored on the document
  doc = native_node.doc
  entity_refs = doc ? lookup_entity_refs(doc, native_node) : nil
  result.concat(entity_refs) if entity_refs

  result
end

.comment_content(node) ⇒ Object



770
771
772
773
# File 'lib/moxml/adapter/libxml.rb', line 770

def comment_content(node)
  native_node = unpatch_node(node)
  native_node&.content
end

.create_document(_native_doc = nil) ⇒ Object



132
133
134
# File 'lib/moxml/adapter/libxml.rb', line 132

def create_document(_native_doc = nil)
  ::LibXML::XML::Document.new
end

.create_native_cdata(content, _owner_doc = nil) ⇒ Object



153
154
155
156
# File 'lib/moxml/adapter/libxml.rb', line 153

def create_native_cdata(content, _owner_doc = nil)
  native = ::LibXML::XML::Node.new_cdata(content.to_s)
  CustomizedLibxml::Cdata.new(native)
end

.create_native_comment(content, _owner_doc = nil) ⇒ Object



158
159
160
161
# File 'lib/moxml/adapter/libxml.rb', line 158

def create_native_comment(content, _owner_doc = nil)
  native = ::LibXML::XML::Node.new_comment(content.to_s)
  CustomizedLibxml::Comment.new(native)
end

.create_native_declaration(version, encoding, standalone) ⇒ Object



168
169
170
171
172
# File 'lib/moxml/adapter/libxml.rb', line 168

def create_native_declaration(version, encoding, standalone)
  doc = create_document
  # Return a Declaration wrapper with explicit parameters
  CustomizedLibxml::Declaration.new(doc, version, encoding, standalone)
end

.create_native_doctype(name, external_id, system_id) ⇒ Object



174
175
176
177
178
179
# File 'lib/moxml/adapter/libxml.rb', line 174

def create_native_doctype(name, external_id, system_id)
  # LibXML::XML::Dtd.new has bizarre parameter order, so we just
  # store values directly in our wrapper
  DoctypeWrapper.new(create_document, name.to_s, external_id&.to_s,
                     system_id&.to_s)
end

.create_native_element(name, _owner_doc = nil) ⇒ Object



136
137
138
# File 'lib/moxml/adapter/libxml.rb', line 136

def create_native_element(name, _owner_doc = nil)
  ::LibXML::XML::Node.new(name.to_s)
end

.create_native_entity_reference(name) ⇒ Object



145
146
147
# File 'lib/moxml/adapter/libxml.rb', line 145

def create_native_entity_reference(name)
  CustomizedLibxml::EntityReference.new(name)
end

.create_native_namespace(element, prefix, uri) ⇒ Object



804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
# File 'lib/moxml/adapter/libxml.rb', line 804

def create_native_namespace(element, prefix, uri)
  native_elem = unpatch_node(element)
  return nil unless native_elem

  ns = ::LibXML::XML::Namespace.new(
    native_elem,
    prefix.to_s.empty? ? nil : prefix.to_s,
    uri.to_s,
  )

  # For default namespace (nil/empty prefix), set it as the element's namespace
  native_elem.namespaces.namespace = ns if prefix.to_s.empty?

  ns
end

.create_native_processing_instruction(target, content) ⇒ Object



163
164
165
166
# File 'lib/moxml/adapter/libxml.rb', line 163

def create_native_processing_instruction(target, content)
  native = ::LibXML::XML::Node.new_pi(target.to_s, content.to_s)
  CustomizedLibxml::ProcessingInstruction.new(native)
end

.create_native_text(content, _owner_doc = nil) ⇒ Object



140
141
142
143
# File 'lib/moxml/adapter/libxml.rb', line 140

def create_native_text(content, _owner_doc = nil)
  native = ::LibXML::XML::Node.new_text(content.to_s)
  CustomizedLibxml::Text.new(native)
end

.declaration_attribute(node, name) ⇒ Object



230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/moxml/adapter/libxml.rb', line 230

def declaration_attribute(node, name)
  return nil unless node

  # Handle Declaration wrapper
  if node.is_a?(CustomizedLibxml::Declaration)
    case name
    when "version"
      node.version
    when "encoding"
      node.encoding
    when "standalone"
      node.standalone # Returns "yes", "no", or nil
    end
  else
    # Fallback for native documents
    case name
    when "version"
      node.version
    when "encoding"
      enc = node.encoding
      enc ? encoding_to_string(enc) : nil
    when "standalone"
      node.standalone? ? "yes" : nil
    end
  end
end

.doctype_external_id(native) ⇒ Object



869
870
871
# File 'lib/moxml/adapter/libxml.rb', line 869

def doctype_external_id(native)
  native.external_id
end

.doctype_name(native) ⇒ Object

Doctype accessor methods



864
865
866
867
# File 'lib/moxml/adapter/libxml.rb', line 864

def doctype_name(native)
  # LibXML uses DoctypeWrapper which stores the values
  native.name
end

.doctype_system_id(native) ⇒ Object



873
874
875
# File 'lib/moxml/adapter/libxml.rb', line 873

def doctype_system_id(native)
  native.system_id
end

.document(node) ⇒ Object



342
343
344
345
346
347
348
349
350
351
# File 'lib/moxml/adapter/libxml.rb', line 342

def document(node)
  native_node = unpatch_node(node)
  return nil unless native_node

  # Handle documents themselves
  return native_node if native_node.is_a?(::LibXML::XML::Document)

  # For other nodes, return their document
  native_node.doc
end

.duplicate_node(node) ⇒ Object



1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
# File 'lib/moxml/adapter/libxml.rb', line 1088

def duplicate_node(node)
  return nil unless node

  # Unwrap if wrapped
  native_node = unpatch_node(node)

  # LibXML is strict about document ownership
  # Create brand new NATIVE nodes that are document-independent
  # Wrappers are only used via patch_node when reading children
  case node_type(node)
  when :doctype
    # DoctypeWrapper - create a new one with same properties
    if node.is_a?(DoctypeWrapper)
      DoctypeWrapper.new(
        create_document,
        node.name,
        node.external_id,
        node.system_id,
      )
    else
      # Should not happen, but handle gracefully
      node
    end
  when :element
    new_node = ::LibXML::XML::Node.new(native_node.name)
    # new_node.line = node.line

    # Copy and set namespace definitions FIRST
    if native_node.is_a?(::LibXML::XML::Node)
      # First, copy all namespace definitions
      native_node.namespaces.each do |ns|
        ::LibXML::XML::Namespace.new(
          new_node,
          ns.prefix,
          ns.href,
        )
      end

      # Then, set this element's own namespace if it has one
      if native_node.namespaces.namespace
        orig_ns = native_node.namespaces.namespace
        # Find the matching namespace we just created
        new_node.namespaces.each do |ns|
          if ns.prefix == orig_ns.prefix && ns.href == orig_ns.href
            new_node.namespaces.namespace = ns
            break
          end
        end
      end
    end

    # Copy attributes AFTER namespaces are set up
    # LibXML handles namespaced attributes through their full names
    if native_node.attributes?
      native_node.each_attr do |attr|
        # Get the full attribute name (may include namespace prefix)
        attr_name = if attr.ns&.prefix
                      "#{attr.ns.prefix}:#{attr.name}"
                    else
                      attr.name
                    end
        new_node[attr_name] = attr.value
      end
    end

    # Recursively copy children
    if native_node.children?
      native_node.each_child do |child|
        # Skip whitespace-only text nodes
        next if child.text? && child.content.to_s.strip.empty?

        # Recursively duplicate the child
        child_copy = duplicate_node(child)
        new_node << child_copy
      end
    end

    new_node
  when :text
    ::LibXML::XML::Node.new_text(native_node.content)
  when :cdata
    ::LibXML::XML::Node.new_cdata(native_node.content)
  when :comment
    ::LibXML::XML::Node.new_comment(native_node.content)
  when :processing_instruction
    ::LibXML::XML::Node.new_pi(native_node.name, native_node.content)
  else
    # For other types, try dup as fallback
    native_node.dup
  end
end

.entity_reference_name(node) ⇒ Object



149
150
151
# File 'lib/moxml/adapter/libxml.rb', line 149

def entity_reference_name(node)
  node.name if node.is_a?(CustomizedLibxml::EntityReference)
end

.get_attribute(element, name) ⇒ Object



422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
# File 'lib/moxml/adapter/libxml.rb', line 422

def get_attribute(element, name)
  native_elem = unpatch_node(element)
  return nil unless native_elem
  return nil unless native_elem.attributes?

  attr = native_elem.attributes.get_attribute(name.to_s)
  return nil unless attr

  # Extend the attribute with to_xml method for proper escaping
  attr.define_singleton_method(:to_xml) do
    escaped = value.to_s
      .gsub("&", "&amp;")
      .gsub("<", "&lt;")
      .gsub(">", "&gt;")
      .gsub("\"", "&quot;")
    "#{name} = #{escaped}"
  end
  attr
end

.get_attribute_value(element, name) ⇒ Object



442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
# File 'lib/moxml/adapter/libxml.rb', line 442

def get_attribute_value(element, name)
  native_elem = unpatch_node(element)
  return nil unless native_elem

  # Try to get the attribute with the given name (handles namespaced attrs)
  value = native_elem[name.to_s]
  return value if value

  # If name contains ':', try to get as namespaced attribute
  if name.to_s.include?(":")
    prefix, local_name = name.to_s.split(":", 2)
    # Try to find attribute by namespace
    if native_elem.attributes?
      native_elem.each_attr do |attr|
        if attr.name == local_name || attr.name == name.to_s
          # Check if attribute's namespace matches the prefix
          if attr.ns && attr.ns.prefix == prefix
            return attr.value
          elsif attr.name == name.to_s
            # Fallback: attribute name includes the prefix
            return attr.value
          end
        end
      end
    end
  end

  nil
end

.has_declaration?(native_doc, wrapper) ⇒ Boolean

Returns:

  • (Boolean)


1220
1221
1222
1223
1224
1225
1226
1227
# File 'lib/moxml/adapter/libxml.rb', line 1220

def has_declaration?(native_doc, wrapper)
  decl = attachments.get(native_doc, :declaration)
  if decl
    !decl.removed
  else
    wrapper.has_xml_declaration
  end
end

.indent_xml(xml_string, indent_size) ⇒ Object



1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
# File 'lib/moxml/adapter/libxml.rb', line 1061

def indent_xml(xml_string, indent_size)
  # Simple line-by-line indentation
  lines = []
  level = 0

  xml_string.each_line do |line|
    line = line.strip
    next if line.empty?

    # Decrease level for closing tags
    level -= 1 if line.start_with?("</")
    level = [level, 0].max

    # Add indented line
    lines << ((" " * (indent_size * level)) + line)

    # Increase level for opening tags (but not self-closing or special tags)
    next unless line.start_with?("<") && !line.start_with?("</") &&
      !line.end_with?("/>") && !line.start_with?("<?") &&
      !line.start_with?("<!") && !line.include?("</")

    level += 1
  end

  lines.join("\n")
end

.inner_text(node) ⇒ Object



734
735
736
737
738
739
740
741
742
743
744
# File 'lib/moxml/adapter/libxml.rb', line 734

def inner_text(node)
  native_node = unpatch_node(node)
  return "" unless native_node
  return "" unless native_node.children?

  result = []
  native_node.each_child do |child|
    result << child.content if child.text?
  end
  result.join
end

.lookup_child_sequence(doc, element) ⇒ Object

Look up child sequence for an element from the document



617
618
619
620
621
622
# File 'lib/moxml/adapter/libxml.rb', line 617

def lookup_child_sequence(doc, element)
  pairs = attachments.get(doc, :_child_seq_pairs)
  return nil unless pairs
  pair = pairs.find { |elem, _| elem == element }
  pair&.last
end

.lookup_entity_refs(doc, element) ⇒ Object

Look up entity refs for an element from the document



597
598
599
600
601
602
# File 'lib/moxml/adapter/libxml.rb', line 597

def lookup_entity_refs(doc, element)
  pairs = attachments.get(doc, :_entity_ref_pairs)
  return nil unless pairs
  pair = pairs.find { |elem, _| elem == element }
  pair&.last
end

.namespace(element) ⇒ Object



827
828
829
830
831
832
833
834
835
# File 'lib/moxml/adapter/libxml.rb', line 827

def namespace(element)
  native_elem = unpatch_node(element)
  return nil unless native_elem

  # Return ONLY the element's own namespace
  # Do NOT inherit parent namespaces (prefixed namespaces are NOT inherited)
  # Only default namespaces are inherited during element creation by LibXML
  native_elem.namespaces&.namespace
end

.namespace_definitions(node) ⇒ Object



845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
# File 'lib/moxml/adapter/libxml.rb', line 845

def namespace_definitions(node)
  native_node = unpatch_node(node)
  return [] unless native_node
  return [] unless native_node.is_a?(::LibXML::XML::Node)

  namespaces = native_node.namespaces
  return [] unless namespaces

  namespace_list =
    if namespaces.respond_to?(:definitions)
      namespaces.definitions
    else
      namespaces
    end

  namespace_list.to_a
end

.namespace_prefix(namespace) ⇒ Object



837
838
839
# File 'lib/moxml/adapter/libxml.rb', line 837

def namespace_prefix(namespace)
  namespace&.prefix
end

.namespace_uri(namespace) ⇒ Object



841
842
843
# File 'lib/moxml/adapter/libxml.rb', line 841

def namespace_uri(namespace)
  namespace&.href
end

.next_sibling(node) ⇒ Object



318
319
320
321
322
323
324
325
326
327
328
# File 'lib/moxml/adapter/libxml.rb', line 318

def next_sibling(node)
  native_node = unpatch_node(node)
  current = native_node&.next
  while current
    # Skip whitespace-only text nodes
    break unless current.text? && current.content.to_s.strip.empty?

    current = current.next
  end
  current ? patch_node(current) : nil
end

.node_name(node) ⇒ Object



220
221
222
223
# File 'lib/moxml/adapter/libxml.rb', line 220

def node_name(node)
  native_node = unpatch_node(node)
  native_node&.name
end

.node_type(node) ⇒ Object



181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/moxml/adapter/libxml.rb', line 181

def node_type(node)
  return :unknown unless node

  # Handle wrapper classes
  return :element if node.is_a?(CustomizedLibxml::Element)
  return :text if node.is_a?(CustomizedLibxml::Text)
  return :cdata if node.is_a?(CustomizedLibxml::Cdata)
  return :comment if node.is_a?(CustomizedLibxml::Comment)
  if node.is_a?(CustomizedLibxml::ProcessingInstruction)
    return :processing_instruction
  end
  return :entity_reference if node.is_a?(CustomizedLibxml::EntityReference)
  return :doctype if node.is_a?(DoctypeWrapper)

  # Unwrap if needed
  native_node = unpatch_node(node)

  case native_node.node_type
  when ::LibXML::XML::Node::DOCUMENT_NODE
    :document
  when ::LibXML::XML::Node::ELEMENT_NODE
    :element
  when ::LibXML::XML::Node::TEXT_NODE
    :text
  when ::LibXML::XML::Node::CDATA_SECTION_NODE
    :cdata
  when ::LibXML::XML::Node::COMMENT_NODE
    :comment
  when ::LibXML::XML::Node::ATTRIBUTE_NODE
    :attribute
  when ::LibXML::XML::Node::PI_NODE
    :processing_instruction
  when ::LibXML::XML::Node::DTD_NODE
    :doctype
  else
    :unknown
  end
end

.parent(node) ⇒ Object



312
313
314
315
316
# File 'lib/moxml/adapter/libxml.rb', line 312

def parent(node)
  native_node = unpatch_node(node)
  parent_node = native_node&.parent
  parent_node ? patch_node(parent_node) : nil
end

.parse(xml, options = {}, _context = nil) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/moxml/adapter/libxml.rb', line 49

def parse(xml, options = {}, _context = nil)
  # LibXML doesn't preserve DOCTYPE during parsing, so we need to extract it manually
  xml_string = if xml.is_a?(String)
                 xml
               elsif xml.is_a?(IO) || xml.is_a?(StringIO)
                 xml.read
               else
                 xml.to_s
               end

  # Preprocess entities before parsing.
  # This converts the string to UTF-8; LibXML will use the encoding
  # parameter or XML declaration for byte interpretation.
  xml_string = preprocess_entities(xml_string)

  # Extract DOCTYPE before parsing
  doctype_match = xml_string.match(/<!DOCTYPE\s+(\S+)(?:\s+PUBLIC\s+"([^"]+)"\s+"([^"]+)"|  \s+SYSTEM\s+"([^"]+)")?\s*>/i)

  native_doc = begin
    # Handle both string and file inputs
    parser = ::LibXML::XML::Parser.string(xml_string)
    parser.parse
  rescue ::LibXML::XML::Error => e
    if options[:strict]
      line = e.line
      raise Moxml::ParseError.new(
        e.message,
        line: line,
        column: nil,
        source: xml_string[0..100],
      )
    end
    # Return empty document for non-strict mode
    create_document
  end

  # Store DOCTYPE if found
  if doctype_match
    name = doctype_match[1]
    external_id = doctype_match[2]
    system_id = doctype_match[3] || doctype_match[4]

    doctype_wrapper = DoctypeWrapper.new(
      native_doc,
      name,
      external_id,
      system_id,
    )
    attachments.set(native_doc, :doctype, doctype_wrapper)
  end

  ctx = _context || Context.new(:libxml)
  DocumentBuilder.new(ctx).build(native_doc)
end

.patch_node(node, _parent = nil) ⇒ Object



1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
# File 'lib/moxml/adapter/libxml.rb', line 1180

def patch_node(node, _parent = nil)
  # Wrap native LibXML nodes in our wrapper classes
  return node if node.nil?
  return node if node.is_a?(CustomizedLibxml::Node)

  case node_type(node)
  when :element
    CustomizedLibxml::Element.new(node)
  when :text
    CustomizedLibxml::Text.new(node)
  when :cdata
    CustomizedLibxml::Cdata.new(node)
  when :comment
    CustomizedLibxml::Comment.new(node)
  when :processing_instruction
    CustomizedLibxml::ProcessingInstruction.new(node)
  else
    node
  end
end

.prepare_for_new_document(node, target_doc) ⇒ Object



1211
1212
1213
1214
1215
1216
1217
1218
# File 'lib/moxml/adapter/libxml.rb', line 1211

def prepare_for_new_document(node, target_doc)
  return node unless node && target_doc

  # For LibXML, we need to duplicate ALL nodes to avoid
  # document ownership issues. Simply importing doesn't work
  # because nodes from the parsed document still have references.
  duplicate_node(node)
end

.previous_sibling(node) ⇒ Object



330
331
332
333
334
335
336
337
338
339
340
# File 'lib/moxml/adapter/libxml.rb', line 330

def previous_sibling(node)
  native_node = unpatch_node(node)
  current = native_node&.prev
  while current
    # Skip whitespace-only text nodes
    break unless current.text? && current.content.to_s.strip.empty?

    current = current.prev
  end
  current ? patch_node(current) : nil
end

.processing_instruction_content(node) ⇒ Object



785
786
787
788
789
790
791
792
793
794
795
796
# File 'lib/moxml/adapter/libxml.rb', line 785

def processing_instruction_content(node)
  native_node = unpatch_node(node)
  content = native_node&.content
  # LibXML may HTML-escape the content, un-escape it
  return nil unless content

  content.gsub("&quot;", '"')
    .gsub("&apos;", "'")
    .gsub("&lt;", "<")
    .gsub("&gt;", ">")
    .gsub("&amp;", "&")
end

.processing_instruction_target(node) ⇒ Object



780
781
782
783
# File 'lib/moxml/adapter/libxml.rb', line 780

def processing_instruction_target(node)
  native_node = unpatch_node(node)
  native_node&.name
end

.remove(node) ⇒ Object



658
659
660
661
662
663
664
665
666
667
# File 'lib/moxml/adapter/libxml.rb', line 658

def remove(node)
  # Handle Declaration wrapper - mark as removed on document
  if node.is_a?(CustomizedLibxml::Declaration)
    node.removed = true
    return
  end

  native_node = unpatch_node(node)
  native_node&.remove!
end

.remove_attribute(element, name) ⇒ Object



472
473
474
475
476
477
478
479
# File 'lib/moxml/adapter/libxml.rb', line 472

def remove_attribute(element, name)
  native_elem = unpatch_node(element)
  return unless native_elem
  return unless native_elem.attributes?

  attr = native_elem.attributes.get_attribute(name.to_s)
  attr&.remove!
end

.replace(node, new_node) ⇒ Object



669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
# File 'lib/moxml/adapter/libxml.rb', line 669

def replace(node, new_node)
  native_node = unpatch_node(node)
  native_new = unpatch_node(new_node)
  parent = native_node&.parent
  return unless parent && native_new

  # Special handling for text nodes - LibXML's sibling manipulation
  # doesn't work reliably for text nodes. Instead, use parent.content
  # for text-to-text replacement
  if native_node.text? && native_new.text?
    parent.content = native_new.content
    return
  end

  # Save the prev/next siblings before removing
  prev_sibling = native_node.prev
  next_sibling = native_node.next

  # Import if needed for cross-document operations
  parent_doc = parent.is_a?(::LibXML::XML::Node) ? parent.doc : nil

  # Use import_and_add to properly handle document adoption
  import_and_add(parent_doc, parent, native_new)

  # Now adjust the position - move new node to where old node was
  if prev_sibling
    # Insert after the previous sibling
    prev_sibling.next = native_new
  end
  if next_sibling
    # Insert before the next sibling
    next_sibling.prev = native_new
  end

  # Finally remove the old node
  native_node.remove!
end

.replace_children(element, children) ⇒ Object



707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
# File 'lib/moxml/adapter/libxml.rb', line 707

def replace_children(element, children)
  native_elem = unpatch_node(element)
  return unless native_elem

  # Remove all existing children first
  native_elem.each_child(&:remove!)

  # Get the element's document for importing
  doc = native_elem.is_a?(::LibXML::XML::Node) ? native_elem.doc : nil

  children.each do |c|
    native_c = unpatch_node(c)

    # Use import_and_add helper which handles all the edge cases
    import_and_add(doc, native_elem, native_c)
  end
end

.root(document) ⇒ Object



353
354
355
356
# File 'lib/moxml/adapter/libxml.rb', line 353

def root(document)
  native_doc = unpatch_node(document)
  native_doc&.root
end

.sax_parse(xml, handler) ⇒ void

This method returns an undefined value.

SAX parsing implementation for LibXML

Parameters:



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/moxml/adapter/libxml.rb', line 109

def sax_parse(xml, handler)
  # Create bridge that translates LibXML SAX to Moxml SAX
  bridge = LibXMLSAXBridge.new(handler)

  # Create LibXML SAX parser
  parser = ::LibXML::XML::SaxParser.string(xml.to_s)

  # Set callbacks
  parser.callbacks = bridge

  # Parse
  parser.parse
rescue ::LibXML::XML::Error => e
  line = e.line
  column = begin
    e.column
  rescue StandardError
    nil
  end
  error = Moxml::ParseError.new(e.message, line: line, column: column)
  handler.on_error(error)
end

.serialize(node, options = {}) ⇒ Object



907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# File 'lib/moxml/adapter/libxml.rb', line 907

def serialize(node, options = {})
  # FIRST: Check if node is any kind of wrapper with custom to_xml
  if node.is_a?(CustomizedLibxml::Node) || node.is_a?(DoctypeWrapper)
    return node.to_xml
  end

  native_node = unpatch_node(node)
  return "" unless native_node

  if native_node.is_a?(::LibXML::XML::Document)
    output = +""

    # Check if we should include declaration
    # Priority: explicit no_declaration option > default (include)
    should_include_decl = if options.key?(:no_declaration)
                            !options[:no_declaration]
                          else
                            # Default: include declaration
                            true
                          end

    if should_include_decl
      # Check if declaration was explicitly managed
      decl = attachments.get(native_node, :declaration)
      if decl
        # Only output declaration if it exists and wasn't removed
        output << decl.to_xml unless decl.removed
      else
        # No declaration stored - create default
        version = native_node.version || "1.0"
        encoding_val = options[:encoding] ||
          encoding_to_string(native_node.encoding) ||
          "UTF-8"

        # Don't add standalone="yes" by default - only if explicitly set
        decl = CustomizedLibxml::Declaration.new(
          native_node,
          version,
          encoding_val,
          nil, # No standalone by default
        )
        attachments.set(native_node, :declaration, decl)
        output << decl.to_xml
      end
    end

    # Add DOCTYPE if stored on document
    doctype_wrapper = attachments.get(native_node, :doctype)
    if doctype_wrapper
      output << "\n" unless output.empty?
      output << doctype_wrapper.to_xml
    end

    # Add document-level processing instructions if stored
    pis = attachments.get(native_node, :pis)
    if pis && !pis.empty?
      pis.each do |pi|
        output << "\n" unless output.empty?
        output << pi.to_xml
      end
    end

    # Add text nodes if stored (for documents without root)
    texts = attachments.get(native_node, :texts)
    if texts && !texts.empty?
      texts.each do |text|
        output << "\n" unless output.empty?
        output << text.to_xml
      end
    end

    if native_node.root
      # Use our custom serializer to control namespace output
      root_output = serialize_element_with_namespaces(
        native_node.root,
        true,
      )

      # Apply indentation if requested
      if options[:indent]&.positive?
        # First add newlines between elements
        formatted = add_newlines_to_xml(root_output)
        output << "\n" << indent_xml(formatted, options[:indent])
      else
        output << "\n" << root_output unless output.empty?
        output << root_output if output.empty?
      end
    end

    output
  else
    serialize_element_with_namespaces(native_node, true)
  end
end

.set_attribute(element, name, value) ⇒ Object



384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
# File 'lib/moxml/adapter/libxml.rb', line 384

def set_attribute(element, name, value)
  native_elem = unpatch_node(element)
  return unless native_elem

  name_str = name.to_s
  value_str = value.to_s

  # Check if attribute name contains namespace prefix
  if name_str.include?(":")
    prefix, local_name = name_str.split(":", 2)

    # Find the namespace with the given prefix
    ns = find_namespace_by_prefix(native_elem, prefix)

    if ns
      # LibXML::XML::Attr.new accepts namespace as third parameter
      # First remove existing attribute if present
      existing = native_elem.attributes.get_attribute(name_str)
      existing&.remove!

      # Create new attribute with namespace
      # Attr.new(node, name, value, ns)
      ::LibXML::XML::Attr.new(native_elem, local_name, value_str, ns)

      # Return the created attribute

    else
      # Namespace not found, set as regular attribute
      native_elem[name_str] = value_str
      native_elem.attributes.get_attribute(name_str)
    end
  else
    # Regular attribute without namespace
    native_elem[name_str] = value_str
    native_elem.attributes.get_attribute(name_str)
  end
end

.set_attribute_name(attribute, new_name) ⇒ Object



481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
# File 'lib/moxml/adapter/libxml.rb', line 481

def set_attribute_name(attribute, new_name)
  # LibXML attributes cannot be renamed directly
  # We must work at the element level
  return unless attribute

  # Get values FIRST before any removal
  old_name = attribute.name
  value = attribute.value
  element = attribute.parent
  return unless element

  # Remove old attribute via element
  element.attributes.get_attribute(old_name)&.remove!

  # Add new attribute with same value
  element[new_name.to_s] = value

  # Return the new attribute
  element.attributes.get_attribute(new_name.to_s)
end

.set_cdata_content(node, content) ⇒ Object



764
765
766
767
768
# File 'lib/moxml/adapter/libxml.rb', line 764

def set_cdata_content(node, content)
  native_node = unpatch_node(node)
  # CDATA content should NOT be escaped
  native_node.content = content.to_s if native_node
end

.set_comment_content(node, content) ⇒ Object



775
776
777
778
# File 'lib/moxml/adapter/libxml.rb', line 775

def set_comment_content(node, content)
  native_node = unpatch_node(node)
  native_node.content = content.to_s if native_node
end

.set_declaration_attribute(node, name, value) ⇒ Object



257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
# File 'lib/moxml/adapter/libxml.rb', line 257

def set_declaration_attribute(node, name, value)
  return unless node

  # Handle Declaration wrapper
  return unless node.is_a?(CustomizedLibxml::Declaration)

  case name
  when "version"
    node.version = value
  when "encoding"
    node.encoding = value
  when "standalone"
    # Pass the value directly - Declaration handles the conversion
    node.standalone = value
  end

  # Native documents are read-only, do nothing for them
end

.set_namespace(element, ns) ⇒ Object



820
821
822
823
824
825
# File 'lib/moxml/adapter/libxml.rb', line 820

def set_namespace(element, ns)
  native_elem = unpatch_node(element)
  return unless native_elem && ns

  native_elem.namespaces.namespace = ns
end

.set_node_name(node, name) ⇒ Object



225
226
227
228
# File 'lib/moxml/adapter/libxml.rb', line 225

def set_node_name(node, name)
  native_node = unpatch_node(node)
  native_node.name = name.to_s if native_node
end

.set_processing_instruction_content(node, content) ⇒ Object



798
799
800
801
802
# File 'lib/moxml/adapter/libxml.rb', line 798

def set_processing_instruction_content(node, content)
  native_node = unpatch_node(node)
  # Store raw content - LibXML will escape it
  native_node.content = content.to_s if native_node
end

.set_root(doc, element) ⇒ Object



45
46
47
# File 'lib/moxml/adapter/libxml.rb', line 45

def set_root(doc, element)
  doc.root = element
end

.set_text_content(node, content) ⇒ Object



746
747
748
749
# File 'lib/moxml/adapter/libxml.rb', line 746

def set_text_content(node, content)
  native_node = unpatch_node(node)
  native_node.content = content.to_s if native_node
end

.store_entity_ref_on_doc(doc, element, ref) ⇒ Object

Store entity ref on the document (stable identity). LibXML element wrappers are ephemeral, so we use == to find matching elements.



585
586
587
588
589
590
591
592
593
594
# File 'lib/moxml/adapter/libxml.rb', line 585

def store_entity_ref_on_doc(doc, element, ref)
  pairs = attachments.get(doc, :_entity_ref_pairs) || []
  pair = pairs.find { |elem, _| elem == element }
  if pair
    pair[1] << ref
  else
    pairs << [element, [ref]]
  end
  attachments.set(doc, :_entity_ref_pairs, pairs)
end

.text_content(node) ⇒ Object



725
726
727
728
729
730
731
732
# File 'lib/moxml/adapter/libxml.rb', line 725

def text_content(node)
  return "" if node.is_a?(CustomizedLibxml::EntityReference)

  native_node = unpatch_node(node)
  return nil unless native_node

  native_node.content
end

.unpatch_node(node) ⇒ Object



1201
1202
1203
1204
1205
1206
1207
1208
1209
# File 'lib/moxml/adapter/libxml.rb', line 1201

def unpatch_node(node)
  # Unwrap to get native LibXML node
  case node
  when CustomizedLibxml::Node, CustomizedLibxml::Declaration, DoctypeWrapper
    node.native
  else
    node
  end
end

.xpath(node, expression, namespaces = nil) ⇒ Object



877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
# File 'lib/moxml/adapter/libxml.rb', line 877

def xpath(node, expression, namespaces = nil)
  native_node = unpatch_node(node)
  return [] unless native_node

  # Build namespace context for LibXML
  # LibXML requires ALL prefixes in the XPath to be registered
  ns_context = build_xpath_namespaces(native_node, namespaces)

  results = if ns_context.empty?
              native_node.find(expression).to_a
            else
              native_node.find(expression, ns_context).to_a
            end

  # Wrap results
  results.map { |n| patch_node(n) }
rescue ::LibXML::XML::Error => e
  raise Moxml::XPathError.new(
    e.message,
    expression: expression,
    adapter: "LibXML",
    node: node,
  )
end