Module: Uniword::Infrastructure::XmlNamespaceNormalizer
- Defined in:
- lib/uniword/infrastructure/xml_namespace_normalizer.rb
Overview
XML normalization utilities for round-trip fidelity
Ensures that serialized XML matches the expected namespace declaration format by adding missing prefixed namespace declarations to root elements.
Constant Summary collapse
- PREFIXED_NAMESPACES =
Prefixed namespaces that should be declared at root level in OOXML documents
{ "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" => "wp", "http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" => "wp14", }.freeze
Class Method Summary collapse
-
.normalize(xml) ⇒ String
Normalize XML to ensure prefixed namespace declarations are at root level.
-
.normalize_zip_content(zip_content, target_files = nil) ⇒ Hash
Normalize namespace declarations in a ZIP content hash.
-
.update_elements_namespace(elements, prefix) ⇒ Object
Update elements to use the correct namespace prefix.
Class Method Details
.normalize(xml) ⇒ String
Normalize XML to ensure prefixed namespace declarations are at root level
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/uniword/infrastructure/xml_namespace_normalizer.rb', line 22 def self.normalize(xml) doc = Nokogiri::XML(xml) return xml unless doc.root # Check if any prefixed namespaces are used but not declared at root root = doc.root root_ns = root.namespace_definitions.to_h { |ns| [ns.prefix, ns.href] } PREFIXED_NAMESPACES.each do |uri, expected_prefix| # Check if this namespace URI is used in the document elements_with_ns = doc.xpath("//*[namespace-uri()='#{uri}']") next if elements_with_ns.empty? # Check if the expected prefix is already declared at root if root_ns.key?(expected_prefix) && (root_ns[expected_prefix] == uri) # Check if the URI matches next end # Check if default namespace with this URI exists default_ns = root_ns.find { |k, v| k.nil? && v == uri } if default_ns # Replace default namespace with prefixed namespace # Find the default xmlns attribute and rename it root.attributes.each do |name, attr| next unless name == "xmlns" && attr.value == uri # Remove the default namespace root.remove_attribute("xmlns") # Add the prefixed namespace root["xmlns:#{expected_prefix}"] = uri # Update all elements that use this namespace to use the prefix update_elements_namespace(elements_with_ns, expected_prefix) break end else # Namespace is used but not declared at root - add it root["xmlns:#{expected_prefix}"] = uri end end doc.to_xml(indent: 0, save_with: Nokogiri::XML::Node::SaveOptions::AS_XML) end |
.normalize_zip_content(zip_content, target_files = nil) ⇒ Hash
Normalize namespace declarations in a ZIP content hash
91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/uniword/infrastructure/xml_namespace_normalizer.rb', line 91 def self.normalize_zip_content(zip_content, target_files = nil) result = zip_content.dup target_files ||= result.keys.select do |k| k.end_with?(".xml", ".rels") end target_files.each do |path| next unless result[path] result[path] = normalize(result[path]) end result end |
.update_elements_namespace(elements, prefix) ⇒ Object
Update elements to use the correct namespace prefix
72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/uniword/infrastructure/xml_namespace_normalizer.rb', line 72 def self.update_elements_namespace(elements, prefix) elements.each do |el| # Update the element's namespace ns = el.document.root.namespaces.find do |_k, v| v == el.namespace.href end next unless ns # Change namespace key from default to prefixed el.document.root.namespaces.delete(ns[0]) el.document.root["xmlns:#{prefix}"] = ns[1] end end |