Module: Uniword::Infrastructure::XmlNamespaceNormalizer

Defined in:
lib/uniword/infrastructure/xml_namespace_normalizer.rb

Overview

XML normalization utilities for round-trip fidelity

Ensures that serialized XML matches the expected namespace declaration format by adding missing prefixed namespace declarations to root elements.

Constant Summary collapse

PREFIXED_NAMESPACES =

Prefixed namespaces that should be declared at root level in OOXML documents

{
  "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" => "wp",
  "http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" => "wp14",
}.freeze

Class Method Summary collapse

Class Method Details

.normalize(xml) ⇒ String

Normalize XML to ensure prefixed namespace declarations are at root level

Parameters:

  • xml (String)

    XML content

Returns:

  • (String)

    Normalized XML content



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/uniword/infrastructure/xml_namespace_normalizer.rb', line 22

def self.normalize(xml)
  doc = Nokogiri::XML(xml)
  return xml unless doc.root

  # Check if any prefixed namespaces are used but not declared at root
  root = doc.root
  root_ns = root.namespace_definitions.to_h { |ns| [ns.prefix, ns.href] }

  PREFIXED_NAMESPACES.each do |uri, expected_prefix|
    # Check if this namespace URI is used in the document
    elements_with_ns = doc.xpath("//*[namespace-uri()='#{uri}']")
    next if elements_with_ns.empty?

    # Check if the expected prefix is already declared at root
    if root_ns.key?(expected_prefix) && (root_ns[expected_prefix] == uri)
      # Check if the URI matches
      next
    end

    # Check if default namespace with this URI exists
    default_ns = root_ns.find { |k, v| k.nil? && v == uri }

    if default_ns
      # Replace default namespace with prefixed namespace
      # Find the default xmlns attribute and rename it
      root.attributes.each do |name, attr|
        next unless name == "xmlns" && attr.value == uri

        # Remove the default namespace
        root.remove_attribute("xmlns")
        # Add the prefixed namespace
        root["xmlns:#{expected_prefix}"] = uri

        # Update all elements that use this namespace to use the prefix
        update_elements_namespace(elements_with_ns, expected_prefix)
        break
      end
    else
      # Namespace is used but not declared at root - add it
      root["xmlns:#{expected_prefix}"] = uri
    end
  end

  doc.to_xml(indent: 0, save_with: Nokogiri::XML::Node::SaveOptions::AS_XML)
end

.normalize_zip_content(zip_content, target_files = nil) ⇒ Hash

Normalize namespace declarations in a ZIP content hash

Parameters:

  • zip_content (Hash)

    File path => content mapping

  • target_files (Array<String>) (defaults to: nil)

    Files to normalize

Returns:

  • (Hash)

    Updated zip content



91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/uniword/infrastructure/xml_namespace_normalizer.rb', line 91

def self.normalize_zip_content(zip_content, target_files = nil)
  result = zip_content.dup
  target_files ||= result.keys.select do |k|
    k.end_with?(".xml", ".rels")
  end

  target_files.each do |path|
    next unless result[path]

    result[path] = normalize(result[path])
  end

  result
end

.update_elements_namespace(elements, prefix) ⇒ Object

Update elements to use the correct namespace prefix

Parameters:

  • elements (Nokogiri::XML::NodeSet)

    Elements to update

  • prefix (String)

    The prefix to use



72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/uniword/infrastructure/xml_namespace_normalizer.rb', line 72

def self.update_elements_namespace(elements, prefix)
  elements.each do |el|
    # Update the element's namespace
    ns = el.document.root.namespaces.find do |_k, v|
      v == el.namespace.href
    end
    next unless ns

    # Change namespace key from default to prefixed
    el.document.root.namespaces.delete(ns[0])
    el.document.root["xmlns:#{prefix}"] = ns[1]
  end
end