Module: Lutaml::Xml::DeclarationHandler

Included in:
Adapter::BaseAdapter
Defined in:
lib/lutaml/xml/declaration_handler.rb

Overview

DeclarationHandler provides XML declaration and DOCTYPE handling for all XML adapter implementations.

This module implements Issue #1: XML Declaration Preservation across Nokogiri, Oga, and Ox adapters.

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.extract_attribute(content, attr_name) ⇒ String?

Extract an attribute value from declaration content Uses simple string parsing to avoid regex ReDoS

Parameters:

  • content (String)

    the declaration content (between <?xml and ?>)

  • attr_name (String)

    the attribute name to find

Returns:

  • (String, nil)

    the attribute value or nil if not found



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/lutaml/xml/declaration_handler.rb', line 58

def self.extract_attribute(content, attr_name)
  # Find attribute name followed by =
  name_start = content.index("#{attr_name}=")
  return nil unless name_start

  # Get the position after attr=
  pos = name_start + attr_name.length + 1

  # Skip any whitespace
  pos += 1 while pos < content.length && content[pos] == " "

  return nil if pos >= content.length

  # Check quote character
  quote = content[pos]
  return nil unless ['"', "'"].include?(quote)

  # Find closing quote
  end_quote = content.index(quote, pos + 1)
  return nil unless end_quote

  # Extract value between quotes
  content[(pos + 1)...end_quote]
end

.extract_xml_declaration(xml) ⇒ Hash

Extract XML declaration information from input string

Detects if input had an XML declaration and extracts version/encoding/standalone. This is used for round-trip preservation of declarations.

Parameters:

  • xml (String)

    the XML string to parse

Returns:

  • (Hash)

    declaration info { version:, encoding:, standalone:, had_declaration: }



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/lutaml/xml/declaration_handler.rb', line 16

def self.extract_xml_declaration(xml)
  # Use string operations instead of regex to avoid ReDoS vulnerability
  # This approach is O(n) with no backtracking

  # Strip leading whitespace
  trimmed = xml.lstrip

  # Fast prefix check - no regex needed
  return { had_declaration: false } unless trimmed.start_with?("<?xml")

  # Find the end of the declaration (?>)
  # Limit search to first 100 chars to avoid scanning entire document
  search_region = trimmed[0, 100]
  end_pos = search_region.index("?>", 5)
  return { had_declaration: false } unless end_pos

  # Extract content between <?xml and ?>
  decl_content = trimmed[5...end_pos]

  # Extract version (defaults to "1.0")
  version = extract_attribute(decl_content, "version") || "1.0"

  # Extract encoding (may be absent)
  encoding = extract_attribute(decl_content, "encoding")

  # Extract standalone (may be absent)
  standalone = extract_attribute(decl_content, "standalone")

  {
    version: version,
    encoding: encoding,
    standalone: standalone,
    had_declaration: true,
  }
end

Instance Method Details

#generate_declaration(options, xml_declaration = nil) ⇒ String

Generate XML declaration string

Uses stored declaration info if available, otherwise uses defaults. Supports custom version strings, encoding, and standalone options.

Parameters:

  • options (Hash)

    serialization options

    • :declaration => String for custom version, true for default

    • :encoding => String or true for UTF-8

    • :standalone => String (“yes”/“no”), true (“yes”), false (“no”), :preserve

  • xml_declaration (Hash) (defaults to: nil)

    extracted declaration info from input

Returns:

  • (String)

    the XML declaration (includes trailing newline)



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/lutaml/xml/declaration_handler.rb', line 133

def generate_declaration(options, xml_declaration = nil)
  # Use instance variable if not provided (for adapter instance methods)
  xml_declaration ||= @xml_declaration

  # Determine version
  # When declaration: true (force), use default 1.0 not input version
  # When declaration: "1.x" (custom), use that string
  # When preserving (no option or :preserve), use input version or default
  version = if options[:declaration].is_a?(String)
              # Custom version string
              options[:declaration]
            elsif options[:declaration] == true
              # Force with default version
              "1.0"
            elsif xml_declaration&.dig(:version)
              # Preserve from input
              xml_declaration[:version]
            else
              # Default fallback
              "1.0"
            end

  # Determine encoding
  # Priority: explicit encoding option > input encoding > none
  encoding = if options[:encoding].is_a?(String)
               options[:encoding]
             elsif options[:encoding] == true
               "UTF-8"
             elsif xml_declaration&.dig(:encoding)
               xml_declaration[:encoding]
             end

  # Determine standalone
  # Priority: explicit standalone option > input standalone > none
  # Supported values: "yes", "no", true ("yes"), false ("no"), :preserve
  standalone = if options.key?(:standalone)
                 case options[:standalone]
                 when String
                   options[:standalone]
                 when true
                   "yes"
                 when false
                   "no"
                 when :preserve
                   xml_declaration&.dig(:standalone)
                 end
               elsif xml_declaration&.dig(:standalone)
                 xml_declaration[:standalone]
               end

  declaration = "<?xml version=\"#{version}\""
  declaration += " encoding=\"#{encoding}\"" if encoding
  declaration += " standalone=\"#{standalone}\"" if standalone
  declaration += "?>\n"
  declaration
end

#generate_doctype_declaration(doctype) ⇒ String?

Generate DOCTYPE declaration from doctype hash

Supports both PUBLIC and SYSTEM DTDs. Format: <!DOCTYPE name PUBLIC “public_id” “system_id”>

<!DOCTYPE name SYSTEM "system_id">

Parameters:

  • doctype (Hash)

    the doctype information

    • :name => root element name

    • :public_id => public identifier (optional)

    • :system_id => system identifier (optional)

Returns:

  • (String, nil)

    the DOCTYPE declaration or nil if no doctype



201
202
203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/lutaml/xml/declaration_handler.rb', line 201

def generate_doctype_declaration(doctype)
  return nil unless doctype

  parts = ["<!DOCTYPE #{doctype[:name]}"]

  if doctype[:public_id]
    parts << %(PUBLIC "#{doctype[:public_id]}")
    parts << %("#{doctype[:system_id]}") if doctype[:system_id]
  elsif doctype[:system_id]
    parts << %(SYSTEM "#{doctype[:system_id]}")
  end

  "#{parts.join(' ')}>\n"
end

#should_include_declaration?(options, xml_declaration = nil) ⇒ Boolean

Determine if XML declaration should be included in output

Supports multiple modes:

  • false: omit declaration

  • true: force include with defaults

  • :preserve: include if input had one

  • String: custom version string

Parameters:

  • options (Hash)

    serialization options

  • xml_declaration (Hash) (defaults to: nil)

    extracted declaration info from input

Returns:

  • (Boolean)

    true if declaration should be included



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/lutaml/xml/declaration_handler.rb', line 94

def should_include_declaration?(options, xml_declaration = nil)
  # Use instance variable if not provided (for adapter instance methods)
  xml_declaration ||= @xml_declaration

  if options.key?(:declaration)
    case options[:declaration]
    when false
      # Explicit false: omit declaration
      false
    when true
      # Explicit true: force include
      true
    when :preserve
      # Preserve mode: include if input had one
      xml_declaration&.dig(:had_declaration) || false
    when String
      # Custom version string: include
      true
    else
      # Default: preserve from input
      xml_declaration&.dig(:had_declaration) || false
    end
  else
    # No declaration option provided: default behavior is preserve from input
    xml_declaration&.dig(:had_declaration) || false
  end
end