Class: Moxml::Adapter::Base

Inherits:
Object
  • Object
show all
Extended by:
XmlUtils
Defined in:
lib/moxml/adapter/base.rb

Direct Known Subclasses

Libxml, Nokogiri, Oga, Ox, Rexml

Constant Summary collapse

ENTITY_MARKER =

Entity marker for adapters that resolve entities during parsing. U+FFFC (Object Replacement Character) + U+FEFF (BOM) is a two-character sentinel chosen because this exact sequence followed by a valid entity name pattern is vanishingly unlikely in real XML content. Non-standard entities like © are converted to this marker before parsing, then restored during serialization. Standard XML entities (& < > " ') are NOT converted.

"\u{FFFC}\u{FEFF}"
ENTITY_NAME_PATTERN =
"[a-zA-Z_][\\w.:-]*"
ENTITY_NAME_RE =
/&(#{ENTITY_NAME_PATTERN});/
ENTITY_MARKER_RE =
/\u{FFFC}\u{FEFF}(#{ENTITY_NAME_PATTERN});/
SERIALIZED_ENTITY_MARKER_RE =
/(#{ENTITY_NAME_PATTERN});/
STANDARD_ENTITIES =
%w[amp lt gt quot apos].freeze

Class Method Summary collapse

Methods included from XmlUtils

encode_entities, normalize_xml_value, validate_comment_content, validate_declaration_encoding, validate_declaration_standalone, validate_declaration_version, validate_element_name, validate_entity_reference_name, validate_pi_target, validate_prefix, validate_uri

Class Method Details

.actual_native(child_native, _parent_native) ⇒ Object

Return the actual native node after an add_child operation. Override for adapters where node identity may change (e.g., LibXML doc.root=).



217
218
219
# File 'lib/moxml/adapter/base.rb', line 217

def actual_native(child_native, _parent_native)
  child_native
end

.create_cdata(content, owner_doc: nil) ⇒ Object



125
126
127
# File 'lib/moxml/adapter/base.rb', line 125

def create_cdata(content, owner_doc: nil)
  create_native_cdata(normalize_xml_value(content), owner_doc)
end

.create_comment(content, owner_doc: nil) ⇒ Object



129
130
131
132
# File 'lib/moxml/adapter/base.rb', line 129

def create_comment(content, owner_doc: nil)
  validate_comment_content(content)
  create_native_comment(normalize_xml_value(content), owner_doc)
end

.create_declaration(version = "1.0", encoding = "UTF-8", standalone = nil) ⇒ Object



144
145
146
147
148
149
150
# File 'lib/moxml/adapter/base.rb', line 144

def create_declaration(version = "1.0", encoding = "UTF-8",
                       standalone = nil)
  validate_declaration_version(version)
  validate_declaration_encoding(encoding)
  validate_declaration_standalone(standalone)
  create_native_declaration(version, encoding, standalone)
end

.create_doctype(name, external_id, system_id) ⇒ Object



134
135
136
# File 'lib/moxml/adapter/base.rb', line 134

def create_doctype(name, external_id, system_id)
  create_native_doctype(name, external_id, system_id)
end

.create_document(_native_doc = nil) ⇒ Object



107
108
109
110
111
112
113
# File 'lib/moxml/adapter/base.rb', line 107

def create_document(_native_doc = nil)
  raise Moxml::NotImplementedError.new(
    "create_document not implemented",
    feature: "create_document",
    adapter: name,
  )
end

.create_element(name, owner_doc: nil) ⇒ Object



115
116
117
118
# File 'lib/moxml/adapter/base.rb', line 115

def create_element(name, owner_doc: nil)
  validate_element_name(name)
  create_native_element(name, owner_doc)
end

.create_entity_reference(name) ⇒ Object



170
171
172
173
# File 'lib/moxml/adapter/base.rb', line 170

def create_entity_reference(name)
  validate_entity_reference_name(name)
  create_native_entity_reference(name)
end

.create_namespace(element, prefix, uri, namespace_validation_mode: :strict) ⇒ Object



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/moxml/adapter/base.rb', line 152

def create_namespace(element, prefix, uri,
namespace_validation_mode: :strict)
  if prefix && uri.to_s.empty?
    raise NamespaceError.new(
      "Prefixed namespace declaration cannot have an empty URI",
      prefix: prefix,
      uri: uri,
    )
  end
  if namespace_validation_mode == :strict
    validate_prefix(prefix) if prefix
    validate_uri(uri, mode: :strict)
  else
    validate_uri(uri, mode: :lenient)
  end
  create_native_namespace(element, prefix, uri)
end

.create_processing_instruction(target, content) ⇒ Object



138
139
140
141
142
# File 'lib/moxml/adapter/base.rb', line 138

def create_processing_instruction(target, content)
  validate_pi_target(target)
  create_native_processing_instruction(target,
                                       normalize_xml_value(content))
end

.create_text(content, owner_doc: nil) ⇒ Object



120
121
122
123
# File 'lib/moxml/adapter/base.rb', line 120

def create_text(content, owner_doc: nil)
  # Ox freezes the content, so we need to dup it
  create_native_text(normalize_xml_value(content).dup, owner_doc)
end

.duplicate_node(node) ⇒ Object



187
188
189
# File 'lib/moxml/adapter/base.rb', line 187

def duplicate_node(node)
  node.dup
end

.entity_reference_name(node) ⇒ Object



183
184
185
# File 'lib/moxml/adapter/base.rb', line 183

def entity_reference_name(node)
  node.name
end

.has_declaration?(_native_doc, wrapper) ⇒ Boolean

Check if the native document has an XML declaration

Parameters:

  • native_doc

    the native document object

  • wrapper (Moxml::Document)

    the wrapper with has_xml_declaration flag

Returns:

  • (Boolean)


207
208
209
# File 'lib/moxml/adapter/base.rb', line 207

def has_declaration?(_native_doc, wrapper)
  wrapper.has_xml_declaration
end

.in_scope_namespaces(element) ⇒ Object

Returns all namespaces in scope for this element, including inherited from ancestors. Adapters with native support (Nokogiri) override this. Default walks the ancestor chain.



224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# File 'lib/moxml/adapter/base.rb', line 224

def in_scope_namespaces(element)
  namespaces = {}
  node = element

  while node
    break unless node_type(node) == :element

    namespace_definitions(node).each do |ns|
      prefix = namespace_prefix(ns)
      namespaces[prefix] = ns unless namespaces.key?(prefix)
    end
    node = parent(node)
  end

  namespaces.values
end

.parse(_xml, _options = {}) ⇒ Object



74
75
76
77
78
79
80
# File 'lib/moxml/adapter/base.rb', line 74

def parse(_xml, _options = {})
  raise Moxml::NotImplementedError.new(
    "parse not implemented",
    feature: "parse",
    adapter: name,
  )
end

.patch_node(node, _parent = nil) ⇒ Object



191
192
193
194
# File 'lib/moxml/adapter/base.rb', line 191

def patch_node(node, _parent = nil)
  # monkey-patch the native node if necessary
  node
end

.prepare_for_new_document(node, _target_doc) ⇒ Object



196
197
198
199
200
201
# File 'lib/moxml/adapter/base.rb', line 196

def prepare_for_new_document(node, _target_doc)
  # Hook for adapters that need special handling when moving nodes
  # between documents (e.g., LibXML's document.import)
  # Default: no-op for backward compatibility
  node
end

.preprocess_entities(xml) ⇒ Object

Replace non-standard entity references with markers before parsing. Always returns a UTF-8 encoded string.



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/moxml/adapter/base.rb', line 27

def preprocess_entities(xml)
  return "" if xml.nil?

  str = if xml.encoding == Encoding::BINARY
          # Binary strings are assumed to be UTF-8. If the bytes are
          # not valid UTF-8, fall back to encoding as UTF-8 with
          # replacement to avoid raising on gsub.
          dup = xml.dup.force_encoding("UTF-8")
          if dup.valid_encoding?
            dup
          else
            xml.dup.encode("UTF-8",
                           "ASCII-8BIT", invalid: :replace, undef: :replace)
          end
        elsif xml.encoding == Encoding::UTF_8
          xml
        else
          xml.encode("UTF-8")
        end
  # Fast path: no `&` means no entity references to mark — skip
  # the regex scan and string allocation entirely. The vast
  # majority of XML payloads contain no entity references.
  return str unless str.include?("&")

  str.gsub(ENTITY_NAME_RE) do |match|
    STANDARD_ENTITIES.include?(::Regexp.last_match(1)) ? match : "#{ENTITY_MARKER}#{::Regexp.last_match(1)};"
  end
end

.remove_declaration(_native_doc) ⇒ Object

Clear the declaration state from the native document. Called when a Declaration node is removed from a document.



213
# File 'lib/moxml/adapter/base.rb', line 213

def remove_declaration(_native_doc); end

.restore_entities(text) ⇒ Object

Restore entity markers back to named entity references.



57
58
59
60
61
62
63
64
# File 'lib/moxml/adapter/base.rb', line 57

def restore_entities(text)
  return text unless text.is_a?(String)

  # Force UTF-8 encoding since markers are UTF-8 characters
  str = text.encoding == Encoding::UTF_8 ? text : text.dup.force_encoding("UTF-8")
  result = str.gsub(ENTITY_MARKER_RE, '&\1;')
  result.gsub(SERIALIZED_ENTITY_MARKER_RE, '&\1;')
end

.sax_parse(_xml, _handler) ⇒ void

This method returns an undefined value.

Parse XML using SAX (event-driven) parsing

SAX parsing provides a memory-efficient way to process XML by triggering events as the document is parsed, rather than building a complete DOM tree.

Parameters:

Raises:



92
93
94
95
96
97
98
# File 'lib/moxml/adapter/base.rb', line 92

def sax_parse(_xml, _handler)
  raise Moxml::NotImplementedError.new(
    "sax_parse not implemented",
    feature: "sax_parse",
    adapter: name,
  )
end

.sax_supported?Boolean

Check if this adapter supports SAX parsing

Returns:

  • (Boolean)

    true if SAX parsing is supported



103
104
105
# File 'lib/moxml/adapter/base.rb', line 103

def sax_supported?
  method(:sax_parse).owner != Moxml::Adapter::Base.singleton_class
end

.set_attribute_name(attribute, name) ⇒ Object



175
176
177
# File 'lib/moxml/adapter/base.rb', line 175

def set_attribute_name(attribute, name)
  attribute.name = name
end

.set_attribute_value(attribute, value) ⇒ Object



179
180
181
# File 'lib/moxml/adapter/base.rb', line 179

def set_attribute_value(attribute, value)
  attribute.value = value
end

.set_root(_doc, _element) ⇒ Object



66
67
68
69
70
71
72
# File 'lib/moxml/adapter/base.rb', line 66

def set_root(_doc, _element)
  raise Moxml::NotImplementedError.new(
    "set_root not implemented",
    feature: "set_root",
    adapter: name,
  )
end