Class: Moxml::Adapter::Base

Inherits:
Object
  • Object
show all
Extended by:
XmlUtils
Defined in:
lib/moxml/adapter/base.rb

Direct Known Subclasses

Libxml, Nokogiri, Oga, Ox, Rexml

Constant Summary collapse

ENTITY_MARKER =

Entity marker for adapters that resolve entities during parsing. U+FFFC (Object Replacement Character) + U+FEFF (BOM) is a two-character sentinel chosen because this exact sequence followed by a valid entity name pattern is vanishingly unlikely in real XML content. Non-standard entities like © are converted to this marker before parsing, then restored during serialization. Standard XML entities (& < > " ') are NOT converted.

"\u{FFFC}\u{FEFF}"
ENTITY_NAME_PATTERN =
"[a-zA-Z_][\\w.:-]*"
ENTITY_NAME_RE =
/&(#{ENTITY_NAME_PATTERN});/
ENTITY_MARKER_RE =
/\u{FFFC}\u{FEFF}(#{ENTITY_NAME_PATTERN});/
SERIALIZED_ENTITY_MARKER_RE =
/(#{ENTITY_NAME_PATTERN});/
STANDARD_ENTITIES =
%w[amp lt gt quot apos].freeze

Class Method Summary collapse

Methods included from XmlUtils

encode_entities, normalize_xml_value, validate_comment_content, validate_declaration_encoding, validate_declaration_standalone, validate_declaration_version, validate_element_name, validate_entity_reference_name, validate_pi_target, validate_prefix, validate_uri

Class Method Details

.actual_native(child_native, _parent_native) ⇒ Object

Return the actual native node after an add_child operation. Override for adapters where node identity may change (e.g., LibXML doc.root=).



217
218
219
# File 'lib/moxml/adapter/base.rb', line 217

def actual_native(child_native, _parent_native)
  child_native
end

.create_cdata(content, owner_doc: nil) ⇒ Object



129
130
131
# File 'lib/moxml/adapter/base.rb', line 129

def create_cdata(content, owner_doc: nil)
  create_native_cdata(normalize_xml_value(content), owner_doc)
end

.create_comment(content, owner_doc: nil) ⇒ Object



133
134
135
136
# File 'lib/moxml/adapter/base.rb', line 133

def create_comment(content, owner_doc: nil)
  validate_comment_content(content)
  create_native_comment(normalize_xml_value(content), owner_doc)
end

.create_declaration(version = "1.0", encoding = "UTF-8", standalone = nil) ⇒ Object



148
149
150
151
152
153
154
# File 'lib/moxml/adapter/base.rb', line 148

def create_declaration(version = "1.0", encoding = "UTF-8",
                       standalone = nil)
  validate_declaration_version(version)
  validate_declaration_encoding(encoding)
  validate_declaration_standalone(standalone)
  create_native_declaration(version, encoding, standalone)
end

.create_doctype(name, external_id, system_id) ⇒ Object



138
139
140
# File 'lib/moxml/adapter/base.rb', line 138

def create_doctype(name, external_id, system_id)
  create_native_doctype(name, external_id, system_id)
end

.create_document(_native_doc = nil) ⇒ Object



111
112
113
114
115
116
117
# File 'lib/moxml/adapter/base.rb', line 111

def create_document(_native_doc = nil)
  raise Moxml::NotImplementedError.new(
    "create_document not implemented",
    feature: "create_document",
    adapter: name,
  )
end

.create_element(name, owner_doc: nil) ⇒ Object



119
120
121
122
# File 'lib/moxml/adapter/base.rb', line 119

def create_element(name, owner_doc: nil)
  validate_element_name(name)
  create_native_element(name, owner_doc)
end

.create_entity_reference(name) ⇒ Object



174
175
176
177
# File 'lib/moxml/adapter/base.rb', line 174

def create_entity_reference(name)
  validate_entity_reference_name(name)
  create_native_entity_reference(name)
end

.create_namespace(element, prefix, uri, namespace_validation_mode: :strict) ⇒ Object



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/moxml/adapter/base.rb', line 156

def create_namespace(element, prefix, uri,
namespace_validation_mode: :strict)
  if prefix && uri.to_s.empty?
    raise NamespaceError.new(
      "Prefixed namespace declaration cannot have an empty URI",
      prefix: prefix,
      uri: uri,
    )
  end
  if namespace_validation_mode == :strict
    validate_prefix(prefix) if prefix
    validate_uri(uri, mode: :strict)
  else
    validate_uri(uri, mode: :lenient)
  end
  create_native_namespace(element, prefix, uri)
end

.create_processing_instruction(target, content) ⇒ Object



142
143
144
145
146
# File 'lib/moxml/adapter/base.rb', line 142

def create_processing_instruction(target, content)
  validate_pi_target(target)
  create_native_processing_instruction(target,
                                       normalize_xml_value(content))
end

.create_text(content, owner_doc: nil) ⇒ Object



124
125
126
127
# File 'lib/moxml/adapter/base.rb', line 124

def create_text(content, owner_doc: nil)
  # Ox freezes the content, so we need to dup it
  create_native_text(normalize_xml_value(content).dup, owner_doc)
end

.duplicate_node(node) ⇒ Object



191
192
193
# File 'lib/moxml/adapter/base.rb', line 191

def duplicate_node(node)
  node.dup
end

.entity_reference_name(node) ⇒ Object



187
188
189
# File 'lib/moxml/adapter/base.rb', line 187

def entity_reference_name(node)
  node.name
end

.has_declaration?(_native_doc, wrapper) ⇒ Boolean

Check if the native document has an XML declaration

Parameters:

  • native_doc

    the native document object

  • wrapper (Moxml::Document)

    the wrapper with has_xml_declaration flag

Returns:

  • (Boolean)


211
212
213
# File 'lib/moxml/adapter/base.rb', line 211

def has_declaration?(_native_doc, wrapper)
  wrapper.has_xml_declaration
end

.in_scope_namespaces(element) ⇒ Object

Returns all namespaces in scope for this element, including inherited from ancestors. Adapters with native support (Nokogiri) override this. Default walks the ancestor chain.



224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# File 'lib/moxml/adapter/base.rb', line 224

def in_scope_namespaces(element)
  namespaces = {}
  node = element

  while node
    break unless node_type(node) == :element

    namespace_definitions(node).each do |ns|
      prefix = namespace_prefix(ns)
      namespaces[prefix] = ns unless namespaces.key?(prefix)
    end
    node = parent(node)
  end

  namespaces.values
end

.parse(_xml, _options = {}) ⇒ Object



77
78
79
80
81
82
83
# File 'lib/moxml/adapter/base.rb', line 77

def parse(_xml, _options = {})
  raise Moxml::NotImplementedError.new(
    "parse not implemented",
    feature: "parse",
    adapter: name,
  )
end

.patch_node(node, _parent = nil) ⇒ Object



195
196
197
198
# File 'lib/moxml/adapter/base.rb', line 195

def patch_node(node, _parent = nil)
  # monkey-patch the native node if necessary
  node
end

.prepare_for_new_document(node, _target_doc) ⇒ Object



200
201
202
203
204
205
# File 'lib/moxml/adapter/base.rb', line 200

def prepare_for_new_document(node, _target_doc)
  # Hook for adapters that need special handling when moving nodes
  # between documents (e.g., LibXML's document.import)
  # Default: no-op for backward compatibility
  node
end

.preprocess_entities(xml) ⇒ Object

Replace non-standard entity references with markers before parsing. Always returns a UTF-8 encoded string.



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/moxml/adapter/base.rb', line 30

def preprocess_entities(xml)
  return "" if xml.nil?

  str = if xml.encoding == Encoding::BINARY
          # Binary strings are assumed to be UTF-8. If the bytes are
          # not valid UTF-8, fall back to encoding as UTF-8 with
          # replacement to avoid raising on gsub.
          dup = xml.dup.force_encoding("UTF-8")
          if dup.valid_encoding?
            dup
          else
            xml.dup.encode("UTF-8",
                           "ASCII-8BIT", invalid: :replace, undef: :replace)
          end
        elsif xml.encoding == Encoding::UTF_8
          xml
        else
          xml.encode("UTF-8")
        end
  # Fast path: no `&` means no entity references to mark — skip
  # the regex scan and string allocation entirely. The vast
  # majority of XML payloads contain no entity references.
  return str unless str.include?("&")

  str.gsub(ENTITY_NAME_RE) do |match|
    STANDARD_ENTITIES.include?(::Regexp.last_match(1)) ? match : "#{ENTITY_MARKER}#{::Regexp.last_match(1)};"
  end
end

.restore_entities(text) ⇒ Object

Restore entity markers back to named entity references.



60
61
62
63
64
65
66
67
# File 'lib/moxml/adapter/base.rb', line 60

def restore_entities(text)
  return text unless text.is_a?(String)

  # Force UTF-8 encoding since markers are UTF-8 characters
  str = text.encoding == Encoding::UTF_8 ? text : text.dup.force_encoding("UTF-8")
  result = str.gsub(ENTITY_MARKER_RE, '&\1;')
  result.gsub(SERIALIZED_ENTITY_MARKER_RE, '&\1;')
end

.sax_parse(_xml, _handler) ⇒ void

This method returns an undefined value.

Parse XML using SAX (event-driven) parsing

SAX parsing provides a memory-efficient way to process XML by triggering events as the document is parsed, rather than building a complete DOM tree.

Parameters:

  • xml (String, IO)

    XML string or IO object to parse

  • handler (Moxml::SAX::Handler)

    Handler object receiving events

Raises:



95
96
97
98
99
100
101
# File 'lib/moxml/adapter/base.rb', line 95

def sax_parse(_xml, _handler)
  raise Moxml::NotImplementedError.new(
    "sax_parse not implemented",
    feature: "sax_parse",
    adapter: name,
  )
end

.sax_supported?Boolean

Check if this adapter supports SAX parsing

Returns:

  • (Boolean)

    true if SAX parsing is supported



106
107
108
109
# File 'lib/moxml/adapter/base.rb', line 106

def sax_supported?
  respond_to?(:sax_parse) &&
    method(:sax_parse).owner != Moxml::Adapter::Base.singleton_class
end

.set_attribute_name(attribute, name) ⇒ Object



179
180
181
# File 'lib/moxml/adapter/base.rb', line 179

def set_attribute_name(attribute, name)
  attribute.name = name
end

.set_attribute_value(attribute, value) ⇒ Object



183
184
185
# File 'lib/moxml/adapter/base.rb', line 183

def set_attribute_value(attribute, value)
  attribute.value = value
end

.set_root(_doc, _element) ⇒ Object



69
70
71
72
73
74
75
# File 'lib/moxml/adapter/base.rb', line 69

def set_root(_doc, _element)
  raise Moxml::NotImplementedError.new(
    "set_root not implemented",
    feature: "set_root",
    adapter: name,
  )
end