Class: Moxml::Adapter::Base

Inherits:
Object
  • Object
show all
Extended by:
XmlUtils
Defined in:
lib/moxml/adapter/base.rb

Direct Known Subclasses

Libxml, Nokogiri, Oga, Ox, Rexml

Constant Summary collapse

ENTITY_MARKER =

Entity marker for adapters that resolve entities during parsing. U+FFFC (Object Replacement Character) + U+FEFF (BOM) is a two-character sentinel chosen because this exact sequence followed by a valid entity name pattern is vanishingly unlikely in real XML content. Non-standard entities like © are converted to this marker before parsing, then restored during serialization. Standard XML entities (& < > " ') are NOT converted.

"\u{FFFC}\u{FEFF}"
ENTITY_NAME_PATTERN =
"[a-zA-Z_][\\w.:-]*"
ENTITY_NAME_RE =
/&(#{ENTITY_NAME_PATTERN});/
ENTITY_MARKER_RE =
/\u{FFFC}\u{FEFF}(#{ENTITY_NAME_PATTERN});/
SERIALIZED_ENTITY_MARKER_RE =
/(#{ENTITY_NAME_PATTERN});/
STANDARD_ENTITIES =
%w[amp lt gt quot apos].freeze

Class Method Summary collapse

Methods included from XmlUtils

encode_entities, normalize_xml_value, validate_comment_content, validate_declaration_encoding, validate_declaration_standalone, validate_declaration_version, validate_element_name, validate_entity_reference_name, validate_pi_target, validate_prefix, validate_uri

Class Method Details

.actual_native(child_native, _parent_native) ⇒ Object

Return the actual native node after an add_child operation. Override for adapters where node identity may change (e.g., LibXML doc.root=).



207
208
209
# File 'lib/moxml/adapter/base.rb', line 207

def actual_native(child_native, _parent_native)
  child_native
end

.create_cdata(content, owner_doc: nil) ⇒ Object



119
120
121
# File 'lib/moxml/adapter/base.rb', line 119

def create_cdata(content, owner_doc: nil)
  create_native_cdata(normalize_xml_value(content), owner_doc)
end

.create_comment(content, owner_doc: nil) ⇒ Object



123
124
125
126
# File 'lib/moxml/adapter/base.rb', line 123

def create_comment(content, owner_doc: nil)
  validate_comment_content(content)
  create_native_comment(normalize_xml_value(content), owner_doc)
end

.create_declaration(version = "1.0", encoding = "UTF-8", standalone = nil) ⇒ Object



138
139
140
141
142
143
144
# File 'lib/moxml/adapter/base.rb', line 138

def create_declaration(version = "1.0", encoding = "UTF-8",
                       standalone = nil)
  validate_declaration_version(version)
  validate_declaration_encoding(encoding)
  validate_declaration_standalone(standalone)
  create_native_declaration(version, encoding, standalone)
end

.create_doctype(name, external_id, system_id) ⇒ Object



128
129
130
# File 'lib/moxml/adapter/base.rb', line 128

def create_doctype(name, external_id, system_id)
  create_native_doctype(name, external_id, system_id)
end

.create_document(_native_doc = nil) ⇒ Object



101
102
103
104
105
106
107
# File 'lib/moxml/adapter/base.rb', line 101

def create_document(_native_doc = nil)
  raise Moxml::NotImplementedError.new(
    "create_document not implemented",
    feature: "create_document",
    adapter: name,
  )
end

.create_element(name, owner_doc: nil) ⇒ Object



109
110
111
112
# File 'lib/moxml/adapter/base.rb', line 109

def create_element(name, owner_doc: nil)
  validate_element_name(name)
  create_native_element(name, owner_doc)
end

.create_entity_reference(name) ⇒ Object



164
165
166
167
# File 'lib/moxml/adapter/base.rb', line 164

def create_entity_reference(name)
  validate_entity_reference_name(name)
  create_native_entity_reference(name)
end

.create_namespace(element, prefix, uri, namespace_validation_mode: :strict) ⇒ Object



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/moxml/adapter/base.rb', line 146

def create_namespace(element, prefix, uri,
namespace_validation_mode: :strict)
  if prefix && uri.to_s.empty?
    raise NamespaceError.new(
      "Prefixed namespace declaration cannot have an empty URI",
      prefix: prefix,
      uri: uri,
    )
  end
  if namespace_validation_mode == :strict
    validate_prefix(prefix) if prefix
    validate_uri(uri, mode: :strict)
  else
    validate_uri(uri, mode: :lenient)
  end
  create_native_namespace(element, prefix, uri)
end

.create_processing_instruction(target, content) ⇒ Object



132
133
134
135
136
# File 'lib/moxml/adapter/base.rb', line 132

def create_processing_instruction(target, content)
  validate_pi_target(target)
  create_native_processing_instruction(target,
                                       normalize_xml_value(content))
end

.create_text(content, owner_doc: nil) ⇒ Object



114
115
116
117
# File 'lib/moxml/adapter/base.rb', line 114

def create_text(content, owner_doc: nil)
  # Ox freezes the content, so we need to dup it
  create_native_text(normalize_xml_value(content).dup, owner_doc)
end

.duplicate_node(node) ⇒ Object



181
182
183
# File 'lib/moxml/adapter/base.rb', line 181

def duplicate_node(node)
  node.dup
end

.entity_reference_name(node) ⇒ Object



177
178
179
# File 'lib/moxml/adapter/base.rb', line 177

def entity_reference_name(node)
  node.name
end

.has_declaration?(_native_doc, wrapper) ⇒ Boolean

Check if the native document has an XML declaration

Parameters:

  • native_doc

    the native document object

  • wrapper (Moxml::Document)

    the wrapper with has_xml_declaration flag

Returns:

  • (Boolean)


201
202
203
# File 'lib/moxml/adapter/base.rb', line 201

def has_declaration?(_native_doc, wrapper)
  wrapper.has_xml_declaration
end

.in_scope_namespaces(element) ⇒ Object

Returns all namespaces in scope for this element, including inherited from ancestors. Adapters with native support (Nokogiri) override this. Default walks the ancestor chain.



214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# File 'lib/moxml/adapter/base.rb', line 214

def in_scope_namespaces(element)
  namespaces = {}
  node = element

  while node
    break unless node_type(node) == :element

    namespace_definitions(node).each do |ns|
      prefix = namespace_prefix(ns)
      namespaces[prefix] = ns unless namespaces.key?(prefix)
    end
    node = parent(node)
  end

  namespaces.values
end

.parse(_xml, _options = {}) ⇒ Object



67
68
69
70
71
72
73
# File 'lib/moxml/adapter/base.rb', line 67

def parse(_xml, _options = {})
  raise Moxml::NotImplementedError.new(
    "parse not implemented",
    feature: "parse",
    adapter: name,
  )
end

.patch_node(node, _parent = nil) ⇒ Object



185
186
187
188
# File 'lib/moxml/adapter/base.rb', line 185

def patch_node(node, _parent = nil)
  # monkey-patch the native node if necessary
  node
end

.prepare_for_new_document(node, _target_doc) ⇒ Object



190
191
192
193
194
195
# File 'lib/moxml/adapter/base.rb', line 190

def prepare_for_new_document(node, _target_doc)
  # Hook for adapters that need special handling when moving nodes
  # between documents (e.g., LibXML's document.import)
  # Default: no-op for backward compatibility
  node
end

.preprocess_entities(xml) ⇒ Object

Replace non-standard entity references with markers before parsing. Always returns a UTF-8 encoded string.



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/moxml/adapter/base.rb', line 30

def preprocess_entities(xml)
  return "" if xml.nil?

  str = if xml.encoding == Encoding::BINARY
          # Binary strings are assumed to be UTF-8. If the bytes are
          # not valid UTF-8, fall back to encoding as UTF-8 with
          # replacement to avoid raising on gsub.
          dup = xml.dup.force_encoding("UTF-8")
          dup.valid_encoding? ? dup : xml.dup.encode("UTF-8", "ASCII-8BIT", invalid: :replace, undef: :replace)
        elsif xml.encoding == Encoding::UTF_8
          xml
        else
          xml.encode("UTF-8")
        end
  str.gsub(ENTITY_NAME_RE) do |match|
    STANDARD_ENTITIES.include?(::Regexp.last_match(1)) ? match : "#{ENTITY_MARKER}#{::Regexp.last_match(1)};"
  end
end

.restore_entities(text) ⇒ Object

Restore entity markers back to named entity references.



50
51
52
53
54
55
56
57
# File 'lib/moxml/adapter/base.rb', line 50

def restore_entities(text)
  return text unless text.is_a?(String)

  # Force UTF-8 encoding since markers are UTF-8 characters
  str = text.encoding == Encoding::UTF_8 ? text : text.dup.force_encoding("UTF-8")
  result = str.gsub(ENTITY_MARKER_RE, '&\1;')
  result.gsub(SERIALIZED_ENTITY_MARKER_RE, '&\1;')
end

.sax_parse(_xml, _handler) ⇒ void

This method returns an undefined value.

Parse XML using SAX (event-driven) parsing

SAX parsing provides a memory-efficient way to process XML by triggering events as the document is parsed, rather than building a complete DOM tree.

Parameters:

  • xml (String, IO)

    XML string or IO object to parse

  • handler (Moxml::SAX::Handler)

    Handler object receiving events

Raises:



85
86
87
88
89
90
91
# File 'lib/moxml/adapter/base.rb', line 85

def sax_parse(_xml, _handler)
  raise Moxml::NotImplementedError.new(
    "sax_parse not implemented",
    feature: "sax_parse",
    adapter: name,
  )
end

.sax_supported?Boolean

Check if this adapter supports SAX parsing

Returns:

  • (Boolean)

    true if SAX parsing is supported



96
97
98
99
# File 'lib/moxml/adapter/base.rb', line 96

def sax_supported?
  respond_to?(:sax_parse) &&
    method(:sax_parse).owner != Moxml::Adapter::Base.singleton_class
end

.set_attribute_name(attribute, name) ⇒ Object



169
170
171
# File 'lib/moxml/adapter/base.rb', line 169

def set_attribute_name(attribute, name)
  attribute.name = name
end

.set_attribute_value(attribute, value) ⇒ Object



173
174
175
# File 'lib/moxml/adapter/base.rb', line 173

def set_attribute_value(attribute, value)
  attribute.value = value
end

.set_root(_doc, _element) ⇒ Object



59
60
61
62
63
64
65
# File 'lib/moxml/adapter/base.rb', line 59

def set_root(_doc, _element)
  raise Moxml::NotImplementedError.new(
    "set_root not implemented",
    feature: "set_root",
    adapter: name,
  )
end