Class: Moxml::Adapter::Base

Inherits:
Object
  • Object
show all
Extended by:
XmlUtils
Defined in:
lib/moxml/adapter/base.rb

Direct Known Subclasses

Libxml, Nokogiri, Oga, Ox, Rexml

Constant Summary collapse

ENTITY_MARKER =

Entity marker for adapters that resolve entities during parsing. U+FFFC (Object Replacement Character) + U+FEFF (BOM) is a two-character sentinel chosen because this exact sequence followed by a valid entity name pattern is vanishingly unlikely in real XML content. Non-standard entities like © are converted to this marker before parsing, then restored during serialization. Standard XML entities (& < > " ') are NOT converted.

"\u{FFFC}\u{FEFF}"
ENTITY_NAME_PATTERN =
"[a-zA-Z_][\\w.:-]*"
ENTITY_NAME_RE =
/&(#{ENTITY_NAME_PATTERN});/
ENTITY_MARKER_RE =
/\u{FFFC}\u{FEFF}(#{ENTITY_NAME_PATTERN});/
SERIALIZED_ENTITY_MARKER_RE =
/(#{ENTITY_NAME_PATTERN});/
STANDARD_ENTITIES =
%w[amp lt gt quot apos].freeze

Class Method Summary collapse

Methods included from XmlUtils

encode_entities, normalize_xml_value, validate_comment_content, validate_declaration_encoding, validate_declaration_standalone, validate_declaration_version, validate_element_name, validate_entity_reference_name, validate_pi_target, validate_prefix, validate_uri

Class Method Details

.actual_native(child_native, _parent_native) ⇒ Object

Return the actual native node after an add_child operation. Override for adapters where node identity may change (e.g., LibXML doc.root=).



212
213
214
# File 'lib/moxml/adapter/base.rb', line 212

def actual_native(child_native, _parent_native)
  child_native
end

.create_cdata(content, owner_doc: nil) ⇒ Object



124
125
126
# File 'lib/moxml/adapter/base.rb', line 124

def create_cdata(content, owner_doc: nil)
  create_native_cdata(normalize_xml_value(content), owner_doc)
end

.create_comment(content, owner_doc: nil) ⇒ Object



128
129
130
131
# File 'lib/moxml/adapter/base.rb', line 128

def create_comment(content, owner_doc: nil)
  validate_comment_content(content)
  create_native_comment(normalize_xml_value(content), owner_doc)
end

.create_declaration(version = "1.0", encoding = "UTF-8", standalone = nil) ⇒ Object



143
144
145
146
147
148
149
# File 'lib/moxml/adapter/base.rb', line 143

def create_declaration(version = "1.0", encoding = "UTF-8",
                       standalone = nil)
  validate_declaration_version(version)
  validate_declaration_encoding(encoding)
  validate_declaration_standalone(standalone)
  create_native_declaration(version, encoding, standalone)
end

.create_doctype(name, external_id, system_id) ⇒ Object



133
134
135
# File 'lib/moxml/adapter/base.rb', line 133

def create_doctype(name, external_id, system_id)
  create_native_doctype(name, external_id, system_id)
end

.create_document(_native_doc = nil) ⇒ Object



106
107
108
109
110
111
112
# File 'lib/moxml/adapter/base.rb', line 106

def create_document(_native_doc = nil)
  raise Moxml::NotImplementedError.new(
    "create_document not implemented",
    feature: "create_document",
    adapter: name,
  )
end

.create_element(name, owner_doc: nil) ⇒ Object



114
115
116
117
# File 'lib/moxml/adapter/base.rb', line 114

def create_element(name, owner_doc: nil)
  validate_element_name(name)
  create_native_element(name, owner_doc)
end

.create_entity_reference(name) ⇒ Object



169
170
171
172
# File 'lib/moxml/adapter/base.rb', line 169

def create_entity_reference(name)
  validate_entity_reference_name(name)
  create_native_entity_reference(name)
end

.create_namespace(element, prefix, uri, namespace_validation_mode: :strict) ⇒ Object



151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/moxml/adapter/base.rb', line 151

def create_namespace(element, prefix, uri,
namespace_validation_mode: :strict)
  if prefix && uri.to_s.empty?
    raise NamespaceError.new(
      "Prefixed namespace declaration cannot have an empty URI",
      prefix: prefix,
      uri: uri,
    )
  end
  if namespace_validation_mode == :strict
    validate_prefix(prefix) if prefix
    validate_uri(uri, mode: :strict)
  else
    validate_uri(uri, mode: :lenient)
  end
  create_native_namespace(element, prefix, uri)
end

.create_processing_instruction(target, content) ⇒ Object



137
138
139
140
141
# File 'lib/moxml/adapter/base.rb', line 137

def create_processing_instruction(target, content)
  validate_pi_target(target)
  create_native_processing_instruction(target,
                                       normalize_xml_value(content))
end

.create_text(content, owner_doc: nil) ⇒ Object



119
120
121
122
# File 'lib/moxml/adapter/base.rb', line 119

def create_text(content, owner_doc: nil)
  # Ox freezes the content, so we need to dup it
  create_native_text(normalize_xml_value(content).dup, owner_doc)
end

.duplicate_node(node) ⇒ Object



186
187
188
# File 'lib/moxml/adapter/base.rb', line 186

def duplicate_node(node)
  node.dup
end

.entity_reference_name(node) ⇒ Object



182
183
184
# File 'lib/moxml/adapter/base.rb', line 182

def entity_reference_name(node)
  node.name
end

.has_declaration?(_native_doc, wrapper) ⇒ Boolean

Check if the native document has an XML declaration

Parameters:

  • native_doc

    the native document object

  • wrapper (Moxml::Document)

    the wrapper with has_xml_declaration flag

Returns:

  • (Boolean)


206
207
208
# File 'lib/moxml/adapter/base.rb', line 206

def has_declaration?(_native_doc, wrapper)
  wrapper.has_xml_declaration
end

.in_scope_namespaces(element) ⇒ Object

Returns all namespaces in scope for this element, including inherited from ancestors. Adapters with native support (Nokogiri) override this. Default walks the ancestor chain.



219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'lib/moxml/adapter/base.rb', line 219

def in_scope_namespaces(element)
  namespaces = {}
  node = element

  while node
    break unless node_type(node) == :element

    namespace_definitions(node).each do |ns|
      prefix = namespace_prefix(ns)
      namespaces[prefix] = ns unless namespaces.key?(prefix)
    end
    node = parent(node)
  end

  namespaces.values
end

.parse(_xml, _options = {}) ⇒ Object



72
73
74
75
76
77
78
# File 'lib/moxml/adapter/base.rb', line 72

def parse(_xml, _options = {})
  raise Moxml::NotImplementedError.new(
    "parse not implemented",
    feature: "parse",
    adapter: name,
  )
end

.patch_node(node, _parent = nil) ⇒ Object



190
191
192
193
# File 'lib/moxml/adapter/base.rb', line 190

def patch_node(node, _parent = nil)
  # monkey-patch the native node if necessary
  node
end

.prepare_for_new_document(node, _target_doc) ⇒ Object



195
196
197
198
199
200
# File 'lib/moxml/adapter/base.rb', line 195

def prepare_for_new_document(node, _target_doc)
  # Hook for adapters that need special handling when moving nodes
  # between documents (e.g., LibXML's document.import)
  # Default: no-op for backward compatibility
  node
end

.preprocess_entities(xml) ⇒ Object

Replace non-standard entity references with markers before parsing. Always returns a UTF-8 encoded string.



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/moxml/adapter/base.rb', line 30

def preprocess_entities(xml)
  return "" if xml.nil?

  str = if xml.encoding == Encoding::BINARY
          # Binary strings are assumed to be UTF-8. If the bytes are
          # not valid UTF-8, fall back to encoding as UTF-8 with
          # replacement to avoid raising on gsub.
          dup = xml.dup.force_encoding("UTF-8")
          if dup.valid_encoding?
            dup
          else
            xml.dup.encode("UTF-8",
                           "ASCII-8BIT", invalid: :replace, undef: :replace)
          end
        elsif xml.encoding == Encoding::UTF_8
          xml
        else
          xml.encode("UTF-8")
        end
  str.gsub(ENTITY_NAME_RE) do |match|
    STANDARD_ENTITIES.include?(::Regexp.last_match(1)) ? match : "#{ENTITY_MARKER}#{::Regexp.last_match(1)};"
  end
end

.restore_entities(text) ⇒ Object

Restore entity markers back to named entity references.



55
56
57
58
59
60
61
62
# File 'lib/moxml/adapter/base.rb', line 55

def restore_entities(text)
  return text unless text.is_a?(String)

  # Force UTF-8 encoding since markers are UTF-8 characters
  str = text.encoding == Encoding::UTF_8 ? text : text.dup.force_encoding("UTF-8")
  result = str.gsub(ENTITY_MARKER_RE, '&\1;')
  result.gsub(SERIALIZED_ENTITY_MARKER_RE, '&\1;')
end

.sax_parse(_xml, _handler) ⇒ void

This method returns an undefined value.

Parse XML using SAX (event-driven) parsing

SAX parsing provides a memory-efficient way to process XML by triggering events as the document is parsed, rather than building a complete DOM tree.

Parameters:

  • xml (String, IO)

    XML string or IO object to parse

  • handler (Moxml::SAX::Handler)

    Handler object receiving events

Raises:



90
91
92
93
94
95
96
# File 'lib/moxml/adapter/base.rb', line 90

def sax_parse(_xml, _handler)
  raise Moxml::NotImplementedError.new(
    "sax_parse not implemented",
    feature: "sax_parse",
    adapter: name,
  )
end

.sax_supported?Boolean

Check if this adapter supports SAX parsing

Returns:

  • (Boolean)

    true if SAX parsing is supported



101
102
103
104
# File 'lib/moxml/adapter/base.rb', line 101

def sax_supported?
  respond_to?(:sax_parse) &&
    method(:sax_parse).owner != Moxml::Adapter::Base.singleton_class
end

.set_attribute_name(attribute, name) ⇒ Object



174
175
176
# File 'lib/moxml/adapter/base.rb', line 174

def set_attribute_name(attribute, name)
  attribute.name = name
end

.set_attribute_value(attribute, value) ⇒ Object



178
179
180
# File 'lib/moxml/adapter/base.rb', line 178

def set_attribute_value(attribute, value)
  attribute.value = value
end

.set_root(_doc, _element) ⇒ Object



64
65
66
67
68
69
70
# File 'lib/moxml/adapter/base.rb', line 64

def set_root(_doc, _element)
  raise Moxml::NotImplementedError.new(
    "set_root not implemented",
    feature: "set_root",
    adapter: name,
  )
end