Class: Moxml::Adapter::Base
- Inherits:
-
Object
- Object
- Moxml::Adapter::Base
show all
- Extended by:
- XmlUtils
- Defined in:
- lib/moxml/adapter/base.rb
Constant Summary
collapse
- ENTITY_MARKER =
Entity marker for adapters that resolve entities during parsing. U+FFFC (Object Replacement Character) + U+FEFF (BOM) is a two-character sentinel chosen because this exact sequence followed by a valid entity name pattern is vanishingly unlikely in real XML content. Non-standard entities like © are converted to this marker before parsing, then restored during serialization. Standard XML entities (& < > " ') are NOT converted.
"\u{FFFC}\u{FEFF}"
- ENTITY_NAME_PATTERN =
"[a-zA-Z_][\\w.:-]*"
- ENTITY_NAME_RE =
/&(#{ENTITY_NAME_PATTERN});/
- ENTITY_MARKER_RE =
/\u{FFFC}\u{FEFF}(#{ENTITY_NAME_PATTERN});/
- SERIALIZED_ENTITY_MARKER_RE =
/(#{ENTITY_NAME_PATTERN});/
- STANDARD_ENTITIES =
%w[amp lt gt quot apos].freeze
Class Method Summary
collapse
-
.actual_native(child_native, _parent_native) ⇒ Object
Return the actual native node after an add_child operation.
-
.create_cdata(content, owner_doc: nil) ⇒ Object
-
.create_comment(content, owner_doc: nil) ⇒ Object
-
.create_declaration(version = "1.0", encoding = "UTF-8", standalone = nil) ⇒ Object
-
.create_doctype(name, external_id, system_id) ⇒ Object
-
.create_document(_native_doc = nil) ⇒ Object
-
.create_element(name, owner_doc: nil) ⇒ Object
-
.create_entity_reference(name) ⇒ Object
-
.create_namespace(element, prefix, uri, namespace_validation_mode: :strict) ⇒ Object
-
.create_processing_instruction(target, content) ⇒ Object
-
.create_text(content, owner_doc: nil) ⇒ Object
-
.duplicate_node(node) ⇒ Object
-
.entity_reference_name(node) ⇒ Object
-
.has_declaration?(_native_doc, wrapper) ⇒ Boolean
Check if the native document has an XML declaration.
-
.in_scope_namespaces(element) ⇒ Object
Returns all namespaces in scope for this element, including inherited from ancestors.
-
.parse(_xml, _options = {}) ⇒ Object
-
.patch_node(node, _parent = nil) ⇒ Object
-
.prepare_for_new_document(node, _target_doc) ⇒ Object
-
.preprocess_entities(xml) ⇒ Object
Replace non-standard entity references with markers before parsing.
-
.restore_entities(text) ⇒ Object
Restore entity markers back to named entity references.
-
.sax_parse(_xml, _handler) ⇒ void
Parse XML using SAX (event-driven) parsing.
-
.sax_supported? ⇒ Boolean
Check if this adapter supports SAX parsing.
-
.set_attribute_name(attribute, name) ⇒ Object
-
.set_attribute_value(attribute, value) ⇒ Object
-
.set_root(_doc, _element) ⇒ Object
Methods included from XmlUtils
encode_entities, normalize_xml_value, validate_comment_content, validate_declaration_encoding, validate_declaration_standalone, validate_declaration_version, validate_element_name, validate_entity_reference_name, validate_pi_target, validate_prefix, validate_uri
Class Method Details
.actual_native(child_native, _parent_native) ⇒ Object
Return the actual native node after an add_child operation. Override for adapters where node identity may change (e.g., LibXML doc.root=).
207
208
209
|
# File 'lib/moxml/adapter/base.rb', line 207
def actual_native(child_native, _parent_native)
child_native
end
|
.create_cdata(content, owner_doc: nil) ⇒ Object
119
120
121
|
# File 'lib/moxml/adapter/base.rb', line 119
def create_cdata(content, owner_doc: nil)
create_native_cdata(normalize_xml_value(content), owner_doc)
end
|
123
124
125
126
|
# File 'lib/moxml/adapter/base.rb', line 123
def (content, owner_doc: nil)
(content)
(normalize_xml_value(content), owner_doc)
end
|
.create_declaration(version = "1.0", encoding = "UTF-8", standalone = nil) ⇒ Object
138
139
140
141
142
143
144
|
# File 'lib/moxml/adapter/base.rb', line 138
def create_declaration(version = "1.0", encoding = "UTF-8",
standalone = nil)
validate_declaration_version(version)
validate_declaration_encoding(encoding)
validate_declaration_standalone(standalone)
create_native_declaration(version, encoding, standalone)
end
|
.create_doctype(name, external_id, system_id) ⇒ Object
128
129
130
|
# File 'lib/moxml/adapter/base.rb', line 128
def create_doctype(name, external_id, system_id)
create_native_doctype(name, external_id, system_id)
end
|
.create_document(_native_doc = nil) ⇒ Object
101
102
103
104
105
106
107
|
# File 'lib/moxml/adapter/base.rb', line 101
def create_document(_native_doc = nil)
raise Moxml::NotImplementedError.new(
"create_document not implemented",
feature: "create_document",
adapter: name,
)
end
|
.create_element(name, owner_doc: nil) ⇒ Object
109
110
111
112
|
# File 'lib/moxml/adapter/base.rb', line 109
def create_element(name, owner_doc: nil)
validate_element_name(name)
create_native_element(name, owner_doc)
end
|
.create_entity_reference(name) ⇒ Object
164
165
166
167
|
# File 'lib/moxml/adapter/base.rb', line 164
def create_entity_reference(name)
validate_entity_reference_name(name)
create_native_entity_reference(name)
end
|
.create_namespace(element, prefix, uri, namespace_validation_mode: :strict) ⇒ Object
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
# File 'lib/moxml/adapter/base.rb', line 146
def create_namespace(element, prefix, uri,
namespace_validation_mode: :strict)
if prefix && uri.to_s.empty?
raise NamespaceError.new(
"Prefixed namespace declaration cannot have an empty URI",
prefix: prefix,
uri: uri,
)
end
if namespace_validation_mode == :strict
validate_prefix(prefix) if prefix
validate_uri(uri, mode: :strict)
else
validate_uri(uri, mode: :lenient)
end
create_native_namespace(element, prefix, uri)
end
|
.create_processing_instruction(target, content) ⇒ Object
132
133
134
135
136
|
# File 'lib/moxml/adapter/base.rb', line 132
def create_processing_instruction(target, content)
validate_pi_target(target)
create_native_processing_instruction(target,
normalize_xml_value(content))
end
|
.create_text(content, owner_doc: nil) ⇒ Object
114
115
116
117
|
# File 'lib/moxml/adapter/base.rb', line 114
def create_text(content, owner_doc: nil)
create_native_text(normalize_xml_value(content).dup, owner_doc)
end
|
.duplicate_node(node) ⇒ Object
181
182
183
|
# File 'lib/moxml/adapter/base.rb', line 181
def duplicate_node(node)
node.dup
end
|
.entity_reference_name(node) ⇒ Object
177
178
179
|
# File 'lib/moxml/adapter/base.rb', line 177
def entity_reference_name(node)
node.name
end
|
.has_declaration?(_native_doc, wrapper) ⇒ Boolean
Check if the native document has an XML declaration
201
202
203
|
# File 'lib/moxml/adapter/base.rb', line 201
def has_declaration?(_native_doc, wrapper)
wrapper.has_xml_declaration
end
|
.in_scope_namespaces(element) ⇒ Object
Returns all namespaces in scope for this element, including inherited from ancestors. Adapters with native support (Nokogiri) override this. Default walks the ancestor chain.
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
|
# File 'lib/moxml/adapter/base.rb', line 214
def in_scope_namespaces(element)
namespaces = {}
node = element
while node
break unless node_type(node) == :element
namespace_definitions(node).each do |ns|
prefix = namespace_prefix(ns)
namespaces[prefix] = ns unless namespaces.key?(prefix)
end
node = parent(node)
end
namespaces.values
end
|
.parse(_xml, _options = {}) ⇒ Object
67
68
69
70
71
72
73
|
# File 'lib/moxml/adapter/base.rb', line 67
def parse(_xml, _options = {})
raise Moxml::NotImplementedError.new(
"parse not implemented",
feature: "parse",
adapter: name,
)
end
|
.patch_node(node, _parent = nil) ⇒ Object
185
186
187
188
|
# File 'lib/moxml/adapter/base.rb', line 185
def patch_node(node, _parent = nil)
node
end
|
.prepare_for_new_document(node, _target_doc) ⇒ Object
190
191
192
193
194
195
|
# File 'lib/moxml/adapter/base.rb', line 190
def prepare_for_new_document(node, _target_doc)
node
end
|
.preprocess_entities(xml) ⇒ Object
Replace non-standard entity references with markers before parsing. Always returns a UTF-8 encoded string.
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
# File 'lib/moxml/adapter/base.rb', line 30
def preprocess_entities(xml)
return "" if xml.nil?
str = if xml.encoding == Encoding::BINARY
dup = xml.dup.force_encoding("UTF-8")
dup.valid_encoding? ? dup : xml.dup.encode("UTF-8", "ASCII-8BIT", invalid: :replace, undef: :replace)
elsif xml.encoding == Encoding::UTF_8
xml
else
xml.encode("UTF-8")
end
str.gsub(ENTITY_NAME_RE) do |match|
STANDARD_ENTITIES.include?(::Regexp.last_match(1)) ? match : "#{ENTITY_MARKER}#{::Regexp.last_match(1)};"
end
end
|
.restore_entities(text) ⇒ Object
Restore entity markers back to named entity references.
50
51
52
53
54
55
56
57
|
# File 'lib/moxml/adapter/base.rb', line 50
def restore_entities(text)
return text unless text.is_a?(String)
str = text.encoding == Encoding::UTF_8 ? text : text.dup.force_encoding("UTF-8")
result = str.gsub(ENTITY_MARKER_RE, '&\1;')
result.gsub(SERIALIZED_ENTITY_MARKER_RE, '&\1;')
end
|
.sax_parse(_xml, _handler) ⇒ void
This method returns an undefined value.
Parse XML using SAX (event-driven) parsing
SAX parsing provides a memory-efficient way to process XML by triggering events as the document is parsed, rather than building a complete DOM tree.
85
86
87
88
89
90
91
|
# File 'lib/moxml/adapter/base.rb', line 85
def sax_parse(_xml, _handler)
raise Moxml::NotImplementedError.new(
"sax_parse not implemented",
feature: "sax_parse",
adapter: name,
)
end
|
.sax_supported? ⇒ Boolean
Check if this adapter supports SAX parsing
96
97
98
99
|
# File 'lib/moxml/adapter/base.rb', line 96
def sax_supported?
respond_to?(:sax_parse) &&
method(:sax_parse).owner != Moxml::Adapter::Base.singleton_class
end
|
.set_attribute_name(attribute, name) ⇒ Object
169
170
171
|
# File 'lib/moxml/adapter/base.rb', line 169
def set_attribute_name(attribute, name)
attribute.name = name
end
|
.set_attribute_value(attribute, value) ⇒ Object
173
174
175
|
# File 'lib/moxml/adapter/base.rb', line 173
def set_attribute_value(attribute, value)
attribute.value = value
end
|
.set_root(_doc, _element) ⇒ Object
59
60
61
62
63
64
65
|
# File 'lib/moxml/adapter/base.rb', line 59
def set_root(_doc, _element)
raise Moxml::NotImplementedError.new(
"set_root not implemented",
feature: "set_root",
adapter: name,
)
end
|