Class: Moxml::Adapter::Base
- Inherits:
-
Object
- Object
- Moxml::Adapter::Base
show all
- Extended by:
- XmlUtils
- Defined in:
- lib/moxml/adapter/base.rb
Constant Summary
collapse
- ENTITY_MARKER =
Entity marker for adapters that resolve entities during parsing. U+FFFC (Object Replacement Character) + U+FEFF (BOM) is a two-character sentinel chosen because this exact sequence followed by a valid entity name pattern is vanishingly unlikely in real XML content. Non-standard entities like © are converted to this marker before parsing, then restored during serialization. Standard XML entities (& < > " ') are NOT converted.
"\u{FFFC}\u{FEFF}"
- ENTITY_NAME_PATTERN =
"[a-zA-Z_][\\w.:-]*"
- ENTITY_NAME_RE =
/&(#{ENTITY_NAME_PATTERN});/
- ENTITY_MARKER_RE =
/\u{FFFC}\u{FEFF}(#{ENTITY_NAME_PATTERN});/
- SERIALIZED_ENTITY_MARKER_RE =
/(#{ENTITY_NAME_PATTERN});/
- STANDARD_ENTITIES =
%w[amp lt gt quot apos].freeze
Class Method Summary
collapse
-
.actual_native(child_native, _parent_native) ⇒ Object
Return the actual native node after an add_child operation.
-
.create_cdata(content, owner_doc: nil) ⇒ Object
-
.create_comment(content, owner_doc: nil) ⇒ Object
-
.create_declaration(version = "1.0", encoding = "UTF-8", standalone = nil) ⇒ Object
-
.create_doctype(name, external_id, system_id) ⇒ Object
-
.create_document(_native_doc = nil) ⇒ Object
-
.create_element(name, owner_doc: nil) ⇒ Object
-
.create_entity_reference(name) ⇒ Object
-
.create_namespace(element, prefix, uri, namespace_validation_mode: :strict) ⇒ Object
-
.create_processing_instruction(target, content) ⇒ Object
-
.create_text(content, owner_doc: nil) ⇒ Object
-
.duplicate_node(node) ⇒ Object
-
.entity_reference_name(node) ⇒ Object
-
.has_declaration?(_native_doc, wrapper) ⇒ Boolean
Check if the native document has an XML declaration.
-
.in_scope_namespaces(element) ⇒ Object
Returns all namespaces in scope for this element, including inherited from ancestors.
-
.parse(_xml, _options = {}) ⇒ Object
-
.patch_node(node, _parent = nil) ⇒ Object
-
.prepare_for_new_document(node, _target_doc) ⇒ Object
-
.preprocess_entities(xml) ⇒ Object
Replace non-standard entity references with markers before parsing.
-
.restore_entities(text) ⇒ Object
Restore entity markers back to named entity references.
-
.sax_parse(_xml, _handler) ⇒ void
Parse XML using SAX (event-driven) parsing.
-
.sax_supported? ⇒ Boolean
Check if this adapter supports SAX parsing.
-
.set_attribute_name(attribute, name) ⇒ Object
-
.set_attribute_value(attribute, value) ⇒ Object
-
.set_root(_doc, _element) ⇒ Object
Methods included from XmlUtils
encode_entities, normalize_xml_value, validate_comment_content, validate_declaration_encoding, validate_declaration_standalone, validate_declaration_version, validate_element_name, validate_entity_reference_name, validate_pi_target, validate_prefix, validate_uri
Class Method Details
.actual_native(child_native, _parent_native) ⇒ Object
Return the actual native node after an add_child operation. Override for adapters where node identity may change (e.g., LibXML doc.root=).
212
213
214
|
# File 'lib/moxml/adapter/base.rb', line 212
def actual_native(child_native, _parent_native)
child_native
end
|
.create_cdata(content, owner_doc: nil) ⇒ Object
124
125
126
|
# File 'lib/moxml/adapter/base.rb', line 124
def create_cdata(content, owner_doc: nil)
create_native_cdata(normalize_xml_value(content), owner_doc)
end
|
128
129
130
131
|
# File 'lib/moxml/adapter/base.rb', line 128
def (content, owner_doc: nil)
(content)
(normalize_xml_value(content), owner_doc)
end
|
.create_declaration(version = "1.0", encoding = "UTF-8", standalone = nil) ⇒ Object
143
144
145
146
147
148
149
|
# File 'lib/moxml/adapter/base.rb', line 143
def create_declaration(version = "1.0", encoding = "UTF-8",
standalone = nil)
validate_declaration_version(version)
validate_declaration_encoding(encoding)
validate_declaration_standalone(standalone)
create_native_declaration(version, encoding, standalone)
end
|
.create_doctype(name, external_id, system_id) ⇒ Object
133
134
135
|
# File 'lib/moxml/adapter/base.rb', line 133
def create_doctype(name, external_id, system_id)
create_native_doctype(name, external_id, system_id)
end
|
.create_document(_native_doc = nil) ⇒ Object
106
107
108
109
110
111
112
|
# File 'lib/moxml/adapter/base.rb', line 106
def create_document(_native_doc = nil)
raise Moxml::NotImplementedError.new(
"create_document not implemented",
feature: "create_document",
adapter: name,
)
end
|
.create_element(name, owner_doc: nil) ⇒ Object
114
115
116
117
|
# File 'lib/moxml/adapter/base.rb', line 114
def create_element(name, owner_doc: nil)
validate_element_name(name)
create_native_element(name, owner_doc)
end
|
.create_entity_reference(name) ⇒ Object
169
170
171
172
|
# File 'lib/moxml/adapter/base.rb', line 169
def create_entity_reference(name)
validate_entity_reference_name(name)
create_native_entity_reference(name)
end
|
.create_namespace(element, prefix, uri, namespace_validation_mode: :strict) ⇒ Object
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
|
# File 'lib/moxml/adapter/base.rb', line 151
def create_namespace(element, prefix, uri,
namespace_validation_mode: :strict)
if prefix && uri.to_s.empty?
raise NamespaceError.new(
"Prefixed namespace declaration cannot have an empty URI",
prefix: prefix,
uri: uri,
)
end
if namespace_validation_mode == :strict
validate_prefix(prefix) if prefix
validate_uri(uri, mode: :strict)
else
validate_uri(uri, mode: :lenient)
end
create_native_namespace(element, prefix, uri)
end
|
.create_processing_instruction(target, content) ⇒ Object
137
138
139
140
141
|
# File 'lib/moxml/adapter/base.rb', line 137
def create_processing_instruction(target, content)
validate_pi_target(target)
create_native_processing_instruction(target,
normalize_xml_value(content))
end
|
.create_text(content, owner_doc: nil) ⇒ Object
119
120
121
122
|
# File 'lib/moxml/adapter/base.rb', line 119
def create_text(content, owner_doc: nil)
create_native_text(normalize_xml_value(content).dup, owner_doc)
end
|
.duplicate_node(node) ⇒ Object
186
187
188
|
# File 'lib/moxml/adapter/base.rb', line 186
def duplicate_node(node)
node.dup
end
|
.entity_reference_name(node) ⇒ Object
182
183
184
|
# File 'lib/moxml/adapter/base.rb', line 182
def entity_reference_name(node)
node.name
end
|
.has_declaration?(_native_doc, wrapper) ⇒ Boolean
Check if the native document has an XML declaration
206
207
208
|
# File 'lib/moxml/adapter/base.rb', line 206
def has_declaration?(_native_doc, wrapper)
wrapper.has_xml_declaration
end
|
.in_scope_namespaces(element) ⇒ Object
Returns all namespaces in scope for this element, including inherited from ancestors. Adapters with native support (Nokogiri) override this. Default walks the ancestor chain.
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
|
# File 'lib/moxml/adapter/base.rb', line 219
def in_scope_namespaces(element)
namespaces = {}
node = element
while node
break unless node_type(node) == :element
namespace_definitions(node).each do |ns|
prefix = namespace_prefix(ns)
namespaces[prefix] = ns unless namespaces.key?(prefix)
end
node = parent(node)
end
namespaces.values
end
|
.parse(_xml, _options = {}) ⇒ Object
72
73
74
75
76
77
78
|
# File 'lib/moxml/adapter/base.rb', line 72
def parse(_xml, _options = {})
raise Moxml::NotImplementedError.new(
"parse not implemented",
feature: "parse",
adapter: name,
)
end
|
.patch_node(node, _parent = nil) ⇒ Object
190
191
192
193
|
# File 'lib/moxml/adapter/base.rb', line 190
def patch_node(node, _parent = nil)
node
end
|
.prepare_for_new_document(node, _target_doc) ⇒ Object
195
196
197
198
199
200
|
# File 'lib/moxml/adapter/base.rb', line 195
def prepare_for_new_document(node, _target_doc)
node
end
|
.preprocess_entities(xml) ⇒ Object
Replace non-standard entity references with markers before parsing. Always returns a UTF-8 encoded string.
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
# File 'lib/moxml/adapter/base.rb', line 30
def preprocess_entities(xml)
return "" if xml.nil?
str = if xml.encoding == Encoding::BINARY
dup = xml.dup.force_encoding("UTF-8")
if dup.valid_encoding?
dup
else
xml.dup.encode("UTF-8",
"ASCII-8BIT", invalid: :replace, undef: :replace)
end
elsif xml.encoding == Encoding::UTF_8
xml
else
xml.encode("UTF-8")
end
str.gsub(ENTITY_NAME_RE) do |match|
STANDARD_ENTITIES.include?(::Regexp.last_match(1)) ? match : "#{ENTITY_MARKER}#{::Regexp.last_match(1)};"
end
end
|
.restore_entities(text) ⇒ Object
Restore entity markers back to named entity references.
55
56
57
58
59
60
61
62
|
# File 'lib/moxml/adapter/base.rb', line 55
def restore_entities(text)
return text unless text.is_a?(String)
str = text.encoding == Encoding::UTF_8 ? text : text.dup.force_encoding("UTF-8")
result = str.gsub(ENTITY_MARKER_RE, '&\1;')
result.gsub(SERIALIZED_ENTITY_MARKER_RE, '&\1;')
end
|
.sax_parse(_xml, _handler) ⇒ void
This method returns an undefined value.
Parse XML using SAX (event-driven) parsing
SAX parsing provides a memory-efficient way to process XML by triggering events as the document is parsed, rather than building a complete DOM tree.
90
91
92
93
94
95
96
|
# File 'lib/moxml/adapter/base.rb', line 90
def sax_parse(_xml, _handler)
raise Moxml::NotImplementedError.new(
"sax_parse not implemented",
feature: "sax_parse",
adapter: name,
)
end
|
.sax_supported? ⇒ Boolean
Check if this adapter supports SAX parsing
101
102
103
104
|
# File 'lib/moxml/adapter/base.rb', line 101
def sax_supported?
respond_to?(:sax_parse) &&
method(:sax_parse).owner != Moxml::Adapter::Base.singleton_class
end
|
.set_attribute_name(attribute, name) ⇒ Object
174
175
176
|
# File 'lib/moxml/adapter/base.rb', line 174
def set_attribute_name(attribute, name)
attribute.name = name
end
|
.set_attribute_value(attribute, value) ⇒ Object
178
179
180
|
# File 'lib/moxml/adapter/base.rb', line 178
def set_attribute_value(attribute, value)
attribute.value = value
end
|
.set_root(_doc, _element) ⇒ Object
64
65
66
67
68
69
70
|
# File 'lib/moxml/adapter/base.rb', line 64
def set_root(_doc, _element)
raise Moxml::NotImplementedError.new(
"set_root not implemented",
feature: "set_root",
adapter: name,
)
end
|