Class: Makiri::XML::Document
- Includes:
- NodeMethods
- Defined in:
- lib/makiri/xml/document.rb,
ext/makiri/glue/ruby_xml.c
Overview
XML-specific document conveniences. The XML node leaves and the document itself are defined in C (ext/makiri/glue/ruby_xml*.c); construction sugar that is pure composition over the public surface lives here, not on the abstract Makiri::Document (which carries no construction).
Class Method Summary collapse
-
.Makiri::XML::Document.new ⇒ Document
A new, empty XML document (no root element) to build up programmatically with #create_element etc.
-
.parse(*args) ⇒ Object
sourceis a String or any object responding to#read(an IO / File / StringIO);max_bytesoverrides the default arena memory ceiling for this parse.
Instance Method Summary collapse
- #at_xpath(*args) ⇒ Object
- #create_cdata(t) ⇒ Object
- #create_cdata_node(t) ⇒ Object
- #create_comment(t) ⇒ Object
-
#create_element(*args) ⇒ Object
create_element(name, content = nil, attributes = {}) -> Element.
- #create_processing_instruction(rb_target, rb_data) ⇒ Object
- #create_text_node(t) ⇒ Object
-
#fragment(source) ⇒ DocumentFragment
Parse
sourceinto a fragment bound to this document, resolving names against the document’s in-scope (root) namespaces, so the fragment’s nodes can be spliced in with Node#add_child and friends. -
#import_node(*args) ⇒ Object
Makiri::XML::Document#import_node(node, deep = false) - the DOM importNode for an XML document.
-
#internal_subset ⇒ Object
The document’s DOCTYPE as a Makiri::XML::DocumentType (aliased Makiri::XML::DTD), or nil if the document had no ‘<!DOCTYPE …>`.
-
#root ⇒ Object
The document’s root element.
-
#root=(node) ⇒ Makiri::XML::Element
Set (or replace) the document’s root element: with an existing root it replaces that root, otherwise it appends one (subject to the single-root rule).
-
#xpath(*args) ⇒ Object
xpath / at_xpath work on the document and on any XML node (rooted at that node), so they live on the shared XML node behavior module + the document.
Methods included from NodeMethods
#<<, #==, #[], #[]=, #add_child, #add_next_sibling, #add_previous_sibling, #after, #ancestors, #at_css, #attribute_nodes, #before, #canonicalize, #child, #children, #clone_node, #collect_namespaces, #content, #content=, #css, #delete, #document, #element_children, #eql?, #hash, #inner_html, #inner_text, #key?, #last_element_child, #local_name, #matches?, #name, #name=, #namespace, #namespace_definitions, #namespace_uri, #namespaces, #next, #next_sibling, #node_type, #outer_html, #parent, #pointer_id, #prefix, #previous, #previous_sibling, #remove, #remove_attribute, #remove_attribute_ns, #replace, #set_attribute_ns, #text, #to_html, #to_s, #to_xml, #unlink, #value
Methods inherited from Document
Methods inherited from Node
#add_class, #append_class, #at, #attribute, #attribute?, #attributes, #blank?, #cdata?, #classes, #clone, #comment?, #document?, #document_fragment?, #dup, #each, #element?, #inspect, #path, #processing_instruction?, #remove_class, #search, #set_attribute, #text?, #to_h, #traverse
Class Method Details
.Makiri::XML::Document.new ⇒ Document
A new, empty XML document (no root element) to build up programmatically with #create_element etc. and #add_child / #root=, like Nokogiri. Any arguments (Nokogiri accepts a version / encoding) are accepted and ignored.
558 559 560 561 562 563 |
# File 'ext/makiri/glue/ruby_xml.c', line 558
static VALUE
mkr_xml_document_s_new(int argc, VALUE *argv, VALUE klass)
{
(void)argc; (void)argv; (void)klass;
return mkr_xml_new_empty_document();
}
|
.Makiri::XML::Document.parse(source, max_bytes: nil) ⇒ Makiri::XML::Document .Makiri::XML(source, max_bytes: nil) ⇒ Makiri::XML::Document
source is a String or any object responding to #read (an IO / File / StringIO); max_bytes overrides the default arena memory ceiling for this parse. Read a non-UTF-8 file in binary mode (File.binread / “rb”) so the encoding is autodetected from its BOM / declaration.
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'ext/makiri/glue/ruby_xml.c', line 103
static VALUE
mkr_xml_s_parse(int argc, VALUE *argv, VALUE self)
{
(void)self;
VALUE rb_source, rb_opts;
rb_scan_args(argc, argv, "1:", &rb_source, &rb_opts);
mkr_xml_limits_t limits = mkr_xml_parse_limits(rb_opts); /* validates; may raise */
size_t budget = limits.max_bytes ? limits.max_bytes : (size_t)MKR_XML_MAX_BYTES;
/* Read an IO/File-like source (an object responding to #read), like the HTML
* entry; a String passes straight through. */
if (rb_respond_to(rb_source, rb_intern("read"))) {
rb_source = rb_funcall(rb_source, rb_intern("read"), 0);
}
/* Strict decode under the GVL: invalid UTF-8 / undecodable byte / NUL all
* raise Makiri::XML::SyntaxError here (no U+FFFD repair). Passing the budget
* lets decode reject an over-budget input (LimitExceeded) before its
* validation copy and the GVL-release copy below - so a hostile oversized
* document is not materialised twice for a doomed parse. */
VALUE decoded = mkr_xml_decode_input(rb_String(rb_source), budget);
/* Copy the decoded bytes into a private C buffer up front - BEFORE allocating
* any Ruby object (the wrap below) - so there is NO GC point between obtaining
* +decoded+ and copying it, and the parse can then run with the GVL released
* without racing GC/compaction on the String's backing store. */
mkr_owned_bytes_t source = {0};
if (mkr_ruby_copy_bytes(decoded, &source) != 0) {
rb_raise(mkr_eError, "out of memory copying XML source");
}
/* Build an empty XML handle and wrap it (doc == NULL) so a failure mid-parse
* frees cleanly via GC (mkr_parsed_destroy -> the XML branch ->
* mkr_xml_doc_destroy(NULL), a no-op). The source is already copied, so this
* Ruby allocation cannot disturb it. */
mkr_parsed_t *parsed = mkr_parsed_new_xml(NULL);
if (parsed == NULL) {
mkr_owned_bytes_clear(&source);
rb_raise(mkr_eError, "out of memory allocating XML document");
}
VALUE obj = mkr_wrap_document(parsed); /* GC owns +parsed+ from here */
mkr_xml_parse_nogvl_t args = { source.ptr, source.len, limits, NULL, MKR_XML_OK };
rb_thread_call_without_gvl(mkr_xml_parse_nogvl, &args, NULL, NULL);
mkr_owned_bytes_clear(&source);
if (args.result == NULL) {
switch (args.status) {
case MKR_XML_ERR_SYNTAX: rb_raise(mkr_eXmlSyntaxError, "malformed XML"); break;
case MKR_XML_ERR_LIMIT: rb_raise(mkr_eXmlLimitExceeded, "XML document budget exceeded"); break;
case MKR_XML_ERR_VERSION: rb_raise(mkr_eXmlSyntaxError,
"unsupported XML version (only XML 1.0 is supported)"); break;
default: rb_raise(mkr_eError, "failed to parse XML document"); break;
}
}
mkr_parsed_set_xml_doc(parsed, args.result);
return obj;
}
|
Instance Method Details
#at_xpath(*args) ⇒ Object
298 299 300 301 302 303 304 |
# File 'ext/makiri/glue/ruby_xml.c', line 298
static VALUE
mkr_xml_doc_at_xpath(int argc, VALUE *argv, VALUE self)
{
VALUE expr, ns;
rb_scan_args(argc, argv, "11", &expr, &ns);
return mkr_xml_doc_xpath_run(self, expr, ns, 1);
}
|
#create_cdata(t) ⇒ Object
1309 |
# File 'ext/makiri/glue/ruby_xml_node.c', line 1309
static VALUE mkr_xml_doc_create_cdata(VALUE self, VALUE t) { return mkr_xml_doc_create_chardata(self, t, MKR_XML_NODE_TYPE_CDATA_SECTION, "CDATA content"); }
|
#create_cdata_node(t) ⇒ Object
1309 |
# File 'ext/makiri/glue/ruby_xml_node.c', line 1309
static VALUE mkr_xml_doc_create_cdata(VALUE self, VALUE t) { return mkr_xml_doc_create_chardata(self, t, MKR_XML_NODE_TYPE_CDATA_SECTION, "CDATA content"); }
|
#create_comment(t) ⇒ Object
1308 |
# File 'ext/makiri/glue/ruby_xml_node.c', line 1308
static VALUE mkr_xml_doc_create_comment(VALUE self, VALUE t) { return mkr_xml_doc_create_chardata(self, t, MKR_XML_NODE_TYPE_COMMENT, "comment content"); }
|
#create_element(*args) ⇒ Object
create_element(name, content = nil, attributes = {}) -> Element. Nokogiri-style trailing arguments: a Hash sets attributes, any other (non-nil) argument is the element’s text content.
1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 |
# File 'ext/makiri/glue/ruby_xml_node.c', line 1260
static VALUE
mkr_xml_doc_create_element(int argc, VALUE *argv, VALUE self)
{
VALUE rb_name, rb_rest;
rb_scan_args(argc, argv, "1*", &rb_name, &rb_rest);
VALUE rb_content = Qnil, rb_attrs = Qnil;
for (long i = 0; i < RARRAY_LEN(rb_rest); i++) {
VALUE a = RARRAY_AREF(rb_rest, i);
if (RB_TYPE_P(a, T_HASH)) {
rb_attrs = a;
} else if (!NIL_P(a)) {
rb_content = a;
}
}
mkr_xml_doc_t *xdoc = mkr_xml_node_xdoc(self);
mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_name, "element name");
mkr_xml_node_t *el = NULL;
mkr_xml_mut_status_t st = mkr_xml_new_element(xdoc, nv.ptr, mkr_xml_u32_len(nv.len), &el);
RB_GC_GUARD(nv.value);
mkr_xml_mut_check(st);
if (!NIL_P(rb_content)) {
mkr_ruby_borrowed_text_t tv = mkr_ruby_verified_text(rb_content, "element content");
st = mkr_xml_set_content(xdoc, el, tv.ptr, mkr_xml_u32_len(tv.len));
RB_GC_GUARD(tv.value);
mkr_xml_mut_check(st);
}
VALUE rb_el = mkr_wrap_xml_node(el, self);
if (!NIL_P(rb_attrs)) {
rb_hash_foreach(rb_attrs, mkr_xml_create_attr_i, rb_el);
}
return rb_el;
}
|
#create_processing_instruction(rb_target, rb_data) ⇒ Object
1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 |
# File 'ext/makiri/glue/ruby_xml_node.c', line 1311
static VALUE
mkr_xml_doc_create_pi(VALUE self, VALUE rb_target, VALUE rb_data)
{
mkr_xml_doc_t *xdoc = mkr_xml_node_xdoc(self);
mkr_ruby_borrowed_text_t tg = mkr_ruby_verified_text(rb_target, "PI target");
mkr_ruby_borrowed_text_t dt = mkr_ruby_verified_text(rb_data, "PI data");
mkr_xml_node_t *pi = NULL;
mkr_xml_mut_status_t st = mkr_xml_new_pi(
xdoc, tg.ptr, mkr_xml_u32_len(tg.len), dt.ptr, mkr_xml_u32_len(dt.len), &pi);
RB_GC_GUARD(tg.value);
RB_GC_GUARD(dt.value);
mkr_xml_mut_check(st);
return mkr_wrap_xml_node(pi, self);
}
|
#create_text_node(t) ⇒ Object
1307 |
# File 'ext/makiri/glue/ruby_xml_node.c', line 1307
static VALUE mkr_xml_doc_create_text_node(VALUE self, VALUE t) { return mkr_xml_doc_create_chardata(self, t, MKR_XML_NODE_TYPE_TEXT, "text content"); }
|
#fragment(source) ⇒ DocumentFragment
Parse source into a fragment bound to this document, resolving names against the document’s in-scope (root) namespaces, so the fragment’s nodes can be spliced in with Node#add_child and friends.
585 586 587 588 589 590 591 592 593 594 595 |
# File 'ext/makiri/glue/ruby_xml.c', line 585
static VALUE
mkr_xml_doc_fragment(VALUE self, VALUE rb_source)
{
mkr_xml_doc_t *xdoc = mkr_parsed_xml_doc(mkr_doc_parsed(self));
if (xdoc == NULL) {
rb_raise(mkr_eError, "the document has no arena");
}
mkr_xml_node_t *frag = mkr_xml_fragment_into(xdoc, rb_source, 1);
VALUE result = mkr_wrap_xml_node(frag, self);
return result;
}
|
#import_node(*args) ⇒ Object
Makiri::XML::Document#import_node(node, deep = false) - the DOM importNode for an XML document. A same-representation (XML) node is deep/shallow-copied into this document’s arena (namespaces re-resolved when it is later linked); an HTML node is TRANSLATED across representations (lxb -> mkr) by ruby_cross_import.c. The result is detached and owned by this document; the source is untouched. Fails closed (no partial node returned).
1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 |
# File 'ext/makiri/glue/ruby_xml_node.c', line 1332
static VALUE
mkr_xml_doc_import_node(int argc, VALUE *argv, VALUE self)
{
VALUE node_v, deep_v;
rb_scan_args(argc, argv, "11", &node_v, &deep_v);
int deep = RTEST(deep_v);
mkr_xml_doc_t *xdoc = mkr_parsed_xml_doc(mkr_doc_parsed(self));
mkr_xml_node_t *copy = NULL;
switch (mkr_node_kind(node_v)) {
case MKR_NODE_KIND_XML:
mkr_xml_mut_check(mkr_xml_copy_node(xdoc, mkr_xml_node_unwrap(node_v), deep, ©));
break;
case MKR_NODE_KIND_HTML:
mkr_xml_mut_check(mkr_cross_html_to_xml(xdoc, mkr_html_node_unwrap(node_v), deep, ©));
break;
default:
rb_raise(rb_eTypeError, "import_node expects a Makiri node");
}
return mkr_wrap_xml_node(copy, self);
}
|
#internal_subset ⇒ Object
The document’s DOCTYPE as a Makiri::XML::DocumentType (aliased Makiri::XML::DTD), or nil if the document had no ‘<!DOCTYPE …>`. Mirrors Nokogiri’s Document#internal_subset. The DTD’s name and external/system identifiers are read; the DTD body is NOT parsed (no entity/element declarations are loaded - &name; stays an undefined-entity error and no external subset is fetched). The doctype node is kept off the tree, so XPath never sees it (XPath 1.0 has no doctype node type).
485 486 487 488 489 490 491 492 |
# File 'ext/makiri/glue/ruby_xml.c', line 485
static VALUE
mkr_xml_doc_internal_subset(VALUE self)
{
mkr_xml_doc_t *xdoc = mkr_parsed_xml_doc(mkr_doc_parsed(self));
return (xdoc == NULL || xdoc->doctype == NULL)
? Qnil
: mkr_wrap_xml_node(xdoc->doctype, self);
}
|
#root ⇒ Object
The document’s root element.
471 472 473 474 475 476 |
# File 'ext/makiri/glue/ruby_xml.c', line 471
static VALUE
mkr_xml_doc_root(VALUE self)
{
mkr_xml_doc_t *xdoc = mkr_parsed_xml_doc(mkr_doc_parsed(self));
return (xdoc == NULL) ? Qnil : mkr_wrap_xml_node(xdoc->root, self);
}
|
#root=(node) ⇒ Makiri::XML::Element
Set (or replace) the document’s root element: with an existing root it replaces that root, otherwise it appends one (subject to the single-root rule). Pure composition over Node#replace / Node#add_child; Nokogiri-compatible. XML only - an HTML5 document has a fixed html/head/body structure, so a free-form root is not meaningful there.
18 19 20 21 |
# File 'lib/makiri/xml/document.rb', line 18 def root=(node) r = root r ? r.replace(node) : add_child(node) end |
#xpath(*args) ⇒ Object
xpath / at_xpath work on the document and on any XML node (rooted at that node), so they live on the shared XML node behavior module + the document.
290 291 292 293 294 295 296 |
# File 'ext/makiri/glue/ruby_xml.c', line 290
static VALUE
mkr_xml_doc_xpath(int argc, VALUE *argv, VALUE self)
{
VALUE expr, ns;
rb_scan_args(argc, argv, "11", &expr, &ns);
return mkr_xml_doc_xpath_run(self, expr, ns, 0);
}
|