Module: Makiri::HTML::NodeMethods

Defined in:
lib/makiri/html/node_methods.rb,
ext/makiri/makiri.c

Overview

The lxb_dom reader/query methods are defined in C on this module and included into every HTML leaf (including the generic Makiri::HTML::Node). The Nokogiri-compatible aliases over those readers live here (not on Makiri::Node) so they resolve against the HTML readers at definition time.

Instance Method Summary collapse

Instance Method Details

#<<(rb_child) ⇒ Object

node << child -> node (chainable).



118
119
120
121
122
123
# File 'ext/makiri/glue/ruby_html_mutate.c', line 118

static VALUE
mkr_node_append(VALUE self, VALUE rb_child)
{
    mkr_node_add_child(self, rb_child);
    return self;
}

#<=>(other) ⇒ Object

Node#<=> : document (pre-order) position, so an array of nodes can be sorted. Returns -1 / 0 / 1, or nil when the nodes are not comparable: a non-node, different documents or detached subtrees (no common root), or an attribute node (attributes are not in the first_child/next chain, so their order is not defined here). Included via Comparable, which gives <, >, between?, etc.



754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
# File 'ext/makiri/glue/ruby_html_node.c', line 754

static VALUE
mkr_node_spaceship(VALUE self, VALUE other)
{
    if (!rb_obj_is_kind_of(other, mkr_cNode)
        || rb_obj_is_kind_of(mkr_node_document(other), mkr_cXmlDocument)) {
        return Qnil;   /* not a node, or an XML node - never order-comparable to HTML */
    }
    lxb_dom_node_t *a = mkr_html_node_unwrap(self);
    lxb_dom_node_t *b = mkr_html_node_unwrap(other);
    if (a == b) {
        return INT2FIX(0);
    }
    if (a->type == LXB_DOM_NODE_TYPE_ATTRIBUTE
        || b->type == LXB_DOM_NODE_TYPE_ATTRIBUTE
        || a->owner_document != b->owner_document) {
        return Qnil;
    }

    size_t da = mkr_node_depth(a), db = mkr_node_depth(b);
    lxb_dom_node_t *pa = a, *pb = b;

    /* Raise the deeper node to the other's depth; if it lands on the other,
     * that other is an ancestor and so comes first in pre-order. */
    if (da > db) {
        for (size_t k = 0; k < da - db; k++) pa = pa->parent;
        if (pa == b) return INT2FIX(1);   /* b is an ancestor of a */
    } else if (db > da) {
        for (size_t k = 0; k < db - da; k++) pb = pb->parent;
        if (pb == a) return INT2FIX(-1);  /* a is an ancestor of b */
    }

    /* Climb both until they share a parent (the lowest common ancestor). */
    while (pa->parent != pb->parent) {
        if (pa->parent == NULL || pb->parent == NULL) {
            return Qnil;                  /* different trees */
        }
        pa = pa->parent;
        pb = pb->parent;
    }
    if (pa->parent == NULL) {
        return Qnil;                      /* two distinct roots */
    }

    /* pa and pb are distinct siblings: earlier in the child list comes first. */
    for (lxb_dom_node_t *c = pa->parent->first_child; c != NULL; c = c->next) {
        if (c == pa) return INT2FIX(-1);
        if (c == pb) return INT2FIX(1);
    }
    return Qnil; /* unreachable for a well-formed tree */
}

#==(other) ⇒ Object

Pointer identity: equal iff both wrappers resolve to the same node pointer (an HTML node is thus never equal to an XML node).



133
134
135
136
137
138
139
140
# File 'ext/makiri/glue/ruby_node.c', line 133

VALUE
mkr_node_equals(VALUE self, VALUE other)
{
    if (!rb_obj_is_kind_of(other, mkr_cNode)) {
        return Qfalse;
    }
    return mkr_node_id(self) == mkr_node_id(other) ? Qtrue : Qfalse;
}

#[](rb_name) ⇒ Object Also known as: attr, get_attribute

node -> String or nil (nil when not an element or absent).



598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
# File 'ext/makiri/glue/ruby_html_node.c', line 598

static VALUE
mkr_node_aref(VALUE self, VALUE rb_name)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        return Qnil;
    }

    mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_name, "attribute name");
    const lxb_char_t *nm = (const lxb_char_t *)nv.ptr;
    size_t nlen = nv.len;

    lxb_dom_element_t *el = lxb_dom_interface_element(node);
    if (!lxb_dom_element_has_attribute(el, nm, nlen)) {
        return Qnil;
    }

    size_t vlen = 0;
    const lxb_char_t *val = lxb_dom_element_get_attribute(el, nm, nlen, &vlen);
    RB_GC_GUARD(nv.value);
    return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)val, vlen));
}

#[]=(rb_name, rb_value) ⇒ Object

element = value -> value



214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'ext/makiri/glue/ruby_html_mutate.c', line 214

static VALUE
mkr_node_aset(VALUE self, VALUE rb_name, VALUE rb_value)
{
    lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        rb_raise(mkr_eError, "cannot set an attribute on a non-element node");
    }
    mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_name, "attribute name");
    mkr_ruby_borrowed_text_t vv = mkr_ruby_verified_text(rb_value, "attribute value");
    lxb_dom_attr_t *attr = lxb_dom_element_set_attribute(
        lxb_dom_interface_element(node),
        (const lxb_char_t *)nv.ptr, nv.len,
        (const lxb_char_t *)vv.ptr, vv.len);
    RB_GC_GUARD(nv.value);
    RB_GC_GUARD(vv.value);
    if (attr == NULL) {
        rb_raise(mkr_eError, "failed to set attribute");
    }
    mkr_invalidate_index(self);
    return rb_value;
}

#add_child(rb_child) ⇒ Object

node.add_child(child) -> child. Appends child as the last child. A document fragment contributes its children rather than itself.



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'ext/makiri/glue/ruby_html_mutate.c', line 98

static VALUE
mkr_node_add_child(VALUE self, VALUE rb_child)
{
    lxb_dom_node_t *parent = mkr_node_unwrap_mutable(self);
    lxb_dom_node_t *child  = mkr_arg_node(rb_child);
    mkr_prepare_insert(parent, child);
    if (mkr_is_fragment(child)) {
        lxb_dom_node_t *c;
        while ((c = child->first_child) != NULL) {
            lxb_dom_node_remove(c);
            lxb_dom_node_insert_child(parent, c);
        }
    } else {
        lxb_dom_node_insert_child(parent, child);
    }
    mkr_invalidate_index(self);
    return rb_child;
}

#add_next_sibling(rb_node) ⇒ Object



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'ext/makiri/glue/ruby_html_mutate.c', line 147

static VALUE
mkr_node_add_next_sibling(VALUE self, VALUE rb_node)
{
    lxb_dom_node_t *ref  = mkr_node_unwrap_mutable(self);
    lxb_dom_node_t *node = mkr_arg_node(rb_node);
    if (ref->parent == NULL) {
        rb_raise(mkr_eError, "cannot add a sibling to a node with no parent");
    }
    mkr_prepare_insert(ref, node);
    if (mkr_is_fragment(node)) {
        lxb_dom_node_t *anchor = ref, *c;
        while ((c = node->first_child) != NULL) {
            lxb_dom_node_remove(c);
            lxb_dom_node_insert_after(anchor, c);
            anchor = c; /* keep document order after ref */
        }
    } else {
        lxb_dom_node_insert_after(ref, node);
    }
    mkr_invalidate_index(self);
    return rb_node;
}

#add_previous_sibling(rb_node) ⇒ Object



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'ext/makiri/glue/ruby_html_mutate.c', line 125

static VALUE
mkr_node_add_previous_sibling(VALUE self, VALUE rb_node)
{
    lxb_dom_node_t *ref  = mkr_node_unwrap_mutable(self);
    lxb_dom_node_t *node = mkr_arg_node(rb_node);
    if (ref->parent == NULL) {
        rb_raise(mkr_eError, "cannot add a sibling to a node with no parent");
    }
    mkr_prepare_insert(ref, node);
    if (mkr_is_fragment(node)) {
        lxb_dom_node_t *c;
        while ((c = node->first_child) != NULL) {
            lxb_dom_node_remove(c);
            lxb_dom_node_insert_before(ref, c);
        }
    } else {
        lxb_dom_node_insert_before(ref, node);
    }
    mkr_invalidate_index(self);
    return rb_node;
}

#after(rb_node) ⇒ Object



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'ext/makiri/glue/ruby_html_mutate.c', line 147

static VALUE
mkr_node_add_next_sibling(VALUE self, VALUE rb_node)
{
    lxb_dom_node_t *ref  = mkr_node_unwrap_mutable(self);
    lxb_dom_node_t *node = mkr_arg_node(rb_node);
    if (ref->parent == NULL) {
        rb_raise(mkr_eError, "cannot add a sibling to a node with no parent");
    }
    mkr_prepare_insert(ref, node);
    if (mkr_is_fragment(node)) {
        lxb_dom_node_t *anchor = ref, *c;
        while ((c = node->first_child) != NULL) {
            lxb_dom_node_remove(c);
            lxb_dom_node_insert_after(anchor, c);
            anchor = c; /* keep document order after ref */
        }
    } else {
        lxb_dom_node_insert_after(ref, node);
    }
    mkr_invalidate_index(self);
    return rb_node;
}

#ancestorsObject

Ancestor elements, nearest first (parent, grandparent, … root).



559
560
561
562
563
564
565
566
567
568
569
570
571
# File 'ext/makiri/glue/ruby_html_node.c', line 559

static VALUE
mkr_node_ancestors(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    VALUE document = mkr_node_document(self);
    VALUE set = mkr_node_set_new(document);
    for (lxb_dom_node_t *p = node->parent; p != NULL; p = p->parent) {
        if (p->type == LXB_DOM_NODE_TYPE_ELEMENT) {
            mkr_node_set_push(set, (mkr_raw_node_t *)p);
        }
    }
    return set;
}

#at_css(rb_selector) ⇒ Object

Node#at_css: the first matching descendant, or nil.



261
262
263
264
265
266
267
268
269
270
271
272
# File 'ext/makiri/glue/ruby_html_css.c', line 261

static VALUE
mkr_node_at_css(VALUE self, VALUE rb_selector)
{
    lxb_dom_node_t *root = mkr_html_node_unwrap(self);

    mkr_css_first_ctx_t ctx = { .root = root, .found = NULL };
    mkr_with_compiled_selector(rb_selector, root, mkr_run_find_first, &ctx);

    return ctx.found != NULL
        ? mkr_wrap_html_node(ctx.found, mkr_node_document(self))
        : Qnil;
}

#at_xpath(*args) ⇒ Object

First matching node (for a node-set), or the scalar value otherwise.



738
739
740
741
742
743
744
745
746
747
748
# File 'ext/makiri/glue/ruby_xpath.c', line 738

static VALUE
mkr_node_at_xpath(int argc, VALUE *argv, VALUE self)
{
    VALUE rb_expr, handler, opts;
    rb_scan_args(argc, argv, "11:", &rb_expr, &handler, &opts);
    VALUE result = mkr_node_xpath_run(self, rb_expr, handler, mkr_ns_matching_lax(opts), 1);
    if (rb_obj_is_kind_of(result, mkr_cNodeSet)) {
        return rb_funcall(result, rb_intern("first"), 0);
    }
    return result;
}

#attribute_nodesObject

element.attribute_nodes -> NodeSet of Attribute nodes (document order). Empty for non-elements. These wrap the bare lxb_dom_attr_t; navigating back with Attribute#parent goes through the compat attr->owner index.



681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
# File 'ext/makiri/glue/ruby_html_node.c', line 681

static VALUE
mkr_node_attribute_nodes(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    VALUE document = mkr_node_document(self);
    VALUE set = mkr_node_set_new(document);
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        return set;
    }
    lxb_dom_attr_t *attr =
        lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
    while (attr != NULL) {
        mkr_node_set_push(set, (mkr_raw_node_t *)lxb_dom_interface_node(attr));
        attr = lxb_dom_element_next_attribute(attr);
    }
    return set;
}

#before(rb_node) ⇒ Object



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'ext/makiri/glue/ruby_html_mutate.c', line 125

static VALUE
mkr_node_add_previous_sibling(VALUE self, VALUE rb_node)
{
    lxb_dom_node_t *ref  = mkr_node_unwrap_mutable(self);
    lxb_dom_node_t *node = mkr_arg_node(rb_node);
    if (ref->parent == NULL) {
        rb_raise(mkr_eError, "cannot add a sibling to a node with no parent");
    }
    mkr_prepare_insert(ref, node);
    if (mkr_is_fragment(node)) {
        lxb_dom_node_t *c;
        while ((c = node->first_child) != NULL) {
            lxb_dom_node_remove(c);
            lxb_dom_node_insert_before(ref, c);
        }
    } else {
        lxb_dom_node_insert_before(ref, node);
    }
    mkr_invalidate_index(self);
    return rb_node;
}

#childObject

First child node (any type), or nil.



523
524
525
526
527
528
# File 'ext/makiri/glue/ruby_html_node.c', line 523

static VALUE
mkr_node_child(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    return mkr_wrap_html_node(node->first_child, mkr_node_document(self));
}

#childrenObject

All child nodes as a NodeSet.



531
532
533
534
535
536
537
538
539
540
541
# File 'ext/makiri/glue/ruby_html_node.c', line 531

static VALUE
mkr_node_children(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    VALUE document = mkr_node_document(self);
    VALUE set = mkr_node_set_new(document);
    for (lxb_dom_node_t *c = node->first_child; c != NULL; c = c->next) {
        mkr_node_set_push(set, (mkr_raw_node_t *)c);
    }
    return set;
}

#clone_node(*args) ⇒ Object

Node#clone_node(deep = false): a shallow (or deep, with deep truthy) copy of this node, owned by the same document and detached from any parent - the DOM cloneNode, whose ‘deep` defaults to false (a missing/nil/false argument => shallow). Built on the same import_node + <template>-content fixup the fragment parser uses, so a deep-cloned <template> carries its contents (which import_node alone omits). Fails closed: a NULL import (e.g. OOM) raises rather than returning a partial node.



318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
# File 'ext/makiri/glue/ruby_doc.c', line 318

VALUE
mkr_node_clone_node(int argc, VALUE *argv, VALUE self)
{
    VALUE deep_v;
    rb_scan_args(argc, argv, "01", &deep_v);
    bool deep = RTEST(deep_v);

    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    lxb_dom_document_t *doc = node->owner_document;

    lxb_dom_node_t *clone = lxb_dom_document_import_node(doc, node, deep);
    if (clone == NULL) {
        rb_raise(mkr_eError, "failed to clone node");
    }
    if (deep) {
        mkr_fixup_template_content(doc, node, clone);
    }
    return mkr_wrap_html_node(clone, mkr_node_document(self));
}

#contentObject

Concatenated text content of this node and its descendants. The DOM spec makes a Document’s textContent null; we instead return the text of the root element (matching the intuitive, Nokogiri-like Document#text).



398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
# File 'ext/makiri/glue/ruby_html_node.c', line 398

static VALUE
mkr_node_content(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    if (node->type == LXB_DOM_NODE_TYPE_DOCUMENT) {
        node = lxb_dom_document_root((lxb_dom_document_t *)node);
        if (node == NULL) {
            return rb_utf8_str_new("", 0);
        }
    }

    /* Fast path for elements / fragments (the common case, incl. document text).
     *
     * Preferred: the per-document text index (dom_adapter/text_index.c) maps
     * this node to the contiguous, document-order run of its descendants' text
     * slices, so we serve a single pre-sized memcpy run with no per-extraction
     * tree walk - the walk is otherwise the dominant, cache-bound cost. Built
     * lazily on first use and dropped on any mutation, so a slice can never
     * point at reallocated/detached storage.
     *
     * Fallback (index unavailable - node outside the indexed tree, e.g. a
     * fragment, or a build OOM): stream each descendant text/CDATA node's data
     * straight into the Ruby string via an iterative pre-order walk (stack-safe;
     * skips Lexbor's intermediate arena buffer + copy). */
    if (node->type == LXB_DOM_NODE_TYPE_ELEMENT
        || node->type == LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT) {
        mkr_parsed_t *parsed = mkr_doc_parsed(mkr_node_document(self));
        const mkr_borrowed_text_t *slices;
        size_t nslices, total;
        if (parsed != NULL
            && mkr_parsed_text_slices(parsed, node, &slices, &nslices, &total)) {
            return mkr_ruby_str_from_slices(slices, nslices, total);
        }

        VALUE str = rb_utf8_str_new(NULL, 0);
        for (lxb_dom_node_t *n = node->first_child; n != NULL;) {
            if (n->type == LXB_DOM_NODE_TYPE_TEXT
                || n->type == LXB_DOM_NODE_TYPE_CDATA_SECTION) {
                const lexbor_str_t *d = &lxb_dom_interface_character_data(n)->data;
                if (d->data != NULL && d->length != 0) {
                    rb_str_cat(str, (const char *)d->data, (long)d->length);
                }
            }
            if (n->first_child != NULL) { n = n->first_child; continue; }
            while (n != node && n->next == NULL) { n = n->parent; }
            if (n == node) { break; }
            n = n->next;
        }
        return str;
    }

    /* Character-data and other node kinds keep the general (proven) path. */
    size_t len = 0;
    lxb_char_t *text = lxb_dom_node_text_content(node, &len);
    if (text == NULL) {
        return rb_utf8_str_new("", 0);
    }
    VALUE str = rb_utf8_str_new((const char *)text, len);
    lxb_dom_document_destroy_text(node->owner_document, text);
    return str;
}

#content=(rb_text) ⇒ Object

node.content = text -> text. DOM textContent setter: for an element this replaces all children with a single text node; for a text/comment/cdata node it sets the data.



441
442
443
444
445
446
447
448
449
450
451
452
453
454
# File 'ext/makiri/glue/ruby_html_mutate.c', line 441

static VALUE
mkr_node_set_content(VALUE self, VALUE rb_text)
{
    lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
    mkr_ruby_borrowed_text_t tv = mkr_ruby_verified_text(rb_text, "node content");
    lxb_status_t st = lxb_dom_node_text_content_set(
        node, (const lxb_char_t *)tv.ptr, tv.len);
    RB_GC_GUARD(tv.value);
    if (st != LXB_STATUS_OK) {
        rb_raise(mkr_eError, "failed to set node content");
    }
    mkr_invalidate_index(self);
    return rb_text;
}

#css(rb_selector) ⇒ Object

Node#css: collect every matching descendant into a NodeSet (document order). Raises Makiri::CSS::SyntaxError on a bad selector, Makiri::Error on an over-large result.



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# File 'ext/makiri/glue/ruby_html_css.c', line 233

static VALUE
mkr_node_css(VALUE self, VALUE rb_selector)
{
    lxb_dom_node_t *root = mkr_html_node_unwrap(self);
    VALUE document = mkr_node_document(self);

    /* A syntax error raises inside mkr_with_compiled_selector BEFORE the find
     * runs, so ctx.nodes is still NULL there - nothing leaks on that path. */
    mkr_css_ctx_t ctx = { .nodes = NULL, .count = 0, .cap = 0,
                          .root = root, .overflow = 0, .oom = 0 };
    mkr_with_compiled_selector(rb_selector, root, mkr_run_find, &ctx);

    if (ctx.overflow || ctx.oom) {
        free(ctx.nodes);
        if (ctx.overflow) {
            rb_raise(mkr_eError, "CSS result set exceeded the node limit (%u)",
                     MKR_NODE_SET_MAX);
        }
        rb_raise(mkr_eError, "out of memory collecting CSS results");
    }

    VALUE set = mkr_node_set_new(document);
    mkr_css_fill_t fill = { set, &ctx };
    rb_ensure(mkr_css_fill_set, (VALUE)&fill, mkr_css_free_nodes, (VALUE)&ctx);
    return set;
}

#delete(rb_name) ⇒ Object

element.delete(name) -> self. Removes the attribute if present.



457
458
459
460
461
462
463
464
465
466
467
468
469
470
# File 'ext/makiri/glue/ruby_html_mutate.c', line 457

static VALUE
mkr_node_delete(VALUE self, VALUE rb_name)
{
    lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        return self;
    }
    mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_name, "attribute name");
    lxb_dom_element_remove_attribute(
        lxb_dom_interface_element(node), (const lxb_char_t *)nv.ptr, nv.len);
    RB_GC_GUARD(nv.value);
    mkr_invalidate_index(self);
    return self;
}

#documentObject




464
465
466
467
468
# File 'ext/makiri/glue/ruby_html_node.c', line 464

static VALUE
mkr_node_get_document(VALUE self)
{
    return mkr_node_document(self);
}

#element_childrenObject

Child elements only, as a NodeSet.



544
545
546
547
548
549
550
551
552
553
554
555
556
# File 'ext/makiri/glue/ruby_html_node.c', line 544

static VALUE
mkr_node_element_children(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    VALUE document = mkr_node_document(self);
    VALUE set = mkr_node_set_new(document);
    for (lxb_dom_node_t *c = node->first_child; c != NULL; c = c->next) {
        if (c->type == LXB_DOM_NODE_TYPE_ELEMENT) {
            mkr_node_set_push(set, (mkr_raw_node_t *)c);
        }
    }
    return set;
}

#elementsObject

Child elements only, as a NodeSet.



544
545
546
547
548
549
550
551
552
553
554
555
556
# File 'ext/makiri/glue/ruby_html_node.c', line 544

static VALUE
mkr_node_element_children(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    VALUE document = mkr_node_document(self);
    VALUE set = mkr_node_set_new(document);
    for (lxb_dom_node_t *c = node->first_child; c != NULL; c = c->next) {
        if (c->type == LXB_DOM_NODE_TYPE_ELEMENT) {
            mkr_node_set_push(set, (mkr_raw_node_t *)c);
        }
    }
    return set;
}

#eql?(other) ⇒ Boolean

Pointer identity: equal iff both wrappers resolve to the same node pointer (an HTML node is thus never equal to an XML node).

Returns:

  • (Boolean)


133
134
135
136
137
138
139
140
# File 'ext/makiri/glue/ruby_node.c', line 133

VALUE
mkr_node_equals(VALUE self, VALUE other)
{
    if (!rb_obj_is_kind_of(other, mkr_cNode)) {
        return Qfalse;
    }
    return mkr_node_id(self) == mkr_node_id(other) ? Qtrue : Qfalse;
}

#first_element_childObject



573
574
575
576
577
578
579
580
581
# File 'ext/makiri/glue/ruby_html_node.c', line 573

static VALUE
mkr_node_first_element_child(VALUE self)
{
    lxb_dom_node_t *c = mkr_html_node_unwrap(self)->first_child;
    while (c != NULL && c->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        c = c->next;
    }
    return mkr_wrap_html_node(c, mkr_node_document(self));
}

#hashObject

Stable hash derived from the node pointer, so a == b implies a.hash == b.hash even across separately-created wrappers. Shares the pointer value with #pointer_id.



155
156
157
158
159
# File 'ext/makiri/glue/ruby_node.c', line 155

VALUE
mkr_node_hash(VALUE self)
{
    return mkr_node_pointer_id(self);
}

#inner_html(*args) ⇒ Object

Inner HTML: the node’s children, without the node’s own tag.



139
140
141
142
143
144
145
# File 'ext/makiri/glue/ruby_html_serialize.c', line 139

static VALUE
mkr_node_inner_html(int argc, VALUE *argv, VALUE self)
{
    int pretty = mkr_serialize_pretty_opt(argc, argv);
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    return mkr_html_serialize(node, 1 /* deep */, pretty);
}

#inner_html=(rb_html) ⇒ Object

element.inner_html = html -> html. Replaces the element’s children.



527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
# File 'ext/makiri/glue/ruby_html_mutate.c', line 527

static VALUE
mkr_node_set_inner_html(VALUE self, VALUE rb_html)
{
    lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        rb_raise(mkr_eError, "inner_html= requires an element");
    }

    /* Detach existing children (arena reclaims them at document destroy). */
    lxb_dom_node_t *c;
    while ((c = node->first_child) != NULL) {
        lxb_dom_node_remove(c);
    }

    mkr_parse_fragment_into(node, rb_html, node->owner_document,
                            mkr_emit_append, node);
    mkr_invalidate_index(self);
    return rb_html;
}

#inner_textObject

Concatenated text content of this node and its descendants. The DOM spec makes a Document’s textContent null; we instead return the text of the root element (matching the intuitive, Nokogiri-like Document#text).



398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
# File 'ext/makiri/glue/ruby_html_node.c', line 398

static VALUE
mkr_node_content(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    if (node->type == LXB_DOM_NODE_TYPE_DOCUMENT) {
        node = lxb_dom_document_root((lxb_dom_document_t *)node);
        if (node == NULL) {
            return rb_utf8_str_new("", 0);
        }
    }

    /* Fast path for elements / fragments (the common case, incl. document text).
     *
     * Preferred: the per-document text index (dom_adapter/text_index.c) maps
     * this node to the contiguous, document-order run of its descendants' text
     * slices, so we serve a single pre-sized memcpy run with no per-extraction
     * tree walk - the walk is otherwise the dominant, cache-bound cost. Built
     * lazily on first use and dropped on any mutation, so a slice can never
     * point at reallocated/detached storage.
     *
     * Fallback (index unavailable - node outside the indexed tree, e.g. a
     * fragment, or a build OOM): stream each descendant text/CDATA node's data
     * straight into the Ruby string via an iterative pre-order walk (stack-safe;
     * skips Lexbor's intermediate arena buffer + copy). */
    if (node->type == LXB_DOM_NODE_TYPE_ELEMENT
        || node->type == LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT) {
        mkr_parsed_t *parsed = mkr_doc_parsed(mkr_node_document(self));
        const mkr_borrowed_text_t *slices;
        size_t nslices, total;
        if (parsed != NULL
            && mkr_parsed_text_slices(parsed, node, &slices, &nslices, &total)) {
            return mkr_ruby_str_from_slices(slices, nslices, total);
        }

        VALUE str = rb_utf8_str_new(NULL, 0);
        for (lxb_dom_node_t *n = node->first_child; n != NULL;) {
            if (n->type == LXB_DOM_NODE_TYPE_TEXT
                || n->type == LXB_DOM_NODE_TYPE_CDATA_SECTION) {
                const lexbor_str_t *d = &lxb_dom_interface_character_data(n)->data;
                if (d->data != NULL && d->length != 0) {
                    rb_str_cat(str, (const char *)d->data, (long)d->length);
                }
            }
            if (n->first_child != NULL) { n = n->first_child; continue; }
            while (n != node && n->next == NULL) { n = n->parent; }
            if (n == node) { break; }
            n = n->next;
        }
        return str;
    }

    /* Character-data and other node kinds keep the general (proven) path. */
    size_t len = 0;
    lxb_char_t *text = lxb_dom_node_text_content(node, &len);
    if (text == NULL) {
        return rb_utf8_str_new("", 0);
    }
    VALUE str = rb_utf8_str_new((const char *)text, len);
    lxb_dom_document_destroy_text(node->owner_document, text);
    return str;
}

#key?(rb_name) ⇒ Boolean Also known as: has_attribute?

node.key?(name) -> true/false

Returns:

  • (Boolean)


622
623
624
625
626
627
628
629
630
631
632
633
634
# File 'ext/makiri/glue/ruby_html_node.c', line 622

static VALUE
mkr_node_has_key(VALUE self, VALUE rb_name)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        return Qfalse;
    }
    mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_name, "attribute name");
    lxb_dom_element_t *el = lxb_dom_interface_element(node);
    bool has = lxb_dom_element_has_attribute(el, (const lxb_char_t *)nv.ptr, nv.len);
    RB_GC_GUARD(nv.value);
    return has ? Qtrue : Qfalse;
}

#keysObject

node.keys -> [String, …] of attribute names (document order).



637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
# File 'ext/makiri/glue/ruby_html_node.c', line 637

static VALUE
mkr_node_keys(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    VALUE ary = rb_ary_new();
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        return ary;
    }
    lxb_dom_attr_t *attr =
        lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
    while (attr != NULL) {
        size_t len = 0;
        const lxb_char_t *name = lxb_dom_attr_qualified_name(attr, &len);
        rb_ary_push(ary, mkr_ruby_str_from_borrowed(
                             mkr_borrowed_text((const char *)name, len)));
        attr = lxb_dom_element_next_attribute(attr);
    }
    return ary;
}

#last_element_childObject



583
584
585
586
587
588
589
590
591
# File 'ext/makiri/glue/ruby_html_node.c', line 583

static VALUE
mkr_node_last_element_child(VALUE self)
{
    lxb_dom_node_t *c = mkr_html_node_unwrap(self)->last_child;
    while (c != NULL && c->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        c = c->prev;
    }
    return mkr_wrap_html_node(c, mkr_node_document(self));
}

#lineObject

node.line -> 1-based source line, or nil when unknown.

The line comes from the byte offset stamped onto the node at parse time (source-location tracking) resolved against the document’s line table. Returns nil for nodes the tracker could not place (e.g. parser-inserted implicit <html>/<head>/<body>, or any node when tracking was disabled).



720
721
722
723
724
725
726
727
# File 'ext/makiri/glue/ruby_html_node.c', line 720

static VALUE
mkr_node_line(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    mkr_parsed_t   *p    = mkr_doc_parsed(mkr_node_document(self));
    size_t line = mkr_parsed_node_line(p, node);
    return line == 0 ? Qnil : ULONG2NUM(line);
}

#local_nameObject

Local name (DOM ‘localName`): the name without any prefix - “div” for <div>, “path” for an SVG <path>, “href” for an xhref attribute. Defined on Element and Attribute only; nil for the other node kinds (the DOM gives a Text/Comment/Document no localName).



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'ext/makiri/glue/ruby_html_node.c', line 124

static VALUE
mkr_node_local_name(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    size_t len = 0;
    const lxb_char_t *name;

    switch (node->type) {
    case LXB_DOM_NODE_TYPE_ELEMENT:
        name = lxb_dom_element_local_name(lxb_dom_interface_element(node), &len);
        break;
    case LXB_DOM_NODE_TYPE_ATTRIBUTE: {
        /* The case-preserved local name is the suffix of the qualified name;
         * Lexbor's stored local_name is lower-cased even when the qualified name
         * keeps its case (set_attribute_ns is case-sensitive). */
        lxb_dom_attr_t *at = lxb_dom_interface_attr(node);
        size_t qlen = 0, llen = 0;
        const lxb_char_t *q = lxb_dom_attr_qualified_name(at, &qlen);
        (void) lxb_dom_attr_local_name(at, &llen);
        if (q != NULL && qlen >= llen) {
            name = q + (qlen - llen);
            len = llen;
        }
        else {
            name = lxb_dom_attr_local_name(at, &len);
        }
        break;
    }
    default:
        return Qnil;
    }
    return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
}

#matches?(rb_selector) ⇒ Boolean

Node#matches?(selector): does THIS node match the CSS selector? (Like Nokogiri - tested against the node itself, not its descendants.) A malformed selector raises Makiri::CSS::SyntaxError.

Returns:

  • (Boolean)


277
278
279
280
281
282
283
284
# File 'ext/makiri/glue/ruby_html_css.c', line 277

static VALUE
mkr_node_matches(VALUE self, VALUE rb_selector)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    int matched = 0;
    mkr_with_compiled_selector(rb_selector, node, mkr_run_match, &matched);
    return matched ? Qtrue : Qfalse;
}

#nameObject Also known as: node_name

Node name. Matches Nokogiri: lowercase tag name for HTML elements (Lexbor lowercases during tokenization), and the un-prefixed DOM names “text”/“comment”/“#cdata-section”/“document” for the other kinds.



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'ext/makiri/glue/ruby_html_node.c', line 86

static VALUE
mkr_node_name(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    size_t len = 0;
    const lxb_char_t *name;

    switch (node->type) {
    case LXB_DOM_NODE_TYPE_ELEMENT:
        name = lxb_dom_element_qualified_name(lxb_dom_interface_element(node), &len);
        return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
    case LXB_DOM_NODE_TYPE_ATTRIBUTE:
        name = lxb_dom_attr_qualified_name(lxb_dom_interface_attr(node), &len);
        return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
    case LXB_DOM_NODE_TYPE_TEXT:
        return rb_utf8_str_new_cstr("text");
    case LXB_DOM_NODE_TYPE_COMMENT:
        return rb_utf8_str_new_cstr("comment");
    case LXB_DOM_NODE_TYPE_CDATA_SECTION:
        return rb_utf8_str_new_cstr("#cdata-section");
    case LXB_DOM_NODE_TYPE_DOCUMENT:
        return rb_utf8_str_new_cstr("document");
    default:
        name = lxb_dom_node_name(node, &len);
        return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
    }
}

#name=(rb_name) ⇒ Object Also known as: node_name=

element.name = new_name -> new_name. Renames the element in place (identity preserved): create a throwaway element with the new name so the document interns it, copy its name fields onto this node, then discard it.



406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
# File 'ext/makiri/glue/ruby_html_mutate.c', line 406

static VALUE
mkr_node_set_name(VALUE self, VALUE rb_name)
{
    lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        rb_raise(mkr_eError, "name= is only supported on elements");
    }
    mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_name, "element name");
    lxb_dom_element_t *fresh = lxb_dom_document_create_element(
        node->owner_document, (const lxb_char_t *)nv.ptr, nv.len, NULL);
    RB_GC_GUARD(nv.value);
    if (fresh == NULL) {
        rb_raise(mkr_eError, "failed to rename element");
    }

    lxb_dom_element_t *el = lxb_dom_interface_element(node);
    el->node.local_name = fresh->node.local_name;
    el->node.prefix     = fresh->node.prefix;
    el->node.ns         = fresh->node.ns;
    el->upper_name      = fresh->upper_name;
    el->qualified_name  = fresh->qualified_name;

    lxb_dom_node_destroy(lxb_dom_interface_node(fresh));
    /* The element's tag id (local_name) is the key the element-by-tag index
     * buckets on and the //tag fast path serves from; renaming changes it, so
     * the persisted index would otherwise miss the element under its new name
     * (a truncated, wrong //newtag result). Drop the indexes like every other
     * mutator. */
    mkr_invalidate_index(self);
    return rb_name;
}

#namespace_uriObject

Namespace URI (DOM ‘namespaceURI`).

Element: resolved from node->ns, so - DOM-faithfully - an HTML element is in the XHTML namespace (“www.w3.org/1999/xhtml”), not nil (an HTML element is never namespaceless; this is what browsers’ DOM and ‘namespace-uri()` return). SVG/MathML elements get their own URI; nil only when truly unnamespaced (LXB_NS__UNDEF).

Attribute: nil for an unprefixed attribute (class, id, …); for a prefixed one, the parser-assigned foreign-content namespace keyed on the prefix (xlink/xml/xmlns), else nil.

Other node kinds: nil.



225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# File 'ext/makiri/glue/ruby_html_node.c', line 225

static VALUE
mkr_node_namespace_uri(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);

    if (node->type == LXB_DOM_NODE_TYPE_ELEMENT) {
        if (node->ns == LXB_NS__UNDEF) {
            return Qnil;
        }
        lxb_dom_document_t *doc = node->owner_document;
        if (doc == NULL || doc->ns == NULL) {
            return Qnil;
        }
        size_t len = 0;
        const lxb_char_t *uri = lxb_ns_by_id(doc->ns, node->ns, &len);
        if (uri == NULL || len == 0) {
            return Qnil;
        }
        return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)uri, len));
    }

    if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
        lxb_dom_attr_t *at = lxb_dom_interface_attr(node);

        /* An attribute set via set_attribute_ns records its OWN namespace on the
         * attr node - distinguishable because it differs from the owner element's
         * ns (a normally-set/parsed attr inherits the element's). Resolve it from
         * the interned id; LXB_NS__UNDEF (set by set_attribute_ns(nil, ...)) is
         * the null namespace. */
        if (at->owner != NULL && node->ns != at->owner->node.ns) {
            if (node->ns == LXB_NS__UNDEF) {
                return Qnil;
            }
            lxb_dom_document_t *doc = node->owner_document;
            if (doc != NULL && doc->ns != NULL) {
                size_t len = 0;
                const lxb_char_t *uri = lxb_ns_by_id(doc->ns, node->ns, &len);
                if (uri != NULL && len != 0) {
                    return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)uri, len));
                }
            }
            return Qnil;
        }

        size_t qlen = 0, llen = 0;
        const lxb_char_t *q = lxb_dom_attr_qualified_name(at, &qlen);
        (void) lxb_dom_attr_local_name(at, &llen);
        if (q == NULL || qlen <= llen + 1) {
            return Qnil;   /* unprefixed attribute => no namespace */
        }
        const char *uri = mkr_attr_ns_for_prefix((const char *)q, qlen - llen - 1);
        return uri ? rb_utf8_str_new_cstr(uri) : Qnil;
    }

    return Qnil;
}

#nextObject



488
489
490
491
492
493
# File 'ext/makiri/glue/ruby_html_node.c', line 488

static VALUE
mkr_node_next(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    return mkr_wrap_html_node(node->next, mkr_node_document(self));
}

#next_elementObject



502
503
504
505
506
507
508
509
510
# File 'ext/makiri/glue/ruby_html_node.c', line 502

static VALUE
mkr_node_next_element(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self)->next;
    while (node != NULL && node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        node = node->next;
    }
    return mkr_wrap_html_node(node, mkr_node_document(self));
}

#next_siblingObject



488
489
490
491
492
493
# File 'ext/makiri/glue/ruby_html_node.c', line 488

static VALUE
mkr_node_next(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    return mkr_wrap_html_node(node->next, mkr_node_document(self));
}

#node_typeObject Also known as: type

Numeric DOM node type (LXB_DOM_NODE_TYPE_*).



323
324
325
326
327
# File 'ext/makiri/glue/ruby_html_node.c', line 323

static VALUE
mkr_node_get_type(VALUE self)
{
    return INT2NUM((int)mkr_html_node_unwrap(self)->type);
}

#outer_html(*args) ⇒ Object

Outer HTML: the node itself plus its descendants. Pass ‘pretty: true` for indented output.



125
126
127
128
129
130
131
132
133
134
135
136
# File 'ext/makiri/glue/ruby_html_serialize.c', line 125

static VALUE
mkr_node_to_html(int argc, VALUE *argv, VALUE self)
{
    int pretty = mkr_serialize_pretty_opt(argc, argv);
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);

    /* A document fragment has no tag of its own; "outer" == its children, so the
     * deep (children) serializer is the right one (the tree serializer rejects a
     * fragment node). */
    int deep = (node->type == LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT);
    return mkr_html_serialize(node, deep, pretty);
}

#outer_html=(rb_html) ⇒ Object

node.outer_html = html -> html. Replaces the node itself with the parse.



548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
# File 'ext/makiri/glue/ruby_html_mutate.c', line 548

static VALUE
mkr_node_set_outer_html(VALUE self, VALUE rb_html)
{
    lxb_dom_node_t *node   = mkr_node_unwrap_mutable(self);
    lxb_dom_node_t *parent = node->parent;
    if (parent == NULL || parent->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        rb_raise(mkr_eError, "outer_html= requires a node with a parent element");
    }

    /* Parse in the parent's context, splice imported nodes before self. */
    mkr_parse_fragment_into(parent, rb_html, node->owner_document,
                            mkr_emit_before, node);
    lxb_dom_node_remove(node);
    mkr_invalidate_index(self);
    return rb_html;
}

#parentObject



470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
# File 'ext/makiri/glue/ruby_html_node.c', line 470

static VALUE
mkr_node_parent(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    VALUE document = mkr_node_document(self);

    /* Lexbor never links an attribute back to its element, so node->parent is
     * NULL for attributes. Resolve via the compat attr->owner index. */
    if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
        lxb_dom_node_t *owner =
            mkr_parsed_attr_owner(mkr_doc_parsed(document),
                                  lxb_dom_interface_attr(node));
        return mkr_wrap_html_node(owner, document);
    }

    return mkr_wrap_html_node(node->parent, document);
}

#parse(rb_html) ⇒ Object

node.parse(html) -> NodeSet of nodes parsed as a fragment in this element’s context (its own tag + namespace). Matches Nokogiri’s Node#parse and is the way to reach a foreign (SVG/MathML) fragment context.



463
464
465
466
467
468
469
470
471
472
473
474
475
# File 'ext/makiri/glue/ruby_doc.c', line 463

static VALUE
mkr_node_parse(VALUE self, VALUE rb_html)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        rb_raise(rb_eArgError, "Node#parse requires an element context");
    }
    VALUE document = mkr_node_document(self);
    VALUE frag = mkr_build_fragment_ctx(document, rb_html,
                                        (lxb_tag_id_t)node->local_name,
                                        (lxb_ns_id_t)node->ns);
    return rb_funcall(frag, rb_intern("children"), 0);
}

#pointer_idObject

Nokogiri-compatible identity: the underlying node pointer as an Integer. Stable for the node’s lifetime and unique among currently-live nodes; a freed-then-reallocated node may reuse an address (same caveat as Nokogiri::XML::Node#pointer_id). a.pointer_id == b.pointer_id iff a.eql?(b).



146
147
148
149
150
# File 'ext/makiri/glue/ruby_node.c', line 146

VALUE
mkr_node_pointer_id(VALUE self)
{
    return ULL2NUM((unsigned long long)mkr_node_id(self));
}

#prefixObject

Namespace prefix (DOM ‘prefix`): nil unless the qualified name is `prefix:local` - typically nil for HTML5-parsed content. Derived from the qualified-vs-local length (qualified == prefix “:” local), so a colon inside a local name can’t be mistaken for a separator. Element/Attribute only.



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'ext/makiri/glue/ruby_html_node.c', line 164

static VALUE
mkr_node_prefix(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    const lxb_char_t *q = NULL;
    size_t qlen = 0, llen = 0;

    switch (node->type) {
    case LXB_DOM_NODE_TYPE_ELEMENT: {
        lxb_dom_element_t *el = lxb_dom_interface_element(node);
        q = lxb_dom_element_qualified_name(el, &qlen);
        (void) lxb_dom_element_local_name(el, &llen);
        break;
    }
    case LXB_DOM_NODE_TYPE_ATTRIBUTE: {
        lxb_dom_attr_t *at = lxb_dom_interface_attr(node);
        q = lxb_dom_attr_qualified_name(at, &qlen);
        (void) lxb_dom_attr_local_name(at, &llen);
        break;
    }
    default:
        return Qnil;
    }
    if (q == NULL || qlen <= llen + 1) {   /* no "prefix:" segment */
        return Qnil;
    }
    return mkr_ruby_str_from_borrowed(
        mkr_borrowed_text((const char *)q, qlen - llen - 1));
}

#previousObject



495
496
497
498
499
500
# File 'ext/makiri/glue/ruby_html_node.c', line 495

static VALUE
mkr_node_previous(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    return mkr_wrap_html_node(node->prev, mkr_node_document(self));
}

#previous_elementObject



512
513
514
515
516
517
518
519
520
# File 'ext/makiri/glue/ruby_html_node.c', line 512

static VALUE
mkr_node_previous_element(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self)->prev;
    while (node != NULL && node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        node = node->prev;
    }
    return mkr_wrap_html_node(node, mkr_node_document(self));
}

#previous_siblingObject



495
496
497
498
499
500
# File 'ext/makiri/glue/ruby_html_node.c', line 495

static VALUE
mkr_node_previous(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    return mkr_wrap_html_node(node->prev, mkr_node_document(self));
}

#removeObject

node.remove / node.unlink -> node. Detaches from the tree (still usable).



171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'ext/makiri/glue/ruby_html_mutate.c', line 171

static VALUE
mkr_node_remove(VALUE self)
{
    lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
    if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
        rb_raise(mkr_eError, "use delete(name) to remove an attribute");
    }
    if (node->parent != NULL) {
        lxb_dom_node_remove(node);
        mkr_invalidate_index(self);
    }
    return self;
}

#remove_attribute(rb_name) ⇒ Object

element.delete(name) -> self. Removes the attribute if present.



457
458
459
460
461
462
463
464
465
466
467
468
469
470
# File 'ext/makiri/glue/ruby_html_mutate.c', line 457

static VALUE
mkr_node_delete(VALUE self, VALUE rb_name)
{
    lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        return self;
    }
    mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_name, "attribute name");
    lxb_dom_element_remove_attribute(
        lxb_dom_interface_element(node), (const lxb_char_t *)nv.ptr, nv.len);
    RB_GC_GUARD(nv.value);
    mkr_invalidate_index(self);
    return self;
}

#remove_attribute_ns(rb_ns, rb_local) ⇒ Object

element.remove_attribute_ns(namespace_or_nil, local_name) -> nil. Removes the attribute matching (namespace, local name) - the DOM key - so a namespaced attribute is removed without disturbing a same-qualified-name one in another namespace (which removal by qualified name, case-insensitive for HTML, would).



366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
# File 'ext/makiri/glue/ruby_html_mutate.c', line 366

static VALUE
mkr_node_remove_attribute_ns(VALUE self, VALUE rb_ns, VALUE rb_local)
{
    lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        return Qnil;
    }
    lxb_dom_element_t *el = lxb_dom_interface_element(node);

    mkr_ruby_borrowed_text_t lv = mkr_ruby_verified_text(rb_local, "attribute local name");

    lxb_ns_id_t want_ns = LXB_NS__UNDEF;
    VALUE ns_guard = Qnil;
    if (!NIL_P(rb_ns)) {
        mkr_ruby_borrowed_text_t nv = mkr_ruby_verified_text(rb_ns, "namespace");
        ns_guard = nv.value;
        if (nv.len > 0 && node->owner_document != NULL && node->owner_document->ns != NULL) {
            const lxb_ns_data_t *d = lxb_ns_append(node->owner_document->ns,
                                                   (const lxb_char_t *)nv.ptr, nv.len);
            if (d != NULL) {
                want_ns = d->ns_id;
            }
        }
    }

    lxb_dom_attr_t *attr = mkr_attr_find_ns(el, want_ns,
                               (const lxb_char_t *)lv.ptr, lv.len);
    if (attr != NULL) {
        lxb_dom_element_attr_remove(el, attr);
        mkr_invalidate_index(self);
    }

    RB_GC_GUARD(lv.value);
    RB_GC_GUARD(ns_guard);
    return Qnil;
}

#replace(rb_other) ⇒ Object

node.replace(other) -> other. Puts other where node is, detaches node.



186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# File 'ext/makiri/glue/ruby_html_mutate.c', line 186

static VALUE
mkr_node_replace(VALUE self, VALUE rb_other)
{
    lxb_dom_node_t *ref   = mkr_node_unwrap_mutable(self);
    lxb_dom_node_t *other = mkr_arg_node(rb_other);
    if (ref->parent == NULL) {
        rb_raise(mkr_eError, "cannot replace a node with no parent");
    }
    mkr_prepare_insert(ref, other);
    if (mkr_is_fragment(other)) {
        lxb_dom_node_t *c;
        while ((c = other->first_child) != NULL) {
            lxb_dom_node_remove(c);
            lxb_dom_node_insert_before(ref, c);
        }
    } else {
        lxb_dom_node_insert_before(ref, other);
    }
    lxb_dom_node_remove(ref);
    mkr_invalidate_index(self);
    return rb_other;
}

#set_attribute_ns(rb_ns, rb_qname, rb_value) ⇒ Object

element.set_attribute_ns(namespace_or_nil, qualified_name, value) -> value.

Stores the attribute under its qualified name (case-preserved - setAttributeNS is case-sensitive, unlike the HTML setAttribute family) and records its OWN namespace on the attr node, so namespaceURI / getAttributeNS resolve it. The namespace URI is interned in the document’s ns table; nil/“” stores the null namespace (LXB_NS__UNDEF).



282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# File 'ext/makiri/glue/ruby_html_mutate.c', line 282

static VALUE
mkr_node_set_attribute_ns(VALUE self, VALUE rb_ns, VALUE rb_qname, VALUE rb_value)
{
    lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        rb_raise(mkr_eError, "cannot set an attribute on a non-element node");
    }
    lxb_dom_element_t *el = lxb_dom_interface_element(node);

    mkr_ruby_borrowed_text_t qv = mkr_ruby_verified_text(rb_qname, "attribute qualified name");
    mkr_ruby_borrowed_text_t vv = mkr_ruby_verified_text(rb_value, "attribute value");

    mkr_ruby_borrowed_text_t nv = {0};
    bool have_ns = false;
    if (!NIL_P(rb_ns)) {
        nv = mkr_ruby_verified_text(rb_ns, "namespace");
        have_ns = nv.len > 0;
    }

    /* Intern the wanted namespace (null/"" => LXB_NS__UNDEF) so the existing
     * attribute is matched on (namespace, local name) - the DOM key - rather than
     * the qualified name. */
    lxb_ns_id_t want_ns = LXB_NS__UNDEF;
    if (have_ns && node->owner_document != NULL && node->owner_document->ns != NULL) {
        const lxb_ns_data_t *d = lxb_ns_append(node->owner_document->ns,
                                               (const lxb_char_t *)nv.ptr, nv.len);
        if (d != NULL) {
            want_ns = d->ns_id;
        }
    }

    const lxb_char_t *qn = (const lxb_char_t *)qv.ptr;
    mkr_span_t qspan = mkr_span((const char *)qn, qv.len);
    size_t colon_off;
    bool has_colon = mkr_span_find(&qspan, ':', &colon_off);
    const lxb_char_t *local = has_colon ? qn + colon_off + 1 : qn;
    size_t local_len = has_colon ? qv.len - colon_off - 1 : qv.len;

    /* A match keeps its qualified name (so re-setting with a different prefix
     * leaves the prefix unchanged); only the value updates. A miss appends a new
     * attribute, even when its qualified name collides with an existing one in a
     * different namespace - the namespace-aware setter splits prefix/local and
     * records the namespace; a null namespace just sets the bare name. */
    lxb_dom_attr_t *attr = mkr_attr_find_ns(el, want_ns, local, local_len);
    if (attr != NULL) {
        if (lxb_dom_attr_set_value(attr, (const lxb_char_t *)vv.ptr, vv.len) != LXB_STATUS_OK) {
            rb_raise(mkr_eError, "failed to set attribute value");
        }
    }
    else {
        attr = lxb_dom_attr_interface_create(node->owner_document);
        if (attr == NULL) {
            rb_raise(mkr_eError, "failed to create attribute");
        }
        /* A fresh attr is calloc'd, so node.ns is already LXB_NS__UNDEF for the
         * null-namespace case; only the namespaced setter changes it. */
        lxb_status_t st;
        if (have_ns) {
            st = lxb_dom_attr_set_name_ns(attr, (const lxb_char_t *)nv.ptr, nv.len,
                                          (const lxb_char_t *)qv.ptr, qv.len, false);
        }
        else {
            st = lxb_dom_attr_set_name(attr, (const lxb_char_t *)qv.ptr, qv.len, false);
        }
        if (st != LXB_STATUS_OK
            || lxb_dom_attr_set_value(attr, (const lxb_char_t *)vv.ptr, vv.len) != LXB_STATUS_OK) {
            /* Leave the un-appended attr for the document arena to free wholesale
             * (the module's "never destroy a detached node" convention). */
            rb_raise(mkr_eError, "failed to set namespaced attribute");
        }
        lxb_dom_element_attr_append(el, attr);
    }

    RB_GC_GUARD(qv.value);
    RB_GC_GUARD(vv.value);
    RB_GC_GUARD(nv.value);
    mkr_invalidate_index(self);
    return rb_value;
}

#tag_nameObject

Element#tag_name (DOM ‘tagName`): the qualified name, uppercased for an HTML element in an HTML document (“DIV”), as the DOM specifies - unlike #name, which is the lowercase qualified name. SVG/MathML elements keep their case. nil for non-element nodes.



288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# File 'ext/makiri/glue/ruby_html_node.c', line 288

static VALUE
mkr_node_tag_name(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        return Qnil;
    }
    size_t len = 0;
    const lxb_char_t *name =
        lxb_dom_element_tag_name(lxb_dom_interface_element(node), &len);
    if (name == NULL) {
        return Qnil;
    }
    return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)name, len));
}

#targetObject

ProcessingInstruction#target (DOM ‘target`): the PI’s target name (the “xml” in <?xml …?>). nil for non-PI nodes. The PI’s data is read via #content / #text like any character-data node.



309
310
311
312
313
314
315
316
317
318
319
320
# File 'ext/makiri/glue/ruby_html_node.c', line 309

static VALUE
mkr_node_pi_target(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    if (node->type != LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION) {
        return Qnil;
    }
    size_t len = 0;
    const lxb_char_t *t = lxb_dom_processing_instruction_target(
        lxb_dom_interface_processing_instruction(node), &len);
    return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)t, len));
}

#textObject

Concatenated text content of this node and its descendants. The DOM spec makes a Document’s textContent null; we instead return the text of the root element (matching the intuitive, Nokogiri-like Document#text).



398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
# File 'ext/makiri/glue/ruby_html_node.c', line 398

static VALUE
mkr_node_content(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    if (node->type == LXB_DOM_NODE_TYPE_DOCUMENT) {
        node = lxb_dom_document_root((lxb_dom_document_t *)node);
        if (node == NULL) {
            return rb_utf8_str_new("", 0);
        }
    }

    /* Fast path for elements / fragments (the common case, incl. document text).
     *
     * Preferred: the per-document text index (dom_adapter/text_index.c) maps
     * this node to the contiguous, document-order run of its descendants' text
     * slices, so we serve a single pre-sized memcpy run with no per-extraction
     * tree walk - the walk is otherwise the dominant, cache-bound cost. Built
     * lazily on first use and dropped on any mutation, so a slice can never
     * point at reallocated/detached storage.
     *
     * Fallback (index unavailable - node outside the indexed tree, e.g. a
     * fragment, or a build OOM): stream each descendant text/CDATA node's data
     * straight into the Ruby string via an iterative pre-order walk (stack-safe;
     * skips Lexbor's intermediate arena buffer + copy). */
    if (node->type == LXB_DOM_NODE_TYPE_ELEMENT
        || node->type == LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT) {
        mkr_parsed_t *parsed = mkr_doc_parsed(mkr_node_document(self));
        const mkr_borrowed_text_t *slices;
        size_t nslices, total;
        if (parsed != NULL
            && mkr_parsed_text_slices(parsed, node, &slices, &nslices, &total)) {
            return mkr_ruby_str_from_slices(slices, nslices, total);
        }

        VALUE str = rb_utf8_str_new(NULL, 0);
        for (lxb_dom_node_t *n = node->first_child; n != NULL;) {
            if (n->type == LXB_DOM_NODE_TYPE_TEXT
                || n->type == LXB_DOM_NODE_TYPE_CDATA_SECTION) {
                const lexbor_str_t *d = &lxb_dom_interface_character_data(n)->data;
                if (d->data != NULL && d->length != 0) {
                    rb_str_cat(str, (const char *)d->data, (long)d->length);
                }
            }
            if (n->first_child != NULL) { n = n->first_child; continue; }
            while (n != node && n->next == NULL) { n = n->parent; }
            if (n == node) { break; }
            n = n->next;
        }
        return str;
    }

    /* Character-data and other node kinds keep the general (proven) path. */
    size_t len = 0;
    lxb_char_t *text = lxb_dom_node_text_content(node, &len);
    if (text == NULL) {
        return rb_utf8_str_new("", 0);
    }
    VALUE str = rb_utf8_str_new((const char *)text, len);
    lxb_dom_document_destroy_text(node->owner_document, text);
    return str;
}

#to_html(*args) ⇒ Object

Outer HTML: the node itself plus its descendants. Pass ‘pretty: true` for indented output.



125
126
127
128
129
130
131
132
133
134
135
136
# File 'ext/makiri/glue/ruby_html_serialize.c', line 125

static VALUE
mkr_node_to_html(int argc, VALUE *argv, VALUE self)
{
    int pretty = mkr_serialize_pretty_opt(argc, argv);
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);

    /* A document fragment has no tag of its own; "outer" == its children, so the
     * deep (children) serializer is the right one (the tree serializer rejects a
     * fragment node). */
    int deep = (node->type == LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT);
    return mkr_html_serialize(node, deep, pretty);
}

#to_s(*args) ⇒ Object

Outer HTML: the node itself plus its descendants. Pass ‘pretty: true` for indented output.



125
126
127
128
129
130
131
132
133
134
135
136
# File 'ext/makiri/glue/ruby_html_serialize.c', line 125

static VALUE
mkr_node_to_html(int argc, VALUE *argv, VALUE self)
{
    int pretty = mkr_serialize_pretty_opt(argc, argv);
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);

    /* A document fragment has no tag of its own; "outer" == its children, so the
     * deep (children) serializer is the right one (the tree serializer rejects a
     * fragment node). */
    int deep = (node->type == LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT);
    return mkr_html_serialize(node, deep, pretty);
}

node.remove / node.unlink -> node. Detaches from the tree (still usable).



171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'ext/makiri/glue/ruby_html_mutate.c', line 171

static VALUE
mkr_node_remove(VALUE self)
{
    lxb_dom_node_t *node = mkr_node_unwrap_mutable(self);
    if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
        rb_raise(mkr_eError, "use delete(name) to remove an attribute");
    }
    if (node->parent != NULL) {
        lxb_dom_node_remove(node);
        mkr_invalidate_index(self);
    }
    return self;
}

#valueObject

attr.value -> the attribute’s value String. For non-attribute nodes, falls back to text content (matching the loose Nokogiri-ish meaning of #value).



701
702
703
704
705
706
707
708
709
710
711
712
# File 'ext/makiri/glue/ruby_html_node.c', line 701

static VALUE
mkr_node_value(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    if (node->type == LXB_DOM_NODE_TYPE_ATTRIBUTE) {
        size_t len = 0;
        const lxb_char_t *val =
            lxb_dom_attr_value(lxb_dom_interface_attr(node), &len);
        return mkr_ruby_str_from_borrowed(mkr_borrowed_text((const char *)val, len));
    }
    return mkr_node_content(self);
}

#valuesObject

node.values -> [String, …] of attribute values (document order).



658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
# File 'ext/makiri/glue/ruby_html_node.c', line 658

static VALUE
mkr_node_values(VALUE self)
{
    lxb_dom_node_t *node = mkr_html_node_unwrap(self);
    VALUE ary = rb_ary_new();
    if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
        return ary;
    }
    lxb_dom_attr_t *attr =
        lxb_dom_element_first_attribute(lxb_dom_interface_element(node));
    while (attr != NULL) {
        size_t len = 0;
        const lxb_char_t *val = lxb_dom_attr_value(attr, &len);
        rb_ary_push(ary, mkr_ruby_str_from_borrowed(
                             mkr_borrowed_text((const char *)val, len)));
        attr = lxb_dom_element_next_attribute(attr);
    }
    return ary;
}

#xpath(*args) ⇒ Object



729
730
731
732
733
734
735
# File 'ext/makiri/glue/ruby_xpath.c', line 729

static VALUE
mkr_node_xpath(int argc, VALUE *argv, VALUE self)
{
    VALUE rb_expr, handler, opts;
    rb_scan_args(argc, argv, "11:", &rb_expr, &handler, &opts);
    return mkr_node_xpath_run(self, rb_expr, handler, mkr_ns_matching_lax(opts), 0);
}