Class: Makiri::NodeSet

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/makiri/node_set.rb,
ext/makiri/makiri.c

Overview

An ordered collection of nodes, returned by xpath/css queries.

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.new(*args) ⇒ Object

NodeSet.new(document_or_node, list = []) -> NodeSet. Mirrors Nokogiri: the first argument is the owning Document (or any node, whose document is taken) that the set pins as a GC keepalive; the optional list seeds it. Every listed node MUST belong to that document - one from another document or representation would be re-wrapped under the wrong document/kind, so it is rejected (fail-closed, preventing the HTML/XML type confusion the set’s representation-opaque storage otherwise relies on document kind to avoid).



409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
# File 'ext/makiri/glue/ruby_node_set.c', line 409

static VALUE
mkr_node_set_s_new(int argc, VALUE *argv, VALUE klass)
{
    (void)klass;
    VALUE rb_ctx, rb_list;
    rb_scan_args(argc, argv, "11", &rb_ctx, &rb_list);

    VALUE rb_doc;
    if (rb_obj_is_kind_of(rb_ctx, mkr_cDocument)) {
        rb_doc = rb_ctx;
    } else if (rb_obj_is_kind_of(rb_ctx, mkr_cNode)) {
        rb_doc = mkr_node_document(rb_ctx);
    } else {
        rb_raise(rb_eTypeError, "expected a Makiri::Document or Node as the first argument");
    }

    VALUE set = mkr_node_set_new(rb_doc);
    if (NIL_P(rb_list)) {
        return set;
    }
    VALUE arr = rb_check_array_type(rb_list);
    if (NIL_P(arr)) {
        rb_raise(rb_eTypeError, "expected an Array of nodes as the second argument");
    }
    for (long i = 0; i < RARRAY_LEN(arr); i++) {
        VALUE node = RARRAY_AREF(arr, i);
        if (!rb_obj_is_kind_of(node, mkr_cNode) || mkr_node_document(node) != rb_doc) {
            rb_raise(rb_eArgError,
                     "every node must be a Makiri node belonging to the given document");
        }
        mkr_node_set_push(set, (mkr_raw_node_t *)mkr_node_raw(node));
    }
    return set;
}

Instance Method Details

#&(other) ⇒ Object

self & other -> intersection (self order, deduped).



370
371
372
373
374
# File 'ext/makiri/glue/ruby_node_set.c', line 370

static VALUE
mkr_node_set_op_and(VALUE self, VALUE other)
{
    return mkr_node_set_op_filter(self, other, 1);
}

#+(other) ⇒ Object

self + other -> concatenation (duplicates kept).



324
325
326
327
328
329
330
331
332
333
# File 'ext/makiri/glue/ruby_node_set.c', line 324

static VALUE
mkr_node_set_op_plus(VALUE self, VALUE other)
{
    mkr_node_set_data_t *s = mkr_node_set_get(self);
    mkr_node_set_data_t *o = mkr_node_set_other(s, other);
    VALUE result = mkr_node_set_new(s->document);
    for (size_t i = 0; i < s->count; i++) mkr_node_set_push(result, s->nodes[i]);
    for (size_t i = 0; i < o->count; i++) mkr_node_set_push(result, o->nodes[i]);
    return result;
}

#-(other) ⇒ Object

self - other -> difference (self order, deduped).



377
378
379
380
381
# File 'ext/makiri/glue/ruby_node_set.c', line 377

static VALUE
mkr_node_set_op_minus(VALUE self, VALUE other)
{
    return mkr_node_set_op_filter(self, other, 0);
}

#[](*args) ⇒ Object

set -> Node or nil (negative indices count from the end). set[start, length] -> a new NodeSet (nil if start is out of range). set -> a new NodeSet (nil if the range start is out of range). Mirrors Array#[].



143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'ext/makiri/glue/ruby_node_set.c', line 143

static VALUE
mkr_node_set_aref(int argc, VALUE *argv, VALUE self)
{
    mkr_node_set_data_t *s;
    TypedData_Get_Struct(self, mkr_node_set_data_t, &mkr_node_set_type, s);
    long count = (long)s->count;

    if (argc == 2) {                       /* set[start, length] */
        long beg = NUM2LONG(argv[0]);
        long len = NUM2LONG(argv[1]);
        if (beg < 0) beg += count;
        if (beg < 0 || beg > count || len < 0) return Qnil;
        if (len > count - beg) len = count - beg;
        return mkr_node_set_slice(s, beg, len);
    }

    rb_check_arity(argc, 1, 2);

    if (rb_obj_is_kind_of(argv[0], rb_cRange)) {
        long beg, len;
        if (rb_range_beg_len(argv[0], &beg, &len, count, 0) != Qtrue) {
            return Qnil;                   /* start out of range */
        }
        return mkr_node_set_slice(s, beg, len);
    }

    long i = NUM2LONG(argv[0]);
    if (i < 0) i += count;
    if (i < 0 || i >= count) return Qnil;
    return mkr_node_set_wrap(s, s->nodes[i]);
}

#at(index) ⇒ Makiri::Node?

Index access; alias for #[].

Returns:



30
31
32
# File 'lib/makiri/node_set.rb', line 30

def at(index)
  self[index]
end

#at_css(selector) ⇒ Makiri::Node?

First node matching the CSS selector across the set, or nil.

Returns:



62
63
64
# File 'lib/makiri/node_set.rb', line 62

def at_css(selector)
  css(selector).first
end

#at_xpath(expr) ⇒ Object

First node matching the XPath expression across the set (or the scalar value for a non-node-set result).



68
69
70
71
# File 'lib/makiri/node_set.rb', line 68

def at_xpath(expr)
  result = xpath(expr)
  result.is_a?(NodeSet) ? result.first : result
end

#clone(freeze: nil) ⇒ Object

Like #dup (a new set over the same nodes), honouring Ruby’s freeze: keyword. (#dup is the native copy.)



97
98
99
100
101
# File 'lib/makiri/node_set.rb', line 97

def clone(freeze: nil)
  copy = dup
  copy.freeze if freeze || (freeze.nil? && frozen?)
  copy
end

#css(selector) ⇒ Makiri::NodeSet

Run a CSS selector against every node and return the unioned matches.

Returns:



50
51
52
# File 'lib/makiri/node_set.rb', line 50

def css(selector)
  union_query(:css, selector)
end

#dup(*args) ⇒ Object

#dup / #clone: a new NodeSet over the same nodes (the nodes are shared - they are owned by the document arena - but the set itself is independent), like Nokogiri. Defined here because the allocator is undef’d, so Ruby’s default allocate-then-copy raises; any level/freeze argument is ignored.



387
388
389
390
391
392
393
394
395
396
397
398
399
400
# File 'ext/makiri/glue/ruby_node_set.c', line 387

static VALUE
mkr_node_set_dup(int argc, VALUE *argv, VALUE self)
{
    (void)argc;
    (void)argv;
    mkr_node_set_data_t *s = mkr_node_set_get(self);
    VALUE copy = mkr_node_set_new(s->document);
    /* Reuse the overflow-checked growth + cap enforcement of mkr_node_set_push;
     * the source already has no duplicates, so this is a faithful copy. */
    for (size_t i = 0; i < s->count; i++) {
        mkr_node_set_push(copy, s->nodes[i]);
    }
    return copy;
}

#eachObject



175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'ext/makiri/glue/ruby_node_set.c', line 175

static VALUE
mkr_node_set_each(VALUE self)
{
    mkr_node_set_data_t *s;
    TypedData_Get_Struct(self, mkr_node_set_data_t, &mkr_node_set_type, s);

    RETURN_ENUMERATOR(self, 0, 0);

    for (size_t i = 0; i < s->count; i++) {
        rb_yield(mkr_node_set_wrap(s, s->nodes[i]));
    }
    return self;
}

#empty?Boolean

Returns:

  • (Boolean)


14
15
16
# File 'lib/makiri/node_set.rb', line 14

def empty?
  length.zero?
end

#firstMakiri::Node?

Returns:



19
20
21
# File 'lib/makiri/node_set.rb', line 19

def first
  self[0]
end

#inspectObject



103
104
105
# File 'lib/makiri/node_set.rb', line 103

def inspect
  "#<#{self.class.name} length=#{length}>"
end

#lastMakiri::Node?

Returns:



24
25
26
# File 'lib/makiri/node_set.rb', line 24

def last
  self[length - 1]
end

#lengthObject



119
120
121
122
123
124
125
# File 'ext/makiri/glue/ruby_node_set.c', line 119

static VALUE
mkr_node_set_length(VALUE self)
{
    mkr_node_set_data_t *s;
    TypedData_Get_Struct(self, mkr_node_set_data_t, &mkr_node_set_type, s);
    return ULONG2NUM(s->count);
}

#removeself Also known as: unlink

Detach every node in the set from its tree.

Returns:

  • (self)


89
90
91
92
# File 'lib/makiri/node_set.rb', line 89

def remove
  to_a.each(&:remove)
  self
end

#remove_attr(name) ⇒ self Also known as: remove_attribute

Remove the named attribute from every node in the set.

Returns:

  • (self)


81
82
83
84
# File 'lib/makiri/node_set.rb', line 81

def remove_attr(name)
  each { |node| node.delete(name) }
  self
end

#search(path) ⇒ Makiri::NodeSet

CSS- or XPath-detecting query against every node (see Makiri::Node#search).

Returns:



75
76
77
# File 'lib/makiri/node_set.rb', line 75

def search(path)
  union_query(:search, path)
end

#sizeInteger

Returns:

  • (Integer)


9
10
11
# File 'lib/makiri/node_set.rb', line 9

def size
  length
end

#textString Also known as: inner_text

Concatenated text content of every node in the set.

Returns:

  • (String)


43
44
45
# File 'lib/makiri/node_set.rb', line 43

def text
  map(&:text).join
end

#to_htmlString Also known as: to_s

Concatenated outer HTML of every node in the set.

Returns:

  • (String)


36
37
38
# File 'lib/makiri/node_set.rb', line 36

def to_html
  map(&:to_html).join
end

#xpath(expr) ⇒ Makiri::NodeSet

Run an XPath expression against every node and union the node-set results.

Returns:



56
57
58
# File 'lib/makiri/node_set.rb', line 56

def xpath(expr)
  union_query(:xpath, expr)
end

#|(other) ⇒ Object

self | other -> union (deduped).



298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# File 'ext/makiri/glue/ruby_node_set.c', line 298

static VALUE
mkr_node_set_op_or(VALUE self, VALUE other)
{
    mkr_node_set_data_t *s = mkr_node_set_get(self);
    mkr_node_set_data_t *o = mkr_node_set_other(s, other);
    VALUE result = mkr_node_set_new(s->document);
    mkr_node_set_data_t *r = mkr_node_set_get(result);

    mkr_ptrset_t seen = { NULL, 0 };
    if (s->count + o->count > MKR_NODE_SET_HASH_MIN) {
        mkr_ptrset_init(&seen, s->count + o->count);
    }
    mkr_node_set_data_t *srcs[2] = { s, o };
    for (int k = 0; k < 2; k++) {
        for (size_t i = 0; i < srcs[k]->count; i++) {
            mkr_raw_node_t *n = srcs[k]->nodes[i];
            int fresh = seen.cap ? mkr_ptrset_add(&seen, n)
                                 : !mkr_node_set_member(r, n);
            if (fresh) mkr_node_set_push(result, n);
        }
    }
    mkr_ptrset_free(&seen);
    return result;
}