Class: String
- Inherits:
-
Object
- Object
- String
- Defined in:
- (unknown)
Instance Method Summary collapse
- #bit_and(other) ⇒ Object
- #bit_and!(other) ⇒ Object
-
#bit_at(*args) ⇒ Object
String#bit_at(n, lsb_first: true) -> true or false.
- #bit_count ⇒ Object
-
#bit_fields(*args) ⇒ Object
String#bit_fields(*bitlens, lsb_first: true) -> Array String#bit_fields(*bitlens, lsb_first: true) { |*fields| } -> self.
- #bit_not ⇒ Object
- #bit_not! ⇒ Object
- #bit_or(other) ⇒ Object
- #bit_or!(other) ⇒ Object
-
#bit_run_count(*args) ⇒ Object
String#bit_run_count(pos, bit) -> Integer | nil.
-
#bit_runs(*args) ⇒ Object
String#bit_runs(lsb_first: true) -> Array String#bit_runs(lsb_first: true) { |bit, len| } -> self.
-
#bit_slice(*args) ⇒ Object
String#bit_slice(bit_offset, bit_length) -> String String#bit_slice(range) -> String.
-
#bit_splice(*args) ⇒ Object
String#bit_splice(bit_index, bit_length, str) -> self String#bit_splice(bit_index, bit_length, str, str_bit_index, str_bit_length) -> self String#bit_splice(range, str) -> self String#bit_splice(range, str, str_range) -> self.
- #bit_xor(other) ⇒ Object
- #bit_xor!(other) ⇒ Object
- #bits(*args) ⇒ Object
- #clear_bit(*args) ⇒ Object
-
#each_bit(*args) ⇒ Object
iterate bits ————————————————————.
-
#each_bit_field(*args) ⇒ Object
String#each_bit_field(*bitlens, lsb_first: true) -> self String#each_bit_field(*bitlens, lsb_first: true) -> Enumerator.
-
#each_bit_run(*args) ⇒ Object
String#each_bit_run(lsb_first: true) { |bit, len| } -> self String#each_bit_run(lsb_first: true) -> Enumerator.
-
#each_set_bit_offset(*args) ⇒ Object
iterate set-bit positions ———————————————–.
- #flip_bit(*args) ⇒ Object
- #set_bit(*args) ⇒ Object
- #set_bit_offsets(*args) ⇒ Object
Instance Method Details
#bit_and(other) ⇒ Object
926 927 928 929 930 931 932 933 934 935 936 937 |
# File 'ext/string_bits/string_bits.c', line 926
static VALUE
rb_str_bit_and(VALUE self, VALUE other)
{
check_binary_op_lengths(self, other);
ssize_t len = RSTRING_LEN(self);
VALUE result = alloc_result(self);
const unsigned char *a = (const unsigned char *)RSTRING_PTR(self);
const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
for (ssize_t i = 0; i < len; i++) dst[i] = a[i] & b[i];
return result;
}
|
#bit_and!(other) ⇒ Object
939 940 941 942 943 944 945 946 947 948 949 |
# File 'ext/string_bits/string_bits.c', line 939
static VALUE
rb_str_bit_and_bang(VALUE self, VALUE other)
{
check_binary_op_lengths(self, other);
rb_str_modify(self);
ssize_t len = RSTRING_LEN(self);
unsigned char *a = (unsigned char *)RSTRING_PTR(self);
const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
for (ssize_t i = 0; i < len; i++) a[i] &= b[i];
return self;
}
|
#bit_at(*args) ⇒ Object
String#bit_at(n, lsb_first: true) -> true or false
bit_at uses flat/Arrow convention: byte_index = n/8 from start, bit = n%8 from LSB e.g. “xAAxCC”: bit 0..7 live in byte=0xAA, bit 8..15 live in byte=0xCC
str = "\xFF\xAA" # 11111111 10101010
str.bit_at(0) # => true (1st bit is set)
str.bit_at(7) # => true (8th bit is set)
str.bit_at(8) # => false (9th bit is clear)
str.bit_at(9) # => true (10th bit is set)
str.bit_at(16) # => nil
351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 |
# File 'ext/string_bits/string_bits.c', line 351
static VALUE
rb_str_bit_at(int argc, VALUE *argv, VALUE self)
{
VALUE n, opts;
rb_scan_args(argc, argv, "1:", &n, &opts);
validate_option_hash(opts, SB_KW_LSB_FIRST);
if (!rb_integer_type_p(n)) {
rb_raise(rb_eTypeError, "bit index must be an integer");
}
ssize_t idx = integer_to_bit_idx(n);
if (idx < 0) {
rb_raise(rb_eArgError, "bit index must be non-negative");
}
ssize_t size = RSTRING_LEN(self) * 8;
if (size <= idx) {
return Qnil;
}
int lsb_first = parse_lsb_first_opt(opts);
if (!lsb_first) {
idx = (idx & ~7L) | (7 - (idx & 7L));
}
if (test_bit(RSTRING_PTR(self), idx)) {
return Qtrue;
} else {
return Qfalse;
}
}
|
#bit_count ⇒ Object
383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 |
# File 'ext/string_bits/string_bits.c', line 383
static VALUE
rb_str_bit_count(VALUE self)
{
ssize_t count = 0;
ssize_t len = RSTRING_LEN(self);
const char *str = RSTRING_PTR(self);
ssize_t off = 0;
ssize_t unrolled_end = len & ~31L;
ssize_t aligned_end = len & ~7L;
/* Use memcpy to avoid unaligned loads (SIGBUS on SPARC, MIPS, etc.)
* and strict-aliasing violations. Modern compilers fold 8-byte memcpy
* into a single load on platforms that allow unaligned access. */
for (; off < unrolled_end; off += 32) {
uint64_t w0, w1, w2, w3;
memcpy(&w0, str + off, 8);
memcpy(&w1, str + off + 8, 8);
memcpy(&w2, str + off + 16, 8);
memcpy(&w3, str + off + 24, 8);
count += sb_popcount64(w0);
count += sb_popcount64(w1);
count += sb_popcount64(w2);
count += sb_popcount64(w3);
}
for (; off < aligned_end; off += 8) {
uint64_t w;
memcpy(&w, str + off, 8);
count += sb_popcount64(w);
}
ssize_t remainder = len - aligned_end;
if (remainder > 0) {
uint64_t last = 0;
const unsigned char *tail = (const unsigned char *)(str + aligned_end);
for (ssize_t i = 0; i < remainder; i++) {
last |= (uint64_t)tail[i] << (i * 8);
}
count += sb_popcount64(last);
}
return SSIZET2NUM(count);
}
|
#bit_fields(*args) ⇒ Object
String#bit_fields(*bitlens, lsb_first: true) -> Array String#bit_fields(*bitlens, lsb_first: true) { |*fields| } -> self
Non-iterator complement of each_bit_field. Without a block, returns an Array of all extracted records. With a single bitlen the array is flat (matching each_bit_field(n).to_a); with multiple bitlens each record is itself an Array (matching each_bit_field(a, b, …).to_a).
With a block, behaves identically to each_bit_field without with: — yielding one Integer per field and returning self.
Porting to Ruby Core:
1. Move alongside each_bit_field in string.c.
2. Share extract_uint64 and the bitlen validation logic.
3. Register with rb_define_method in Init_String().
1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 |
# File 'ext/string_bits/string_bits.c', line 1144
static VALUE
rb_str_bit_fields(int argc, VALUE *argv, VALUE self)
{
VALUE rest, opts;
rb_scan_args(argc, argv, "*:", &rest, &opts);
validate_option_hash(opts, SB_KW_LSB_FIRST);
ssize_t num_fields = RARRAY_LEN(rest);
if (num_fields == 0) {
rb_raise(rb_eArgError, "wrong number of arguments (given 0, expected 1+)");
}
ssize_t *bitlens = ALLOCA_N(ssize_t, num_fields);
ssize_t step = 0;
for (ssize_t f = 0; f < num_fields; f++) {
VALUE v = RARRAY_AREF(rest, f);
if (!rb_integer_type_p(v)) {
rb_raise(rb_eTypeError, "bitlen must be an integer");
}
ssize_t bl = NUM2SSIZET(v);
if (bl <= 0) {
rb_raise(rb_eArgError, "bitlen must be positive");
}
if (bl > 64) {
rb_raise(rb_eArgError, "bitlen must be <= 64 (got %ld)", bl);
}
bitlens[f] = bl;
step += bl;
}
int lsb_first = parse_lsb_first_opt(opts);
ssize_t src_len = RSTRING_LEN(self);
ssize_t total_bits = src_len * 8;
ssize_t iterations = total_bits / step;
int have_block = rb_block_given_p();
VALUE result = have_block ? Qnil : rb_ary_new_capa(iterations);
VALUE *field_vals = ALLOCA_N(VALUE, num_fields);
for (ssize_t iter = 0; iter < iterations; iter++) {
ssize_t base_bit = iter * step;
const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
ssize_t field_bit = base_bit;
for (ssize_t f = 0; f < num_fields; f++) {
uint64_t val = extract_uint64(src, src_len, field_bit, bitlens[f], lsb_first);
field_vals[f] = ULL2NUM(val);
field_bit += bitlens[f];
}
if (have_block) {
rb_yield_values2((int)num_fields, field_vals);
} else if (num_fields == 1) {
rb_ary_push(result, field_vals[0]);
} else {
rb_ary_push(result, rb_ary_new_from_values(num_fields, field_vals));
}
}
return have_block ? self : result;
}
|
#bit_not ⇒ Object
905 906 907 908 909 910 911 912 913 914 |
# File 'ext/string_bits/string_bits.c', line 905
static VALUE
rb_str_bit_not(VALUE self)
{
ssize_t len = RSTRING_LEN(self);
VALUE result = alloc_result(self);
const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
for (ssize_t i = 0; i < len; i++) dst[i] = ~src[i];
return result;
}
|
#bit_not! ⇒ Object
916 917 918 919 920 921 922 923 924 |
# File 'ext/string_bits/string_bits.c', line 916
static VALUE
rb_str_bit_not_bang(VALUE self)
{
rb_str_modify(self);
ssize_t len = RSTRING_LEN(self);
unsigned char *ptr = (unsigned char *)RSTRING_PTR(self);
for (ssize_t i = 0; i < len; i++) ptr[i] = ~ptr[i];
return self;
}
|
#bit_or(other) ⇒ Object
951 952 953 954 955 956 957 958 959 960 961 962 |
# File 'ext/string_bits/string_bits.c', line 951
static VALUE
rb_str_bit_or(VALUE self, VALUE other)
{
check_binary_op_lengths(self, other);
ssize_t len = RSTRING_LEN(self);
VALUE result = alloc_result(self);
const unsigned char *a = (const unsigned char *)RSTRING_PTR(self);
const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
for (ssize_t i = 0; i < len; i++) dst[i] = a[i] | b[i];
return result;
}
|
#bit_or!(other) ⇒ Object
964 965 966 967 968 969 970 971 972 973 974 |
# File 'ext/string_bits/string_bits.c', line 964
static VALUE
rb_str_bit_or_bang(VALUE self, VALUE other)
{
check_binary_op_lengths(self, other);
rb_str_modify(self);
ssize_t len = RSTRING_LEN(self);
unsigned char *a = (unsigned char *)RSTRING_PTR(self);
const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
for (ssize_t i = 0; i < len; i++) a[i] |= b[i];
return self;
}
|
#bit_run_count(*args) ⇒ Object
String#bit_run_count(pos, bit) -> Integer | nil
Returns the length of the consecutive run of ‘bit` starting at flat position `pos`. Returns nil when `pos` is out of range or the bit at `pos` does not equal `bit`.
‘bit` accepts 0, 1, false, or true (false/true are aliases for 0/1, matching the values yielded by each_bit_run).
Counts forward from ‘pos` toward higher bit indices.
Inspired by Gauche Scheme’s (bitvector-count-run bit bvec i).
Uses the same flat LSB-first addressing as bit_at: byte bit pos%8.
Porting to Ruby Core:
1. Move to string.c; register in Init_String().
2. Reuse integer_to_bit_idx for consistent Bignum handling.
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 |
# File 'ext/string_bits/string_bits.c', line 1294
static VALUE
rb_str_bit_run_count(int argc, VALUE *argv, VALUE self)
{
VALUE pos_val, bit_val, opts;
rb_scan_args(argc, argv, "20:", &pos_val, &bit_val, &opts);
validate_option_hash(opts, SB_KW_LSB_FIRST);
int lsb_first = parse_lsb_first_opt(opts);
if (!rb_integer_type_p(pos_val)) {
rb_raise(rb_eTypeError, "position must be an integer");
}
int target;
if (bit_val == Qtrue || bit_val == INT2FIX(1)) {
target = 1;
} else if (bit_val == Qfalse || bit_val == INT2FIX(0)) {
target = 0;
} else {
rb_raise(rb_eArgError, "bit must be 0, 1, false, or true");
}
ssize_t pos = integer_to_bit_idx(pos_val);
ssize_t src_len = RSTRING_LEN(self);
if (pos < 0 || pos >= src_len * 8) return Qnil;
const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
if (lsb_first) {
if (((src[pos >> 3] >> (pos & 7)) & 1) != target) return Qnil;
return SSIZET2NUM(count_run_lsb(src, src_len, pos, target));
}
if (logical_get_bit(src, pos, 0) != target) return Qnil;
ssize_t run = 1;
ssize_t total_bits = src_len * 8;
while (pos + run < total_bits && logical_get_bit(src, pos + run, 0) == target) {
run++;
}
return SSIZET2NUM(run);
}
|
#bit_runs(*args) ⇒ Object
String#bit_runs(lsb_first: true) -> Array String#bit_runs(lsb_first: true) { |bit, len| } -> self
Non-iterator complement of each_bit_run. Without a block, collects all (bit, run_length) pairs into an Array and returns it. With a block, yields each pair and returns self.
Follows the same pattern as String#bytes vs String#each_byte.
Porting to Ruby Core:
1. Move to string.c alongside each_bit_run; register in Init_String().
1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 |
# File 'ext/string_bits/string_bits.c', line 1401
static VALUE
rb_str_bit_runs(int argc, VALUE *argv, VALUE self)
{
int lsb_first = parse_lsb_first(argc, argv);
ssize_t src_len = RSTRING_LEN(self);
int have_block = rb_block_given_p();
if (src_len == 0) return have_block ? self : rb_ary_new();
ssize_t total_bits = src_len * 8;
VALUE result = have_block ? Qnil : rb_ary_new();
if (lsb_first) {
ssize_t pos = 0;
while (pos < total_bits) {
const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
int bit = (src[pos >> 3] >> (pos & 7)) & 1;
ssize_t run = count_run_lsb(src, src_len, pos, bit);
VALUE bval = bit ? Qtrue : Qfalse;
VALUE lval = SSIZET2NUM(run);
have_block ? rb_yield_values(2, bval, lval)
: rb_ary_push(result, rb_assoc_new(bval, lval));
pos += run;
}
} else {
ssize_t pos = 0;
while (pos < total_bits) {
const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
int bit = logical_get_bit(src, pos, 0);
ssize_t run = 1;
while (pos + run < total_bits && logical_get_bit(src, pos + run, 0) == bit) {
run++;
}
VALUE bval = bit ? Qtrue : Qfalse;
VALUE lval = SSIZET2NUM(run);
have_block ? rb_yield_values(2, bval, lval)
: rb_ary_push(result, rb_assoc_new(bval, lval));
pos += run;
}
}
return have_block ? self : result;
}
|
#bit_slice(*args) ⇒ Object
String#bit_slice(bit_offset, bit_length) -> String String#bit_slice(range) -> String
str = "\xFF\x00" # 11111111 00000000
str.bit_slice(4, 8) # => "\xF0" (11110000)
721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 |
# File 'ext/string_bits/string_bits.c', line 721
static VALUE
rb_str_bit_slice(int argc, VALUE *argv, VALUE self)
{
ssize_t src_len = RSTRING_LEN(self);
ssize_t total_bits = src_len * 8;
ssize_t offset, length;
VALUE v0, v1, opts;
int n_pos = rb_scan_args(argc, argv, "11:", &v0, &v1, &opts);
validate_option_hash(opts, SB_KW_LSB_FIRST);
int lsb_first = parse_lsb_first_opt(opts);
if (n_pos == 1 && rb_obj_is_kind_of(v0, rb_cRange)) {
ssize_t beg, len;
if (!RTEST(sb_range_beg_len(v0, &beg, &len, total_bits, 0))) {
return Qnil;
}
offset = beg;
length = len;
}
else if (n_pos == 2) {
if (!rb_integer_type_p(v0) || !rb_integer_type_p(v1)) {
return Qnil;
}
offset = integer_to_bit_idx(v0);
length = integer_to_bit_idx(v1);
if (offset < 0 || length < 0) return Qnil;
}
else if (n_pos == 1) {
return Qnil;
}
else {
rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 1 or 2)", n_pos);
}
if (offset > total_bits) return Qnil;
ssize_t available = total_bits - offset;
if (length > available) length = available;
if (length == 0) return rb_str_new("", 0);
ssize_t out_bytes = (length + 7) / 8;
VALUE result = rb_str_buf_new(out_bytes);
rb_str_resize(result, out_bytes);
rb_enc_associate(result, rb_enc_get(self));
unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
memset(dst, 0, out_bytes);
if (lsb_first) {
bit_copy_core(dst, 0, src, src_len, offset, length);
} else {
ssize_t dst_bit = 0;
ssize_t start_byte = offset >> 3;
ssize_t end_byte = (offset + length - 1) >> 3;
for (ssize_t b = start_byte; b <= end_byte; b++) {
ssize_t b_start_l = b << 3;
ssize_t b_end_l = b_start_l + 7;
ssize_t l_min = (offset > b_start_l) ? offset : b_start_l;
ssize_t l_max = ((offset + length - 1) < b_end_l) ? (offset + length - 1) : b_end_l;
ssize_t p_min = b_start_l + (7 - (l_max & 7L));
ssize_t p_max = b_start_l + (7 - (l_min & 7L));
ssize_t chunk_len = p_max - p_min + 1;
bit_copy_core(dst, dst_bit, src, src_len, p_min, chunk_len);
dst_bit += chunk_len;
}
}
return result;
}
|
#bit_splice(*args) ⇒ Object
String#bit_splice(bit_index, bit_length, str) -> self String#bit_splice(bit_index, bit_length, str, str_bit_index, str_bit_length) -> self String#bit_splice(range, str) -> self String#bit_splice(range, str, str_range) -> self
Writes bits from str into self at bit-level granularity. The inverse of bit_slice: where bit_slice reads a sub-sequence of bits, bit_splice writes one.
The destination and source bit lengths must be equal; bit_splice does not resize self (sub-byte resize is undefined). This mirrors the constraint that bytesplice imposes when the replacement has the same byte length.
Negative indices count backward from the end, exactly as in bytesplice. Returns self.
Porting to Ruby Core:
1. Move to string.c; register in Init_String().
2. Use rb_str_modify_expand if resize support is ever added.
3. bit_copy_core moves with it; share ebs_extract with bit_slice.
1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 |
# File 'ext/string_bits/string_bits.c', line 1465
static VALUE
rb_str_bit_splice(int argc, VALUE *argv, VALUE self)
{
ssize_t dst_bit_off, dst_bit_len;
ssize_t src_bit_off, src_bit_len;
VALUE str;
ssize_t dst_total = RSTRING_LEN(self) * 8;
VALUE v0, v1, v2, v3, v4, opts;
int n_pos = rb_scan_args(argc, argv, "23:", &v0, &v1, &v2, &v3, &v4, &opts);
validate_option_hash(opts, SB_KW_LSB_FIRST);
int lsb_first = parse_lsb_first_opt(opts);
if (n_pos == 2 && rb_obj_is_kind_of(v0, rb_cRange)) {
/* bit_splice(range, str) */
ssize_t beg, len;
sb_range_beg_len(v0, &beg, &len, dst_total, 1);
dst_bit_off = beg;
dst_bit_len = len;
str = v1;
Check_Type(str, T_STRING);
src_bit_off = 0;
src_bit_len = dst_bit_len;
}
else if (n_pos == 3 && rb_obj_is_kind_of(v0, rb_cRange)) {
/* bit_splice(range, str, str_range) */
ssize_t beg, len;
sb_range_beg_len(v0, &beg, &len, dst_total, 1);
dst_bit_off = beg;
dst_bit_len = len;
str = v1;
Check_Type(str, T_STRING);
if (!rb_obj_is_kind_of(v2, rb_cRange)) {
rb_raise(rb_eTypeError, "third argument must be a Range");
}
ssize_t src_total = RSTRING_LEN(str) * 8;
sb_range_beg_len(v2, &beg, &len, src_total, 1);
src_bit_off = beg;
src_bit_len = len;
}
else if (n_pos == 3) {
/* bit_splice(bit_index, bit_length, str) */
if (!rb_integer_type_p(v0) || !rb_integer_type_p(v1)) {
rb_raise(rb_eTypeError, "bit index and length must be integers");
}
dst_bit_off = integer_to_bit_idx(v0);
dst_bit_len = integer_to_bit_idx(v1);
if (dst_bit_off < 0) dst_bit_off += dst_total;
/*
* Integer source support was prototyped here, but it is intentionally
* disabled in the current proposal to keep the public API limited to
* String-to-String splicing.
*/
if (rb_integer_type_p(v2)) {
rb_raise(rb_eArgError,
"bit_splice source must be a String in the current proposal");
}
str = v2;
Check_Type(str, T_STRING);
src_bit_off = 0;
src_bit_len = dst_bit_len;
}
else if (n_pos == 5) {
/* bit_splice(bit_index, bit_length, str, str_bit_index, str_bit_length) */
if (!rb_integer_type_p(v0) || !rb_integer_type_p(v1) ||
!rb_integer_type_p(v3) || !rb_integer_type_p(v4)) {
rb_raise(rb_eTypeError, "bit indices and lengths must be integers");
}
dst_bit_off = integer_to_bit_idx(v0);
dst_bit_len = integer_to_bit_idx(v1);
if (dst_bit_off < 0) dst_bit_off += dst_total;
str = v2;
Check_Type(str, T_STRING);
ssize_t src_total = RSTRING_LEN(str) * 8;
src_bit_off = integer_to_bit_idx(v3);
src_bit_len = integer_to_bit_idx(v4);
if (src_bit_off < 0) src_bit_off += src_total;
}
else {
rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", n_pos);
}
if (dst_bit_off < 0 || dst_bit_len < 0 || dst_bit_off + dst_bit_len > dst_total) {
rb_raise(rb_eIndexError,
"bit_splice: destination range [%ld, %ld] out of bounds (total %ld bits)",
dst_bit_off, dst_bit_len, dst_total);
}
ssize_t src_total_bits = RSTRING_LEN(str) * 8;
if (src_bit_off < 0 || src_bit_len < 0 || src_bit_off + src_bit_len > src_total_bits) {
rb_raise(rb_eIndexError,
"bit_splice: source range [%ld, %ld] out of bounds (total %ld bits)",
src_bit_off, src_bit_len, src_total_bits);
}
if (dst_bit_len != src_bit_len) {
rb_raise(rb_eArgError,
"bit_splice: destination length (%ld) must equal source length (%ld)",
dst_bit_len, src_bit_len);
}
if (dst_bit_len == 0) return self;
/* Guard against self-aliasing: duplicate src before modifying self */
VALUE src_str = (str == self) ? rb_str_dup(str) : str;
rb_str_modify(self);
unsigned char *dst = (unsigned char *)RSTRING_PTR(self);
const unsigned char *src = (const unsigned char *)RSTRING_PTR(src_str);
ssize_t src_len_bytes = RSTRING_LEN(src_str);
if (lsb_first) {
bit_copy_core(dst, dst_bit_off, src, src_len_bytes, src_bit_off, dst_bit_len);
} else {
ssize_t current_src_bit = src_bit_off;
ssize_t start_byte = dst_bit_off >> 3;
ssize_t end_byte = (dst_bit_off + dst_bit_len - 1) >> 3;
for (ssize_t b = start_byte; b <= end_byte; b++) {
ssize_t b_start_l = b << 3;
ssize_t b_end_l = b_start_l + 7;
ssize_t l_min = (dst_bit_off > b_start_l) ? dst_bit_off : b_start_l;
ssize_t l_max = ((dst_bit_off + dst_bit_len - 1) < b_end_l) ? (dst_bit_off + dst_bit_len - 1) : b_end_l;
ssize_t p_min = b_start_l + (7 - (l_max & 7L));
ssize_t p_max = b_start_l + (7 - (l_min & 7L));
ssize_t chunk_len = p_max - p_min + 1;
bit_copy_core(dst, p_min, src, src_len_bytes, current_src_bit, chunk_len);
current_src_bit += chunk_len;
}
}
RB_GC_GUARD(src_str);
return self;
}
|
#bit_xor(other) ⇒ Object
976 977 978 979 980 981 982 983 984 985 986 987 |
# File 'ext/string_bits/string_bits.c', line 976
static VALUE
rb_str_bit_xor(VALUE self, VALUE other)
{
check_binary_op_lengths(self, other);
ssize_t len = RSTRING_LEN(self);
VALUE result = alloc_result(self);
const unsigned char *a = (const unsigned char *)RSTRING_PTR(self);
const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
for (ssize_t i = 0; i < len; i++) dst[i] = a[i] ^ b[i];
return result;
}
|
#bit_xor!(other) ⇒ Object
989 990 991 992 993 994 995 996 997 998 999 |
# File 'ext/string_bits/string_bits.c', line 989
static VALUE
rb_str_bit_xor_bang(VALUE self, VALUE other)
{
check_binary_op_lengths(self, other);
rb_str_modify(self);
ssize_t len = RSTRING_LEN(self);
unsigned char *a = (unsigned char *)RSTRING_PTR(self);
const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
for (ssize_t i = 0; i < len; i++) a[i] ^= b[i];
return self;
}
|
#bits(*args) ⇒ Object
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 |
# File 'ext/string_bits/string_bits.c', line 454
static VALUE
rb_str_bits(int argc, VALUE *argv, VALUE self)
{
int lsb_first = parse_lsb_first(argc, argv);
ssize_t len = RSTRING_LEN(self);
const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
ssize_t total_bits = len * 8;
int have_block = rb_block_given_p();
VALUE ary = have_block ? Qnil : rb_ary_new_capa(total_bits);
for (ssize_t i = 0; i < len; i++) {
unsigned char b = str[i];
if (lsb_first) {
for (int j = 0; j < 8; j++) {
VALUE bit = (b >> j) & 1 ? Qtrue : Qfalse;
have_block ? rb_yield(bit) : rb_ary_push(ary, bit);
}
} else {
for (int j = 7; j >= 0; j--) {
VALUE bit = (b >> j) & 1 ? Qtrue : Qfalse;
have_block ? rb_yield(bit) : rb_ary_push(ary, bit);
}
}
}
return have_block ? self : ary;
}
|
#clear_bit(*args) ⇒ Object
872 873 874 875 876 |
# File 'ext/string_bits/string_bits.c', line 872
static VALUE
rb_str_clear_bit(int argc, VALUE *argv, VALUE self)
{
return rb_str_mutate_bits(argc, argv, self, SB_MUT_CLEAR);
}
|
#each_bit(*args) ⇒ Object
iterate bits ————————————————————
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 |
# File 'ext/string_bits/string_bits.c', line 429
static VALUE
rb_str_each_bit(int argc, VALUE *argv, VALUE self)
{
RETURN_ENUMERATOR(self, argc, argv);
int lsb_first = parse_lsb_first(argc, argv);
ssize_t len = RSTRING_LEN(self);
const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
for (ssize_t i = 0; i < len; i++) {
unsigned char b = str[i];
if (lsb_first) {
for (int j = 0; j < 8; j++) {
rb_yield((b >> j) & 1 ? Qtrue : Qfalse);
}
} else {
for (int j = 7; j >= 0; j--) {
rb_yield((b >> j) & 1 ? Qtrue : Qfalse);
}
}
}
return self;
}
|
#each_bit_field(*args) ⇒ Object
String#each_bit_field(*bitlens, lsb_first: true) -> self String#each_bit_field(*bitlens, lsb_first: true) -> Enumerator
Iterates over the string as a sequence of packed bit-field records. Each positional argument specifies the width (in bits) of one field in the record. On each iteration, one Integer per field is yielded (LSB-first bit layout). Each bitlen must be in the range 1..64.
lsb_first: true (default) – intra-byte field extraction uses bit 0..7. lsb_first: false – intra-byte field extraction uses bit 7..0.
Incomplete trailing bits (when bytesize*8 is not a multiple of sum(bitlens)) are silently dropped, matching the behavior of Enumerable#each_slice.
Porting to Ruby Core:
1. Move extract_uint64 and this function into string.c.
2. Register with rb_define_method in Init_String().
3. Replace ALLOCA_N with stack arrays for small field counts and heap otherwise.
1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 |
# File 'ext/string_bits/string_bits.c', line 1073
static VALUE
rb_str_each_bit_field(int argc, VALUE *argv, VALUE self)
{
RETURN_ENUMERATOR(self, argc, argv);
VALUE rest, opts;
rb_scan_args(argc, argv, "*:", &rest, &opts);
validate_option_hash(opts, SB_KW_LSB_FIRST);
ssize_t num_fields = RARRAY_LEN(rest);
if (num_fields == 0) {
rb_raise(rb_eArgError, "wrong number of arguments (given 0, expected 1+)");
}
ssize_t *bitlens = ALLOCA_N(ssize_t, num_fields);
ssize_t step = 0;
for (ssize_t f = 0; f < num_fields; f++) {
VALUE v = RARRAY_AREF(rest, f);
if (!rb_integer_type_p(v)) {
rb_raise(rb_eTypeError, "bitlen must be an integer");
}
ssize_t bl = NUM2SSIZET(v);
if (bl <= 0) {
rb_raise(rb_eArgError, "bitlen must be positive");
}
if (bl > 64) {
rb_raise(rb_eArgError, "bitlen must be <= 64 (got %ld)", bl);
}
bitlens[f] = bl;
step += bl;
}
int lsb_first = parse_lsb_first_opt(opts);
ssize_t src_len = RSTRING_LEN(self);
ssize_t total_bits = src_len * 8;
ssize_t iterations = total_bits / step;
VALUE *field_vals = ALLOCA_N(VALUE, num_fields);
for (ssize_t iter = 0; iter < iterations; iter++) {
ssize_t base_bit = iter * step;
const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
ssize_t field_bit = base_bit;
for (ssize_t f = 0; f < num_fields; f++) {
uint64_t val = extract_uint64(src, src_len, field_bit, bitlens[f], lsb_first);
field_vals[f] = ULL2NUM(val);
field_bit += bitlens[f];
}
rb_yield_values2((int)num_fields, field_vals);
}
return self;
}
|
#each_bit_run(*args) ⇒ Object
String#each_bit_run(lsb_first: true) { |bit, len| } -> self String#each_bit_run(lsb_first: true) -> Enumerator
Yields (bit, run_length) pairs for each consecutive run of identical bits. Run-length boundary detection and counting happen entirely in C, replacing the Ruby-level current/count state machine required when using each_bit.
For random data (~50% density) each_bit_run yields ~half as many times as each_bit. For structured data (sparse validity bitmaps, sensor bursts) the ratio is proportional to the average run length.
lsb_first: true (default) iterates bit 0..7 within each byte. lsb_first: false iterates bit 7..0 within each byte.
Porting to Ruby Core:
1. Move to string.c; register in Init_String().
2. count_run_lsb / count_run_msb move with it.
1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 |
# File 'ext/string_bits/string_bits.c', line 1351
static VALUE
rb_str_each_bit_run(int argc, VALUE *argv, VALUE self)
{
RETURN_ENUMERATOR(self, argc, argv);
int lsb_first = parse_lsb_first(argc, argv);
ssize_t src_len = RSTRING_LEN(self);
if (src_len == 0) return self;
ssize_t total_bits = src_len * 8;
if (lsb_first) {
ssize_t pos = 0;
while (pos < total_bits) {
const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
int bit = (src[pos >> 3] >> (pos & 7)) & 1;
ssize_t run = count_run_lsb(src, src_len, pos, bit);
rb_yield_values(2, bit ? Qtrue : Qfalse, SSIZET2NUM(run));
pos += run;
}
}
else {
ssize_t pos = 0;
while (pos < total_bits) {
const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
int bit = logical_get_bit(src, pos, 0);
ssize_t run = 1;
while (pos + run < total_bits && logical_get_bit(src, pos + run, 0) == bit) {
run++;
}
rb_yield_values(2, bit ? Qtrue : Qfalse, SSIZET2NUM(run));
pos += run;
}
}
return self;
}
|
#each_set_bit_offset(*args) ⇒ Object
iterate set-bit positions ———————————————–
485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 |
# File 'ext/string_bits/string_bits.c', line 485
static VALUE
rb_str_each_set_bit_offset(int argc, VALUE *argv, VALUE self)
{
RETURN_ENUMERATOR(self, argc, argv);
int lsb_first = parse_lsb_first(argc, argv);
ssize_t len = RSTRING_LEN(self);
const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
if (lsb_first) {
/* LSB-first: ascending positions 0, 1, 2, ...
* On little-endian, loading 8 bytes as a uint64_t preserves the flat
* LSB-first bit numbering: word bit 0 == position 0, bit 63 == 63.
* memcpy avoids unaligned-load SIGBUS on strict-alignment platforms. */
#if SB_LITTLE_ENDIAN
ssize_t n_words = len >> 3;
for (ssize_t wi = 0; wi < n_words; wi++) {
uint64_t w;
memcpy(&w, str + wi * 8, 8);
while (w != 0) {
int bit = sb_ctzll(w);
rb_yield(SSIZET2NUM(wi * 64 + bit));
w &= w - 1;
}
}
for (ssize_t bi = n_words << 3; bi < len; bi++) {
unsigned int b = str[bi];
while (b != 0) {
int bit = sb_ctz8(b);
rb_yield(SSIZET2NUM(bi * 8 + bit));
b &= b - 1;
}
}
#else
for (ssize_t bi = 0; bi < len; bi++) {
unsigned int b = str[bi];
while (b != 0) {
int bit = sb_ctz8(b);
rb_yield(SSIZET2NUM(bi * 8 + bit));
b &= b - 1;
}
}
#endif
}
else {
/* lsb_first: false => byte order preserved, bits 7..0 map to logical 0..7 */
for (ssize_t bi = 0; bi < len; bi++) {
unsigned int b = str[bi];
while (b != 0) {
int bit = sb_highest_bit8(b);
ssize_t physical = bi * 8 + bit;
rb_yield(SSIZET2NUM(physical_to_count_from(physical, 0)));
b ^= (1u << bit); /* clear highest set bit */
}
}
}
return self;
}
|
#flip_bit(*args) ⇒ Object
878 879 880 881 882 |
# File 'ext/string_bits/string_bits.c', line 878
static VALUE
rb_str_flip_bit(int argc, VALUE *argv, VALUE self)
{
return rb_str_mutate_bits(argc, argv, self, SB_MUT_FLIP);
}
|
#set_bit(*args) ⇒ Object
866 867 868 869 870 |
# File 'ext/string_bits/string_bits.c', line 866
static VALUE
rb_str_set_bit(int argc, VALUE *argv, VALUE self)
{
return rb_str_mutate_bits(argc, argv, self, SB_MUT_SET);
}
|
#set_bit_offsets(*args) ⇒ Object
544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 |
# File 'ext/string_bits/string_bits.c', line 544
static VALUE
rb_str_set_bit_offsets(int argc, VALUE *argv, VALUE self)
{
int lsb_first = parse_lsb_first(argc, argv);
ssize_t len = RSTRING_LEN(self);
const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
int have_block = rb_block_given_p();
VALUE ary;
if (have_block) {
ary = Qnil;
}
else {
/* Pre-size the Array with popcount to avoid repeated reallocation.
* memcpy avoids unaligned-load issues on strict-alignment platforms. */
ssize_t count = 0;
ssize_t nw = len >> 3;
for (ssize_t wi = 0; wi < nw; wi++) {
uint64_t w;
memcpy(&w, str + wi * 8, 8);
count += sb_popcount64(w);
}
for (ssize_t bi = nw << 3; bi < len; bi++)
count += sb_popcount64((uint64_t)(unsigned char)str[bi]);
ary = rb_ary_new_capa(count);
}
if (lsb_first) {
#if SB_LITTLE_ENDIAN
ssize_t n_words = len >> 3;
for (ssize_t wi = 0; wi < n_words; wi++) {
uint64_t w;
memcpy(&w, str + wi * 8, 8);
while (w != 0) {
int bit = sb_ctzll(w);
VALUE pos = SSIZET2NUM(wi * 64 + bit);
have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
w &= w - 1;
}
}
for (ssize_t bi = n_words << 3; bi < len; bi++) {
unsigned int b = str[bi];
while (b != 0) {
int bit = sb_ctz8(b);
VALUE pos = SSIZET2NUM(bi * 8 + bit);
have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
b &= b - 1;
}
}
#else
for (ssize_t bi = 0; bi < len; bi++) {
unsigned int b = str[bi];
while (b != 0) {
int bit = sb_ctz8(b);
VALUE pos = SSIZET2NUM(bi * 8 + bit);
have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
b &= b - 1;
}
}
#endif
}
else {
for (ssize_t bi = 0; bi < len; bi++) {
unsigned int b = str[bi];
while (b != 0) {
int bit = sb_highest_bit8(b);
ssize_t physical = bi * 8 + bit;
VALUE pos = SSIZET2NUM(physical_to_count_from(physical, 0));
have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
b ^= (1u << bit);
}
}
}
return have_block ? self : ary;
}
|