Module: Twilic::Core::ProtocolHelpers

Defined in:
lib/twilic/core/protocol_helpers.rb

Class Method Summary collapse

Class Method Details

.abs64(v) ⇒ Object



222
223
224
# File 'lib/twilic/core/protocol_helpers.rb', line 222

def abs64(v)
  v.negative? ? -v : v
end

.bit_width_signed(min, max) ⇒ Object



211
212
213
214
# File 'lib/twilic/core/protocol_helpers.rb', line 211

def bit_width_signed(min, max)
  range_val = max >= min ? max - min : min - max
  bit_width_u64(range_val)
end

.bit_width_u64(v) ⇒ Object



216
217
218
219
220
# File 'lib/twilic/core/protocol_helpers.rb', line 216

def bit_width_u64(v)
  return 1 if v.zero?

  v.to_s(2).length
end

.column_null_strategy_local(values, present_bits) ⇒ Object



8
9
10
11
12
13
14
15
16
17
# File 'lib/twilic/core/protocol_helpers.rb', line 8

def column_null_strategy_local(values, present_bits)
  null_count = values.count { |v| v.kind == Model::ValueKind::NULL }
  return [Model::NullStrategy::ALL_PRESENT_ELIDED, nil, false] if null_count.zero?

  if null_count <= values.length / 4
    inverted = present_bits.map { |bit| !bit }
    return [Model::NullStrategy::INVERTED_PRESENCE_BITMAP, inverted, true]
  end
  [Model::NullStrategy::PRESENCE_BITMAP, present_bits.dup, true]
end

.common_prefix_len(a, b) ⇒ Object



226
227
228
229
230
231
232
233
# File 'lib/twilic/core/protocol_helpers.rb', line 226

def common_prefix_len(a, b)
  n = [a.bytesize, b.bytesize].min
  i = 0
  while i < n && a.getbyte(i) == b.getbyte(i)
    i += 1
  end
  i
end

.control_bitpack_decode_bytes(input) ⇒ Object



269
270
271
# File 'lib/twilic/core/protocol_helpers.rb', line 269

def control_bitpack_decode_bytes(input)
  input.b.dup
end

.control_bitpack_encode_bytes(input) ⇒ Object



265
266
267
# File 'lib/twilic/core/protocol_helpers.rb', line 265

def control_bitpack_encode_bytes(input)
  input.b.dup
end

.control_fse_decode_bytes(input) ⇒ Object



285
286
287
# File 'lib/twilic/core/protocol_helpers.rb', line 285

def control_fse_decode_bytes(input)
  input.dup
end

.control_fse_encode_bytes(input) ⇒ Object



281
282
283
# File 'lib/twilic/core/protocol_helpers.rb', line 281

def control_fse_encode_bytes(input)
  input.dup
end

.control_huffman_decode_bytes(input) ⇒ Object



277
278
279
# File 'lib/twilic/core/protocol_helpers.rb', line 277

def control_huffman_decode_bytes(input)
  input.dup
end

.control_huffman_encode_bytes(input) ⇒ Object



273
274
275
# File 'lib/twilic/core/protocol_helpers.rb', line 273

def control_huffman_encode_bytes(input)
  input.dup
end

.deltas(values) ⇒ Object



185
186
187
# File 'lib/twilic/core/protocol_helpers.rb', line 185

def deltas(values)
  values.each_with_index.map { |value, i| i.zero? ? value : value - values[i - 1] }
end

.diff_message(prev, current) ⇒ Object



341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
# File 'lib/twilic/core/protocol_helpers.rb', line 341

def diff_message(prev, current)
  a = message_fields(prev)
  b = message_fields(current)
  n = [a.length, b.length].max
  ops = []
  n.times do |i|
    if i < a.length && i < b.length
      if Model.equal(a[i], b[i])
        ops << Model::PatchOperation.new(field_id: i, opcode: Model::PatchOpcode::KEEP, value: nil)
      else
        ops << Model::PatchOperation.new(
          field_id: i, opcode: Model::PatchOpcode::REPLACE_SCALAR, value: b[i].clone_value
        )
      end
    elsif i < b.length
      ops << Model::PatchOperation.new(
        field_id: i, opcode: Model::PatchOpcode::INSERT_FIELD, value: b[i].clone_value
      )
    else
      ops << Model::PatchOperation.new(field_id: i, opcode: Model::PatchOpcode::DELETE_FIELD, value: nil)
    end
  end
  [ops, 0]
end

.diff_template_columns(previous, current) ⇒ Object



311
312
313
314
315
316
317
318
319
320
321
# File 'lib/twilic/core/protocol_helpers.rb', line 311

def diff_template_columns(previous, current)
  mask = Array.new(current.length, false)
  changed = []
  current.each_with_index do |col, i|
    if i >= previous.length || estimate_column_size(previous[i]) != estimate_column_size(col)
      mask[i] = true
      changed << col
    end
  end
  [mask, changed]
end

.encoded_bytes_size(length) ⇒ Object



463
464
465
# File 'lib/twilic/core/protocol_helpers.rb', line 463

def encoded_bytes_size(length)
  varuint_size(length) + length
end

.encoded_key_ref_size(key) ⇒ Object



471
472
473
474
475
476
477
# File 'lib/twilic/core/protocol_helpers.rb', line 471

def encoded_key_ref_size(key)
  if key.is_id
    1 + varuint_size(key.id)
  else
    encoded_string_size(key.literal)
  end
end

.encoded_string_size(value) ⇒ Object



467
468
469
# File 'lib/twilic/core/protocol_helpers.rb', line 467

def encoded_string_size(value)
  encoded_bytes_size(value.b.bytesize)
end

.estimate_column_size(column) ⇒ Object



427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
# File 'lib/twilic/core/protocol_helpers.rb', line 427

def estimate_column_size(column)
  size = varuint_size(column.field_id) + 4
  case column.values.kind
  when Model::ElementType::BOOL
    size + column.values.bools.length / 8 + 2
  when Model::ElementType::I64
    size + column.values.i64s.length * 4
  when Model::ElementType::U64
    size + column.values.u64s.length * 4
  when Model::ElementType::F64
    size + column.values.f64s.length * 8
  when Model::ElementType::STRING
    size + column.values.strings.sum { |s| encoded_string_size(s) }
  else
    size
  end
end

.estimate_message_size(message) ⇒ Object



407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
# File 'lib/twilic/core/protocol_helpers.rb', line 407

def estimate_message_size(message)
  case message.kind
  when Model::MessageKind::SCALAR
    1 + estimate_value_size(message.scalar)
  when Model::MessageKind::ARRAY
    1 + varuint_size(message.array.length) + message.array.sum { |v| estimate_value_size(v) }
  when Model::MessageKind::MAP
    1 + varuint_size(message.map.length) +
      message.map.sum { |e| encoded_key_ref_size(e.key) + estimate_value_size(e.value) }
  when Model::MessageKind::STATE_PATCH
    sp = message.state_patch
    1 + 2 + varuint_size(sp.operations.length) +
      sp.operations.sum do |op|
        varuint_size(op.field_id) + 2 + (op.value ? estimate_value_size(op.value) : 0)
      end
  else
    16
  end
end

.estimate_value_size(value) ⇒ Object



445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
# File 'lib/twilic/core/protocol_helpers.rb', line 445

def estimate_value_size(value)
  case value.kind
  when Model::ValueKind::NULL, Model::ValueKind::BOOL then 1
  when Model::ValueKind::I64 then 2 + smallest_u64_size(Wire.encode_zigzag(value.i64))
  when Model::ValueKind::U64 then 2 + smallest_u64_size(value.u64)
  when Model::ValueKind::F64 then 9
  when Model::ValueKind::STRING then 2 + encoded_string_size(value.str)
  when Model::ValueKind::BINARY then 1 + encoded_bytes_size(value.bin.bytesize)
  when Model::ValueKind::ARRAY
    1 + varuint_size(value.arr.length) + value.arr.sum { |v| estimate_value_size(v) }
  when Model::ValueKind::MAP
    1 + varuint_size(value.map.length) +
      value.map.sum { |e| encoded_string_size(e.key) + estimate_value_size(e.value) }
  else
    1
  end
end

.find_template_id(templates, columns) ⇒ Object



298
299
300
301
302
303
304
305
306
307
308
309
# File 'lib/twilic/core/protocol_helpers.rb', line 298

def find_template_id(templates, columns)
  templates.keys.sort.each do |id|
    t = templates[id]
    next if t.field_ids.length != columns.length

    ok = t.field_ids.each_with_index.all? do |fid, i|
      fid == columns[i].field_id && t.null_strategies[i] == columns[i].null_strategy
    end
    return [id, true] if ok
  end
  [0, false]
end

.infer_column_codec_and_values(values) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/twilic/core/protocol_helpers.rb', line 48

def infer_column_codec_and_values(values)
  return [Model::VectorCodec::PLAIN, Model::TypedVectorData.new(
    kind: Model::ElementType::VALUE, bools: [], i64s: [], u64s: [], f64s: [],
    strings: [], binary: [], values: nil
  )] if values.empty?

  kinds = values.map(&:kind)
  if kinds.all?(Model::ValueKind::I64)
    data = values.map(&:i64)
    return [select_integer_codec(data), typed_data_i64(data)]
  end
  if kinds.all?(Model::ValueKind::U64)
    data = values.map(&:u64)
    return [select_u64_codec(data), typed_data_u64(data)]
  end
  if kinds.all?(Model::ValueKind::F64)
    data = values.map(&:f64)
    return [select_float_codec(data), typed_data_f64(data)]
  end
  if kinds.all?(Model::ValueKind::BOOL)
    data = values.map(&:bool)
    return [Model::VectorCodec::DIRECT_BITPACK, typed_data_bool(data)]
  end
  if kinds.all?(Model::ValueKind::STRING)
    data = values.map(&:str)
    return [select_string_codec(data), typed_data_string(data)]
  end
  cloned = values.map(&:clone_value)
  [Model::VectorCodec::PLAIN, Model::TypedVectorData.new(
    kind: Model::ElementType::VALUE, bools: [], i64s: [], u64s: [], f64s: [],
    strings: [], binary: [], values: cloned
  )]
end

.key_ref_field_identity(key, state) ⇒ Object



496
497
498
499
# File 'lib/twilic/core/protocol_helpers.rb', line 496

def key_ref_field_identity(key, state)
  s = key_ref_string(key, state)
  s.empty? ? nil : s
end

.key_ref_string(key, state) ⇒ Object



501
502
503
504
505
506
507
508
509
# File 'lib/twilic/core/protocol_helpers.rb', line 501

def key_ref_string(key, state)
  if key.is_id
    s, ok = state.key_table.get_value(key.id)
    return s if ok

    return ""
  end
  key.literal
end

.merge_template_columns(previous, changed_mask, changed) ⇒ Object



323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
# File 'lib/twilic/core/protocol_helpers.rb', line 323

def merge_template_columns(previous, changed_mask, changed)
  out = Array.new(changed_mask.length)
  idx = 0
  changed_mask.each_with_index do |bit, i|
    if bit
      raise Errors.invalid_data("template changed column count mismatch") if idx >= changed.length

      out[i] = changed[idx]
      idx += 1
    else
      raise Errors.invalid_data("template reference out of range") if i >= previous.length

      out[i] = previous[i]
    end
  end
  out
end

.message_fields(message) ⇒ Object



366
367
368
369
370
371
372
373
374
375
376
377
378
379
# File 'lib/twilic/core/protocol_helpers.rb', line 366

def message_fields(message)
  case message.kind
  when Model::MessageKind::ARRAY
    message.array.map(&:clone_value)
  when Model::MessageKind::MAP
    message.map.map { |e| e.value.clone_value }
  when Model::MessageKind::SHAPED_OBJECT
    message.shaped_object.values.map(&:clone_value)
  when Model::MessageKind::SCHEMA_OBJECT
    message.schema_object.fields.map(&:clone_value)
  else
    []
  end
end

.rebuild_message_like(base, fields) ⇒ Object



381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
# File 'lib/twilic/core/protocol_helpers.rb', line 381

def rebuild_message_like(base, fields)
  case base.kind
  when Model::MessageKind::ARRAY
    Model.message(kind: Model::MessageKind::ARRAY, array: fields)
  when Model::MessageKind::MAP
    entries = fields.each_with_index.map do |value, i|
      raise Errors.invalid_data("patch map shape mismatch") if i >= base.map.length

      Model::MessageMapEntry.new(key: base.map[i].key, value: value)
    end
    Model.message(kind: Model::MessageKind::MAP, map: entries)
  when Model::MessageKind::SHAPED_OBJECT
    s = base.shaped_object
    Model.message(kind: Model::MessageKind::SHAPED_OBJECT, shaped_object: Model::ShapedObjectMessage.new(
      shape_id: s.shape_id, presence: s.presence&.dup, has_presence: s.has_presence, values: fields
    ))
  when Model::MessageKind::SCHEMA_OBJECT
    s = base.schema_object
    Model.message(kind: Model::MessageKind::SCHEMA_OBJECT, schema_object: Model::SchemaObjectMessage.new(
      schema_id: s.schema_id, presence: s.presence&.dup, has_presence: s.has_presence, fields: fields
    ))
  else
    raise Errors.invalid_data("state patch reconstruction unsupported for this message kind")
  end
end

.rle_decode_bytes(input) ⇒ Object



251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/twilic/core/protocol_helpers.rb', line 251

def rle_decode_bytes(input)
  out = +""
  i = 0
  while i < input.bytesize
    raise Errors.invalid_data("rle payload") if i + 1 >= input.bytesize

    run = input.getbyte(i)
    b = input.getbyte(i + 1)
    run.times { out << b.chr }
    i += 2
  end
  out
end

.rle_encode_bytes(input) ⇒ Object



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# File 'lib/twilic/core/protocol_helpers.rb', line 235

def rle_encode_bytes(input)
  return nil if input.empty?

  out = +""
  i = 0
  while i < input.bytesize
    j = i + 1
    while j < input.bytesize && input.getbyte(j) == input.getbyte(i) && j - i < 255
      j += 1
    end
    out << (j - i).chr << input[i].chr
    i = j
  end
  out
end

.rows_to_columns(rows) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/twilic/core/protocol_helpers.rb', line 23

def rows_to_columns(rows)
  return nil if rows.empty?

  width = rows.map(&:length).max
  column_values = Array.new(width) { [] }
  column_presence = Array.new(width) { [] }
  rows.each do |row|
    width.times do |col|
      value = col < row.length ? row[col].clone_value : Model.null_value
      column_values[col] << value
      column_presence[col] << (value.kind != Model::ValueKind::NULL)
    end
  end
  Array.new(width) do |col|
    null_strategy, presence, has_presence = column_null_strategy_local(
      column_values[col], column_presence[col]
    )
    codec, tvd = infer_column_codec_and_values(strip_nulls_local(column_values[col]))
    Model::Column.new(
      field_id: col, null_strategy: null_strategy, presence: presence,
      has_presence: has_presence, codec: codec, dictionary_id: nil, values: tvd
    )
  end
end

.run_stats(values) ⇒ Object



189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/twilic/core/protocol_helpers.rb', line 189

def run_stats(values)
  return [0.0, 0.0] if values.empty?

  runs = []
  run_len = 1
  (1...values.length).each do |i|
    if values[i] == values[i - 1]
      run_len += 1
    else
      runs << run_len
      run_len = 1
    end
  end
  runs << run_len
  repeated_items = runs.select { |r| r > 1 }.sum
  [repeated_items.to_f / values.length, runs.sum.to_f / runs.length]
end

.run_stats_u64(values) ⇒ Object



207
208
209
# File 'lib/twilic/core/protocol_helpers.rb', line 207

def run_stats_u64(values)
  run_stats(values)
end

.select_float_codec(values) ⇒ Object



156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/twilic/core/protocol_helpers.rb', line 156

def select_float_codec(values)
  return Model::VectorCodec::PLAIN if values.length < 4

  changes = 0
  prev = [values[0]].pack("E").unpack1("Q<")
  values.each_cons(2) do |_, cur|
    bits = [cur].pack("E").unpack1("Q<")
    changes += 1 if bits != prev
    prev = bits
  end
  changes * 2 <= values.length ? Model::VectorCodec::XOR_FLOAT : Model::VectorCodec::PLAIN
end

.select_integer_codec(values) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/twilic/core/protocol_helpers.rb', line 107

def select_integer_codec(values)
  return Model::VectorCodec::PLAIN if values.length < 4

  delta_vals = deltas(values)
  dd = deltas(delta_vals)
  non_zero_dd = (1...dd.length).count { |i| dd[i] != 0 }
  non_zero_ratio = dd.length > 1 ? non_zero_dd.to_f / (dd.length - 1) : 0.0
  delta_range_bits = bit_width_signed(delta_vals.min, delta_vals.max)
  return Model::VectorCodec::DELTA_DELTA_BITPACK if values.length >= 8 &&
    (non_zero_ratio <= 0.25 || delta_range_bits <= 2)

  repeated_ratio, avg_run = run_stats(values)
  return Model::VectorCodec::RLE if repeated_ratio >= 0.5 && avg_run >= 3.0

  range_bits = bit_width_signed(values.min, values.max)
  return Model::VectorCodec::FOR_BITPACK if range_bits <= 60

  monotonic = values.each_cons(2).all? { |a, b| b >= a }
  return Model::VectorCodec::DELTA_FOR_BITPACK if values.length >= 8 && monotonic &&
    delta_range_bits <= range_bits - 3

  max_abs_delta_bits = delta_vals.map { |v| bit_width_u64(abs64(v)) }.max
  return Model::VectorCodec::DELTA_BITPACK if max_abs_delta_bits <= 61

  max_bit_width = values.map { |v| bit_width_u64(abs64(v)) }.max
  return Model::VectorCodec::SIMPLE8B if values.length >= 8 && max_bit_width <= 16 && !monotonic
  return Model::VectorCodec::DIRECT_BITPACK if max_bit_width < 64

  Model::VectorCodec::PLAIN
end

.select_string_codec(values) ⇒ Object



169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/twilic/core/protocol_helpers.rb', line 169

def select_string_codec(values)
  return Model::VectorCodec::PLAIN if values.empty?

  return Model::VectorCodec::DICTIONARY if values.uniq.length * 2 <= values.length

  prefix_gain = 0
  prev = ""
  values.each do |v|
    prefix_gain += common_prefix_len(prev.b, v.b)
    prev = v
  end
  return Model::VectorCodec::PREFIX_DELTA if prefix_gain > values.length * 2

  Model::VectorCodec::PLAIN
end

.select_u64_codec(values) ⇒ Object



138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/twilic/core/protocol_helpers.rb', line 138

def select_u64_codec(values)
  if values.all? { |v| v <= 0x7FFFFFFFFFFFFFFF }
    return select_integer_codec(values.map { |v| v & 0x7FFFFFFFFFFFFFFF })
  end
  return Model::VectorCodec::DIRECT_BITPACK if values.length < 4

  repeated_ratio, avg_run = run_stats_u64(values)
  return Model::VectorCodec::RLE if repeated_ratio >= 0.5 && avg_run >= 3.0

  return Model::VectorCodec::FOR_BITPACK if bit_width_u64(values.max - values.min) <= 60

  max_width = values.map { |v| bit_width_u64(v) }.max
  return Model::VectorCodec::SIMPLE8B if values.length >= 8 && max_width <= 16
  return Model::VectorCodec::DIRECT_BITPACK if max_width < 64

  Model::VectorCodec::PLAIN
end

.smallest_u64_size(value) ⇒ Object



488
489
490
491
492
493
494
# File 'lib/twilic/core/protocol_helpers.rb', line 488

def smallest_u64_size(value)
  if value <= 0xFF then 1
  elsif value <= 0xFFFF then 2
  elsif value <= 0xFFFFFFFF then 4
  else 8
  end
end

.strip_nulls_local(values) ⇒ Object



19
20
21
# File 'lib/twilic/core/protocol_helpers.rb', line 19

def strip_nulls_local(values)
  values.reject { |v| v.kind == Model::ValueKind::NULL }
end

.template_descriptor_from_columns(template_id, columns) ⇒ Object



289
290
291
292
293
294
295
296
# File 'lib/twilic/core/protocol_helpers.rb', line 289

def template_descriptor_from_columns(template_id, columns)
  Model::TemplateDescriptor.new(
    template_id: template_id,
    field_ids: columns.map(&:field_id),
    null_strategies: columns.map(&:null_strategy),
    codecs: columns.map(&:codec)
  )
end

.typed_data_bool(data) ⇒ Object



97
98
99
100
# File 'lib/twilic/core/protocol_helpers.rb', line 97

def typed_data_bool(data)
  Model::TypedVectorData.new(kind: Model::ElementType::BOOL, bools: data, i64s: [],
                             u64s: [], f64s: [], strings: [], binary: [], values: [])
end

.typed_data_f64(data) ⇒ Object



92
93
94
95
# File 'lib/twilic/core/protocol_helpers.rb', line 92

def typed_data_f64(data)
  Model::TypedVectorData.new(kind: Model::ElementType::F64, bools: [], i64s: [],
                             u64s: [], f64s: data, strings: [], binary: [], values: [])
end

.typed_data_i64(data) ⇒ Object



82
83
84
85
# File 'lib/twilic/core/protocol_helpers.rb', line 82

def typed_data_i64(data)
  Model::TypedVectorData.new(kind: Model::ElementType::I64, bools: [], i64s: data,
                             u64s: [], f64s: [], strings: [], binary: [], values: [])
end

.typed_data_string(data) ⇒ Object



102
103
104
105
# File 'lib/twilic/core/protocol_helpers.rb', line 102

def typed_data_string(data)
  Model::TypedVectorData.new(kind: Model::ElementType::STRING, bools: [], i64s: [],
                             u64s: [], f64s: [], strings: data, binary: [], values: [])
end

.typed_data_u64(data) ⇒ Object



87
88
89
90
# File 'lib/twilic/core/protocol_helpers.rb', line 87

def typed_data_u64(data)
  Model::TypedVectorData.new(kind: Model::ElementType::U64, bools: [], i64s: [],
                             u64s: data, f64s: [], strings: [], binary: [], values: [])
end

.varuint_size(value) ⇒ Object



479
480
481
482
483
484
485
486
# File 'lib/twilic/core/protocol_helpers.rb', line 479

def varuint_size(value)
  sz = 1
  while value >= 0x80
    value >>= 7
    sz += 1
  end
  sz
end