Module: Twilic::Core::Dictionary
- Defined in:
- lib/twilic/core/dictionary.rb
Defined Under Namespace
Classes: WideU128
Class Method Summary collapse
- .apply_dictionary_references(state, columns) ⇒ Object
- .decode_trained_dictionary_block(block, dictionary) ⇒ Object
- .decode_trained_dictionary_payload(payload) ⇒ Object
- .dictionary_payload_hash(payload) ⇒ Object
- .encode_trained_dictionary_block(values, dictionary) ⇒ Object
- .pack_fixed_width_u64(values, width, out) ⇒ Object
- .unpack_fixed_width_u64(bytes, count, width) ⇒ Object
- .wide_and(a, m) ⇒ Object
- .wide_from_u64(v) ⇒ Object
- .wide_mask(width) ⇒ Object
- .wide_or(a, b) ⇒ Object
- .wide_shl(w, n) ⇒ Object
- .wide_shr(w, n) ⇒ Object
- .wide_zero?(w) ⇒ Boolean
Class Method Details
.apply_dictionary_references(state, columns) ⇒ Object
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# File 'lib/twilic/core/dictionary.rb', line 195 def apply_dictionary_references(state, columns) columns.each_with_index do |column, i| next unless column.values.kind == Model::ElementType::STRING values = column.values.strings next if values.length < 16 unique = values.uniq next if unique.length.to_f / values.length > 0.5 codec = column.codec next unless codec == Model::VectorCodec::DICTIONARY || codec == Model::VectorCodec::STRING_REF dict_id = state.allocate_dictionary_id payload = +"" keys = unique.sort Wire.encode_varuint(keys.length, payload) keys.each { |item| Wire.encode_string(item, payload) } profile = Session::DictionaryProfile.new( version: 1, hash: dictionary_payload_hash(payload), expires_at: 0, fallback: state..unknown_reference_policy == Session::UnknownReferencePolicy::STATELESS_RETRY ? Session::DictionaryFallback::STATELESS_RETRY : Session::DictionaryFallback::FAIL_FAST ) state.dictionaries[dict_id] = payload state.dictionary_profiles[dict_id] = profile columns[i] = column.with(dictionary_id: dict_id) end end |
.decode_trained_dictionary_block(block, dictionary) ⇒ Object
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/twilic/core/dictionary.rb', line 55 def decode_trained_dictionary_block(block, dictionary) reader = Wire::Reader.new(block) mode = reader.read_u8 n = reader.read_varuint ids = case mode when 0 Array.new(n) { reader.read_varuint } when 1 bit_width = reader.read_u8 remaining = block.bytesize - reader.position packed = reader.read_exact(remaining) unpack_fixed_width_u64(packed, n, bit_width) else raise Errors.invalid_data("trained dictionary block mode") end raise Errors.invalid_data("trained dictionary block trailing bytes") unless reader.eof? ids.map do |id| raise Errors.invalid_data("trained dictionary block id") if id >= dictionary.length dictionary[id] end end |
.decode_trained_dictionary_payload(payload) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/twilic/core/dictionary.rb', line 10 def decode_trained_dictionary_payload(payload) reader = Wire::Reader.new(payload) n = reader.read_varuint values = [] n.times do values << reader.read_string end raise Errors.invalid_data("trained dictionary payload trailing bytes") unless reader.eof? values end |
.dictionary_payload_hash(payload) ⇒ Object
226 227 228 229 230 231 232 233 |
# File 'lib/twilic/core/dictionary.rb', line 226 def dictionary_payload_hash(payload) h = 0xCBF29CE484222325 payload.each_byte do |b| h ^= b h = (h * 0x00000100000001B1) & 0xFFFFFFFFFFFFFFFF end h end |
.encode_trained_dictionary_block(values, dictionary) ⇒ Object
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/twilic/core/dictionary.rb', line 22 def encode_trained_dictionary_block(values, dictionary) if values.empty? out = +"" out << "\x00" Wire.encode_varuint(0, out) return [out, true] end by_value = {} dictionary.each_with_index { |v, idx| by_value[v] = idx } ids = values.map do |value| id = by_value[value] return [nil, false] unless id id end raw = +"" raw << "\x00" Wire.encode_varuint(ids.length, raw) ids.each { |id| Wire.encode_varuint(id, raw) } max_id = ids.max || 0 bit_width = max_id.zero? ? 0 : (64 - max_id.to_s(2).length) packed = +"" pack_fixed_width_u64(ids, bit_width, packed) bitpacked = +"" bitpacked << "\x01" Wire.encode_varuint(ids.length, bitpacked) bitpacked << bit_width.chr bitpacked << packed return [bitpacked, true] if bitpacked.bytesize < raw.bytesize [raw, true] end |
.pack_fixed_width_u64(values, width, out) ⇒ Object
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
# File 'lib/twilic/core/dictionary.rb', line 97 def pack_fixed_width_u64(values, width, out) raise Errors.invalid_data("fixed-width u64 bit width") if width > 64 if width.zero? values.each do |value| raise Errors.invalid_data("fixed-width u64 value overflow") unless value.zero? end return end acc = WideU128.new(lo: 0, hi: 0) acc_bits = 0 values.each do |value| raise Errors.invalid_data("fixed-width u64 value overflow") if width < 64 && (value >> width) != 0 acc = wide_or(acc, wide_shl(wide_from_u64(value), acc_bits)) acc_bits += width while acc_bits >= 8 out << (acc.lo & 0xFF).chr acc = wide_shr(acc, 8) acc_bits -= 8 end end out << (acc.lo & 0xFF).chr if acc_bits.positive? end |
.unpack_fixed_width_u64(bytes, count, width) ⇒ Object
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/twilic/core/dictionary.rb', line 122 def unpack_fixed_width_u64(bytes, count, width) raise Errors.invalid_data("fixed-width u64 bit width") if width > 64 if width.zero? bytes.each { |b| raise Errors.invalid_data("fixed-width u64 trailing bytes") unless b.zero? } return Array.new(count, 0) end out = [] acc = WideU128.new(lo: 0, hi: 0) acc_bits = 0 idx = 0 mask = wide_mask(width) count.times do while acc_bits < width raise Errors.invalid_data("fixed-width u64 underflow") if idx >= bytes.bytesize acc = wide_or(acc, wide_shl(wide_from_u64(bytes.getbyte(idx)), acc_bits)) idx += 1 acc_bits += 8 end out << wide_and(acc, mask).lo acc = wide_shr(acc, width) acc_bits -= width end raise Errors.invalid_data("fixed-width u64 trailing bytes") unless wide_zero?(acc) while idx < bytes.bytesize raise Errors.invalid_data("fixed-width u64 trailing bytes") unless bytes.getbyte(idx).zero? idx += 1 end out end |
.wide_and(a, m) ⇒ Object
159 160 161 |
# File 'lib/twilic/core/dictionary.rb', line 159 def wide_and(a, m) WideU128.new(lo: a.lo & m.lo, hi: a.hi & m.hi) end |
.wide_from_u64(v) ⇒ Object
81 82 83 |
# File 'lib/twilic/core/dictionary.rb', line 81 def wide_from_u64(v) WideU128.new(lo: v, hi: 0) end |
.wide_mask(width) ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/twilic/core/dictionary.rb', line 85 def wide_mask(width) if width == 64 WideU128.new(lo: 0xFFFFFFFFFFFFFFFF, hi: 0xFFFFFFFFFFFFFFFF) elsif width.zero? WideU128.new(lo: 0, hi: 0) elsif width <= 64 WideU128.new(lo: (1 << width) - 1, hi: 0) else WideU128.new(lo: 0xFFFFFFFFFFFFFFFF, hi: (1 << (width - 64)) - 1) end end |
.wide_or(a, b) ⇒ Object
163 164 165 |
# File 'lib/twilic/core/dictionary.rb', line 163 def wide_or(a, b) WideU128.new(lo: a.lo | b.lo, hi: a.hi | b.hi) end |
.wide_shl(w, n) ⇒ Object
167 168 169 170 171 172 173 174 175 176 177 178 179 |
# File 'lib/twilic/core/dictionary.rb', line 167 def wide_shl(w, n) return w if n.zero? return WideU128.new(lo: 0, hi: 0) if n >= 128 if n < 64 hi = ((w.hi << n) | (w.lo >> (64 - n))) & 0xFFFFFFFFFFFFFFFF lo = (w.lo << n) & 0xFFFFFFFFFFFFFFFF WideU128.new(lo: lo, hi: hi) else n -= 64 WideU128.new(lo: 0, hi: (w.lo << n) & 0xFFFFFFFFFFFFFFFF) end end |
.wide_shr(w, n) ⇒ Object
181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/twilic/core/dictionary.rb', line 181 def wide_shr(w, n) return w if n.zero? return WideU128.new(lo: 0, hi: 0) if n >= 128 if n < 64 lo = ((w.lo >> n) | (w.hi << (64 - n))) & 0xFFFFFFFFFFFFFFFF hi = w.hi >> n WideU128.new(lo: lo, hi: hi) else n -= 64 WideU128.new(lo: w.hi >> n, hi: 0) end end |
.wide_zero?(w) ⇒ Boolean
155 156 157 |
# File 'lib/twilic/core/dictionary.rb', line 155 def wide_zero?(w) w.lo.zero? && w.hi.zero? end |