Class: CBOR::Packer
- Inherits:
-
Object
- Object
- CBOR::Packer
- Defined in:
- lib/cbor-packed.rb
Constant Summary collapse
- CBOR_PACKED_HEX =
- CBOR_PACKED_MAPS =
Class Method Summary collapse
- .argref(n, casezero = 6) ⇒ Object
- .build_bytes_from_hex(strings, count, match_array, item, translate, prefixes) ⇒ Object
- .build_maps(count, match_array, translate, prefixes) ⇒ Object
- .build_match_array(item) ⇒ Object
- .from_item(item) ⇒ Object
- .sharedref(i) ⇒ Object
Instance Method Summary collapse
- #has(o) ⇒ Object
-
#initialize(match_array, argument_array, translate, mixed) ⇒ Packer
constructor
A new instance of Packer.
- #pack(pa) ⇒ Object
Constructor Details
#initialize(match_array, argument_array, translate, mixed) ⇒ Packer
Returns a new instance of Packer.
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 |
# File 'lib/cbor-packed.rb', line 295 def initialize(match_array, argument_array, translate, mixed) @mixed = mixed @hit = translate # XXX: make sure we don't overwrite the existing prefix compression values! # (this should really be done downwards, ...) 16 x 1, 160 x 2, (512-48) x 3 match_array[0...16].each_with_index do |o, i| @hit[o] = CBOR::Simple.new(i) end # if m = match_array[16...128] # no allocation for simple(16...128) # m.each_with_index do |o, i| # @hit[o] = CBOR::Simple.new(i + 128) # end # end if m = match_array[16..-1] m.each_with_index do |o, i| @hit[o] = CBOR::Tagged.new(REF_TAG, (i >> 1) ^ -(i & 1)) end end # add one round of transitive matching @hit.each do |k, v| if r = @hit[v] @hit[k] = r end end # warn [:HIT, @hit['688921AF59455E4D35D2FFE5FD21CA0598D42374'.xeh]].inspect @match_array = match_array # @prefix = {} -- do that later @argument_array = argument_array end |
Class Method Details
.argref(n, casezero = 6) ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/cbor-packed.rb', line 60 def self.argref(n, casezero = 6) case n when 0 casezero when 1..31 n + 224 when 32...0x1000 n + 0x7000 when 0x1000...0x10000000 n + 0x70000000 end end |
.build_bytes_from_hex(strings, count, match_array, item, translate, prefixes) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
# File 'lib/cbor-packed.rb', line 82 def self.build_bytes_from_hex(strings, count, match_array, item, translate, prefixes) optimizable, others = strings.partition do |str| if /(?:(?<lc>(?:[0-9a-f][0-9a-f]){4,})|(?<uc>(?:[0-9A-F][0-9A-F]){4,}))$/ =~ str left = $` # warn [:lc, left, lc, uc].inspect bytes = [ if lc tag = HEX_LC_TAG fail if uc lc else fail unless uc tag = HEX_UC_TAG uc end].pack('H*').b if outer = translate[str] fail outer.inspect unless CBOR::Tagged === outer && left == outer.value # XXX else referto = bytes if (count[bytes] += 1) == 2 # patch up match_array... # translate[bytes] = sharedref(match_array.size) # happens later, too match_array << bytes referto = sharedref(match_array.size-1) end prefixes << left unless prefixes[-1] == left translate[str] = CBOR::Tagged.new(argref(prefixes.size-1), CBOR::Tagged.new(tag, referto)) unless count[bytes] > 2 # don't overwrite any entry made at == 2 end true end end others end |
.build_maps(count, match_array, translate, prefixes) ⇒ Object
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
# File 'lib/cbor-packed.rb', line 117 def self.build_maps(count, match_array, translate, prefixes) klcount = Hash.new(0) mapvalues = Hash.new {|h, kl| # kl = key list h.member?(kl) or h[kl] = Hash.new {|hv, kv| hv.member?(kv) or hv[kv] = Hash.new(0) } } count.select {|o, _n| Hash === o}.each do |o, _n| kl = o.keys.sort klcount[kl] += 1 v = o.values_at(*kl) z = kl.zip(v) z.each do |k1, v1| mapvalues[kl][k1][v1] += 1 end end repkl = klcount.select {|kl, n| n > 1}.sort_by {|kl, n| -n} # sort not needed # fudge here -- ignore single children even in parent selection # parent selection klparents = {} repkl.map do |kl, n| klparent = {} mapvalues[kl].each do |k1, v1| repv = v1.to_a.select{|k2, v2| v2 > (n+3)/2} # low-hanging -> if repv != [] # -> should be zero or one of them # pp [k1, repv] # The k1 values are those that go into the head arrays # first .first: low-hanging, second .first: repeated member value klparent[k1] = repv.first.first end end # pp klparent if klparent != {} ix = prefixes.size prefixes << klparent constant_keys = Set[*klparent.keys] variable_keys = Set[*kl] - constant_keys klparents[kl] = [ klparent, argref(ix), constant_keys, variable_keys.to_a ] end end klmatch = {} count.select {|o, _n| Hash === o}.each do |o, _n| kl = o.keys.sort klp, ref, constant_keys, variable_kl = klparents[kl] if klp # split off parent, return to common code then oouter = o o = o.reject {|k| constant_keys.include? k} kl = variable_kl end if kl.size > 1 && (klp || klcount[kl] > 1) # TODO: tune unless ix = klmatch[kl] ix = prefixes.size prefixes << CBOR::Tagged.new(RECORD_TAG, kl) klmatch[kl] = ix end packed = translate[o] = CBOR::Tagged.new(argref(ix), o.values_at(*kl)) # needs packing end if packed && klp translate[oouter] = CBOR::Tagged.new(ref, packed) # merge Hashes end end end |
.build_match_array(item) ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/cbor-packed.rb', line 30 def self.build_match_array(item) # Hash with all data items as keys and the number of times they occur as values: count = Hash.new(0) item.cbor_visit do |o| (count[o] += 1) == 1 # if the count gets > 1, we can stop visiting, so we return false in the block end # choose those matches that are occurring > 1, make first rough estimate of saving good_count = count .select {|k, v| v > 1} .map {|k, v| l = k.to_cbor.length # v-1: We can only save space for the second etc. # l-1: We'll need at least 1 byte for the reference gain = (v-1)*(l-1) [k, v, l] if gain > 0 }.compact .sort_by {|a| -a[1]} # good_count is now an array of [value, count, length] tuples that have potential gain, # sorted by descending number of references we'll get match_array = [] good_count.each {|a| match_array << a[0] if (REF_SIZE[match_array.size] || 4) < a[2] } # pp match_array [count, match_array] end |
.from_item(item) ⇒ Object
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 |
# File 'lib/cbor-packed.rb', line 191 def self.from_item(item) count, match_array = build_match_array(item) # TODO: the below needs to be done with arrays and (hard!) maps as well # do this on the reverse to find common suffixes translate = {} # map (sub)items to their arg-packed versions prefixes = [] # provisional prefix table used in the above # select all strings (ignoring reference counts) and sort them strings = count.select {|k, v| String === k}.map(&:first).sort if strings != [] strings = build_bytes_from_hex(strings, count, match_array, item, translate, prefixes) if CBOR_PACKED_HEX if strings != [] string_common = strings[1..-1].zip(strings).map{ |y, x| l = x.chars.zip(y.chars).take_while{|a, b| a == b}.length # should be bytes [x, l] } << [strings[-1], 0] end # string_common: list of strings/counts of number of /bytes/ matching with next # pp string_common end if string_common prefix_stack = [[0, false]] # sentinel pos = 0 # mirror prefix_stack[-1][0] tag_no = argref(prefixes.size) # REF_TAG string_common.each do |s, l| if l > pos + 2 + $compression_hack if t = prefix_stack[-1][1] # if we still have a prefix left prefixes << CBOR::Tagged.new(t, s[pos...l]) else prefixes << s[0...l] end prefix_stack << [l, tag_no] pos = l tag_no += 1 tag_no = 225 if tag_no == REF_TAG+1 # skip 224, as we always can use 6 here tag_no = 28704 if tag_no == 256 tag_no = 1879052288 if tag_no == 32768 end if t = prefix_stack[-1][1] # if we still have a viable prefix left translate[s] = CBOR::Tagged.new(t, s[pos..-1]) end # pop the prefix stack while l < pos prefix_stack.pop pos = prefix_stack[-1][0] end # pp prefix_stack # pp pos end end # pp [:TRANSLATE, translate] # pp [:PREFIXES, prefixes] # Note that we didn't compute a suffix table to mix in build_maps(count, match_array, translate, prefixes) if CBOR_PACKED_MAPS match_array_size = match_array.size mixed = match_array_size + prefixes.size <= STRAIGHT_11_SIZE if mixed && match_array_size != 0 # We need to shift by match_array_size translate = Hash[translate.map {|k, v| PP.pp v, STDERR newtag = v.tag if newtag == REF_TAG newtag = 224 end # this works only if <= STRAIGHT_11_SIZE (above) newtag += match_array_size [k, CBOR::Tagged.new(newtag, v.value)] }] prefixes = prefixes.map {|p| if CBOR::Tagged === p case p.tag when REF_TAG CBOR::Tagged.new(224 + match_array_size, p.value) when 224..255 CBOR::Tagged.new(p.tag + match_array_size, p.value) else # this works only if <= STRAIGHT_11_SIZE (above) p end else p end } # pp translate # pp prefixes end # Check match_array for direct entries of an arg-packed string match_array = match_array.map do |v| if r = translate[v] # puts "*** replacing #{v.inspect} by #{r.inspect}" r else v end end new(match_array, prefixes, translate, mixed) end |
Instance Method Details
#has(o) ⇒ Object
324 325 326 327 |
# File 'lib/cbor-packed.rb', line 324 def has(o) # warn [:HIT1].inspect if o == '688921AF59455E4D35D2FFE5FD21CA0598D42374'.xeh @hit[o] end |
#pack(pa) ⇒ Object
328 329 330 331 332 333 334 335 336 |
# File 'lib/cbor-packed.rb', line 328 def pack(pa) pa = pa.to_packed_cbor1(self) # should also sort by frequency... if @mixed mixed_array = @match_array + @argument_array CBOR::Tagged.new(MIXED_PACKED_TAG, [mixed_array, pa]) else CBOR::Tagged.new(PACKED_TAG, [@match_array, @argument_array, pa]) end end |