Class: CBOR::Packer

Inherits:
Object
  • Object
show all
Defined in:
lib/cbor-packed.rb

Constant Summary collapse

CBOR_PACKED_HEX =
CBOR_PACKED_MAPS =

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(match_array, argument_array, translate, mixed) ⇒ Packer

Returns a new instance of Packer.



295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
# File 'lib/cbor-packed.rb', line 295

def initialize(match_array, argument_array, translate, mixed)
  @mixed = mixed
  @hit = translate
  # XXX: make sure we don't overwrite the existing prefix compression values!
  # (this should really be done downwards, ...) 16 x 1, 160 x 2, (512-48) x 3
  match_array[0...16].each_with_index do |o, i|
    @hit[o] = CBOR::Simple.new(i)
  end
  # if m = match_array[16...128]    # no allocation for simple(16...128)
  #   m.each_with_index do |o, i|
  #     @hit[o] = CBOR::Simple.new(i + 128)
  #   end
  # end
  if m = match_array[16..-1]
    m.each_with_index do |o, i|
      @hit[o] = CBOR::Tagged.new(REF_TAG, (i >> 1) ^ -(i & 1))
    end
  end
  # add one round of transitive matching
  @hit.each do |k, v|
    if r = @hit[v]
      @hit[k] = r
    end
  end
  # warn [:HIT, @hit['688921AF59455E4D35D2FFE5FD21CA0598D42374'.xeh]].inspect
  @match_array = match_array
  # @prefix = {} -- do that later
  @argument_array = argument_array
end

Class Method Details

.argref(n, casezero = 6) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/cbor-packed.rb', line 60

def self.argref(n, casezero = 6)
  case n
  when 0
    casezero
  when 1..31
    n + 224
  when 32...0x1000
    n + 0x7000
  when 0x1000...0x10000000
    n + 0x70000000
  end
end

.build_bytes_from_hex(strings, count, match_array, item, translate, prefixes) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/cbor-packed.rb', line 82

def self.build_bytes_from_hex(strings, count, match_array, item, translate, prefixes)
  optimizable, others = strings.partition do |str|
    if /(?:(?<lc>(?:[0-9a-f][0-9a-f]){4,})|(?<uc>(?:[0-9A-F][0-9A-F]){4,}))$/ =~ str
      left = $`
      # warn [:lc, left, lc, uc].inspect
      bytes = [
        if lc
          tag = HEX_LC_TAG
          fail if uc
          lc
        else
          fail unless uc
          tag = HEX_UC_TAG
          uc
        end].pack('H*').b
      if outer = translate[str]
        fail outer.inspect unless CBOR::Tagged === outer && left == outer.value # XXX
      else
        referto = bytes
        if (count[bytes] += 1) == 2 # patch up match_array...
          # translate[bytes] = sharedref(match_array.size) # happens later, too
          match_array << bytes
          referto = sharedref(match_array.size-1)
        end
        prefixes << left unless prefixes[-1] == left
        translate[str] = CBOR::Tagged.new(argref(prefixes.size-1),
                                          CBOR::Tagged.new(tag, referto)) unless count[bytes] > 2
        # don't overwrite any entry made at == 2
      end
      true
    end
  end
  others
end

.build_maps(count, match_array, translate, prefixes) ⇒ Object



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/cbor-packed.rb', line 117

def self.build_maps(count, match_array, translate, prefixes)
  klcount = Hash.new(0)
  mapvalues = Hash.new {|h, kl|      # kl = key list
    h.member?(kl) or h[kl] = Hash.new {|hv, kv|
      hv.member?(kv) or hv[kv] = Hash.new(0)
    }
  }

  count.select {|o, _n| Hash === o}.each do |o, _n|
    kl = o.keys.sort
    klcount[kl] += 1
    v = o.values_at(*kl)
    z = kl.zip(v)
    z.each do |k1, v1|
      mapvalues[kl][k1][v1] += 1
    end
  end
  repkl = klcount.select {|kl, n| n > 1}.sort_by {|kl, n| -n} # sort not needed
  # fudge here -- ignore single children even in parent selection

  # parent selection

  klparents = {}
  repkl.map do |kl, n|
    klparent = {}
    mapvalues[kl].each do |k1, v1|
      repv = v1.to_a.select{|k2, v2| v2 > (n+3)/2} # low-hanging ->
      if repv != []             # -> should be zero or one of them
        # pp [k1, repv]
        # The k1 values are those that go into the head arrays
        # first .first: low-hanging, second .first: repeated member value
        klparent[k1] = repv.first.first
      end
    end
    # pp klparent
    if klparent != {}
      ix = prefixes.size
      prefixes << klparent
      constant_keys = Set[*klparent.keys]
      variable_keys = Set[*kl] - constant_keys
      klparents[kl] = [
        klparent, argref(ix), constant_keys, variable_keys.to_a
      ]
    end
  end

  klmatch = {}

  count.select {|o, _n| Hash === o}.each do |o, _n|
    kl = o.keys.sort
    klp, ref, constant_keys, variable_kl = klparents[kl]
    if klp # split off parent, return to common code then
      oouter = o
      o = o.reject {|k| constant_keys.include? k}
      kl = variable_kl
    end
    if kl.size > 1 && (klp || klcount[kl] > 1) # TODO: tune
      unless ix = klmatch[kl]
        ix = prefixes.size
        prefixes << CBOR::Tagged.new(RECORD_TAG, kl)
        klmatch[kl] = ix
      end
      packed = translate[o] = CBOR::Tagged.new(argref(ix), o.values_at(*kl)) # needs packing
    end
    if packed && klp
      translate[oouter] = CBOR::Tagged.new(ref, packed) # merge Hashes
    end
  end

end

.build_match_array(item) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/cbor-packed.rb', line 30

def self.build_match_array(item)
  # Hash with all data items as keys and the number of times they occur as values:
  count = Hash.new(0)
  item.cbor_visit do |o|
    (count[o] += 1) == 1
    # if the count gets > 1, we can stop visiting, so we return false in the block
  end

  # choose those matches that are occurring > 1, make first rough estimate of saving
  good_count = count
                 .select {|k, v| v > 1}
                 .map {|k, v|
                    l = k.to_cbor.length
                    # v-1: We can only save space for the second etc.
                    # l-1: We'll need at least 1 byte for the reference
                    gain = (v-1)*(l-1)
                    [k, v, l] if gain > 0
                 }.compact
                 .sort_by {|a| -a[1]}
  # good_count is now an array of [value, count, length] tuples that have potential gain,
  # sorted by descending number of references we'll get

  match_array = []
  good_count.each {|a|
    match_array << a[0] if (REF_SIZE[match_array.size] || 4) < a[2]
  }
  # pp match_array
  [count, match_array]
end

.from_item(item) ⇒ Object



191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# File 'lib/cbor-packed.rb', line 191

def self.from_item(item)
  count, match_array = build_match_array(item)

  # TODO: the below needs to be done with arrays and (hard!) maps as well
  # do this on the reverse to find common suffixes

  translate = {}            # map (sub)items to their arg-packed versions
  prefixes = []             # provisional prefix table used in the above

  # select all strings (ignoring reference counts) and sort them
  strings = count.select {|k, v| String === k}.map(&:first).sort
  if strings != []
    strings = build_bytes_from_hex(strings, count, match_array, item, translate, prefixes) if CBOR_PACKED_HEX
    if strings != []
      string_common = strings[1..-1].zip(strings).map{ |y, x|
        l = x.chars.zip(y.chars).take_while{|a, b| a == b}.length # should be bytes
        [x, l]
      } << [strings[-1], 0]
    end
    # string_common: list of strings/counts of number of /bytes/ matching with next
    # pp string_common
  end
  if string_common
    
  prefix_stack = [[0, false]] # sentinel
  pos = 0                     # mirror prefix_stack[-1][0]
  tag_no = argref(prefixes.size) # REF_TAG
  string_common.each do |s, l|
    if l > pos + 2 + $compression_hack
      if t = prefix_stack[-1][1] # if we still have a prefix left
        prefixes << CBOR::Tagged.new(t, s[pos...l])
      else
        prefixes << s[0...l]
      end
      prefix_stack << [l, tag_no]
      pos = l
      tag_no += 1
      tag_no = 225 if tag_no == REF_TAG+1 # skip 224, as we always can use 6 here
      tag_no = 28704 if tag_no == 256
      tag_no = 1879052288 if tag_no == 32768
    end
    if t = prefix_stack[-1][1] # if we still have a viable prefix left
      translate[s] = CBOR::Tagged.new(t, s[pos..-1])
    end
    # pop the prefix stack
    while l < pos
      prefix_stack.pop
      pos = prefix_stack[-1][0]
    end
    # pp prefix_stack
    # pp pos
  end
    
  end
  # pp [:TRANSLATE, translate]
  # pp [:PREFIXES, prefixes]
  # Note that we didn't compute a suffix table to mix in

  build_maps(count, match_array, translate, prefixes)  if CBOR_PACKED_MAPS

  match_array_size = match_array.size
  mixed = match_array_size + prefixes.size <= STRAIGHT_11_SIZE
  if mixed && match_array_size != 0
    # We need to shift by match_array_size
    translate = Hash[translate.map {|k, v|
                       PP.pp v, STDERR
                       newtag = v.tag
                       if newtag == REF_TAG
                         newtag = 224
                       end
                       # this works only if <= STRAIGHT_11_SIZE (above)
                       newtag += match_array_size
                       [k, CBOR::Tagged.new(newtag, v.value)]
                     }]
    prefixes = prefixes.map {|p|
      if CBOR::Tagged === p
        case p.tag
        when REF_TAG
          CBOR::Tagged.new(224 + match_array_size, p.value)
        when 224..255
          CBOR::Tagged.new(p.tag + match_array_size, p.value)
        else   # this works only if <= STRAIGHT_11_SIZE (above)
          p
        end
      else
        p
      end
    }
    # pp translate
    # pp prefixes
  end

  # Check match_array for direct entries of an arg-packed string
  match_array = match_array.map do |v|
    if r = translate[v]
      # puts "*** replacing #{v.inspect} by #{r.inspect}"
      r
    else
      v
    end
  end

  new(match_array, prefixes, translate, mixed)
end

.sharedref(i) ⇒ Object



73
74
75
76
77
78
79
80
# File 'lib/cbor-packed.rb', line 73

def self.sharedref(i)
  if i < 16
    CBOR::Simple.new(i)
  else
    i -= 16
    CBOR::Tagged.new(REF_TAG, (i >> 1) ^ -(i & 1))
  end
end

Instance Method Details

#has(o) ⇒ Object



324
325
326
327
# File 'lib/cbor-packed.rb', line 324

def has(o)
  # warn [:HIT1].inspect if o == '688921AF59455E4D35D2FFE5FD21CA0598D42374'.xeh
  @hit[o]
end

#pack(pa) ⇒ Object



328
329
330
331
332
333
334
335
336
# File 'lib/cbor-packed.rb', line 328

def pack(pa)
  pa = pa.to_packed_cbor1(self) # should also sort by frequency...
  if @mixed
    mixed_array = @match_array + @argument_array
    CBOR::Tagged.new(MIXED_PACKED_TAG, [mixed_array, pa])
  else
    CBOR::Tagged.new(PACKED_TAG, [@match_array, @argument_array, pa])
  end
end