Class: Philiprehberger::Counter

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/philiprehberger/counter.rb,
lib/philiprehberger/counter/version.rb

Defined Under Namespace

Classes: Error

Constant Summary collapse

VERSION =
'0.4.0'

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(enumerable = nil) ⇒ Counter

Create a new Counter from an enumerable

Parameters:

  • enumerable (Enumerable, nil) (defaults to: nil)

    initial elements to count

Raises:



15
16
17
18
19
20
21
22
# File 'lib/philiprehberger/counter.rb', line 15

def initialize(enumerable = nil)
  @counts = Hash.new(0)
  return unless enumerable

  raise Error, 'argument must be Enumerable' unless enumerable.is_a?(Enumerable)

  enumerable.each { |item| @counts[item] += 1 }
end

Class Method Details

.from_json(str) ⇒ Counter

Deserialize a counter from a JSON string

Parameters:

  • str (String)

    JSON string

Returns:



207
208
209
210
211
212
# File 'lib/philiprehberger/counter.rb', line 207

def self.from_json(str)
  data = JSON.parse(str)
  counter = new
  data.each { |key, count| counter.increment(key, count) }
  counter
end

Instance Method Details

#[](key) ⇒ Integer

Get count for a key

Parameters:

  • key (Object)

Returns:

  • (Integer)


28
29
30
# File 'lib/philiprehberger/counter.rb', line 28

def [](key)
  @counts[key]
end

#decrement(key, n = 1) ⇒ Integer

Decrement count for a key, floored at zero

Parameters:

  • key (Object)
  • n (Integer) (defaults to: 1)

    amount to decrement

Returns:

  • (Integer)

    new count



46
47
48
# File 'lib/philiprehberger/counter.rb', line 46

def decrement(key, n = 1)
  @counts[key] = [(@counts[key] - n), 0].max
end

#delete(key) ⇒ Integer?

Delete a key entirely from the counter

Parameters:

  • key (Object)

Returns:

  • (Integer, nil)

    the count that was removed, or nil if key not present



174
175
176
# File 'lib/philiprehberger/counter.rb', line 174

def delete(key)
  @counts.delete(key)
end

#each {|key, count| ... } ⇒ Object

Iterate over key-count pairs

Yields:

  • (key, count)


261
262
263
# File 'lib/philiprehberger/counter.rb', line 261

def each(&)
  @counts.each(&)
end

#entropyFloat

Shannon entropy of the count distribution in bits

Returns:

  • (Float)

    entropy in bits, 0.0 for empty or single-key counters



158
159
160
161
162
163
164
165
166
167
168
# File 'lib/philiprehberger/counter.rb', line 158

def entropy
  t = total.to_f
  return 0.0 if t.zero?

  @counts.each_value.sum do |c|
    next 0.0 if c.zero?

    p = c / t
    -p * Math.log2(p)
  end
end

#filter_by_count(min: nil, max: nil) ⇒ Counter

Filter entries by count range

Parameters:

  • min (Integer, nil) (defaults to: nil)

    minimum count (inclusive)

  • max (Integer, nil) (defaults to: nil)

    maximum count (inclusive)

Returns:

  • (Counter)

    new counter with filtered entries



83
84
85
86
87
88
89
90
91
92
# File 'lib/philiprehberger/counter.rb', line 83

def filter_by_count(min: nil, max: nil)
  result = Counter.new
  each do |key, count|
    next if min && count < min
    next if max && count > max

    result.increment(key, count)
  end
  result
end

#increment(key, n = 1) ⇒ Integer

Increment count for a key

Parameters:

  • key (Object)
  • n (Integer) (defaults to: 1)

    amount to increment

Returns:

  • (Integer)

    new count



37
38
39
# File 'lib/philiprehberger/counter.rb', line 37

def increment(key, n = 1)
  @counts[key] += n
end

#keysArray

Return all tracked keys

Returns:

  • (Array)


240
241
242
# File 'lib/philiprehberger/counter.rb', line 240

def keys
  @counts.keys
end

#least_common(n = nil) ⇒ Array<Array>

Return the n least common elements and their counts

Parameters:

  • n (Integer, nil) (defaults to: nil)

    number of elements to return

Returns:

  • (Array<Array>)

    array of [key, count] pairs



107
108
109
110
# File 'lib/philiprehberger/counter.rb', line 107

def least_common(n = nil)
  sorted = @counts.sort_by { |_, count| count }
  n ? sorted.first(n) : sorted
end

#max_countArray?

Return the [key, count] pair with the highest count

Returns:

  • (Array, nil)
    key, count

    or nil if empty



181
182
183
184
185
# File 'lib/philiprehberger/counter.rb', line 181

def max_count
  return nil if @counts.empty?

  @counts.max_by { |_, count| count }
end

#merge(other) ⇒ Counter

Merge another counter into this one

Parameters:

Returns:

  • (Counter)

    new counter with merged counts

Raises:



123
124
125
126
127
128
129
130
# File 'lib/philiprehberger/counter.rb', line 123

def merge(other)
  raise Error, 'argument must be a Counter' unless other.is_a?(Counter)

  result = Counter.new
  each { |key, count| result.increment(key, count) }
  other.each { |key, count| result.increment(key, count) }
  result
end

#min_countArray?

Return the [key, count] pair with the lowest count

Returns:

  • (Array, nil)
    key, count

    or nil if empty



190
191
192
193
194
# File 'lib/philiprehberger/counter.rb', line 190

def min_count
  return nil if @counts.empty?

  @counts.min_by { |_, count| count }
end

#most_common(n = nil) ⇒ Array<Array>

Return the n most common elements and their counts

Parameters:

  • n (Integer, nil) (defaults to: nil)

    number of elements to return

Returns:

  • (Array<Array>)

    array of [key, count] pairs



98
99
100
101
# File 'lib/philiprehberger/counter.rb', line 98

def most_common(n = nil)
  sorted = @counts.sort_by { |_, count| -count }
  n ? sorted.first(n) : sorted
end

#percentage(key) ⇒ Float

Get the percentage of a key relative to total

Parameters:

  • key (Object)

Returns:

  • (Float)


149
150
151
152
153
# File 'lib/philiprehberger/counter.rb', line 149

def percentage(key)
  return 0.0 if total.zero?

  (@counts[key].to_f / total * 100)
end

#reset(key = nil) ⇒ void

This method returns an undefined value.

Reset counts — clear a specific key or all counts

Parameters:

  • key (Object, nil) (defaults to: nil)

    key to reset, or nil to clear all



54
55
56
57
58
59
60
# File 'lib/philiprehberger/counter.rb', line 54

def reset(key = nil)
  if key.nil?
    @counts.clear
  else
    @counts.delete(key)
  end
end

#sample(n = 1) ⇒ Object, Array

Weighted random sample based on counts

Parameters:

  • n (Integer) (defaults to: 1)

    number of samples (default 1)

Returns:

  • (Object, Array)

    single item when n=1, array when n>1



218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# File 'lib/philiprehberger/counter.rb', line 218

def sample(n = 1)
  return (n == 1 ? nil : []) if @counts.empty?

  keys_arr = []
  @counts.each do |key, count|
    next if count <= 0

    keys_arr.concat(Array.new(count, key))
  end

  return (n == 1 ? nil : []) if keys_arr.empty?

  if n == 1
    keys_arr.sample
  else
    Array.new(n) { keys_arr.sample }
  end
end

#sizeInteger

Number of unique keys

Returns:

  • (Integer)


268
269
270
# File 'lib/philiprehberger/counter.rb', line 268

def size
  @counts.size
end

#subtract(other) ⇒ Counter

Subtract another counter from this one

Parameters:

Returns:

  • (Counter)

    new counter with subtracted counts

Raises:



136
137
138
139
140
141
142
143
# File 'lib/philiprehberger/counter.rb', line 136

def subtract(other)
  raise Error, 'argument must be a Counter' unless other.is_a?(Counter)

  result = Counter.new
  each { |key, count| result.increment(key, count) }
  other.each { |key, count| result.increment(key, -count) }
  result
end

#to_hHash

Convert to a plain hash

Returns:

  • (Hash)


254
255
256
# File 'lib/philiprehberger/counter.rb', line 254

def to_h
  @counts.dup
end

#to_json(*_args) ⇒ String

Serialize the counter to a JSON string

Returns:

  • (String)


199
200
201
# File 'lib/philiprehberger/counter.rb', line 199

def to_json(*_args)
  @counts.to_json
end

#totalInteger

Total of all counts

Returns:

  • (Integer)


115
116
117
# File 'lib/philiprehberger/counter.rb', line 115

def total
  @counts.values.sum
end

#update(data) ⇒ self

Batch update counts from a Hash or Enumerable

Parameters:

  • data (Hash, Enumerable)

    Hash of key => count, or Enumerable of items to count

Returns:

  • (self)


66
67
68
69
70
71
72
73
74
75
76
# File 'lib/philiprehberger/counter.rb', line 66

def update(data)
  case data
  when Hash
    data.each { |key, count| @counts[key] += count }
  when Enumerable
    data.each { |item| @counts[item] += 1 }
  else
    raise Error, 'argument must be a Hash or Enumerable'
  end
  self
end

#valuesArray<Integer>

Return all count values

Returns:

  • (Array<Integer>)


247
248
249
# File 'lib/philiprehberger/counter.rb', line 247

def values
  @counts.values
end