Class: Iriq::PositionStats

Inherits:
Object
  • Object
show all
Defined in:
lib/iriq/position_stats.rb

Overview

Rolling frequency counts for a single (host, prefix-shape, position). Value cardinality is capped so a high-entropy position (UUIDs, timestamps) doesn’t grow memory without bound — ‘total` keeps growing accurately, but only the first `max_values` distinct values are tracked individually.

Constant Summary collapse

DEFAULT_MAX_VALUES =
1_000

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(max_values: DEFAULT_MAX_VALUES) ⇒ PositionStats

Returns a new instance of PositionStats.



11
12
13
14
15
16
# File 'lib/iriq/position_stats.rb', line 11

def initialize(max_values: DEFAULT_MAX_VALUES)
  @value_counts = Hash.new(0)
  @type_counts  = Hash.new(0)
  @total        = 0
  @max_values   = max_values
end

Instance Attribute Details

#max_valuesObject (readonly)

Returns the value of attribute max_values.



9
10
11
# File 'lib/iriq/position_stats.rb', line 9

def max_values
  @max_values
end

#totalObject (readonly)

Returns the value of attribute total.



9
10
11
# File 'lib/iriq/position_stats.rb', line 9

def total
  @total
end

#type_countsObject (readonly)

Returns the value of attribute type_counts.



9
10
11
# File 'lib/iriq/position_stats.rb', line 9

def type_counts
  @type_counts
end

#value_countsObject (readonly)

Returns the value of attribute value_counts.



9
10
11
# File 'lib/iriq/position_stats.rb', line 9

def value_counts
  @value_counts
end

Class Method Details

.from_dump(h) ⇒ Object



54
55
56
57
58
59
60
61
62
# File 'lib/iriq/position_stats.rb', line 54

def self.from_dump(h)
  stats = new(max_values: h["max_values"])
  stats.instance_variable_set(:@total, h["total"])
  vc = Hash.new(0).merge(h["value_counts"])
  tc = Hash.new(0).merge(h["type_counts"].transform_keys(&:to_sym))
  stats.instance_variable_set(:@value_counts, vc)
  stats.instance_variable_set(:@type_counts, tc)
  stats
end

Instance Method Details

#cardinalityObject



26
27
28
# File 'lib/iriq/position_stats.rb', line 26

def cardinality
  @value_counts.size
end

#dumpObject



45
46
47
48
49
50
51
52
# File 'lib/iriq/position_stats.rb', line 45

def dump
  {
    "value_counts" => @value_counts,
    "type_counts"  => @type_counts.transform_keys(&:to_s),
    "total"        => @total,
    "max_values"   => @max_values,
  }
end

#observe(value, type) ⇒ Object



18
19
20
21
22
23
24
# File 'lib/iriq/position_stats.rb', line 18

def observe(value, type)
  @total += 1
  @type_counts[type] += 1
  if @value_counts.size < @max_values || @value_counts.key?(value)
    @value_counts[value] += 1
  end
end

#value_fraction(value) ⇒ Object



39
40
41
42
43
# File 'lib/iriq/position_stats.rb', line 39

def value_fraction(value)
  return 0.0 if @total.zero?

  (@value_counts[value] || 0).to_f / @total
end

#variable_fraction(classifier) ⇒ Object

Fraction of observations whose type was variable (i.e. classifier said not :literal).



32
33
34
35
36
37
# File 'lib/iriq/position_stats.rb', line 32

def variable_fraction(classifier)
  return 0.0 if @total.zero?

  var = @type_counts.sum { |t, c| classifier.variable?(t) ? c : 0 }
  var.to_f / @total
end