Class: Iriq::Storage::Memory

Inherits:
Object
  • Object
show all
Defined in:
lib/iriq/storage/memory.rb

Overview

Memory is the canonical backend — every other backend either wraps it (Json) or implements the same surface against an external store (Sqlite).

The contract is small enough to enumerate up top:

increment_host(host)
increment_path_length(length)
increment_raw_shape(shape)
increment_fingerprint(shape)
observe_position(position, value, type)        # position is Iriq::Position
add_to_cluster(key, host, scheme, shape, identifier)
record_observation(canonical)                  # append to source-IRI log

host_counts / path_length_counts / raw_shape_counts / fingerprint_counts
position_stats(position)
each_position_stats { |position, stats| ... }
each_observed_iri { |canonical| ... }
clear_materialized_views                       # for reinfer
clusters / cluster_size

transaction { ... }    # backends may batch within
flush                  # commit pending writes (no-op for Memory)
close                  # release resources

Direct Known Subclasses

Json

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(classifier: SegmentClassifier::DEFAULT, max_values_per_position: PositionStats::DEFAULT_MAX_VALUES) ⇒ Memory

Returns a new instance of Memory.



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/iriq/storage/memory.rb', line 33

def initialize(classifier: SegmentClassifier::DEFAULT,
               max_values_per_position: PositionStats::DEFAULT_MAX_VALUES)
  @classifier              = classifier
  @max_values_per_position = max_values_per_position
  @host_counts             = Hash.new(0)
  @path_length_counts      = Hash.new(0)
  @raw_shape_counts        = Hash.new(0)
  @fingerprint_counts      = Hash.new(0)
  @position_stats          = {}
  @clusters                = {}
  # The source-IRI log. Persisted alongside materialized views; the
  # log is the source of truth, the views are derived. Corpus#reinfer
  # drops the views and replays the log through events + reducers.
  @observed_iris           = []
  # Recognizers promoted from RecognizerProposal via
  # Corpus#activate_proposal. Stored as {prefix, type, specificity}
  # hashes so reopens can re-synthesize them onto the corpus's
  # classifier.
  @activated_recognizers   = []
end

Instance Attribute Details

#max_values_per_positionObject (readonly)

Returns the value of attribute max_values_per_position.



27
28
29
# File 'lib/iriq/storage/memory.rb', line 27

def max_values_per_position
  @max_values_per_position
end

Instance Method Details

#activated_recognizer_countObject



125
126
127
# File 'lib/iriq/storage/memory.rb', line 125

def activated_recognizer_count
  @activated_recognizers.size
end

#add_to_cluster(key, host, scheme, shape, identifier) ⇒ Object



91
92
93
94
95
96
97
98
# File 'lib/iriq/storage/memory.rb', line 91

def add_to_cluster(key, host, scheme, shape, identifier)
  cluster = @clusters[key] ||= Cluster.new(
    key: key, host: host, scheme: scheme, shape: shape,
    max_values: @max_values_per_position,
  )
  cluster.add(identifier, classifier: @classifier)
  cluster
end

#batchObject



58
59
60
# File 'lib/iriq/storage/memory.rb', line 58

def batch
  yield
end

#clear_materialized_viewsObject

Drop every materialized view (host_counts, position_stats, clusters, …) without touching the source-IRI log. Corpus#reinfer calls this before replaying the log so views rebuild from scratch.



132
133
134
135
136
137
138
139
# File 'lib/iriq/storage/memory.rb', line 132

def clear_materialized_views
  @host_counts        = Hash.new(0)
  @path_length_counts = Hash.new(0)
  @raw_shape_counts   = Hash.new(0)
  @fingerprint_counts = Hash.new(0)
  @position_stats     = {}
  @clusters           = {}
end

#closeObject



63
# File 'lib/iriq/storage/memory.rb', line 63

def close;  end

#cluster_for(key) ⇒ Object

O(1) lookup by cluster key — used by Corpus#normalize to pull the cluster’s param_stats for the URL being normalized. nil if no cluster has been observed under this key yet.



167
168
169
# File 'lib/iriq/storage/memory.rb', line 167

def cluster_for(key)
  @clusters[key]
end

#cluster_sizeObject



160
161
162
# File 'lib/iriq/storage/memory.rb', line 160

def cluster_size
  @clusters.size
end

#clustersObject



156
157
158
# File 'lib/iriq/storage/memory.rb', line 156

def clusters
  @clusters.values
end

#each_activated_recognizer(&block) ⇒ Object



121
122
123
# File 'lib/iriq/storage/memory.rb', line 121

def each_activated_recognizer(&block)
  @activated_recognizers.each(&block)
end

#each_observed_iri(&block) ⇒ Object



107
108
109
# File 'lib/iriq/storage/memory.rb', line 107

def each_observed_iri(&block)
  @observed_iris.each(&block)
end

#each_position_stats(&block) ⇒ Object



152
153
154
# File 'lib/iriq/storage/memory.rb', line 152

def each_position_stats(&block)
  @position_stats.each(&block)
end

#fingerprint_countsObject



146
# File 'lib/iriq/storage/memory.rb', line 146

def fingerprint_counts; @fingerprint_counts; end

#flushObject



62
# File 'lib/iriq/storage/memory.rb', line 62

def flush;  end

#host_countsObject

— Reads ————————————————————



143
# File 'lib/iriq/storage/memory.rb', line 143

def host_counts;        @host_counts;        end

#increment_fingerprint(shape) ⇒ Object



82
83
84
# File 'lib/iriq/storage/memory.rb', line 82

def increment_fingerprint(shape)
  @fingerprint_counts[shape] += 1
end

#increment_host(host) ⇒ Object

— Increments ——————————————————-



70
71
72
# File 'lib/iriq/storage/memory.rb', line 70

def increment_host(host)
  @host_counts[host] += 1 if host
end

#increment_path_length(length) ⇒ Object



74
75
76
# File 'lib/iriq/storage/memory.rb', line 74

def increment_path_length(length)
  @path_length_counts[length] += 1
end

#increment_raw_shape(shape) ⇒ Object



78
79
80
# File 'lib/iriq/storage/memory.rb', line 78

def increment_raw_shape(shape)
  @raw_shape_counts[shape] += 1
end

#load_dump!(h) ⇒ Object

— Bulk load (used by JSON backend) ——————————–



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/iriq/storage/memory.rb', line 173

def load_dump!(h)
  @host_counts        = Hash.new(0).merge(h["host_counts"])
  @path_length_counts = Hash.new(0).merge(h["path_length_counts"].transform_keys(&:to_i))
  @raw_shape_counts   = Hash.new(0).merge(h["raw_shape_counts"])
  @fingerprint_counts = Hash.new(0).merge(h["fingerprint_counts"])
  @max_values_per_position = h.fetch("max_values_per_position", PositionStats::DEFAULT_MAX_VALUES)
  @position_stats = h["position_stats"].each_with_object({}) do |entry, acc|
    position = Position.from_dump(entry["position"])
    acc[position] = PositionStats.from_dump(entry["stats"])
  end
  cdump = h.fetch("clusterer", { "clusters" => {} })
  @clusters = cdump["clusters"].transform_values { |c| Cluster.from_dump(c, max_values: @max_values_per_position) }
  @observed_iris         = h.fetch("observed_iris", [])
  @activated_recognizers = h.fetch("activated_recognizers", [])
  self
end

#observe_position(position, value, type) ⇒ Object



86
87
88
89
# File 'lib/iriq/storage/memory.rb', line 86

def observe_position(position, value, type)
  stats = @position_stats[position] ||= PositionStats.new(max_values: @max_values_per_position)
  stats.observe(value, type)
end

#observed_iri_countObject



111
112
113
# File 'lib/iriq/storage/memory.rb', line 111

def observed_iri_count
  @observed_iris.size
end

#pathObject

Path of the underlying file, if any. Memory backends are unpathed; Json/Sqlite override.



31
# File 'lib/iriq/storage/memory.rb', line 31

def path; nil; end

#path_length_countsObject



144
# File 'lib/iriq/storage/memory.rb', line 144

def path_length_counts; @path_length_counts; end

#position_stats(position) ⇒ Object



148
149
150
# File 'lib/iriq/storage/memory.rb', line 148

def position_stats(position)
  @position_stats[position]
end

#raw_shape_countsObject



145
# File 'lib/iriq/storage/memory.rb', line 145

def raw_shape_counts;   @raw_shape_counts;   end

#record_activated_recognizer(dump) ⇒ Object

— Activated recognizers (Corpus#activate_proposal) —————–



117
118
119
# File 'lib/iriq/storage/memory.rb', line 117

def record_activated_recognizer(dump)
  @activated_recognizers << dump
end

#record_observation(canonical) ⇒ Object

Append a canonical IRI to the source-IRI log. Called by Corpus#observe after the event reducers have applied; the log is the source of truth that Corpus#reinfer replays.



103
104
105
# File 'lib/iriq/storage/memory.rb', line 103

def record_observation(canonical)
  @observed_iris << canonical
end

#save(path = nil) ⇒ Object

No-op for in-memory; subclasses override.



66
# File 'lib/iriq/storage/memory.rb', line 66

def save(path = nil); end

#to_dumpObject



190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/iriq/storage/memory.rb', line 190

def to_dump
  {
    "host_counts"             => @host_counts,
    "path_length_counts"      => @path_length_counts.transform_keys(&:to_s),
    "raw_shape_counts"        => @raw_shape_counts,
    "fingerprint_counts"      => @fingerprint_counts,
    "max_values_per_position" => @max_values_per_position,
    "position_stats"          => @position_stats.map { |pos, s|
      { "position" => pos.to_dump, "stats" => s.dump }
    },
    "clusterer"               => {
      "clusters" => @clusters.transform_values(&:dump),
    },
    "observed_iris"           => @observed_iris,
    "activated_recognizers"   => @activated_recognizers,
  }
end

#transaction {|_self| ... } ⇒ Object

Yields:

  • (_self)

Yield Parameters:



54
55
56
# File 'lib/iriq/storage/memory.rb', line 54

def transaction
  yield self
end