Class: Iriq::Storage::Memory
- Inherits:
-
Object
- Object
- Iriq::Storage::Memory
- Defined in:
- lib/iriq/storage/memory.rb
Overview
Memory is the canonical backend — every other backend either wraps it (Json) or implements the same surface against an external store (Sqlite).
The contract is small enough to enumerate up top:
increment_host(host)
increment_path_length(length)
increment_raw_shape(shape)
increment_fingerprint(shape)
observe_position(position, value, type) # position is Iriq::Position
add_to_cluster(key, host, scheme, shape, identifier)
record_observation(canonical) # append to source-IRI log
host_counts / path_length_counts / raw_shape_counts / fingerprint_counts
position_stats(position)
each_position_stats { |position, stats| ... }
each_observed_iri { |canonical| ... }
clear_materialized_views # for reinfer
clusters / cluster_size
transaction { ... } # backends may batch within
flush # commit pending writes (no-op for Memory)
close # release resources
Direct Known Subclasses
Instance Attribute Summary collapse
-
#max_values_per_position ⇒ Object
readonly
Returns the value of attribute max_values_per_position.
Instance Method Summary collapse
- #activated_recognizer_count ⇒ Object
- #add_to_cluster(key, host, scheme, shape, identifier) ⇒ Object
- #batch ⇒ Object
-
#clear_materialized_views ⇒ Object
Drop every materialized view (host_counts, position_stats, clusters, …) without touching the source-IRI log.
- #close ⇒ Object
-
#cluster_for(key) ⇒ Object
O(1) lookup by cluster key — used by Corpus#normalize to pull the cluster’s param_stats for the URL being normalized.
- #cluster_size ⇒ Object
- #clusters ⇒ Object
- #each_activated_recognizer(&block) ⇒ Object
- #each_observed_iri(&block) ⇒ Object
- #each_position_stats(&block) ⇒ Object
- #fingerprint_counts ⇒ Object
- #flush ⇒ Object
-
#host_counts ⇒ Object
— Reads ————————————————————.
- #increment_fingerprint(shape) ⇒ Object
-
#increment_host(host) ⇒ Object
— Increments ——————————————————-.
- #increment_path_length(length) ⇒ Object
- #increment_raw_shape(shape) ⇒ Object
-
#initialize(classifier: SegmentClassifier::DEFAULT, max_values_per_position: PositionStats::DEFAULT_MAX_VALUES) ⇒ Memory
constructor
A new instance of Memory.
-
#load_dump!(h) ⇒ Object
— Bulk load (used by JSON backend) ——————————–.
- #observe_position(position, value, type) ⇒ Object
- #observed_iri_count ⇒ Object
-
#path ⇒ Object
Path of the underlying file, if any.
- #path_length_counts ⇒ Object
- #position_stats(position) ⇒ Object
- #raw_shape_counts ⇒ Object
-
#record_activated_recognizer(dump) ⇒ Object
— Activated recognizers (Corpus#activate_proposal) —————–.
-
#record_observation(canonical) ⇒ Object
Append a canonical IRI to the source-IRI log.
-
#save(path = nil) ⇒ Object
No-op for in-memory; subclasses override.
- #to_dump ⇒ Object
- #transaction {|_self| ... } ⇒ Object
Constructor Details
#initialize(classifier: SegmentClassifier::DEFAULT, max_values_per_position: PositionStats::DEFAULT_MAX_VALUES) ⇒ Memory
Returns a new instance of Memory.
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/iriq/storage/memory.rb', line 33 def initialize(classifier: SegmentClassifier::DEFAULT, max_values_per_position: PositionStats::DEFAULT_MAX_VALUES) @classifier = classifier @max_values_per_position = max_values_per_position @host_counts = Hash.new(0) @path_length_counts = Hash.new(0) @raw_shape_counts = Hash.new(0) @fingerprint_counts = Hash.new(0) @position_stats = {} @clusters = {} # The source-IRI log. Persisted alongside materialized views; the # log is the source of truth, the views are derived. Corpus#reinfer # drops the views and replays the log through events + reducers. @observed_iris = [] # Recognizers promoted from RecognizerProposal via # Corpus#activate_proposal. Stored as {prefix, type, specificity} # hashes so reopens can re-synthesize them onto the corpus's # classifier. @activated_recognizers = [] end |
Instance Attribute Details
#max_values_per_position ⇒ Object (readonly)
Returns the value of attribute max_values_per_position.
27 28 29 |
# File 'lib/iriq/storage/memory.rb', line 27 def max_values_per_position @max_values_per_position end |
Instance Method Details
#activated_recognizer_count ⇒ Object
125 126 127 |
# File 'lib/iriq/storage/memory.rb', line 125 def activated_recognizer_count @activated_recognizers.size end |
#add_to_cluster(key, host, scheme, shape, identifier) ⇒ Object
91 92 93 94 95 96 97 98 |
# File 'lib/iriq/storage/memory.rb', line 91 def add_to_cluster(key, host, scheme, shape, identifier) cluster = @clusters[key] ||= Cluster.new( key: key, host: host, scheme: scheme, shape: shape, max_values: @max_values_per_position, ) cluster.add(identifier, classifier: @classifier) cluster end |
#batch ⇒ Object
58 59 60 |
# File 'lib/iriq/storage/memory.rb', line 58 def batch yield end |
#clear_materialized_views ⇒ Object
Drop every materialized view (host_counts, position_stats, clusters, …) without touching the source-IRI log. Corpus#reinfer calls this before replaying the log so views rebuild from scratch.
132 133 134 135 136 137 138 139 |
# File 'lib/iriq/storage/memory.rb', line 132 def clear_materialized_views @host_counts = Hash.new(0) @path_length_counts = Hash.new(0) @raw_shape_counts = Hash.new(0) @fingerprint_counts = Hash.new(0) @position_stats = {} @clusters = {} end |
#close ⇒ Object
63 |
# File 'lib/iriq/storage/memory.rb', line 63 def close; end |
#cluster_for(key) ⇒ Object
O(1) lookup by cluster key — used by Corpus#normalize to pull the cluster’s param_stats for the URL being normalized. nil if no cluster has been observed under this key yet.
167 168 169 |
# File 'lib/iriq/storage/memory.rb', line 167 def cluster_for(key) @clusters[key] end |
#cluster_size ⇒ Object
160 161 162 |
# File 'lib/iriq/storage/memory.rb', line 160 def cluster_size @clusters.size end |
#clusters ⇒ Object
156 157 158 |
# File 'lib/iriq/storage/memory.rb', line 156 def clusters @clusters.values end |
#each_activated_recognizer(&block) ⇒ Object
121 122 123 |
# File 'lib/iriq/storage/memory.rb', line 121 def each_activated_recognizer(&block) @activated_recognizers.each(&block) end |
#each_observed_iri(&block) ⇒ Object
107 108 109 |
# File 'lib/iriq/storage/memory.rb', line 107 def each_observed_iri(&block) @observed_iris.each(&block) end |
#each_position_stats(&block) ⇒ Object
152 153 154 |
# File 'lib/iriq/storage/memory.rb', line 152 def each_position_stats(&block) @position_stats.each(&block) end |
#fingerprint_counts ⇒ Object
146 |
# File 'lib/iriq/storage/memory.rb', line 146 def fingerprint_counts; @fingerprint_counts; end |
#flush ⇒ Object
62 |
# File 'lib/iriq/storage/memory.rb', line 62 def flush; end |
#host_counts ⇒ Object
— Reads ————————————————————
143 |
# File 'lib/iriq/storage/memory.rb', line 143 def host_counts; @host_counts; end |
#increment_fingerprint(shape) ⇒ Object
82 83 84 |
# File 'lib/iriq/storage/memory.rb', line 82 def increment_fingerprint(shape) @fingerprint_counts[shape] += 1 end |
#increment_host(host) ⇒ Object
— Increments ——————————————————-
70 71 72 |
# File 'lib/iriq/storage/memory.rb', line 70 def increment_host(host) @host_counts[host] += 1 if host end |
#increment_path_length(length) ⇒ Object
74 75 76 |
# File 'lib/iriq/storage/memory.rb', line 74 def increment_path_length(length) @path_length_counts[length] += 1 end |
#increment_raw_shape(shape) ⇒ Object
78 79 80 |
# File 'lib/iriq/storage/memory.rb', line 78 def increment_raw_shape(shape) @raw_shape_counts[shape] += 1 end |
#load_dump!(h) ⇒ Object
— Bulk load (used by JSON backend) ——————————–
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
# File 'lib/iriq/storage/memory.rb', line 173 def load_dump!(h) @host_counts = Hash.new(0).merge(h["host_counts"]) @path_length_counts = Hash.new(0).merge(h["path_length_counts"].transform_keys(&:to_i)) @raw_shape_counts = Hash.new(0).merge(h["raw_shape_counts"]) @fingerprint_counts = Hash.new(0).merge(h["fingerprint_counts"]) @max_values_per_position = h.fetch("max_values_per_position", PositionStats::DEFAULT_MAX_VALUES) @position_stats = h["position_stats"].each_with_object({}) do |entry, acc| position = Position.from_dump(entry["position"]) acc[position] = PositionStats.from_dump(entry["stats"]) end cdump = h.fetch("clusterer", { "clusters" => {} }) @clusters = cdump["clusters"].transform_values { |c| Cluster.from_dump(c, max_values: @max_values_per_position) } @observed_iris = h.fetch("observed_iris", []) @activated_recognizers = h.fetch("activated_recognizers", []) self end |
#observe_position(position, value, type) ⇒ Object
86 87 88 89 |
# File 'lib/iriq/storage/memory.rb', line 86 def observe_position(position, value, type) stats = @position_stats[position] ||= PositionStats.new(max_values: @max_values_per_position) stats.observe(value, type) end |
#observed_iri_count ⇒ Object
111 112 113 |
# File 'lib/iriq/storage/memory.rb', line 111 def observed_iri_count @observed_iris.size end |
#path ⇒ Object
Path of the underlying file, if any. Memory backends are unpathed; Json/Sqlite override.
31 |
# File 'lib/iriq/storage/memory.rb', line 31 def path; nil; end |
#path_length_counts ⇒ Object
144 |
# File 'lib/iriq/storage/memory.rb', line 144 def path_length_counts; @path_length_counts; end |
#position_stats(position) ⇒ Object
148 149 150 |
# File 'lib/iriq/storage/memory.rb', line 148 def position_stats(position) @position_stats[position] end |
#raw_shape_counts ⇒ Object
145 |
# File 'lib/iriq/storage/memory.rb', line 145 def raw_shape_counts; @raw_shape_counts; end |
#record_activated_recognizer(dump) ⇒ Object
— Activated recognizers (Corpus#activate_proposal) —————–
117 118 119 |
# File 'lib/iriq/storage/memory.rb', line 117 def record_activated_recognizer(dump) @activated_recognizers << dump end |
#record_observation(canonical) ⇒ Object
Append a canonical IRI to the source-IRI log. Called by Corpus#observe after the event reducers have applied; the log is the source of truth that Corpus#reinfer replays.
103 104 105 |
# File 'lib/iriq/storage/memory.rb', line 103 def record_observation(canonical) @observed_iris << canonical end |
#save(path = nil) ⇒ Object
No-op for in-memory; subclasses override.
66 |
# File 'lib/iriq/storage/memory.rb', line 66 def save(path = nil); end |
#to_dump ⇒ Object
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
# File 'lib/iriq/storage/memory.rb', line 190 def to_dump { "host_counts" => @host_counts, "path_length_counts" => @path_length_counts.transform_keys(&:to_s), "raw_shape_counts" => @raw_shape_counts, "fingerprint_counts" => @fingerprint_counts, "max_values_per_position" => @max_values_per_position, "position_stats" => @position_stats.map { |pos, s| { "position" => pos.to_dump, "stats" => s.dump } }, "clusterer" => { "clusters" => @clusters.transform_values(&:dump), }, "observed_iris" => @observed_iris, "activated_recognizers" => @activated_recognizers, } end |
#transaction {|_self| ... } ⇒ Object
54 55 56 |
# File 'lib/iriq/storage/memory.rb', line 54 def transaction yield self end |