Module: Philiprehberger::Mask
- Defined in:
- lib/philiprehberger/mask.rb,
lib/philiprehberger/mask/version.rb,
lib/philiprehberger/mask/detector.rb,
lib/philiprehberger/mask/scrubber.rb,
lib/philiprehberger/mask/configuration.rb,
lib/philiprehberger/mask/deep_scrubber.rb
Defined Under Namespace
Modules: DeepScrubber, Detector, Scrubber Classes: Configuration, Error
Constant Summary collapse
- VERSION =
'0.6.0'
Class Method Summary collapse
-
.add_locale(locale, patterns) ⇒ Object
Register locale-specific patterns.
-
.batch_scrub(strings, mode: :full, locale: nil) ⇒ Array<String>
Process an array of strings in one call with shared compiled patterns.
-
.configure {|Configuration| ... } ⇒ Object
Configure custom patterns.
-
.configure_priority(detector_order) ⇒ Object
Set detector evaluation priority.
-
.detect(string, locale: nil) ⇒ Array<Hash>
Scan a string for PII without modifying it.
-
.detokenize(string, tokens:) ⇒ String
Reverse tokenization using a token lookup table.
-
.reset_configuration! ⇒ Object
Reset configuration to defaults.
-
.scrub(string, mode: :full) ⇒ String
Detect and redact PII patterns in a string.
-
.scrub_hash(data, keys: nil, mode: :full) ⇒ Hash, Array
Deep-walk a hash/array and redact sensitive values.
-
.scrub_hash_with_audit(data, keys: nil) ⇒ Hash
Deep-walk a hash/array and redact sensitive values with audit trail.
-
.scrub_io(io, mode: :full, locale: nil) ⇒ Array<String>
Read from IO line by line, scrub each line.
-
.scrub_log(path, output: nil, mode: :full, locale: nil) ⇒ Hash
Read a file line by line, scrub each line, and write the result.
-
.scrub_with_audit(string) ⇒ Hash
Scrub a string and return an audit trail of what was masked.
-
.tokenize(string) ⇒ Hash
Replace PII with reversible tokens.
Class Method Details
.add_locale(locale, patterns) ⇒ Object
Register locale-specific patterns
129 130 131 |
# File 'lib/philiprehberger/mask.rb', line 129 def self.add_locale(locale, patterns) Configuration.instance.add_locale(locale, patterns) end |
.batch_scrub(strings, mode: :full, locale: nil) ⇒ Array<String>
Process an array of strings in one call with shared compiled patterns
Raises ArgumentError when strings is not an Array. An empty Array returns [].
110 111 112 113 114 115 116 |
# File 'lib/philiprehberger/mask.rb', line 110 def self.batch_scrub(strings, mode: :full, locale: nil) raise ArgumentError, 'strings must be an Array' unless strings.is_a?(Array) patterns = Configuration.instance.patterns(locale: locale) compiled = patterns.map { |pat| pat.merge(pattern: Regexp.new(pat[:pattern].source, pat[:pattern].)) } strings.map { |s| Scrubber.call(s, patterns: compiled, mode: mode) } end |
.configure {|Configuration| ... } ⇒ Object
Configure custom patterns
195 196 197 |
# File 'lib/philiprehberger/mask.rb', line 195 def self.configure(&block) block.call(Configuration.instance) end |
.configure_priority(detector_order) ⇒ Object
Set detector evaluation priority
121 122 123 |
# File 'lib/philiprehberger/mask.rb', line 121 def self.configure_priority(detector_order) Configuration.instance.set_priority(detector_order) end |
.detect(string, locale: nil) ⇒ Array<Hash>
Scan a string for PII without modifying it
Returns the list of detector matches in detection order. Each entry has :detector, :match, and :position. Useful for “should this be redacted?” checks before the cost of substitution. The input string is not mutated.
40 41 42 |
# File 'lib/philiprehberger/mask.rb', line 40 def self.detect(string, locale: nil) Scrubber.scan(string, patterns: Configuration.instance.patterns(locale: locale)) end |
.detokenize(string, tokens:) ⇒ String
Reverse tokenization using a token lookup table
92 93 94 95 96 |
# File 'lib/philiprehberger/mask.rb', line 92 def self.detokenize(string, tokens:) result = string.dup tokens.each { |token, original| result = result.gsub(token, original) } result end |
.reset_configuration! ⇒ Object
Reset configuration to defaults
200 201 202 |
# File 'lib/philiprehberger/mask.rb', line 200 def self.reset_configuration! Configuration.reset! end |
.scrub(string, mode: :full) ⇒ String
Detect and redact PII patterns in a string
22 23 24 |
# File 'lib/philiprehberger/mask.rb', line 22 def self.scrub(string, mode: :full) Scrubber.call(string, patterns: Configuration.instance.patterns, mode: mode) end |
.scrub_hash(data, keys: nil, mode: :full) ⇒ Hash, Array
Deep-walk a hash/array and redact sensitive values
53 54 55 56 |
# File 'lib/philiprehberger/mask.rb', line 53 def self.scrub_hash(data, keys: nil, mode: :full) config = Configuration.instance DeepScrubber.call(data, patterns: config.patterns, sensitive_keys: keys || config.sensitive_keys, mode: mode) end |
.scrub_hash_with_audit(data, keys: nil) ⇒ Hash
Deep-walk a hash/array and redact sensitive values with audit trail
63 64 65 66 |
# File 'lib/philiprehberger/mask.rb', line 63 def self.scrub_hash_with_audit(data, keys: nil) config = Configuration.instance DeepScrubber.call_with_audit(data, patterns: config.patterns, sensitive_keys: keys || config.sensitive_keys) end |
.scrub_io(io, mode: :full, locale: nil) ⇒ Array<String>
Read from IO line by line, scrub each line
Raises ArgumentError when io is nil. An IO that is already at EOF (or empty) returns an empty Array rather than raising.
146 147 148 149 150 151 152 |
# File 'lib/philiprehberger/mask.rb', line 146 def self.scrub_io(io, mode: :full, locale: nil) raise ArgumentError, 'io is required' if io.nil? return [] if io.respond_to?(:eof?) && io.eof? patterns = Configuration.instance.patterns(locale: locale) io.each_line.map { |line| Scrubber.call(line, patterns: patterns, mode: mode) } end |
.scrub_log(path, output: nil, mode: :full, locale: nil) ⇒ Hash
Read a file line by line, scrub each line, and write the result
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
# File 'lib/philiprehberger/mask.rb', line 161 def self.scrub_log(path, output: nil, mode: :full, locale: nil) patterns = Configuration.instance.patterns(locale: locale) lines_processed = 0 lines_modified = 0 detections = 0 scrubbed_lines = File.open(path, 'r') do |f| f.each_line.map do |line| lines_processed += 1 scrubbed = Scrubber.call(line, patterns: patterns, mode: mode) if scrubbed != line lines_modified += 1 detections += Scrubber.call_with_audit(line, patterns: patterns)[:audit].length end scrubbed end end if output.nil? Tempfile.open([File.basename(path), '.tmp'], File.dirname(path)) do |tmp| tmp.write(scrubbed_lines.join) tmp.flush File.rename(tmp.path, path) end else File.write(output, scrubbed_lines.join) end { lines_processed: lines_processed, lines_modified: lines_modified, detections: detections } end |
.scrub_with_audit(string) ⇒ Hash
Scrub a string and return an audit trail of what was masked
72 73 74 |
# File 'lib/philiprehberger/mask.rb', line 72 def self.scrub_with_audit(string) Scrubber.call_with_audit(string, patterns: Configuration.instance.patterns) end |
.tokenize(string) ⇒ Hash
Replace PII with reversible tokens
83 84 85 |
# File 'lib/philiprehberger/mask.rb', line 83 def self.tokenize(string) Scrubber.call_with_tokens(string, patterns: Configuration.instance.patterns) end |