Module: Philiprehberger::Mask
- Defined in:
- lib/philiprehberger/mask.rb,
lib/philiprehberger/mask/version.rb,
lib/philiprehberger/mask/detector.rb,
lib/philiprehberger/mask/scrubber.rb,
lib/philiprehberger/mask/configuration.rb,
lib/philiprehberger/mask/deep_scrubber.rb
Defined Under Namespace
Modules: DeepScrubber, Detector, Scrubber Classes: Configuration, Error
Constant Summary collapse
- VERSION =
'0.5.1'
Class Method Summary collapse
-
.add_locale(locale, patterns) ⇒ Object
Register locale-specific patterns.
-
.batch_scrub(strings, mode: :full, locale: nil) ⇒ Array<String>
Process an array of strings in one call with shared compiled patterns.
-
.configure {|Configuration| ... } ⇒ Object
Configure custom patterns.
-
.configure_priority(detector_order) ⇒ Object
Set detector evaluation priority.
-
.detokenize(string, tokens:) ⇒ String
Reverse tokenization using a token lookup table.
-
.reset_configuration! ⇒ Object
Reset configuration to defaults.
-
.scrub(string, mode: :full) ⇒ String
Detect and redact PII patterns in a string.
-
.scrub_hash(data, keys: nil, mode: :full) ⇒ Hash, Array
Deep-walk a hash/array and redact sensitive values.
-
.scrub_hash_with_audit(data, keys: nil) ⇒ Hash
Deep-walk a hash/array and redact sensitive values with audit trail.
-
.scrub_io(io, mode: :full, locale: nil) ⇒ Array<String>
Read from IO line by line, scrub each line.
-
.scrub_log(path, output: nil, mode: :full, locale: nil) ⇒ Hash
Read a file line by line, scrub each line, and write the result.
-
.scrub_with_audit(string) ⇒ Hash
Scrub a string and return an audit trail of what was masked.
-
.tokenize(string) ⇒ Hash
Replace PII with reversible tokens.
Class Method Details
.add_locale(locale, patterns) ⇒ Object
Register locale-specific patterns
111 112 113 |
# File 'lib/philiprehberger/mask.rb', line 111 def self.add_locale(locale, patterns) Configuration.instance.add_locale(locale, patterns) end |
.batch_scrub(strings, mode: :full, locale: nil) ⇒ Array<String>
Process an array of strings in one call with shared compiled patterns
Raises ArgumentError when strings is not an Array. An empty Array returns [].
92 93 94 95 96 97 98 |
# File 'lib/philiprehberger/mask.rb', line 92 def self.batch_scrub(strings, mode: :full, locale: nil) raise ArgumentError, 'strings must be an Array' unless strings.is_a?(Array) patterns = Configuration.instance.patterns(locale: locale) compiled = patterns.map { |pat| pat.merge(pattern: Regexp.new(pat[:pattern].source, pat[:pattern].)) } strings.map { |s| Scrubber.call(s, patterns: compiled, mode: mode) } end |
.configure {|Configuration| ... } ⇒ Object
Configure custom patterns
177 178 179 |
# File 'lib/philiprehberger/mask.rb', line 177 def self.configure(&block) block.call(Configuration.instance) end |
.configure_priority(detector_order) ⇒ Object
Set detector evaluation priority
103 104 105 |
# File 'lib/philiprehberger/mask.rb', line 103 def self.configure_priority(detector_order) Configuration.instance.set_priority(detector_order) end |
.detokenize(string, tokens:) ⇒ String
Reverse tokenization using a token lookup table
74 75 76 77 78 |
# File 'lib/philiprehberger/mask.rb', line 74 def self.detokenize(string, tokens:) result = string.dup tokens.each { |token, original| result = result.gsub(token, original) } result end |
.reset_configuration! ⇒ Object
Reset configuration to defaults
182 183 184 |
# File 'lib/philiprehberger/mask.rb', line 182 def self.reset_configuration! Configuration.reset! end |
.scrub(string, mode: :full) ⇒ String
Detect and redact PII patterns in a string
22 23 24 |
# File 'lib/philiprehberger/mask.rb', line 22 def self.scrub(string, mode: :full) Scrubber.call(string, patterns: Configuration.instance.patterns, mode: mode) end |
.scrub_hash(data, keys: nil, mode: :full) ⇒ Hash, Array
Deep-walk a hash/array and redact sensitive values
35 36 37 38 |
# File 'lib/philiprehberger/mask.rb', line 35 def self.scrub_hash(data, keys: nil, mode: :full) config = Configuration.instance DeepScrubber.call(data, patterns: config.patterns, sensitive_keys: keys || config.sensitive_keys, mode: mode) end |
.scrub_hash_with_audit(data, keys: nil) ⇒ Hash
Deep-walk a hash/array and redact sensitive values with audit trail
45 46 47 48 |
# File 'lib/philiprehberger/mask.rb', line 45 def self.scrub_hash_with_audit(data, keys: nil) config = Configuration.instance DeepScrubber.call_with_audit(data, patterns: config.patterns, sensitive_keys: keys || config.sensitive_keys) end |
.scrub_io(io, mode: :full, locale: nil) ⇒ Array<String>
Read from IO line by line, scrub each line
Raises ArgumentError when io is nil. An IO that is already at EOF (or empty) returns an empty Array rather than raising.
128 129 130 131 132 133 134 |
# File 'lib/philiprehberger/mask.rb', line 128 def self.scrub_io(io, mode: :full, locale: nil) raise ArgumentError, 'io is required' if io.nil? return [] if io.respond_to?(:eof?) && io.eof? patterns = Configuration.instance.patterns(locale: locale) io.each_line.map { |line| Scrubber.call(line, patterns: patterns, mode: mode) } end |
.scrub_log(path, output: nil, mode: :full, locale: nil) ⇒ Hash
Read a file line by line, scrub each line, and write the result
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
# File 'lib/philiprehberger/mask.rb', line 143 def self.scrub_log(path, output: nil, mode: :full, locale: nil) patterns = Configuration.instance.patterns(locale: locale) lines_processed = 0 lines_modified = 0 detections = 0 scrubbed_lines = File.open(path, 'r') do |f| f.each_line.map do |line| lines_processed += 1 scrubbed = Scrubber.call(line, patterns: patterns, mode: mode) if scrubbed != line lines_modified += 1 detections += Scrubber.call_with_audit(line, patterns: patterns)[:audit].length end scrubbed end end if output.nil? Tempfile.open([File.basename(path), '.tmp'], File.dirname(path)) do |tmp| tmp.write(scrubbed_lines.join) tmp.flush File.rename(tmp.path, path) end else File.write(output, scrubbed_lines.join) end { lines_processed: lines_processed, lines_modified: lines_modified, detections: detections } end |
.scrub_with_audit(string) ⇒ Hash
Scrub a string and return an audit trail of what was masked
54 55 56 |
# File 'lib/philiprehberger/mask.rb', line 54 def self.scrub_with_audit(string) Scrubber.call_with_audit(string, patterns: Configuration.instance.patterns) end |
.tokenize(string) ⇒ Hash
Replace PII with reversible tokens
65 66 67 |
# File 'lib/philiprehberger/mask.rb', line 65 def self.tokenize(string) Scrubber.call_with_tokens(string, patterns: Configuration.instance.patterns) end |