Module: TopSecret

Defined in:
lib/top_secret.rb,
lib/top_secret/text.rb,
lib/top_secret/error.rb,
lib/top_secret/mapping.rb,
lib/top_secret/version.rb,
lib/top_secret/category.rb,
lib/top_secret/constants.rb,
lib/top_secret/null_model.rb,
lib/top_secret/filters/ner.rb,
lib/top_secret/text/result.rb,
lib/top_secret/filtered_text.rb,
lib/top_secret/filters/regex.rb,
lib/top_secret/text/scan_result.rb,
lib/top_secret/text/batch_result.rb,
lib/top_secret/text/global_mapping.rb,
lib/top_secret/text/label_sequence.rb,
lib/top_secret/filtered_text/result.rb

Overview

TopSecret filters sensitive information from free text before it’s sent to external services or APIs, such as chatbots and LLMs.

Defined Under Namespace

Modules: Filters, Mapping Classes: Category, Error, FilteredText, NullModel, Text

Constant Summary collapse

VERSION =
"1.0.1"
MINIMUM_RAILS_VERSION =
">= 7.0.8"
MAXIMUM_RAILS_VERSION =
"< 9"
MODEL_PATH =

Returns The path to the NER model file.

Returns:

  • (String)

    The path to the NER model file

"ner_model.dat"
CREDIT_CARD_REGEX =

Returns Matches credit card numbers.

Returns:

  • (Regexp)

    Matches credit card numbers

/
  \b[3456]\d{15}\b |
  \b[3456]\d{3}(?:[\s+-]\d{4}){3}\b
/x
EMAIL_REGEX =

Returns Matches valid email addresses.

Returns:

  • (Regexp)

    Matches valid email addresses

%r{
  [a-zA-Z0-9.!\#$%&'*+/=?^_`{|}~-]+@
  [a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
  (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*
}x
PHONE_REGEX =

Returns Matches phone numbers with optional country code.

Returns:

  • (Regexp)

    Matches phone numbers with optional country code

/\b(?:\+\d{1,2}\s)?\(?\d{3}\)?[\s+.-]\d{3}[\s+.-]\d{4}\b/
SSN_REGEX =

Returns Matches Social Security Numbers in common formats.

Returns:

  • (Regexp)

    Matches Social Security Numbers in common formats

/\b\d{3}[\s+-]\d{2}[\s+-]\d{4}\b/
MIN_CONFIDENCE_SCORE =

Returns The minimum confidence score for NER filtering.

Returns:

  • (Float)

    The minimum confidence score for NER filtering

0.5
LABEL_DELIMITER =

Returns The delimiter used in label names.

Returns:

  • (String)

    The delimiter used in label names

"_"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Attribute Details

#credit_card_filterTopSecret::Filters::Regex

Returns filter for credit card numbers.

Returns:



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/top_secret.rb', line 46

module TopSecret
  mattr_accessor :model_path, default: MODEL_PATH
  mattr_accessor :min_confidence_score, default: MIN_CONFIDENCE_SCORE

  mattr_accessor :custom_filters, default: []

  mattr_accessor :credit_card_filter, default: TopSecret::Filters::Regex.new(label: "CREDIT_CARD", regex: CREDIT_CARD_REGEX)
  mattr_accessor :email_filter, default: TopSecret::Filters::Regex.new(label: "EMAIL", regex: EMAIL_REGEX)
  mattr_accessor :phone_number_filter, default: TopSecret::Filters::Regex.new(label: "PHONE_NUMBER", regex: PHONE_REGEX)
  mattr_accessor :ssn_filter, default: TopSecret::Filters::Regex.new(label: "SSN", regex: SSN_REGEX)
  mattr_accessor :people_filter, default: TopSecret::Filters::NER.new(label: "PERSON", tag: :person)
  mattr_accessor :location_filter, default: TopSecret::Filters::NER.new(label: "LOCATION", tag: :location)

  class << self
    def configure
      yield self
    end
  end
end

#custom_filtersArray

Returns array of custom filters that can be configured.

Returns:

  • (Array)

    array of custom filters that can be configured



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/top_secret.rb', line 46

module TopSecret
  mattr_accessor :model_path, default: MODEL_PATH
  mattr_accessor :min_confidence_score, default: MIN_CONFIDENCE_SCORE

  mattr_accessor :custom_filters, default: []

  mattr_accessor :credit_card_filter, default: TopSecret::Filters::Regex.new(label: "CREDIT_CARD", regex: CREDIT_CARD_REGEX)
  mattr_accessor :email_filter, default: TopSecret::Filters::Regex.new(label: "EMAIL", regex: EMAIL_REGEX)
  mattr_accessor :phone_number_filter, default: TopSecret::Filters::Regex.new(label: "PHONE_NUMBER", regex: PHONE_REGEX)
  mattr_accessor :ssn_filter, default: TopSecret::Filters::Regex.new(label: "SSN", regex: SSN_REGEX)
  mattr_accessor :people_filter, default: TopSecret::Filters::NER.new(label: "PERSON", tag: :person)
  mattr_accessor :location_filter, default: TopSecret::Filters::NER.new(label: "LOCATION", tag: :location)

  class << self
    def configure
      yield self
    end
  end
end

#email_filterTopSecret::Filters::Regex

Returns filter for email addresses.

Returns:



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/top_secret.rb', line 46

module TopSecret
  mattr_accessor :model_path, default: MODEL_PATH
  mattr_accessor :min_confidence_score, default: MIN_CONFIDENCE_SCORE

  mattr_accessor :custom_filters, default: []

  mattr_accessor :credit_card_filter, default: TopSecret::Filters::Regex.new(label: "CREDIT_CARD", regex: CREDIT_CARD_REGEX)
  mattr_accessor :email_filter, default: TopSecret::Filters::Regex.new(label: "EMAIL", regex: EMAIL_REGEX)
  mattr_accessor :phone_number_filter, default: TopSecret::Filters::Regex.new(label: "PHONE_NUMBER", regex: PHONE_REGEX)
  mattr_accessor :ssn_filter, default: TopSecret::Filters::Regex.new(label: "SSN", regex: SSN_REGEX)
  mattr_accessor :people_filter, default: TopSecret::Filters::NER.new(label: "PERSON", tag: :person)
  mattr_accessor :location_filter, default: TopSecret::Filters::NER.new(label: "LOCATION", tag: :location)

  class << self
    def configure
      yield self
    end
  end
end

#location_filterTopSecret::Filters::NER

Returns filter for location names.

Returns:



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/top_secret.rb', line 46

module TopSecret
  mattr_accessor :model_path, default: MODEL_PATH
  mattr_accessor :min_confidence_score, default: MIN_CONFIDENCE_SCORE

  mattr_accessor :custom_filters, default: []

  mattr_accessor :credit_card_filter, default: TopSecret::Filters::Regex.new(label: "CREDIT_CARD", regex: CREDIT_CARD_REGEX)
  mattr_accessor :email_filter, default: TopSecret::Filters::Regex.new(label: "EMAIL", regex: EMAIL_REGEX)
  mattr_accessor :phone_number_filter, default: TopSecret::Filters::Regex.new(label: "PHONE_NUMBER", regex: PHONE_REGEX)
  mattr_accessor :ssn_filter, default: TopSecret::Filters::Regex.new(label: "SSN", regex: SSN_REGEX)
  mattr_accessor :people_filter, default: TopSecret::Filters::NER.new(label: "PERSON", tag: :person)
  mattr_accessor :location_filter, default: TopSecret::Filters::NER.new(label: "LOCATION", tag: :location)

  class << self
    def configure
      yield self
    end
  end
end

#min_confidence_scoreFloat

Returns the minimum confidence score required for NER matches.

Returns:

  • (Float)

    the minimum confidence score required for NER matches



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/top_secret.rb', line 46

module TopSecret
  mattr_accessor :model_path, default: MODEL_PATH
  mattr_accessor :min_confidence_score, default: MIN_CONFIDENCE_SCORE

  mattr_accessor :custom_filters, default: []

  mattr_accessor :credit_card_filter, default: TopSecret::Filters::Regex.new(label: "CREDIT_CARD", regex: CREDIT_CARD_REGEX)
  mattr_accessor :email_filter, default: TopSecret::Filters::Regex.new(label: "EMAIL", regex: EMAIL_REGEX)
  mattr_accessor :phone_number_filter, default: TopSecret::Filters::Regex.new(label: "PHONE_NUMBER", regex: PHONE_REGEX)
  mattr_accessor :ssn_filter, default: TopSecret::Filters::Regex.new(label: "SSN", regex: SSN_REGEX)
  mattr_accessor :people_filter, default: TopSecret::Filters::NER.new(label: "PERSON", tag: :person)
  mattr_accessor :location_filter, default: TopSecret::Filters::NER.new(label: "LOCATION", tag: :location)

  class << self
    def configure
      yield self
    end
  end
end

#model_pathString

Returns the path to the MITIE NER model.

Returns:

  • (String)

    the path to the MITIE NER model



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/top_secret.rb', line 46

module TopSecret
  mattr_accessor :model_path, default: MODEL_PATH
  mattr_accessor :min_confidence_score, default: MIN_CONFIDENCE_SCORE

  mattr_accessor :custom_filters, default: []

  mattr_accessor :credit_card_filter, default: TopSecret::Filters::Regex.new(label: "CREDIT_CARD", regex: CREDIT_CARD_REGEX)
  mattr_accessor :email_filter, default: TopSecret::Filters::Regex.new(label: "EMAIL", regex: EMAIL_REGEX)
  mattr_accessor :phone_number_filter, default: TopSecret::Filters::Regex.new(label: "PHONE_NUMBER", regex: PHONE_REGEX)
  mattr_accessor :ssn_filter, default: TopSecret::Filters::Regex.new(label: "SSN", regex: SSN_REGEX)
  mattr_accessor :people_filter, default: TopSecret::Filters::NER.new(label: "PERSON", tag: :person)
  mattr_accessor :location_filter, default: TopSecret::Filters::NER.new(label: "LOCATION", tag: :location)

  class << self
    def configure
      yield self
    end
  end
end

#people_filterTopSecret::Filters::NER

Returns filter for person names.

Returns:



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/top_secret.rb', line 46

module TopSecret
  mattr_accessor :model_path, default: MODEL_PATH
  mattr_accessor :min_confidence_score, default: MIN_CONFIDENCE_SCORE

  mattr_accessor :custom_filters, default: []

  mattr_accessor :credit_card_filter, default: TopSecret::Filters::Regex.new(label: "CREDIT_CARD", regex: CREDIT_CARD_REGEX)
  mattr_accessor :email_filter, default: TopSecret::Filters::Regex.new(label: "EMAIL", regex: EMAIL_REGEX)
  mattr_accessor :phone_number_filter, default: TopSecret::Filters::Regex.new(label: "PHONE_NUMBER", regex: PHONE_REGEX)
  mattr_accessor :ssn_filter, default: TopSecret::Filters::Regex.new(label: "SSN", regex: SSN_REGEX)
  mattr_accessor :people_filter, default: TopSecret::Filters::NER.new(label: "PERSON", tag: :person)
  mattr_accessor :location_filter, default: TopSecret::Filters::NER.new(label: "LOCATION", tag: :location)

  class << self
    def configure
      yield self
    end
  end
end

#phone_number_filterTopSecret::Filters::Regex

Returns filter for phone numbers.

Returns:



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/top_secret.rb', line 46

module TopSecret
  mattr_accessor :model_path, default: MODEL_PATH
  mattr_accessor :min_confidence_score, default: MIN_CONFIDENCE_SCORE

  mattr_accessor :custom_filters, default: []

  mattr_accessor :credit_card_filter, default: TopSecret::Filters::Regex.new(label: "CREDIT_CARD", regex: CREDIT_CARD_REGEX)
  mattr_accessor :email_filter, default: TopSecret::Filters::Regex.new(label: "EMAIL", regex: EMAIL_REGEX)
  mattr_accessor :phone_number_filter, default: TopSecret::Filters::Regex.new(label: "PHONE_NUMBER", regex: PHONE_REGEX)
  mattr_accessor :ssn_filter, default: TopSecret::Filters::Regex.new(label: "SSN", regex: SSN_REGEX)
  mattr_accessor :people_filter, default: TopSecret::Filters::NER.new(label: "PERSON", tag: :person)
  mattr_accessor :location_filter, default: TopSecret::Filters::NER.new(label: "LOCATION", tag: :location)

  class << self
    def configure
      yield self
    end
  end
end

#ssn_filterTopSecret::Filters::Regex

Returns filter for social security numbers.

Returns:



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/top_secret.rb', line 46

module TopSecret
  mattr_accessor :model_path, default: MODEL_PATH
  mattr_accessor :min_confidence_score, default: MIN_CONFIDENCE_SCORE

  mattr_accessor :custom_filters, default: []

  mattr_accessor :credit_card_filter, default: TopSecret::Filters::Regex.new(label: "CREDIT_CARD", regex: CREDIT_CARD_REGEX)
  mattr_accessor :email_filter, default: TopSecret::Filters::Regex.new(label: "EMAIL", regex: EMAIL_REGEX)
  mattr_accessor :phone_number_filter, default: TopSecret::Filters::Regex.new(label: "PHONE_NUMBER", regex: PHONE_REGEX)
  mattr_accessor :ssn_filter, default: TopSecret::Filters::Regex.new(label: "SSN", regex: SSN_REGEX)
  mattr_accessor :people_filter, default: TopSecret::Filters::NER.new(label: "PERSON", tag: :person)
  mattr_accessor :location_filter, default: TopSecret::Filters::NER.new(label: "LOCATION", tag: :location)

  class << self
    def configure
      yield self
    end
  end
end

Class Method Details

.configure {|_self| ... } ⇒ Object

Yields:

  • (_self)

Yield Parameters:

  • _self (TopSecret)

    the object that the method was called on



60
61
62
# File 'lib/top_secret.rb', line 60

def configure
  yield self
end