Class: Iriq::ProposalStrategy::PrefixUnderscoreId

Inherits:
Object
  • Object
show all
Defined in:
lib/iriq/recognizer_proposal.rb

Overview

Detects ‘<prefix>_<alphanumeric>` patterns at slug/opaque_id positions — the GitHub PAT (`ghp_…`), Stripe customer ID (`cus_…`), AWS-style (`sk_test_…` — partial match), Twilio SID-with-letter- prefix family. Restricting the suffix to alphanumeric (no further separators) keeps real slugs (`my-cool-post`, `red_team_member`) from triggering false proposals.

Constant Summary collapse

PATTERN =
/\A([a-z]+)_([A-Za-z0-9]+)\z/.freeze
NAME =
:prefix_underscore_id

Instance Method Summary collapse

Instance Method Details

#propose(storage, min_observations: DEFAULT_MIN_OBSERVATIONS, min_coverage: DEFAULT_MIN_COVERAGE, min_hosts: DEFAULT_MIN_HOSTS) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/iriq/recognizer_proposal.rb', line 100

def propose(storage,
            min_observations: DEFAULT_MIN_OBSERVATIONS,
            min_coverage:     DEFAULT_MIN_COVERAGE,
            min_hosts:        DEFAULT_MIN_HOSTS)
  per_prefix = Hash.new { |h, k| h[k] = empty_accumulator }

  storage.each_position_stats do |position, stats|
    next unless slug_or_opaque?(stats)

    stats.value_counts.each do |value, count|
      m = PATTERN.match(value) or next
      prefix = "#{m[1]}_"
      acc = per_prefix[prefix]
      acc[:matching_count] += count
      acc[:position_observations] += stats.total unless acc[:positions].include?(position)
      acc[:positions] << position
      acc[:hosts] << position.host
      # Collect every match; we'll sort + cap to a stable top-N at
      # emission time so Ruby and Go produce identical samples
      # regardless of underlying Hash / map iteration order.
      acc[:matches] << value
    end
  end

  per_prefix.filter_map { |prefix, acc|
    next nil if acc[:matching_count] < min_observations
    next nil if acc[:hosts].size < min_hosts

    coverage = acc[:matching_count].to_f / acc[:position_observations]
    next nil if coverage < min_coverage

    RecognizerProposal.new(
      prefix:            prefix,
      suggested_type:    prefix.chomp("_").to_sym,
      positions:         acc[:positions].to_a,
      hosts:             acc[:hosts],
      coverage:          coverage,
      observation_count: acc[:matching_count],
      # Sort + cap to 5 so Ruby and Go produce identical samples
      # regardless of underlying Hash / map iteration order. The
      # samples are illustrative for humans; alphabetical is fine.
      sample_values:     acc[:matches].sort.first(5),
      strategy:          NAME,
    )
  }.sort_by { |p| [-p.confidence, p.prefix] }
end