199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
|
# File 'lib/heitt.rb', line 199
def self.scan(input, database: HEITT::DATABASE, min_entropy: 3.5)
text = File.exist?(input) ? File.read(input) : input
context_scores = HEITT::Analyzer.analyze(text, database: database)
found = {} seen = {}
database.each do |entry|
regex = get_regex(entry)
modes = get_modes(entry)
next unless regex && modes && !modes.empty?
pattern = regex.is_a?(Regexp) ? regex : Regexp.new(regex)
scanner = StringScanner.new(text)
while scanner.scan_until(pattern)
matched = scanner.matched
next unless matched.length < 8 || HEITT::Analyzer.high_entropy?(matched, min_entropy)
offset = scanner.pos - matched.length
delim_prefix = HEITT::Analyzer.(text, offset)
candidates = HEITT::Analyzer.score_candidates(modes, delim_prefix, context_scores)
score = candidates.first[:score]
if score > (seen[matched] || -1)
seen[matched] = score
found[matched] = {hash: matched, candidates: candidates}
end
end
end
found.values
end
|