Class: CipherStash::Analysis::TokenFilters::NGram

Inherits:
Base
  • Object
show all
Defined in:
lib/cipherstash/analysis/token_filters.rb

Instance Method Summary collapse

Methods inherited from Base

#initialize

Constructor Details

This class inherits a constructor from CipherStash::Analysis::TokenFilters::Base

Instance Method Details

#perform(str_or_array) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/cipherstash/analysis/token_filters.rb', line 17

def perform(str_or_array)
  min_length = @opts["minLength"] || 3
  max_length = @opts["maxLength"] || 8

  Array(str_or_array).flat_map do |token|
    token_length = token.length

    ngrams = [].tap do |out|
      (min_length..max_length).each do |n|
        ngram = token.chars.each_cons(n).map(&:join)
        out << ngram
      end

      if token_length > max_length
        out << token
      end
    end

    ngrams.flatten
  end
end