Class: TokenKit::ConfigBuilder

Inherits:
Object
  • Object
show all
Defined in:
lib/tokenkit/config_builder.rb

Overview

Builder for creating immutable Configuration objects

Constant Summary collapse

DEFAULTS =

Default values

{
  strategy: :unicode,
  lowercase: true,
  remove_punctuation: false,
  preserve_patterns: [],
  grapheme_extended: true,
  min_gram: 2,
  max_gram: 10,
  delimiter: "/",
  split_on_chars: " \t\n\r"
}.freeze
VALID_STRATEGIES =
[
  :unicode, :whitespace, :pattern, :sentence, :grapheme, :keyword,
  :edge_ngram, :ngram, :path_hierarchy, :url_email, :char_group,
  :letter, :lowercase
].freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(base_config = nil) ⇒ ConfigBuilder

Returns a new instance of ConfigBuilder.



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/tokenkit/config_builder.rb', line 31

def initialize(base_config = nil)
  if base_config
    # Copy from existing config
    @strategy = base_config.strategy
    @lowercase = base_config.lowercase
    @remove_punctuation = base_config.remove_punctuation
    @preserve_patterns = base_config.preserve_patterns.dup
    @regex = base_config.instance_variable_get(:@regex) if base_config.instance_variable_defined?(:@regex)
    @grapheme_extended = base_config.instance_variable_get(:@grapheme_extended) || DEFAULTS[:grapheme_extended]
    @min_gram = base_config.instance_variable_get(:@min_gram) || DEFAULTS[:min_gram]
    @max_gram = base_config.instance_variable_get(:@max_gram) || DEFAULTS[:max_gram]
    @delimiter = base_config.instance_variable_get(:@delimiter) || DEFAULTS[:delimiter]
    @split_on_chars = base_config.instance_variable_get(:@split_on_chars) || DEFAULTS[:split_on_chars]
  else
    # Start with defaults
    DEFAULTS.each do |key, value|
      instance_variable_set("@#{key}", value.is_a?(Array) ? value.dup : value)
    end
  end
end

Instance Attribute Details

#delimiterObject

Returns the value of attribute delimiter.



10
11
12
# File 'lib/tokenkit/config_builder.rb', line 10

def delimiter
  @delimiter
end

#grapheme_extendedObject

Returns the value of attribute grapheme_extended.



9
10
11
# File 'lib/tokenkit/config_builder.rb', line 9

def grapheme_extended
  @grapheme_extended
end

#lowercaseObject

Returns the value of attribute lowercase.



8
9
10
# File 'lib/tokenkit/config_builder.rb', line 8

def lowercase
  @lowercase
end

#max_gramObject

Returns the value of attribute max_gram.



9
10
11
# File 'lib/tokenkit/config_builder.rb', line 9

def max_gram
  @max_gram
end

#min_gramObject

Returns the value of attribute min_gram.



9
10
11
# File 'lib/tokenkit/config_builder.rb', line 9

def min_gram
  @min_gram
end

#preserve_patternsObject

Returns the value of attribute preserve_patterns.



8
9
10
# File 'lib/tokenkit/config_builder.rb', line 8

def preserve_patterns
  @preserve_patterns
end

#regexObject

Returns the value of attribute regex.



9
10
11
# File 'lib/tokenkit/config_builder.rb', line 9

def regex
  @regex
end

#remove_punctuationObject

Returns the value of attribute remove_punctuation.



8
9
10
# File 'lib/tokenkit/config_builder.rb', line 8

def remove_punctuation
  @remove_punctuation
end

#split_on_charsObject

Returns the value of attribute split_on_chars.



10
11
12
# File 'lib/tokenkit/config_builder.rb', line 10

def split_on_chars
  @split_on_chars
end

#strategyObject

Returns the value of attribute strategy.



8
9
10
# File 'lib/tokenkit/config_builder.rb', line 8

def strategy
  @strategy
end

Instance Method Details

#buildConfiguration

Build an immutable Configuration object

Returns:

Raises:

  • (Error)

    if configuration is invalid



55
56
57
58
59
60
# File 'lib/tokenkit/config_builder.rb', line 55

def build
  validate!

  config_hash = build_config_hash
  Configuration.new(config_hash, self)
end