Class: ParadeDB::Index::TokenizerParser

Inherits:
Object
  • Object
show all
Defined in:
lib/parade_db/index.rb

Constant Summary collapse

TOKENIZER_EXPRESSION =
/\A[a-zA-Z_][a-zA-Z0-9_]*(?:(?:::|\.)[a-zA-Z_][a-zA-Z0-9_]*)*(?:\(\s*[a-zA-Z0-9_'".,=\s:-]*\s*\))?\z/.freeze
TOKENIZER_SINGLE_KEYS =
%i[tokenizer args named_args filters stemmer alias].freeze

Class Method Summary collapse

Class Method Details

.parse(source_name, tokenizer_spec) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/parade_db/index.rb', line 53

def parse(source_name, tokenizer_spec)
  case tokenizer_spec
  when Symbol, String
    [build_tokenized_entry(source_name, tokenizer_spec.to_s, {})]
  when Hash
    tokenizer_spec.map do |tokenizer, opts|
      case opts
      when Hash
        build_tokenized_entry(source_name, tokenizer.to_s, normalize_options(opts))
      when Symbol, String
        build_tokenized_entry(source_name, tokenizer.to_s, normalize_positional_option(opts))
      else
        raise InvalidIndexDefinition,
              "tokenizer options for #{source_name}.#{tokenizer} must be a Hash, Symbol, or String"
      end
    end
  else
    raise InvalidIndexDefinition,
          "invalid tokenizer definition for #{source_name}: #{tokenizer_spec.inspect}"
  end
end