Module: Kotoshu
- Defined in:
- lib/kotoshu/embeddings.rb,
lib/kotoshu.rb,
lib/kotoshu.rb,
lib/kotoshu/cli.rb,
lib/kotoshu/core.rb,
lib/kotoshu/cache.rb,
lib/kotoshu/paths.rb,
lib/kotoshu/grammar.rb,
lib/kotoshu/readers.rb,
lib/kotoshu/version.rb,
lib/kotoshu/defaults.rb,
lib/kotoshu/keyboard.rb,
lib/kotoshu/language.rb,
lib/kotoshu/integrity.rb,
lib/kotoshu/languages.rb,
lib/kotoshu/algorithms.rb,
lib/kotoshu/cli/errors.rb,
lib/kotoshu/debug_mode.rb,
lib/kotoshu/cache/cache.rb,
lib/kotoshu/debug_logger.rb,
lib/kotoshu/grammar/rule.rb,
lib/kotoshu/spellchecker.rb,
lib/kotoshu/configuration.rb,
lib/kotoshu/cli/auto_setup.rb,
lib/kotoshu/core/trie/node.rb,
lib/kotoshu/core/trie/trie.rb,
lib/kotoshu/fluent_checker.rb,
lib/kotoshu/metrics_module.rb,
lib/kotoshu/models/context.rb,
lib/kotoshu/plugins/plugin.rb,
lib/kotoshu/project_config.rb,
lib/kotoshu/results/result.rb,
lib/kotoshu/string_metrics.rb,
lib/kotoshu/core/exceptions.rb,
lib/kotoshu/dictionary/base.rb,
lib/kotoshu/keyboard/layout.rb,
lib/kotoshu/resource_bundle.rb,
lib/kotoshu/source_registry.rb,
lib/kotoshu/cache/base_cache.rb,
lib/kotoshu/core/models/word.rb,
lib/kotoshu/plugins/registry.rb,
lib/kotoshu/readers/aff_data.rb,
lib/kotoshu/resource_manager.rb,
lib/kotoshu/algorithms/lookup.rb,
lib/kotoshu/cache/model_cache.rb,
lib/kotoshu/cli/cache_command.rb,
lib/kotoshu/cli/status_report.rb,
lib/kotoshu/core/trie/builder.rb,
lib/kotoshu/dictionary/cspell.rb,
lib/kotoshu/dictionary/custom.rb,
lib/kotoshu/keyboard/registry.rb,
lib/kotoshu/language/detector.rb,
lib/kotoshu/language/registry.rb,
lib/kotoshu/metrics_collector.rb,
lib/kotoshu/models/onnx_model.rb,
lib/kotoshu/models/suggestion.rb,
lib/kotoshu/algorithms/suggest.rb,
lib/kotoshu/cache/lookup_cache.rb,
lib/kotoshu/cli/batch_reporter.rb,
lib/kotoshu/dictionary/unified.rb,
lib/kotoshu/documents/document.rb,
lib/kotoshu/documents/location.rb,
lib/kotoshu/integrity/manifest.rb,
lib/kotoshu/integrity/net_http.rb,
lib/kotoshu/readers/aff_reader.rb,
lib/kotoshu/readers/dic_reader.rb,
lib/kotoshu/dictionary/hunspell.rb,
lib/kotoshu/grammar/rule_engine.rb,
lib/kotoshu/grammar/rule_loader.rb,
lib/kotoshu/integrity/audit_log.rb,
lib/kotoshu/language/identifier.rb,
lib/kotoshu/personal_dictionary.rb,
lib/kotoshu/readers/file_reader.rb,
lib/kotoshu/suggestions/context.rb,
lib/kotoshu/cache/language_cache.rb,
lib/kotoshu/components/tokenizer.rb,
lib/kotoshu/dictionaries/catalog.rb,
lib/kotoshu/suggestions/pipeline.rb,
lib/kotoshu/cache/frequency_cache.rb,
lib/kotoshu/cli/display_formatter.rb,
lib/kotoshu/cli/language_resolver.rb,
lib/kotoshu/cli/progress_reporter.rb,
lib/kotoshu/components/pos_tagger.rb,
lib/kotoshu/configuration/builder.rb,
lib/kotoshu/dictionary/plain_text.rb,
lib/kotoshu/dictionary/repository.rb,
lib/kotoshu/dictionary/unix_words.rb,
lib/kotoshu/languages/de/language.rb,
lib/kotoshu/languages/en/language.rb,
lib/kotoshu/languages/es/language.rb,
lib/kotoshu/languages/fr/language.rb,
lib/kotoshu/languages/ja/language.rb,
lib/kotoshu/languages/pt/language.rb,
lib/kotoshu/languages/ru/language.rb,
lib/kotoshu/models/fasttext_model.rb,
lib/kotoshu/models/semantic_error.rb,
lib/kotoshu/models/word_embedding.rb,
lib/kotoshu/suggestions/generator.rb,
lib/kotoshu/cache/suggestion_cache.rb,
lib/kotoshu/cli/navigation_manager.rb,
lib/kotoshu/commands/cache_command.rb,
lib/kotoshu/commands/check_command.rb,
lib/kotoshu/commands/model_command.rb,
lib/kotoshu/components/synthesizer.rb,
lib/kotoshu/configuration/resolver.rb,
lib/kotoshu/core/models/affix_rule.rb,
lib/kotoshu/models/embedding_model.rb,
lib/kotoshu/readers/lookup_builder.rb,
lib/kotoshu/suggestions/suggestion.rb,
lib/kotoshu/algorithms/permutations.rb,
lib/kotoshu/core/indexed_dictionary.rb,
lib/kotoshu/keyboard/layouts/azerty.rb,
lib/kotoshu/keyboard/layouts/dvorak.rb,
lib/kotoshu/keyboard/layouts/jcuken.rb,
lib/kotoshu/keyboard/layouts/qwerty.rb,
lib/kotoshu/keyboard/layouts/qwertz.rb,
lib/kotoshu/language/languages/base.rb,
lib/kotoshu/language/tokenizer/base.rb,
lib/kotoshu/models/nearest_neighbor.rb,
lib/kotoshu/algorithms/ngram_suggest.rb,
lib/kotoshu/cli/interactive_reviewer.rb,
lib/kotoshu/components/spell_checker.rb,
lib/kotoshu/data/common_words_loader.rb,
lib/kotoshu/language/normalizer/base.rb,
lib/kotoshu/algorithms/capitalization.rb,
lib/kotoshu/algorithms/phonet_suggest.rb,
lib/kotoshu/readers/condition_checker.rb,
lib/kotoshu/suggestions/suggestion_set.rb,
lib/kotoshu/analyzers/semantic_analyzer.rb,
lib/kotoshu/documents/asciidoc_document.rb,
lib/kotoshu/documents/markdown_document.rb,
lib/kotoshu/data_structures/bloom_filter.rb,
lib/kotoshu/embeddings/similarity_search.rb,
lib/kotoshu/documents/plain_text_document.rb,
lib/kotoshu/spellchecker/parallel_checker.rb,
lib/kotoshu/core/models/result/word_result.rb,
lib/kotoshu/components/whitespace_tokenizer.rb,
lib/kotoshu/core/models/result/document_result.rb,
lib/kotoshu/language/tokenizer/latin_tokenizer.rb,
lib/kotoshu/language/tokenizer/french_tokenizer.rb,
lib/kotoshu/language/tokenizer/german_tokenizer.rb,
lib/kotoshu/components/passthrough_spell_checker.rb,
lib/kotoshu/language/tokenizer/russian_tokenizer.rb,
lib/kotoshu/language/tokenizer/spanish_tokenizer.rb,
lib/kotoshu/suggestions/strategies/base_strategy.rb,
lib/kotoshu/grammar/pattern_matchers/base_matcher.rb,
lib/kotoshu/language/tokenizer/japanese_tokenizer.rb,
lib/kotoshu/suggestions/strategies/ngram_strategy.rb,
lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb,
lib/kotoshu/suggestions/strategies/phonetic_strategy.rb,
lib/kotoshu/suggestions/strategies/semantic_strategy.rb,
lib/kotoshu/suggestions/strategies/symspell_strategy.rb,
lib/kotoshu/suggestions/strategies/composite_strategy.rb,
lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb,
lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb,
lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb,
lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb,
lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb
Overview
Embeddings module for FastText ONNX integration.
Provides semantic spell checking using FastText word embeddings. Supports 157 languages through pre-converted ONNX models.
Defined Under Namespace
Modules: Algorithms, Analyzers, Cache, Cli, Components, Core, Data, DataStructures, Debug, Defaults, Dictionaries, Dictionary, Documents, Embeddings, Grammar, Integrity, Keyboard, Language, Languages, Metrics, Models, Paths, Plugins, Readers, Results, StringMetrics, Suggestions Classes: AffixRuleError, CacheCommand, CheckCommand, Configuration, ConfigurationError, DictionaryNotFoundError, Error, IntegrityError, InvalidDictionaryFormatError, ModelCommand, PersonalDictionary, ProjectConfig, ResourceBundle, ResourceManager, ResourceNotCachedError, ResourceNotSetupError, ResourceResolutionError, SourceRegistry, SpellcheckError, Spellchecker
Constant Summary collapse
- VERSION =
"0.3.0"
Class Method Summary collapse
-
.check(text, language: nil, **_options) ⇒ Models::Result::DocumentResult
Check text for spelling errors.
-
.check_file(path, language: nil, **_options) ⇒ Models::Result::DocumentResult
Check a file for spelling errors.
-
.check_files(paths, **options) ⇒ Array<Models::Result::DocumentResult>
Check multiple files for spelling errors.
-
.configuration ⇒ Configuration
Get the global configuration.
-
.configure {|configuration| ... } ⇒ Configuration
Global configuration instance.
-
.correct?(word, language: nil) ⇒ Boolean
Check if a word is spelled correctly.
-
.detect_language(text) ⇒ String?
Detect language of text.
-
.detect_language_with_confidence(text) ⇒ Array<String, Float>
Detect language with confidence score.
-
.dictionary(source = nil) ⇒ Core::IndexedDictionary
Convenience method for creating an indexed dictionary.
-
.get_language(code) ⇒ Class?
Get language class by code.
-
.language ⇒ Module
Access the language module.
-
.language_registered?(code) ⇒ Boolean
Check if a language is registered.
-
.languages_setup ⇒ Array<String>
List languages that have been set up.
-
.misspelled?(word, language: nil) ⇒ Boolean
Check if a word is misspelled.
-
.register_dictionary_type(type, klass) ⇒ Object
Register a custom dictionary type.
-
.register_suggestion_algorithm(name, klass) ⇒ Object
Register a custom suggestion algorithm.
-
.reset_spellchecker ⇒ Object
Reset the spellchecker cache.
-
.resolve(language: nil, want: nil) ⇒ ResourceBundle
Resolve language resources from the cache (no download).
-
.setup(*languages, want: nil, **opts) ⇒ SetupResult+
Set up resources for one or more languages (download or register local files).
-
.setup?(language, resource = nil) ⇒ Boolean
Check if a language (or a specific resource for that language) is set up.
-
.spellchecker ⇒ Spellchecker
Default spellchecker (singleton).
-
.spellchecker_for(language) ⇒ Spellchecker
Get a spellchecker for a specific language (cache-only, raises on miss).
-
.suggest(word, language: nil, **options) ⇒ Suggestions::SuggestionSet
Get spelling suggestions for a word.
-
.suggestion_pipeline(*strategies) ⇒ Suggestions::Strategies::CompositeStrategy
Convenience method for creating a suggestion pipeline.
-
.supported_languages ⇒ Array<String>
Get all supported language codes.
-
.trie(source = nil) ⇒ Core::Trie::Trie
Convenience method for creating a trie.
Class Method Details
.check(text, language: nil, **_options) ⇒ Models::Result::DocumentResult
Check text for spelling errors. Hot path.
322 323 324 325 |
# File 'lib/kotoshu.rb', line 322 def self.check(text, language: nil, **) checker = language ? spellchecker_for(language) : spellchecker checker.check(text) end |
.check_file(path, language: nil, **_options) ⇒ Models::Result::DocumentResult
Check a file for spelling errors. Hot path.
339 340 341 342 |
# File 'lib/kotoshu.rb', line 339 def self.check_file(path, language: nil, **) checker = language ? spellchecker_for(language) : spellchecker checker.check_file(path) end |
.check_files(paths, **options) ⇒ Array<Models::Result::DocumentResult>
Check multiple files for spelling errors.
353 354 355 |
# File 'lib/kotoshu.rb', line 353 def self.check_files(paths, **) paths.map { |path| check_file(path, **) } end |
.configuration ⇒ Configuration
Get the global configuration.
135 136 137 |
# File 'lib/kotoshu.rb', line 135 def self.configuration Configuration.instance end |
.configure {|configuration| ... } ⇒ Configuration
Global configuration instance.
124 125 126 127 |
# File 'lib/kotoshu.rb', line 124 def self.configure yield configuration if block_given? configuration end |
.correct?(word, language: nil) ⇒ Boolean
Check if a word is spelled correctly. Hot path — cache-only, raises if language not set up.
278 279 280 281 |
# File 'lib/kotoshu.rb', line 278 def self.correct?(word, language: nil) checker = language ? spellchecker_for(language) : spellchecker checker.correct?(word) end |
.detect_language(text) ⇒ String?
Detect language of text.
445 446 447 |
# File 'lib/kotoshu.rb', line 445 def self.detect_language(text) Language.detect(text) end |
.detect_language_with_confidence(text) ⇒ Array<String, Float>
Detect language with confidence score.
458 459 460 |
# File 'lib/kotoshu.rb', line 458 def self.detect_language_with_confidence(text) Language.detect_with_confidence(text) end |
.dictionary(source = nil) ⇒ Core::IndexedDictionary
Convenience method for creating an indexed dictionary.
361 362 363 364 365 366 367 368 369 370 371 372 |
# File 'lib/kotoshu.rb', line 361 def self.dictionary(source = nil) case source when Array Core::IndexedDictionary.new(source) when String Core::IndexedDictionary.from_file(source) when nil, Hash Core::IndexedDictionary.new else raise ArgumentError, "Invalid dictionary source: #{source.inspect}" end end |
.get_language(code) ⇒ Class?
Get language class by code.
469 470 471 |
# File 'lib/kotoshu.rb', line 469 def self.get_language(code) Language.get(code) end |
.language ⇒ Module
Access the language module.
433 434 435 |
# File 'lib/kotoshu.rb', line 433 def self.language Language end |
.language_registered?(code) ⇒ Boolean
Check if a language is registered.
480 481 482 |
# File 'lib/kotoshu.rb', line 480 def self.language_registered?(code) Language.registered?(code) end |
.languages_setup ⇒ Array<String>
List languages that have been set up.
249 250 251 |
# File 'lib/kotoshu.rb', line 249 def self.languages_setup ResourceManager.languages_setup end |
.misspelled?(word, language: nil) ⇒ Boolean
Check if a word is misspelled. Hot path.
289 290 291 |
# File 'lib/kotoshu.rb', line 289 def self.misspelled?(word, language: nil) !correct?(word, language: language) end |
.register_dictionary_type(type, klass) ⇒ Object
Register a custom dictionary type.
412 413 414 |
# File 'lib/kotoshu.rb', line 412 def self.register_dictionary_type(type, klass) Dictionary.register_type(type, klass) end |
.register_suggestion_algorithm(name, klass) ⇒ Object
Register a custom suggestion algorithm.
423 424 425 |
# File 'lib/kotoshu.rb', line 423 def self.register_suggestion_algorithm(name, klass) Suggestions::Strategies::BaseStrategy.register_type(name, klass) end |
.reset_spellchecker ⇒ Object
Reset the spellchecker cache. The next call to ‘spellchecker` or `spellchecker_for` re-resolves from the current configuration.
Does NOT eagerly reload — clearing the cache is enough. This makes the method safe to call between tests even when no language is set up yet (the next call will raise ResourceNotSetupError per the strict two-stage contract).
260 261 262 263 264 |
# File 'lib/kotoshu.rb', line 260 def self.reset_spellchecker @spellchecker = nil @spellcheckers = nil nil end |
.resolve(language: nil, want: nil) ⇒ ResourceBundle
Resolve language resources from the cache (no download).
183 184 185 186 187 188 189 |
# File 'lib/kotoshu.rb', line 183 def self.resolve(language: nil, want: nil) lang = language || configuration.default_language raise ResourceNotSetupError.new(lang || "default", "spelling") if lang.nil? want_param = want || ResourceManager::DEFAULT_WANT ResourceManager.resolve(language: lang, want: want_param) end |
.setup(*languages, want: nil, **opts) ⇒ SetupResult+
Set up resources for one or more languages (download or register local files). Idempotent: re-running with the same args is a no-op unless ‘force: true`.
217 218 219 220 221 222 223 224 225 226 |
# File 'lib/kotoshu.rb', line 217 def self.setup(*languages, want: nil, **opts) raise ArgumentError, "Kotoshu.setup requires at least one language" if languages.empty? want_param = want || ResourceManager::DEFAULT_WANT if languages.size == 1 ResourceManager.setup(languages.first, want: want_param, **opts) else languages.map { |lang| ResourceManager.setup(lang, want: want_param, **opts) } end end |
.setup?(language, resource = nil) ⇒ Boolean
Check if a language (or a specific resource for that language) is set up.
239 240 241 |
# File 'lib/kotoshu.rb', line 239 def self.setup?(language, resource = nil) ResourceManager.setup?(language, resource: resource) end |
.spellchecker ⇒ Spellchecker
Default spellchecker (singleton). Uses the configured default language. Cache-only — raises ResourceNotSetupError if the default language hasn’t been set up via Kotoshu.setup.
145 146 147 148 149 150 151 152 |
# File 'lib/kotoshu.rb', line 145 def self.spellchecker return @spellchecker if @spellchecker lang = configuration.default_language raise ResourceNotSetupError.new(lang || "default", "spelling") if lang.nil? || lang.to_s.empty? @spellchecker = spellchecker_for(lang) end |
.spellchecker_for(language) ⇒ Spellchecker
Get a spellchecker for a specific language (cache-only, raises on miss).
163 164 165 166 167 168 169 170 |
# File 'lib/kotoshu.rb', line 163 def self.spellchecker_for(language) key = language.to_s @spellcheckers ||= {} @spellcheckers[key] ||= begin bundle = ResourceManager.resolve(language: language) Spellchecker.new(resource_bundle: bundle, config: configuration) end end |
.suggest(word, language: nil, **options) ⇒ Suggestions::SuggestionSet
Get spelling suggestions for a word. Hot path.
305 306 307 308 |
# File 'lib/kotoshu.rb', line 305 def self.suggest(word, language: nil, **) checker = language ? spellchecker_for(language) : spellchecker checker.suggest(word, **) end |
.suggestion_pipeline(*strategies) ⇒ Suggestions::Strategies::CompositeStrategy
Convenience method for creating a suggestion pipeline.
399 400 401 402 403 |
# File 'lib/kotoshu.rb', line 399 def self.suggestion_pipeline(*strategies) pipeline = Suggestions::Strategies::CompositeStrategy.new(name: :default) strategies.each { |s| pipeline.add(s) } pipeline end |
.supported_languages ⇒ Array<String>
Get all supported language codes.
490 491 492 |
# File 'lib/kotoshu.rb', line 490 def self.supported_languages Language.supported_codes end |
.trie(source = nil) ⇒ Core::Trie::Trie
Convenience method for creating a trie.
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 |
# File 'lib/kotoshu.rb', line 378 def self.trie(source = nil) case source when Array Core::Trie::Builder.from_array(source) when String if File.exist?(source) Core::Trie::Builder.from_file(source) else Core::Trie::Builder.from_string(source) end when nil Core::Trie::Trie.new else raise ArgumentError, "Invalid trie source: #{source.inspect}" end end |