Class: Kotoshu::Spellchecker

Inherits:
Object
  • Object
show all
Defined in:
lib/kotoshu/spellchecker.rb,
lib/kotoshu/fluent_checker.rb,
lib/kotoshu/spellchecker/parallel_checker.rb

Overview

Main spellchecker class.

This is the primary facade for spell checking operations, providing methods to check words, text, and files.

Examples:

Creating a spellchecker with a dictionary

dict = Kotoshu::Dictionary::UnixWords.new("/usr/share/dict/words", language_code: "en-US")
spellchecker = Spellchecker.new(dictionary: dict)
spellchecker.correct?("hello")  # => true

Using configuration

spellchecker = Spellchecker.new(
  dictionary_path: "/usr/share/dict/words",
  language: "en-US"
)

Defined Under Namespace

Classes: FluentChecker, ParallelChecker

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dictionary: nil, config: nil, resource_bundle: nil, **kwargs) ⇒ Spellchecker

Create a new spellchecker.

Examples:

With dictionary

spellchecker = Spellchecker.new(dictionary: dict)

With resource bundle (0.2+)

bundle = Kotoshu::ResourceManager.resolve(language: "en")
spellchecker = Spellchecker.new(resource_bundle: bundle)
spellchecker.correct?("hello")  # => true

With configuration hash

spellchecker = Spellchecker.new(
  dictionary_path: "/usr/share/dict/words",
  language: "en-US"
)

With Configuration object

config = Configuration.new(dictionary_path: "words.txt")
spellchecker = Spellchecker.new(config: config)

Parameters:

  • dictionary (Dictionary::Base, nil) (defaults to: nil)

    The dictionary (optional)

  • config (Configuration, Hash) (defaults to: nil)

    Configuration or settings

  • resource_bundle (ResourceBundle, nil) (defaults to: nil)

    Pre-resolved resource bundle

  • kwargs (Hash)

    Additional configuration options



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/kotoshu/spellchecker.rb', line 58

def initialize(dictionary: nil, config: nil, resource_bundle: nil, **kwargs)
  @resource_bundle = resource_bundle

  if resource_bundle
    dictionary ||= resource_bundle.dictionary
    kwargs[:language] = resource_bundle.language unless kwargs.key?(:language)
  end

  if config.is_a?(Configuration)
    @config = config
  else
    settings = kwargs.dup
    settings[:dictionary_path] = dictionary.path if dictionary.respond_to?(:path)
    @config = Configuration.new(settings)
  end

  @config.dictionary = dictionary if dictionary

  dict = @config.dictionary
  max_suggestions = @config.max_suggestions

  @generator = Suggestions::Generator.new(
    dict,
    max_suggestions: max_suggestions,
    algorithms: @config.suggestion_algorithms
  )
end

Instance Attribute Details

#configConfiguration (readonly)

Returns The configuration.

Returns:



29
30
31
# File 'lib/kotoshu/spellchecker.rb', line 29

def config
  @config
end

#generatorSuggestions::Generator (readonly)

Returns The suggestion generator.

Returns:



26
27
28
# File 'lib/kotoshu/spellchecker.rb', line 26

def generator
  @generator
end

#resource_bundleResourceBundle? (readonly)

Returns The resource bundle if provided.

Returns:



32
33
34
# File 'lib/kotoshu/spellchecker.rb', line 32

def resource_bundle
  @resource_bundle
end

Instance Method Details

#check(text) ⇒ Models::Result::DocumentResult

Check text for spelling errors.

Examples:

result = spellchecker.check("Hello wrold")
result.success?    # => false
result.errors.map(&:word)  # => ["wrold"]

Parameters:

  • text (String)

    The text to check

Returns:



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/kotoshu/spellchecker.rb', line 159

def check(text)
  return Models::Result::DocumentResult.success if text.nil? || text.empty?

  words = tokenize(text)
  errors = []
  position = 0

  words.each do |word_data|
    word, pos = word_data
    result = check_word(word)

    if result.incorrect?
      errors << Models::Result::WordResult.new(
        word,
        correct: false,
        suggestions: result.suggestions,
        position: pos
      )
    end

    position = pos
  end

  Models::Result::DocumentResult.new(
    file: nil,
    errors: errors,
    word_count: words.size
  )
end

#check_directory(path, pattern: "*.txt") ⇒ Array<Models::Result::DocumentResult>

Check a directory for spelling errors.

Examples:

results = spellchecker.check_directory("docs/")
results.select(&:failed?).map(&:file)

Parameters:

  • path (String)

    The directory path

  • pattern (String) (defaults to: "*.txt")

    File pattern to match (default: “*.txt”)

Returns:

Raises:



220
221
222
223
224
225
# File 'lib/kotoshu/spellchecker.rb', line 220

def check_directory(path, pattern: "*.txt")
  raise DictionaryNotFoundError, path unless File.exist?(path) && File.directory?(path)

  files = Dir.glob(File.join(path, pattern))
  files.map { |file| check_file(file) }
end

#check_file(path) ⇒ Models::Result::DocumentResult

Check a file for spelling errors.

Examples:

result = spellchecker.check_file("README.md")
result.to_s  # => "File 'README.md': 3 spelling error(s) found"

Parameters:

  • path (String)

    The file path

Returns:

Raises:



197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/kotoshu/spellchecker.rb', line 197

def check_file(path)
  raise DictionaryNotFoundError, path unless File.exist?(path)

  text = File.read(path, encoding: @config.encoding)
  result = check(text)

  # Create a new result with the file path
  Models::Result::DocumentResult.new(
    file: path,
    errors: result.errors,
    word_count: result.word_count
  )
end

#check_word(word) ⇒ Models::Result::WordResult

Check a word and return a result object.

Examples:

result = spellchecker.check_word("hello")
result.correct?  # => true

With misspelled word

result = spellchecker.check_word("helo")
result.correct?         # => false
result.suggestions      # => SuggestionSet with suggestions

Parameters:

  • word (String)

    The word to check

Returns:



136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/kotoshu/spellchecker.rb', line 136

def check_word(word)
  if word.nil? || word.empty?
    return Models::Result::WordResult.new("", correct: false,
                                              suggestions: Suggestions::SuggestionSet.empty)
  end

  if correct?(word)
    Models::Result::WordResult.correct(word)
  else
    suggestions = suggest(word)
    Models::Result::WordResult.incorrect(word, suggestions: suggestions)
  end
end

#correct?(word) ⇒ Boolean

Check if a word is spelled correctly.

Examples:

spellchecker.correct?("hello")  # => true
spellchecker.correct?("helo")   # => false

Parameters:

  • word (String)

    The word to check

Returns:

  • (Boolean)

    True if the word is correct



94
95
96
97
98
# File 'lib/kotoshu/spellchecker.rb', line 94

def correct?(word)
  return false if word.nil? || word.empty?

  @generator.correct?(word)
end

#dictionaryDictionary::Base

Get the dictionary being used.

Returns:



263
264
265
# File 'lib/kotoshu/spellchecker.rb', line 263

def dictionary
  @generator.dictionary
end

#incorrect?(word) ⇒ Boolean

Check if a word is misspelled.

Parameters:

  • word (String)

    The word to check

Returns:

  • (Boolean)

    True if the word is misspelled



104
105
106
# File 'lib/kotoshu/spellchecker.rb', line 104

def incorrect?(word)
  !correct?(word)
end

#reload_dictionaryself

Reload the dictionary.

Returns:

  • (self)

    Self for chaining



270
271
272
273
274
275
276
277
278
279
280
281
# File 'lib/kotoshu/spellchecker.rb', line 270

def reload_dictionary
  @config.reset_dictionary

  dict = @config.dictionary
  @generator = Suggestions::Generator.new(
    dict,
    max_suggestions: @config.max_suggestions,
    algorithms: @config.suggestion_algorithms
  )

  self
end

#suggest(word, max_suggestions: nil) ⇒ Suggestions::SuggestionSet

Get spelling suggestions for a word.

Examples:

suggestions = spellchecker.suggest("helo")
suggestions.to_words  # => ["hello", "help", "held", ...]

Parameters:

  • word (String)

    The misspelled word

  • max_suggestions (Integer) (defaults to: nil)

    Maximum suggestions (optional)

Returns:



117
118
119
120
121
# File 'lib/kotoshu/spellchecker.rb', line 117

def suggest(word, max_suggestions: nil)
  return Suggestions::SuggestionSet.empty if word.nil? || word.empty?

  @generator.generate(word, max_suggestions: max_suggestions)
end

#tokenize(text) ⇒ Array<Array>

Tokenize text into words.

Examples:

spellchecker.tokenize("Hello world!")
# => [["Hello", 0], ["world", 6]]

Parameters:

  • text (String)

    The text to tokenize

Returns:

  • (Array<Array>)

    Array of [word, position] pairs



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# File 'lib/kotoshu/spellchecker.rb', line 235

def tokenize(text)
  return [] if text.nil? || text.empty?

  words = []
  position = 0
  word_buffer = String.new
  word_start = 0

  text.each_char.with_index do |char, i|
    if word_char?(char)
      word_buffer << char
      word_start = i if word_buffer.length == 1
      position = i
    elsif !word_buffer.empty?
      words << [word_buffer.dup.freeze, word_start]
      word_buffer.clear
    end
  end

  # Don't forget the last word
  words << [word_buffer.dup.freeze, word_start] unless word_buffer.empty?

  words
end