Class: Kotoshu::Dictionary::PlainText

Inherits:
Base
  • Object
show all
Defined in:
lib/kotoshu/dictionary/plain_text.rb

Overview

Plain text dictionary backend.

This dictionary reads from simple plain text word lists, with support for comments and various formatting options.

File format:

  • One word per line

  • Lines starting with # are comments

  • Empty lines are ignored

  • Supports multi-word phrases (e.g., “New York”)

Examples:

Creating from a file

dict = PlainText.new("words.txt", language_code: "en-US")
dict.lookup?("hello")  # => true

Creating from a URL

dict = PlainText.new("https://raw.githubusercontent.com/kotoshu/dictionaries/main/en_US/words.txt",
                     language_code: "en-US")

Creating from an array

dict = PlainText.from_words(%w[hello world test], language_code: "en")

Instance Attribute Summary collapse

Attributes inherited from Base

#language_code, #locale, #metadata

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#each_word, #empty?, load, #lookup?, register_type, registry, #size, #to_s, #type, #words_matching, #words_with_prefix

Constructor Details

#initialize(path, language_code:, locale: nil, case_sensitive: false, word_pattern: nil, metadata: {}) ⇒ PlainText

Create a new PlainText dictionary.

Parameters:

  • path (String)

    Path to the dictionary file or URL

  • language_code (String)

    The language code

  • locale (String, nil) (defaults to: nil)

    The locale (optional)

  • case_sensitive (Boolean) (defaults to: false)

    Whether lookups are case-sensitive

  • word_pattern (Regexp, nil) (defaults to: nil)

    Pattern to filter words (optional)

  • metadata (Hash) (defaults to: {})

    Additional metadata (optional)



47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/kotoshu/dictionary/plain_text.rb', line 47

def initialize(path, language_code:, locale: nil, case_sensitive: false,
               word_pattern: nil, metadata: {})
  super(language_code, locale: locale, metadata: )

  @original_path = path
  @path = resolve_path(path)
  @case_sensitive = case_sensitive
  @word_pattern = word_pattern
  @words = load_words(@path)
  @word_set = build_word_set

  # Register this dictionary type
  self.class.register_type(:plain_text) unless Dictionary.registry.key?(:plain_text)
end

Instance Attribute Details

#case_sensitiveBoolean (readonly)

Returns Whether lookups are case-sensitive.

Returns:

  • (Boolean)

    Whether lookups are case-sensitive



34
35
36
# File 'lib/kotoshu/dictionary/plain_text.rb', line 34

def case_sensitive
  @case_sensitive
end

#pathString (readonly)

Returns The path to the dictionary file (or nil if created from array).

Returns:

  • (String)

    The path to the dictionary file (or nil if created from array)



31
32
33
# File 'lib/kotoshu/dictionary/plain_text.rb', line 31

def path
  @path
end

#word_patternRegexp? (readonly)

Returns Pattern for word filtering.

Returns:

  • (Regexp, nil)

    Pattern for word filtering



37
38
39
# File 'lib/kotoshu/dictionary/plain_text.rb', line 37

def word_pattern
  @word_pattern
end

Class Method Details

.from_string(text, language_code:, locale: nil, case_sensitive: false) ⇒ PlainText

Create a dictionary from a string.

Examples:

text = "hello\nworld\ntest"
dict = PlainText.from_string(text, language_code: "en")

Parameters:

  • text (String)

    The text containing words (newline separated)

  • language_code (String)

    The language code

  • locale (String, nil) (defaults to: nil)

    The locale (optional)

  • case_sensitive (Boolean) (defaults to: false)

    Whether lookups are case-sensitive

Returns:



179
180
181
182
183
184
185
# File 'lib/kotoshu/dictionary/plain_text.rb', line 179

def self.from_string(text, language_code:, locale: nil, case_sensitive: false)
  words = text.split("\n").reject { |l| l.empty? || l.strip.start_with?("#") }
              .map(&:strip)

  from_words(words, language_code: language_code, locale: locale,
                    case_sensitive: case_sensitive)
end

.from_words(words, language_code:, locale: nil, case_sensitive: false) ⇒ PlainText

Create a dictionary from an array of words.

Examples:

dict = PlainText.from_words(%w[hello world test], language_code: "en")

Parameters:

  • words (Array<String>)

    The words

  • language_code (String)

    The language code

  • locale (String, nil) (defaults to: nil)

    The locale (optional)

  • case_sensitive (Boolean) (defaults to: false)

    Whether lookups are case-sensitive

Returns:



150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/kotoshu/dictionary/plain_text.rb', line 150

def self.from_words(words, language_code:, locale: nil, case_sensitive: false)
  dict = allocate

  dict.instance_variable_set(:@language_code, language_code.dup.freeze)
  dict.instance_variable_set(:@locale, locale&.dup&.freeze)
  dict.instance_variable_set(:@path, nil)
  dict.instance_variable_set(:@case_sensitive, case_sensitive)
  dict.instance_variable_set(:@word_pattern, nil)
  dict.instance_variable_set(:@words, words.dup.map { |w| case_sensitive ? w : w.downcase })
  dict.instance_variable_set(:@word_set, dict.instance_variable_get(:@words).each_with_index.to_h)
  dict.instance_variable_set(:@metadata, {}.freeze)

  # Register this dictionary type (unless already registered)
  register_type(:plain_text) unless Dictionary.registry.key?(:plain_text)

  dict
end

Instance Method Details

#add_word(word, flags: []) ⇒ Boolean

Add a word to the dictionary.

Parameters:

  • word (String)

    The word to add

  • flags (Array<String>) (defaults to: [])

    Flags (ignored for PlainText)

Returns:

  • (Boolean)

    True if added



105
106
107
108
109
110
111
112
113
114
115
# File 'lib/kotoshu/dictionary/plain_text.rb', line 105

def add_word(word, flags: [])
  return false if word.nil? || word.empty?

  lookup_word = @case_sensitive ? word : word.downcase
  return false if @word_set.key?(lookup_word)

  @words << lookup_word
  @word_set[lookup_word] = @words.length - 1

  true
end

#lookup(word) ⇒ Boolean

Check if a word exists in the dictionary.

Parameters:

  • word (String)

    The word to look up

Returns:

  • (Boolean)

    True if the word exists



66
67
68
69
70
71
# File 'lib/kotoshu/dictionary/plain_text.rb', line 66

def lookup(word)
  return false if word.nil? || word.empty?

  lookup_word = @case_sensitive ? word : word.downcase
  @word_set.key?(lookup_word)
end

#remove_word(word) ⇒ Boolean

Remove a word from the dictionary.

Parameters:

  • word (String)

    The word to remove

Returns:

  • (Boolean)

    True if removed



121
122
123
124
125
126
127
128
129
130
131
# File 'lib/kotoshu/dictionary/plain_text.rb', line 121

def remove_word(word)
  return false if word.nil? || word.empty?

  lookup_word = @case_sensitive ? word : word.downcase
  return false unless @word_set.key?(lookup_word)

  index = @word_set.delete(lookup_word)
  @words.delete_at(index)

  true
end

#suggest(word, max_suggestions: 10) ⇒ Array<String>

Generate spelling suggestions.

Uses edit distance to find similar words in the dictionary.

Parameters:

  • word (String)

    The misspelled word

  • max_suggestions (Integer) (defaults to: 10)

    Maximum suggestions

Returns:

  • (Array<String>)

    List of suggested words



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/kotoshu/dictionary/plain_text.rb', line 80

def suggest(word, max_suggestions: 10)
  return [] if word.nil? || word.empty?

  lookup_word = @case_sensitive ? word : word.downcase

  # Find words with same prefix
  prefix_len = [lookup_word.length - 1, 3].max
  prefix = lookup_word[0...prefix_len]
  candidates = @words.select { |w| w.start_with?(prefix) }

  # Calculate edit distances
  candidates.map do |dict_word|
    dist = edit_distance(lookup_word, dict_word)
    [dict_word, dist]
  end.select { |_, dist| dist.positive? && dist <= 2 }
            .sort_by { |_, dist| dist }
            .first(max_suggestions)
            .map(&:first)
end

#wordsArray<String>

Get all words in the dictionary.

Returns:

  • (Array<String>)

    All words



136
137
138
# File 'lib/kotoshu/dictionary/plain_text.rb', line 136

def words
  @words.dup
end