Class: Kotoshu::Languages::Russian

Inherits:
Kotoshu::Language::Base show all
Defined in:
lib/kotoshu/languages/ru/language.rb

Overview

Russian language implementation.

Supports multiple dialects: ru-RU, ru-BY, ru-KZ, ru-KG, ru-MD

Full Hunspell integration with spell checking, POS tagging, and grammar rules specifically handling Russian Cyrillic script and case system.

Defined Under Namespace

Modules: GrammarRules Classes: POSTagger, SpellChecker, Tokenizer

Constant Summary collapse

HUNSPELL_DICTIONARIES =
{
  'ru-RU' => {
    aff: 'spec/integrational/fixtures/ru_RU.aff',
    dic: 'spec/integrational/fixtures/ru_RU.dic'
  }
}.freeze
VARIANT_NAMES =
{
  'RU' => 'Russian',
  'BY' => 'Belarusian',
  'KZ' => 'Kazakh',
  'KG' => 'Kyrgyz',
  'MD' => 'Moldovan'
}.freeze

Instance Attribute Summary

Attributes inherited from Kotoshu::Language::Base

#code, #name, #region, #variant

Instance Method Summary collapse

Methods inherited from Kotoshu::Language::Base

#base_code, #base_language?, #compatible_with?, #encoding, #full_name, #info, instance, #matches_code?, #normalize, #normalize_word, #region_code, register, #rtl?, #tokenize, #valid_word?

Constructor Details

#initialize(code: "ru", name: "Russian", variant: nil) ⇒ Russian

Returns a new instance of Russian.



334
335
336
337
338
# File 'lib/kotoshu/languages/ru/language.rb', line 334

def initialize(code: "ru", name: "Russian", variant: nil)
  variant ||= extract_region_code(code)
  super(code: code, name: name, variant: variant)
  @hunspell_paths = resolve_hunspell_paths(code)
end

Instance Method Details

#create_pos_taggerObject



383
384
385
386
387
388
389
390
# File 'lib/kotoshu/languages/ru/language.rb', line 383

def create_pos_tagger
  POSTagger.new(
    aff_path: @hunspell_paths[:aff],
    dic_path: @hunspell_paths[:dic],
    script: :cyrillic,
    flag_mapping: POSTagger::FLAG_TO_POS
  )
end

#create_spell_checkerObject



371
372
373
374
375
376
377
# File 'lib/kotoshu/languages/ru/language.rb', line 371

def create_spell_checker
  SpellChecker.new(
    aff_path: @hunspell_paths[:aff],
    dic_path: @hunspell_paths[:dic],
    script: :cyrillic
  )
end

#create_tokenizerObject



379
380
381
# File 'lib/kotoshu/languages/ru/language.rb', line 379

def create_tokenizer
  Tokenizer.new
end

#default_dictionary_pathsObject



358
359
360
361
362
363
364
365
# File 'lib/kotoshu/languages/ru/language.rb', line 358

def default_dictionary_paths
  case code
  when "ru-RU"
    ["/usr/share/dict/russian"]
  else
    ["/usr/share/dict/words"]
  end
end

#descriptionObject



340
341
342
343
344
# File 'lib/kotoshu/languages/ru/language.rb', line 340

def description
  return name unless variant
  variant_name = VARIANT_NAMES[variant] || variant
  "#{name} (#{variant_name})"
end

#dictionary_classObject



354
355
356
# File 'lib/kotoshu/languages/ru/language.rb', line 354

def dictionary_class
  Dictionary::UnixWords
end

#normalizerObject



350
351
352
# File 'lib/kotoshu/languages/ru/language.rb', line 350

def normalizer
  @normalizer ||= Language::Normalizer::Base.new
end

#script_typeObject



367
368
369
# File 'lib/kotoshu/languages/ru/language.rb', line 367

def script_type
  :cyrillic
end

#tokenizerObject



346
347
348
# File 'lib/kotoshu/languages/ru/language.rb', line 346

def tokenizer
  @tokenizer ||= Tokenizer.new
end