Class: Kotoshu::Languages::German

Inherits:
Kotoshu::Language::Base show all
Defined in:
lib/kotoshu/languages/de/language.rb

Overview

German language implementation.

Supports multiple dialects: de-DE, de-AT, de-CH, de-BE, de-IT, de-LI, de-LU

Full Hunspell integration with spell checking, POS tagging, and grammar rules specifically handling German compound words and capitalization.

Defined Under Namespace

Modules: GrammarRules Classes: POSTagger, SpellChecker, Tokenizer

Constant Summary collapse

HUNSPELL_DICTIONARIES =
{
  'de-DE' => {
    aff: 'spec/integrational/fixtures/de_DE.aff',
    dic: 'spec/integrational/fixtures/de_DE.dic'
  },
  'de-AT' => {
    aff: 'spec/integrational/fixtures/de_AT.aff',
    dic: 'spec/integrational/fixtures/de_AT.dic'
  },
  'de-CH' => {
    aff: 'spec/integrational/fixtures/de_CH.aff',
    dic: 'spec/integrational/fixtures/de_CH.dic'
  }
}.freeze
VARIANT_NAMES =
{
  'DE' => 'German',
  'AT' => 'Austrian',
  'CH' => 'Swiss',
  'BE' => 'Belgian',
  'IT' => 'South Tyrolean',
  'LI' => 'Liechtenstein',
  'LU' => 'Luxembourgish'
}.freeze

Instance Attribute Summary

Attributes inherited from Kotoshu::Language::Base

#code, #name, #region, #variant

Instance Method Summary collapse

Methods inherited from Kotoshu::Language::Base

#base_code, #base_language?, #compatible_with?, #encoding, #full_name, #info, instance, #matches_code?, #normalize, #normalize_word, #region_code, register, #rtl?, #tokenize, #valid_word?

Constructor Details

#initialize(code: "de", name: "German", variant: nil) ⇒ German

Returns a new instance of German.



474
475
476
477
478
# File 'lib/kotoshu/languages/de/language.rb', line 474

def initialize(code: "de", name: "German", variant: nil)
  variant ||= extract_region_code(code)
  super(code: code, name: name, variant: variant)
  @hunspell_paths = resolve_hunspell_paths(code)
end

Instance Method Details

#create_pos_taggerObject



525
526
527
528
529
530
531
532
# File 'lib/kotoshu/languages/de/language.rb', line 525

def create_pos_tagger
  POSTagger.new(
    aff_path: @hunspell_paths[:aff],
    dic_path: @hunspell_paths[:dic],
    script: :latin,
    flag_mapping: POSTagger::FLAG_TO_POS
  )
end

#create_spell_checkerObject



513
514
515
516
517
518
519
# File 'lib/kotoshu/languages/de/language.rb', line 513

def create_spell_checker
  SpellChecker.new(
    aff_path: @hunspell_paths[:aff],
    dic_path: @hunspell_paths[:dic],
    script: :latin
  )
end

#create_tokenizerObject



521
522
523
# File 'lib/kotoshu/languages/de/language.rb', line 521

def create_tokenizer
  Tokenizer.new
end

#default_dictionary_pathsObject



498
499
500
501
502
503
504
505
506
507
# File 'lib/kotoshu/languages/de/language.rb', line 498

def default_dictionary_paths
  case code
  when "de-DE", "de-AT", "de-BE"
    ["/usr/share/dict/german"]
  when "de-CH"
    ["/usr/share/dict/swiss-german"]
  else
    ["/usr/share/dict/words"]
  end
end

#descriptionObject



480
481
482
483
484
# File 'lib/kotoshu/languages/de/language.rb', line 480

def description
  return name unless variant
  variant_name = VARIANT_NAMES[variant] || variant
  "#{name} (#{variant_name})"
end

#dictionary_classObject



494
495
496
# File 'lib/kotoshu/languages/de/language.rb', line 494

def dictionary_class
  Dictionary::UnixWords
end

#normalizerObject



490
491
492
# File 'lib/kotoshu/languages/de/language.rb', line 490

def normalizer
  @normalizer ||= Language::Normalizer::Base.new
end

#script_typeObject



509
510
511
# File 'lib/kotoshu/languages/de/language.rb', line 509

def script_type
  :latin
end

#tokenizerObject



486
487
488
# File 'lib/kotoshu/languages/de/language.rb', line 486

def tokenizer
  @tokenizer ||= Tokenizer.new
end