Class: Kotoshu::Languages::French

Inherits:
Kotoshu::Language::Base show all
Defined in:
lib/kotoshu/languages/fr/language.rb

Overview

French language implementation.

Supports multiple dialects: fr-FR, fr-CA, fr-BE, fr-CH, fr-LU, fr-MC

Full Hunspell integration with spell checking, POS tagging, and grammar rules.

Defined Under Namespace

Modules: GrammarRules Classes: POSTagger, SpellChecker, Tokenizer

Constant Summary collapse

HUNSPELL_DICTIONARIES =
{
  'fr-FR' => {
    aff: 'spec/integrational/fixtures/fr_FR.aff',
    dic: 'spec/integrational/fixtures/fr_FR.dic'
  },
  'fr-CA' => {
    aff: 'spec/integrational/fixtures/fr_CA.aff',
    dic: 'spec/integrational/fixtures/fr_CA.dic'
  }
}.freeze
VARIANT_NAMES =
{
  'FR' => 'France',
  'CA' => 'Canadian',
  'BE' => 'Belgian',
  'CH' => 'Swiss',
  'LU' => 'Luxembourgish',
  'MC' => 'Monégasque'
}.freeze

Instance Attribute Summary

Attributes inherited from Kotoshu::Language::Base

#code, #name, #region, #variant

Instance Method Summary collapse

Methods inherited from Kotoshu::Language::Base

#base_code, #base_language?, #compatible_with?, #encoding, #full_name, #info, instance, #matches_code?, #normalize, #normalize_word, #region_code, register, #rtl?, #tokenize, #valid_word?

Constructor Details

#initialize(code: "fr", name: "French", variant: nil) ⇒ French

Returns a new instance of French.



421
422
423
424
425
# File 'lib/kotoshu/languages/fr/language.rb', line 421

def initialize(code: "fr", name: "French", variant: nil)
  variant ||= extract_region_code(code)
  super(code: code, name: name, variant: variant)
  @hunspell_paths = resolve_hunspell_paths(code)
end

Instance Method Details

#create_pos_taggerObject



472
473
474
475
476
477
478
479
# File 'lib/kotoshu/languages/fr/language.rb', line 472

def create_pos_tagger
  POSTagger.new(
    aff_path: @hunspell_paths[:aff],
    dic_path: @hunspell_paths[:dic],
    script: :latin,
    flag_mapping: POSTagger::FLAG_TO_POS
  )
end

#create_spell_checkerObject



460
461
462
463
464
465
466
# File 'lib/kotoshu/languages/fr/language.rb', line 460

def create_spell_checker
  SpellChecker.new(
    aff_path: @hunspell_paths[:aff],
    dic_path: @hunspell_paths[:dic],
    script: :latin
  )
end

#create_tokenizerObject



468
469
470
# File 'lib/kotoshu/languages/fr/language.rb', line 468

def create_tokenizer
  Tokenizer.new
end

#default_dictionary_pathsObject



445
446
447
448
449
450
451
452
453
454
# File 'lib/kotoshu/languages/fr/language.rb', line 445

def default_dictionary_paths
  case code
  when "fr-FR"
    ["/usr/share/dict/french"]
  when "fr-CA"
    ["/usr/share/dict/french-CA"]
  else
    ["/usr/share/dict/words"]
  end
end

#descriptionObject



427
428
429
430
431
# File 'lib/kotoshu/languages/fr/language.rb', line 427

def description
  return name unless variant
  variant_name = VARIANT_NAMES[variant] || variant
  "#{name} (#{variant_name})"
end

#dictionary_classObject



441
442
443
# File 'lib/kotoshu/languages/fr/language.rb', line 441

def dictionary_class
  Dictionary::UnixWords
end

#normalizerObject



437
438
439
# File 'lib/kotoshu/languages/fr/language.rb', line 437

def normalizer
  @normalizer ||= Language::Normalizer::Base.new
end

#script_typeObject



456
457
458
# File 'lib/kotoshu/languages/fr/language.rb', line 456

def script_type
  :latin
end

#tokenizerObject



433
434
435
# File 'lib/kotoshu/languages/fr/language.rb', line 433

def tokenizer
  @tokenizer ||= Tokenizer.new
end