Class: Kotoshu::Languages::Portuguese

Inherits:
Kotoshu::Language::Base show all
Defined in:
lib/kotoshu/languages/pt/language.rb

Overview

Portuguese language implementation.

Supports multiple dialects: pt-BR, pt-PT, pt-AO, pt-MZ, pt-GW, pt-CV

Full Hunspell integration with spell checking, POS tagging, and grammar rules specifically handling Portuguese accents and Brazilian vs European differences.

Defined Under Namespace

Modules: GrammarRules Classes: POSTagger, SpellChecker, Tokenizer

Constant Summary collapse

HUNSPELL_DICTIONARIES =
{
  'pt-BR' => {
    aff: 'spec/integrational/fixtures/pt_BR.aff',
    dic: 'spec/integrational/fixtures/pt_BR.dic'
  },
  'pt-PT' => {
    aff: 'spec/integrational/fixtures/pt_PT.aff',
    dic: 'spec/integrational/fixtures/pt_PT.dic'
  }
}.freeze
VARIANT_NAMES =
{
  'BR' => 'Brazilian',
  'PT' => 'European',
  'AO' => 'Angolan',
  'MZ' => 'Mozambican',
  'GW' => 'Guinea-Bissau',
  'CV' => 'Cape Verdean'
}.freeze

Instance Attribute Summary

Attributes inherited from Kotoshu::Language::Base

#code, #name, #region, #variant

Instance Method Summary collapse

Methods inherited from Kotoshu::Language::Base

#base_code, #base_language?, #compatible_with?, #encoding, #full_name, #info, instance, #matches_code?, #normalize, #normalize_word, #region_code, register, #rtl?, #tokenize, #valid_word?

Constructor Details

#initialize(code: "pt", name: "Portuguese", variant: nil) ⇒ Portuguese

Returns a new instance of Portuguese.



351
352
353
354
355
# File 'lib/kotoshu/languages/pt/language.rb', line 351

def initialize(code: "pt", name: "Portuguese", variant: nil)
  variant ||= extract_region_code(code)
  super(code: code, name: name, variant: variant)
  @hunspell_paths = resolve_hunspell_paths(code)
end

Instance Method Details

#create_pos_taggerObject



402
403
404
405
406
407
408
409
# File 'lib/kotoshu/languages/pt/language.rb', line 402

def create_pos_tagger
  POSTagger.new(
    aff_path: @hunspell_paths[:aff],
    dic_path: @hunspell_paths[:dic],
    script: :latin,
    flag_mapping: POSTagger::FLAG_TO_POS
  )
end

#create_spell_checkerObject



390
391
392
393
394
395
396
# File 'lib/kotoshu/languages/pt/language.rb', line 390

def create_spell_checker
  SpellChecker.new(
    aff_path: @hunspell_paths[:aff],
    dic_path: @hunspell_paths[:dic],
    script: :latin
  )
end

#create_tokenizerObject



398
399
400
# File 'lib/kotoshu/languages/pt/language.rb', line 398

def create_tokenizer
  Tokenizer.new
end

#default_dictionary_pathsObject



375
376
377
378
379
380
381
382
383
384
# File 'lib/kotoshu/languages/pt/language.rb', line 375

def default_dictionary_paths
  case code
  when "pt-BR"
    ["/usr/share/dict/brazilian"]
  when "pt-PT"
    ["/usr/share/dict/portuguese"]
  else
    ["/usr/share/dict/words"]
  end
end

#descriptionObject



357
358
359
360
361
# File 'lib/kotoshu/languages/pt/language.rb', line 357

def description
  return name unless variant
  variant_name = VARIANT_NAMES[variant] || variant
  "#{name} (#{variant_name})"
end

#dictionary_classObject



371
372
373
# File 'lib/kotoshu/languages/pt/language.rb', line 371

def dictionary_class
  Dictionary::UnixWords
end

#normalizerObject



367
368
369
# File 'lib/kotoshu/languages/pt/language.rb', line 367

def normalizer
  @normalizer ||= Language::Normalizer::Base.new
end

#script_typeObject



386
387
388
# File 'lib/kotoshu/languages/pt/language.rb', line 386

def script_type
  :latin
end

#tokenizerObject



363
364
365
# File 'lib/kotoshu/languages/pt/language.rb', line 363

def tokenizer
  @tokenizer ||= Tokenizer.new
end