Class: Kotoshu::Languages::Spanish

Inherits:
Kotoshu::Language::Base show all
Defined in:
lib/kotoshu/languages/es/language.rb

Overview

Spanish language implementation.

Supports multiple dialects: es-ES, es-MX, es-AR, es-CO, es-PE, es-VE, es-CL, es-EC

Full Hunspell integration with spell checking, POS tagging, and grammar rules specifically handling Spanish inverted punctuation and diacritics.

Defined Under Namespace

Modules: GrammarRules Classes: POSTagger, SpellChecker, Tokenizer

Constant Summary collapse

HUNSPELL_DICTIONARIES =
{
  'es-ES' => {
    aff: 'spec/integrational/fixtures/es_ES.aff',
    dic: 'spec/integrational/fixtures/es_ES.dic'
  },
  'es-MX' => {
    aff: 'spec/integrational/fixtures/es_MX.aff',
    dic: 'spec/integrational/fixtures/es_MX.dic'
  }
}.freeze
VARIANT_NAMES =
{
  'ES' => 'European',
  'MX' => 'Mexican',
  'AR' => 'Argentinian',
  'CO' => 'Colombian',
  'PE' => 'Peruvian',
  'VE' => 'Venezuelan',
  'CL' => 'Chilean',
  'EC' => 'Ecuadorian',
  'GT' => 'Guatemalan',
  'CU' => 'Cuban',
  'BO' => 'Bolivian',
  'DO' => 'Dominican',
  'HN' => 'Honduran',
  'PY' => 'Paraguayan',
  'SV' => 'Salvadoran',
  'NI' => 'Nicaraguan',
  'CR' => 'Costa Rican',
  'PA' => 'Panamanian',
  'UY' => 'Uruguayan',
  'PR' => 'Puerto Rican'
}.freeze

Instance Attribute Summary

Attributes inherited from Kotoshu::Language::Base

#code, #name, #region, #variant

Instance Method Summary collapse

Methods inherited from Kotoshu::Language::Base

#base_code, #base_language?, #compatible_with?, #encoding, #full_name, #info, instance, #matches_code?, #normalize, #normalize_word, #region_code, register, #rtl?, #tokenize, #valid_word?

Constructor Details

#initialize(code: "es", name: "Spanish", variant: nil) ⇒ Spanish

Returns a new instance of Spanish.



387
388
389
390
391
# File 'lib/kotoshu/languages/es/language.rb', line 387

def initialize(code: "es", name: "Spanish", variant: nil)
  variant ||= extract_region_code(code)
  super(code: code, name: name, variant: variant)
  @hunspell_paths = resolve_hunspell_paths(code)
end

Instance Method Details

#create_pos_taggerObject



438
439
440
441
442
443
444
445
# File 'lib/kotoshu/languages/es/language.rb', line 438

def create_pos_tagger
  POSTagger.new(
    aff_path: @hunspell_paths[:aff],
    dic_path: @hunspell_paths[:dic],
    script: :latin,
    flag_mapping: POSTagger::FLAG_TO_POS
  )
end

#create_spell_checkerObject



426
427
428
429
430
431
432
# File 'lib/kotoshu/languages/es/language.rb', line 426

def create_spell_checker
  SpellChecker.new(
    aff_path: @hunspell_paths[:aff],
    dic_path: @hunspell_paths[:dic],
    script: :latin
  )
end

#create_tokenizerObject



434
435
436
# File 'lib/kotoshu/languages/es/language.rb', line 434

def create_tokenizer
  Tokenizer.new
end

#default_dictionary_pathsObject



411
412
413
414
415
416
417
418
419
420
# File 'lib/kotoshu/languages/es/language.rb', line 411

def default_dictionary_paths
  case code
  when "es-ES"
    ["/usr/share/dict/spanish"]
  when "es-MX"
    ["/usr/share/dict/mexican"]
  else
    ["/usr/share/dict/words"]
  end
end

#descriptionObject



393
394
395
396
397
# File 'lib/kotoshu/languages/es/language.rb', line 393

def description
  return name unless variant
  variant_name = VARIANT_NAMES[variant] || variant
  "#{name} (#{variant_name})"
end

#dictionary_classObject



407
408
409
# File 'lib/kotoshu/languages/es/language.rb', line 407

def dictionary_class
  Dictionary::UnixWords
end

#normalizerObject



403
404
405
# File 'lib/kotoshu/languages/es/language.rb', line 403

def normalizer
  @normalizer ||= Language::Normalizer::Base.new
end

#script_typeObject



422
423
424
# File 'lib/kotoshu/languages/es/language.rb', line 422

def script_type
  :latin
end

#tokenizerObject



399
400
401
# File 'lib/kotoshu/languages/es/language.rb', line 399

def tokenizer
  @tokenizer ||= Tokenizer.new
end