Class: Kotoshu::Languages::Spanish
Overview
Spanish language implementation.
Supports multiple dialects: es-ES, es-MX, es-AR, es-CO, es-PE, es-VE, es-CL, es-EC
Full Hunspell integration with spell checking, POS tagging, and grammar rules specifically handling Spanish inverted punctuation and diacritics.
Defined Under Namespace
Modules: GrammarRules
Classes: POSTagger, SpellChecker, Tokenizer
Constant Summary
collapse
- HUNSPELL_DICTIONARIES =
{
'es-ES' => {
aff: 'spec/integrational/fixtures/es_ES.aff',
dic: 'spec/integrational/fixtures/es_ES.dic'
},
'es-MX' => {
aff: 'spec/integrational/fixtures/es_MX.aff',
dic: 'spec/integrational/fixtures/es_MX.dic'
}
}.freeze
- VARIANT_NAMES =
{
'ES' => 'European',
'MX' => 'Mexican',
'AR' => 'Argentinian',
'CO' => 'Colombian',
'PE' => 'Peruvian',
'VE' => 'Venezuelan',
'CL' => 'Chilean',
'EC' => 'Ecuadorian',
'GT' => 'Guatemalan',
'CU' => 'Cuban',
'BO' => 'Bolivian',
'DO' => 'Dominican',
'HN' => 'Honduran',
'PY' => 'Paraguayan',
'SV' => 'Salvadoran',
'NI' => 'Nicaraguan',
'CR' => 'Costa Rican',
'PA' => 'Panamanian',
'UY' => 'Uruguayan',
'PR' => 'Puerto Rican'
}.freeze
Instance Attribute Summary
#code, #name, #region, #variant
Instance Method Summary
collapse
#base_code, #base_language?, #compatible_with?, #encoding, #full_name, #info, instance, #matches_code?, #normalize, #normalize_word, #region_code, register, #rtl?, #tokenize, #valid_word?
Constructor Details
#initialize(code: "es", name: "Spanish", variant: nil) ⇒ Spanish
Returns a new instance of Spanish.
387
388
389
390
391
|
# File 'lib/kotoshu/languages/es/language.rb', line 387
def initialize(code: "es", name: "Spanish", variant: nil)
variant ||= (code)
super(code: code, name: name, variant: variant)
@hunspell_paths = resolve_hunspell_paths(code)
end
|
Instance Method Details
#create_pos_tagger ⇒ Object
438
439
440
441
442
443
444
445
|
# File 'lib/kotoshu/languages/es/language.rb', line 438
def create_pos_tagger
POSTagger.new(
aff_path: @hunspell_paths[:aff],
dic_path: @hunspell_paths[:dic],
script: :latin,
flag_mapping: POSTagger::FLAG_TO_POS
)
end
|
#create_spell_checker ⇒ Object
426
427
428
429
430
431
432
|
# File 'lib/kotoshu/languages/es/language.rb', line 426
def create_spell_checker
SpellChecker.new(
aff_path: @hunspell_paths[:aff],
dic_path: @hunspell_paths[:dic],
script: :latin
)
end
|
#create_tokenizer ⇒ Object
434
435
436
|
# File 'lib/kotoshu/languages/es/language.rb', line 434
def create_tokenizer
Tokenizer.new
end
|
#default_dictionary_paths ⇒ Object
411
412
413
414
415
416
417
418
419
420
|
# File 'lib/kotoshu/languages/es/language.rb', line 411
def default_dictionary_paths
case code
when "es-ES"
["/usr/share/dict/spanish"]
when "es-MX"
["/usr/share/dict/mexican"]
else
["/usr/share/dict/words"]
end
end
|
#description ⇒ Object
393
394
395
396
397
|
# File 'lib/kotoshu/languages/es/language.rb', line 393
def description
return name unless variant
variant_name = VARIANT_NAMES[variant] || variant
"#{name} (#{variant_name})"
end
|
#dictionary_class ⇒ Object
407
408
409
|
# File 'lib/kotoshu/languages/es/language.rb', line 407
def dictionary_class
Dictionary::UnixWords
end
|
#normalizer ⇒ Object
403
404
405
|
# File 'lib/kotoshu/languages/es/language.rb', line 403
def normalizer
@normalizer ||= Language::Normalizer::Base.new
end
|
#script_type ⇒ Object
422
423
424
|
# File 'lib/kotoshu/languages/es/language.rb', line 422
def script_type
:latin
end
|
#tokenizer ⇒ Object
399
400
401
|
# File 'lib/kotoshu/languages/es/language.rb', line 399
def tokenizer
@tokenizer ||= Tokenizer.new
end
|