Class: Kotoshu::Languages::Portuguese::POSTagger
- Inherits:
-
Components::PosTagger
- Object
- Components::PosTagger
- Kotoshu::Languages::Portuguese::POSTagger
- Defined in:
- lib/kotoshu/languages/pt/language.rb
Overview
Portuguese POS tagger.
Constant Summary collapse
- FLAG_TO_POS =
{ 'N' => 'NOUN', 'NN' => 'NOUN', 'NNS' => 'NOUN', 'NNP' => 'NOUN_PROPER', 'V' => 'VERB', 'VB' => 'VERB', 'VBD' => 'VERB', 'VBG' => 'VERB', 'VBN' => 'VERB', 'VBP' => 'VERB', 'VBZ' => 'VERB', 'A' => 'ADJ', 'JJ' => 'ADJ', 'JJR' => 'ADJ', 'JJS' => 'ADJ', 'R' => 'ADV', 'RB' => 'ADV', 'RBR' => 'ADV', 'RBS' => 'ADV', 'D' => 'DET', 'DT' => 'DET', 'PDT' => 'DET', 'P' => 'PRON', 'PP' => 'PRON', 'PRP' => 'PRON', 'PRP$' => 'PRON_POSS', 'WP' => 'PRON', 'WP$' => 'PRON_POSS', 'I' => 'PREP', 'IN' => 'PREP', 'C' => 'CONJ', 'CC' => 'CONJ', 'U' => 'PART', 'RP' => 'PART', 'INTJ' => 'INTJ', 'UH' => 'INTJ', 'CD' => 'NUM', 'FW' => 'X', 'PUNCT' => 'PUNCT', '.' => 'PUNCT', ',' => 'PUNCT', '!' => 'PUNCT', '?' => 'PUNCT', ';' => 'PUNCT', ':' => 'PUNCT' }.freeze
Instance Attribute Summary collapse
-
#aff_path ⇒ Object
readonly
Returns the value of attribute aff_path.
-
#dic_path ⇒ Object
readonly
Returns the value of attribute dic_path.
-
#script ⇒ Object
readonly
Returns the value of attribute script.
Instance Method Summary collapse
- #clear_cache ⇒ Object
- #flag_mapping ⇒ Object
- #flag_mapping=(mapping) ⇒ Object
-
#initialize(aff_path:, dic_path:, script: :latin, encoding: 'UTF-8', flag_mapping: FLAG_TO_POS) ⇒ POSTagger
constructor
A new instance of POSTagger.
- #tag(tokens) ⇒ Object
Methods inherited from Components::PosTagger
Constructor Details
#initialize(aff_path:, dic_path:, script: :latin, encoding: 'UTF-8', flag_mapping: FLAG_TO_POS) ⇒ POSTagger
Returns a new instance of POSTagger.
170 171 172 173 174 175 176 177 178 |
# File 'lib/kotoshu/languages/pt/language.rb', line 170 def initialize(aff_path:, dic_path:, script: :latin, encoding: 'UTF-8', flag_mapping: FLAG_TO_POS) @aff_path = aff_path @dic_path = dic_path @script = script @encoding = encoding @flag_mapping = flag_mapping @lookuper = Readers::LookupBuilder.new(aff_path, dic_path, encoding: encoding, script: script).build @lookup_cache = {} end |
Instance Attribute Details
#aff_path ⇒ Object (readonly)
Returns the value of attribute aff_path.
168 169 170 |
# File 'lib/kotoshu/languages/pt/language.rb', line 168 def aff_path @aff_path end |
#dic_path ⇒ Object (readonly)
Returns the value of attribute dic_path.
168 169 170 |
# File 'lib/kotoshu/languages/pt/language.rb', line 168 def dic_path @dic_path end |
#script ⇒ Object (readonly)
Returns the value of attribute script.
168 169 170 |
# File 'lib/kotoshu/languages/pt/language.rb', line 168 def script @script end |
Instance Method Details
#clear_cache ⇒ Object
201 202 203 |
# File 'lib/kotoshu/languages/pt/language.rb', line 201 def clear_cache @lookup_cache.clear end |
#flag_mapping ⇒ Object
193 194 195 |
# File 'lib/kotoshu/languages/pt/language.rb', line 193 def flag_mapping @flag_mapping end |
#flag_mapping=(mapping) ⇒ Object
197 198 199 |
# File 'lib/kotoshu/languages/pt/language.rb', line 197 def flag_mapping=(mapping) @flag_mapping = mapping end |
#tag(tokens) ⇒ Object
180 181 182 183 184 185 186 187 188 189 190 191 |
# File 'lib/kotoshu/languages/pt/language.rb', line 180 def tag(tokens) return [] if tokens.nil? || tokens.empty? tokens.map do |token| word = token[:token] if word.nil? || word.empty? token.merge(pos_tag: nil, lemma: nil) else lookup_result = lookup_with_pos(word) token.merge(pos_tag: lookup_result[:pos_tag], lemma: lookup_result[:lemma] || word) end end end |