Class: Kotoshu::Languages::Russian::POSTagger
- Inherits:
-
Components::PosTagger
- Object
- Components::PosTagger
- Kotoshu::Languages::Russian::POSTagger
- Defined in:
- lib/kotoshu/languages/ru/language.rb
Overview
Russian POS tagger.
Constant Summary collapse
- FLAG_TO_POS =
{ 'N' => 'NOUN', 'NN' => 'NOUN', 'NNS' => 'NOUN', 'NNP' => 'NOUN_PROPER', 'S' => 'NOUN', 'Sub' => 'NOUN', 'V' => 'VERB', 'VB' => 'VERB', 'VBD' => 'VERB', 'VBG' => 'VERB', 'VBN' => 'VERB', 'VBP' => 'VERB', 'VBZ' => 'VERB', 'A' => 'ADJ', 'JJ' => 'ADJ', 'JJR' => 'ADJ', 'JJS' => 'ADJ', 'Adj' => 'ADJ', 'R' => 'ADV', 'RB' => 'ADV', 'RBR' => 'ADV', 'RBS' => 'ADV', 'Adv' => 'ADV', 'D' => 'DET', 'DT' => 'DET', 'PDT' => 'DET', 'P' => 'PRON', 'PP' => 'PRON', 'PRP' => 'PRON', 'PRP$' => 'PRON_POSS', 'WP' => 'PRON', 'WP$' => 'PRON_POSS', 'Pro' => 'PRON', 'I' => 'PREP', 'IN' => 'PREP', 'Präp' => 'PREP', 'C' => 'CONJ', 'CC' => 'CONJ', 'Conj' => 'CONJ', 'U' => 'PART', 'RP' => 'PART', 'Pt' => 'PART', 'INTJ' => 'INTJ', 'UH' => 'INTJ', 'Int' => 'INTJ', 'CD' => 'NUM', 'FW' => 'X', 'PUNCT' => 'PUNCT', '.' => 'PUNCT', ',' => 'PUNCT', '!' => 'PUNCT', '?' => 'PUNCT', ';' => 'PUNCT', ':' => 'PUNCT' }.freeze
Instance Attribute Summary collapse
-
#aff_path ⇒ Object
readonly
Returns the value of attribute aff_path.
-
#dic_path ⇒ Object
readonly
Returns the value of attribute dic_path.
-
#script ⇒ Object
readonly
Returns the value of attribute script.
Instance Method Summary collapse
- #clear_cache ⇒ Object
- #flag_mapping ⇒ Object
- #flag_mapping=(mapping) ⇒ Object
-
#initialize(aff_path:, dic_path:, script: :cyrillic, encoding: 'UTF-8', flag_mapping: FLAG_TO_POS) ⇒ POSTagger
constructor
A new instance of POSTagger.
- #tag(tokens) ⇒ Object
Methods inherited from Components::PosTagger
Constructor Details
#initialize(aff_path:, dic_path:, script: :cyrillic, encoding: 'UTF-8', flag_mapping: FLAG_TO_POS) ⇒ POSTagger
Returns a new instance of POSTagger.
175 176 177 178 179 180 181 182 183 |
# File 'lib/kotoshu/languages/ru/language.rb', line 175 def initialize(aff_path:, dic_path:, script: :cyrillic, encoding: 'UTF-8', flag_mapping: FLAG_TO_POS) @aff_path = aff_path @dic_path = dic_path @script = script @encoding = encoding @flag_mapping = flag_mapping @lookuper = Readers::LookupBuilder.new(aff_path, dic_path, encoding: encoding, script: script).build @lookup_cache = {} end |
Instance Attribute Details
#aff_path ⇒ Object (readonly)
Returns the value of attribute aff_path.
173 174 175 |
# File 'lib/kotoshu/languages/ru/language.rb', line 173 def aff_path @aff_path end |
#dic_path ⇒ Object (readonly)
Returns the value of attribute dic_path.
173 174 175 |
# File 'lib/kotoshu/languages/ru/language.rb', line 173 def dic_path @dic_path end |
#script ⇒ Object (readonly)
Returns the value of attribute script.
173 174 175 |
# File 'lib/kotoshu/languages/ru/language.rb', line 173 def script @script end |
Instance Method Details
#clear_cache ⇒ Object
206 207 208 |
# File 'lib/kotoshu/languages/ru/language.rb', line 206 def clear_cache @lookup_cache.clear end |
#flag_mapping ⇒ Object
198 199 200 |
# File 'lib/kotoshu/languages/ru/language.rb', line 198 def flag_mapping @flag_mapping end |
#flag_mapping=(mapping) ⇒ Object
202 203 204 |
# File 'lib/kotoshu/languages/ru/language.rb', line 202 def flag_mapping=(mapping) @flag_mapping = mapping end |
#tag(tokens) ⇒ Object
185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/kotoshu/languages/ru/language.rb', line 185 def tag(tokens) return [] if tokens.nil? || tokens.empty? tokens.map do |token| word = token[:token] if word.nil? || word.empty? token.merge(pos_tag: nil, lemma: nil) else lookup_result = lookup_with_pos(word) token.merge(pos_tag: lookup_result[:pos_tag], lemma: lookup_result[:lemma] || word) end end end |