Class: Kotoshu::Languages::English::POSTagger
- Inherits:
-
Components::PosTagger
- Object
- Components::PosTagger
- Kotoshu::Languages::English::POSTagger
- Defined in:
- lib/kotoshu/languages/en/language.rb
Overview
English POS tagger.
Constant Summary collapse
- FLAG_TO_POS =
{ 'N' => 'NOUN', 'NN' => 'NOUN', 'NNS' => 'NOUN', 'NNP' => 'NOUN', 'NP' => 'NOUN_PROPER', 'V' => 'VERB', 'VB' => 'VERB', 'VBD' => 'VERB', 'VBG' => 'VERB', 'VBN' => 'VERB', 'VBP' => 'VERB', 'VBZ' => 'VERB', 'MD' => 'VERB_MODAL', 'A' => 'ADJ', 'JJ' => 'ADJ', 'JJR' => 'ADJ', 'JJS' => 'ADJ', 'R' => 'ADV', 'RB' => 'ADV', 'RBR' => 'ADV', 'RBS' => 'ADV', 'D' => 'DET', 'DT' => 'DET', 'PDT' => 'DET', 'P' => 'PRON', 'PP' => 'PRON', 'PRP' => 'PRON', 'PRP$' => 'PRON_POSS', 'WP' => 'PRON', 'WP$' => 'PRON_POSS', 'I' => 'PREP', 'IN' => 'PREP', 'C' => 'CONJ', 'CC' => 'CONJ', 'U' => 'PART', 'RP' => 'PART', 'INTJ' => 'INTJ', 'UH' => 'INTJ', 'CD' => 'NUM', 'FW' => 'X', 'PUNCT' => 'PUNCT', '.' => 'PUNCT', ',' => 'PUNCT', '!' => 'PUNCT', '?' => 'PUNCT', ';' => 'PUNCT', ':' => 'PUNCT' }.freeze
Instance Attribute Summary collapse
-
#aff_path ⇒ Object
readonly
Returns the value of attribute aff_path.
-
#dic_path ⇒ Object
readonly
Returns the value of attribute dic_path.
-
#script ⇒ Object
readonly
Returns the value of attribute script.
Instance Method Summary collapse
- #clear_cache ⇒ Object
- #flag_mapping ⇒ Object
- #flag_mapping=(mapping) ⇒ Object
-
#initialize(aff_path:, dic_path:, script: :latin, encoding: 'ISO-8859-1', flag_mapping: FLAG_TO_POS) ⇒ POSTagger
constructor
A new instance of POSTagger.
- #tag(tokens) ⇒ Object
Methods inherited from Components::PosTagger
Constructor Details
#initialize(aff_path:, dic_path:, script: :latin, encoding: 'ISO-8859-1', flag_mapping: FLAG_TO_POS) ⇒ POSTagger
Returns a new instance of POSTagger.
235 236 237 238 239 240 241 242 243 |
# File 'lib/kotoshu/languages/en/language.rb', line 235 def initialize(aff_path:, dic_path:, script: :latin, encoding: 'ISO-8859-1', flag_mapping: FLAG_TO_POS) @aff_path = aff_path @dic_path = dic_path @script = script @encoding = encoding @flag_mapping = flag_mapping @lookuper = Readers::LookupBuilder.new(aff_path, dic_path, encoding: encoding, script: script).build @lookup_cache = {} end |
Instance Attribute Details
#aff_path ⇒ Object (readonly)
Returns the value of attribute aff_path.
233 234 235 |
# File 'lib/kotoshu/languages/en/language.rb', line 233 def aff_path @aff_path end |
#dic_path ⇒ Object (readonly)
Returns the value of attribute dic_path.
233 234 235 |
# File 'lib/kotoshu/languages/en/language.rb', line 233 def dic_path @dic_path end |
#script ⇒ Object (readonly)
Returns the value of attribute script.
233 234 235 |
# File 'lib/kotoshu/languages/en/language.rb', line 233 def script @script end |
Instance Method Details
#clear_cache ⇒ Object
266 267 268 |
# File 'lib/kotoshu/languages/en/language.rb', line 266 def clear_cache @lookup_cache.clear end |
#flag_mapping ⇒ Object
258 259 260 |
# File 'lib/kotoshu/languages/en/language.rb', line 258 def flag_mapping @flag_mapping end |
#flag_mapping=(mapping) ⇒ Object
262 263 264 |
# File 'lib/kotoshu/languages/en/language.rb', line 262 def flag_mapping=(mapping) @flag_mapping = mapping end |
#tag(tokens) ⇒ Object
245 246 247 248 249 250 251 252 253 254 255 256 |
# File 'lib/kotoshu/languages/en/language.rb', line 245 def tag(tokens) return [] if tokens.nil? || tokens.empty? tokens.map do |token| word = token[:token] if word.nil? || word.empty? token.merge(pos_tag: nil, lemma: nil) else lookup_result = lookup_with_pos(word) token.merge(pos_tag: lookup_result[:pos_tag], lemma: lookup_result[:lemma] || word) end end end |