Class: Obp::Access::GrammarParser
- Inherits:
-
Object
- Object
- Obp::Access::GrammarParser
- Defined in:
- lib/obp/access/grammar_parser.rb
Defined Under Namespace
Classes: Result
Constant Summary collapse
- POS_MAP =
{ "adj." => "adjective", "Adj." => "adjective", "verb" => "verb", }.freeze
- GENDER_MAP =
{ "m" => "masculine", "f" => "feminine", "n" => "neuter", }.freeze
- BOLD_PATTERNS =
[ [->(t) { POS_MAP.key?(t) }, :handle_pos_marker], [->(t) { GENDER_MAP.key?(t) }, :handle_gender_marker], [->(t) { t.match?(/\A[mfn],\z/) }, :handle_gender_with_comma], [->(t) { t.match?(/\A[mfn][,\s]+[mfn]([,\s]+[mfn])*\z/) }, :handle_multi_gender], [->(t) { t == "," }, :handle_comma], [->(t) { t == "〈" }, :handle_enter_bracket], [->(t) { t == "〉" }, :handle_exit_bracket], [->(t) { t.match?(/\A[mfn]\s+/) }, :handle_gender_qualifier], [->(t) { t.match?(/,.+[mfn]\z/) }, :handle_term_with_gender], ].freeze
Class Method Summary collapse
Class Method Details
.parse(inner_html) ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/obp/access/grammar_parser.rb', line 32 def self.parse(inner_html) state = { pos: "noun", genders: [], term_parts: [], in_bracket: false } segments = parse_segments(inner_html) segments.each do |seg| handler = find_handler(seg, state[:in_bracket]) handler.call(seg[:text], state) end Result.new(term: clean_term(state[:term_parts]), pos: state[:pos], genders: state[:genders].uniq) end |