Class: Obp::Access::GrammarParser

Inherits:
Object
  • Object
show all
Defined in:
lib/obp/access/grammar_parser.rb

Defined Under Namespace

Classes: Result

Constant Summary collapse

POS_MAP =
{
  "adj." => "adjective",
  "Adj." => "adjective",
  "verb" => "verb",
}.freeze
GENDER_MAP =
{
  "m" => "masculine",
  "f" => "feminine",
  "n" => "neuter",
}.freeze
BOLD_PATTERNS =
[
  [->(t) { POS_MAP.key?(t) }, :handle_pos_marker],
  [->(t) { GENDER_MAP.key?(t) }, :handle_gender_marker],
  [->(t) { t.match?(/\A[mfn],\z/) }, :handle_gender_with_comma],
  [->(t) { t.match?(/\A[mfn][,\s]+[mfn]([,\s]+[mfn])*\z/) }, :handle_multi_gender],
  [->(t) { t == "," }, :handle_comma],
  [->(t) { t == "" }, :handle_enter_bracket],
  [->(t) { t == "" }, :handle_exit_bracket],
  [->(t) { t.match?(/\A[mfn]\s+/) }, :handle_gender_qualifier],
  [->(t) { t.match?(/,.+[mfn]\z/) }, :handle_term_with_gender],
].freeze

Class Method Summary collapse

Class Method Details

.parse(inner_html) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
# File 'lib/obp/access/grammar_parser.rb', line 32

def self.parse(inner_html)
  state = { pos: "noun", genders: [], term_parts: [], in_bracket: false }
  segments = parse_segments(inner_html)

  segments.each do |seg|
    handler = find_handler(seg, state[:in_bracket])
    handler.call(seg[:text], state)
  end

  Result.new(term: clean_term(state[:term_parts]), pos: state[:pos], genders: state[:genders].uniq)
end