Class: DwcAgent::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/dwc_agent/parser.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeParser

Returns a new instance of Parser.



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/dwc_agent/parser.rb', line 11

def initialize
  options = {
    prefer_comma_as_separator: true,
    separator: SPLIT_BY,
    title: TITLE,
    appellation: APPELLATION,
    suffix: SUFFIX
  }
  @namae = Namae::Parser.new(options)
  @strip_out_regex = Regexp.new STRIP_OUT.to_s
  @tidy_remains_regex = Regexp.new POST_STRIP_TIDY.to_s
  @char_subs_regex = Regexp.new [CHAR_SUBS.keys.join].to_s
  @phrase_subs_regex = Regexp.new PHRASE_SUBS.keys.map{|a| Regexp.escape a }.join('|').to_s
  @residual_terminators_regex = Regexp.new SPLIT_BY.to_s + %r{\s*\z}.to_s
  @separators = SEPARATORS.map{|k,v| [ Regexp.new(k), v] }
end

Class Method Details

.instanceObject



6
7
8
# File 'lib/dwc_agent/parser.rb', line 6

def instance
  Thread.current[:dwc_agent_parser] ||= new
end

Instance Method Details

#parse(name) ⇒ Array

Parses the passed-in string and returns a list of names.

Parameters:

  • names (String)

    the name or names to be parsed

Returns:

  • (Array)

    the list of parsed names



32
33
34
35
36
37
38
39
40
41
42
# File 'lib/dwc_agent/parser.rb', line 32

def parse(name)
  return [] if name.nil? || name == ""
  name.gsub!(@strip_out_regex, ' ')
  name.gsub!(@tidy_remains_regex, '')
  name.gsub!(Regexp.union(@char_subs_regex, @phrase_subs_regex), CHAR_SUBS.merge(PHRASE_SUBS))
  @separators.each{|k| name.gsub!(k[0], k[1])}
  name.gsub!(@residual_terminators_regex, '')
  name.squeeze!(' ')
  name.strip!
  @namae.parse(name)
end