Class: Dentaku::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/dentaku/tokenizer.rb

Constant Summary collapse

LPAREN =
TokenMatcher.new(:grouping, :open)
RPAREN =
TokenMatcher.new(:grouping, :close)

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#aliasesObject (readonly)

Returns the value of attribute aliases.



8
9
10
# File 'lib/dentaku/tokenizer.rb', line 8

def aliases
  @aliases
end

Instance Method Details

#alias_regexObject



97
98
99
100
# File 'lib/dentaku/tokenizer.rb', line 97

def alias_regex
  values = @aliases.values.flatten.join('|')
  /(?<=\p{Punct}|[[:space:]]|\A)(#{values})(?=\()/i
end

#last_tokenObject



58
59
60
# File 'lib/dentaku/tokenizer.rb', line 58

def last_token
  @tokens.last
end

#replace_aliases(string) ⇒ Object



83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/dentaku/tokenizer.rb', line 83

def replace_aliases(string)
  return string unless @aliases.any?

  string.gsub!(alias_regex) do |match|
    match_regex = /^#{Regexp.escape(match)}$/i

    @aliases.detect do |(_key, aliases)|
      !aliases.grep(match_regex).empty?
    end.first
  end

  string
end

#strip_comments(input) ⇒ Object



79
80
81
# File 'lib/dentaku/tokenizer.rb', line 79

def strip_comments(input)
  input.gsub(/\/\*[^*]*\*+(?:[^*\/][^*]*\*+)*\//, '')
end

#tokenize(string, options = {}) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/dentaku/tokenizer.rb', line 13

def tokenize(string, options = {})
  @nesting = 0
  @tokens  = []
  @aliases = options.fetch(:aliases, global_aliases)
  input    = strip_comments(string.to_s.dup)
  input    = replace_aliases(input)

  scanner_options = {
    case_sensitive: options.fetch(:case_sensitive, false),
    raw_date_literals: options.fetch(:raw_date_literals, true)
  }

  # Hoist the scanner list out of the per-position loop. The previous code
  # rebuilt this Array (via Hash#select + .values) on every advance, which
  # was a significant chunk of tokenize allocations.
  scanners = TokenScanner.scanners(scanner_options)

  ss = StringScanner.new(input)

  until ss.eos?
    last_token = @tokens.last
    scanned = false

    scanners.each do |scanner|
      result = scanner.scan_at(ss, last_token)
      next unless result

      if result.is_a?(Array)
        result.each { |t| handle_token(t, ss) }
      else
        handle_token(result, ss)
      end

      scanned = true
      break
    end

    fail! :parse_error, at: ss.rest unless scanned
  end

  fail! :too_many_opening_parentheses if @nesting > 0

  @tokens
end