Class: Dentaku::TokenScanner

Inherits:
Object
  • Object
show all
Extended by:
StringCasing
Defined in:
lib/dentaku/token_scanner.rb

Constant Summary collapse

DATE_TIME_REGEXP =
/\d{2}\d{2}?-\d{1,2}-\d{1,2}([ |T]\d{1,2}:\d{1,2}:\d{1,2}(\.\d*)?)? ?(Z|((\+|\-)\d{2}\:?\d{2}))?(?!\d)/.freeze

Class Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from StringCasing

standardize_case

Constructor Details

#initialize(category, regexp, converter = nil, condition = nil) ⇒ TokenScanner

Returns a new instance of TokenScanner.



13
14
15
16
17
18
19
20
21
# File 'lib/dentaku/token_scanner.rb', line 13

def initialize(category, regexp, converter = nil, condition = nil)
  @category  = category
  @regexp    = %r{\A(#{ regexp })}i
  # StringScanner anchors implicitly at the current position, so an
  # unanchored copy of the same pattern is what its `scan` wants.
  @ss_regexp = %r{(#{ regexp })}i
  @converter = converter
  @condition = condition
end

Class Attribute Details

.case_sensitiveObject (readonly)

Returns the value of attribute case_sensitive.



62
63
64
# File 'lib/dentaku/token_scanner.rb', line 62

def case_sensitive
  @case_sensitive
end

Class Method Details

.accessObject



191
192
193
194
# File 'lib/dentaku/token_scanner.rb', line 191

def access
  names = { lbracket: '[', rbracket: ']' }.invert
  new(:access, '\[|\]', lambda { |raw| names[raw] })
end

.arrayObject



186
187
188
189
# File 'lib/dentaku/token_scanner.rb', line 186

def array
  names = { array_start: '{', array_end: '}', }.invert
  new(:array, '\{|\}|,', lambda { |raw| names[raw] })
end

.available_scannersObject



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/dentaku/token_scanner.rb', line 64

def available_scanners
  [
    :null,
    :whitespace,
    :datetime, # before numeric so it can pick up timestamps
    :numeric,
    :hexadecimal,
    :double_quoted_string,
    :single_quoted_string,
    :negate,
    :combinator,
    :operator,
    :grouping,
    :array,
    :access,
    :case_statement,
    :comparator,
    :boolean,
    :function,
    :identifier,
    :quoted_identifier
  ]
end

.booleanObject



215
216
217
# File 'lib/dentaku/token_scanner.rb', line 215

def boolean
  new(:logical, '(true|false)\b', lambda { |raw| raw.strip.downcase == 'true' })
end

.case_statementObject



196
197
198
199
# File 'lib/dentaku/token_scanner.rb', line 196

def case_statement
  names = { open: 'case', close: 'end', then: 'then', when: 'when', else: 'else' }.invert
  new(:case, '(case|end|then|when|else)\b', lambda { |raw| names[raw.downcase] })
end

.combinatorObject



207
208
209
210
211
212
213
# File 'lib/dentaku/token_scanner.rb', line 207

def combinator
  names = { and: '&&', or: '||' }.invert
  new(:combinator, '(and|or|&&|\|\|)\s', lambda { |raw|
    norm = raw.strip.downcase
    names.fetch(norm) { norm.to_sym }
  })
end

.comparatorObject



201
202
203
204
205
# File 'lib/dentaku/token_scanner.rb', line 201

def comparator
  names = { le: '<=', ge: '>=', ne: '!=', lt: '<', gt: '>', eq: '=' }.invert
  alternate = { ne: '<>', eq: '==' }.invert
  new(:comparator, '<=|>=|!=|<>|<|>|==|=', lambda { |raw| names[raw] || alternate[raw] })
end

.datetimeObject

NOTE: Convert to DateTime as Array(Time) returns the parts of the time for some reason



139
140
141
# File 'lib/dentaku/token_scanner.rb', line 139

def datetime
  new(:datetime, DATE_TIME_REGEXP, lambda { |raw| Time.parse(raw).to_datetime })
end

.double_quoted_stringObject



153
154
155
# File 'lib/dentaku/token_scanner.rb', line 153

def double_quoted_string
  new(:string, '"[^"]*"', lambda { |raw| raw.gsub(/^"|"$/, '') })
end

.functionObject



219
220
221
222
223
224
225
226
227
# File 'lib/dentaku/token_scanner.rb', line 219

def function
  new(:function, '\w+!?\s*\(', lambda do |raw|
    function_name = raw.gsub('(', '')
    [
      Token.new(:function, function_name.strip.downcase.to_sym, function_name),
      Token.new(:grouping, :open, '(')
    ]
  end)
end

.groupingObject



181
182
183
184
# File 'lib/dentaku/token_scanner.rb', line 181

def grouping
  names = { open: '(', close: ')', comma: ',' }.invert
  new(:grouping, '\(|\)|,', lambda { |raw| names[raw] })
end

.hexadecimalObject



149
150
151
# File 'lib/dentaku/token_scanner.rb', line 149

def hexadecimal
  new(:numeric, '(0x[0-9a-f]+)\b', lambda { |raw| raw[2..-1].to_i(16) })
end

.identifierObject



229
230
231
# File 'lib/dentaku/token_scanner.rb', line 229

def identifier
  new(:identifier, '[[[:word:]]\.]+\b', lambda { |raw| standardize_case(raw.strip) })
end

.negateObject



161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/dentaku/token_scanner.rb', line 161

def negate
  new(:operator, '-', lambda { |raw| :negate }, lambda { |last_token|
    last_token.nil?               ||
    last_token.is?(:operator)     ||
    last_token.is?(:comparator)   ||
    last_token.is?(:combinator)   ||
    last_token.value == :open     ||
    last_token.value == :comma    ||
    last_token.value == :lbracket ||
    last_token.value == :array_start
  })
end

.nullObject



134
135
136
# File 'lib/dentaku/token_scanner.rb', line 134

def null
  new(:null, 'null\b')
end

.numericObject



143
144
145
146
147
# File 'lib/dentaku/token_scanner.rb', line 143

def numeric
  new(:numeric, '((?:\d+(\.\d+)?|\.\d+)(?:(e|E)(\+|-)?\d+)?)\b', lambda { |raw|
    raw =~ /(\.|e|E)/ ? BigDecimal(raw) : raw.to_i
  })
end

.operatorObject



174
175
176
177
178
179
# File 'lib/dentaku/token_scanner.rb', line 174

def operator
  names = {
    pow: '^', add: '+', subtract: '-', multiply: '*', divide: '/', mod: '%', bitor: '|', bitand: '&', bitshiftleft: '<<', bitshiftright: '>>'
  }.invert
  new(:operator, '\^|\+|-|\*|\/|%|\||&|<<|>>', lambda { |raw| names[raw] })
end

.quoted_identifierObject



233
234
235
# File 'lib/dentaku/token_scanner.rb', line 233

def quoted_identifier
  new(:identifier, '`[^`]*`', lambda { |raw| raw.gsub(/^`|`$/, '') })
end

.register_default_scannersObject



88
89
90
# File 'lib/dentaku/token_scanner.rb', line 88

def register_default_scanners
  register_scanners(available_scanners)
end

.register_scanner(id, scanner) ⇒ Object



99
100
101
102
# File 'lib/dentaku/token_scanner.rb', line 99

def register_scanner(id, scanner)
  @scanners[id] = scanner
  reset_scanner_cache!
end

.register_scanners(scanner_ids) ⇒ Object



92
93
94
95
96
97
# File 'lib/dentaku/token_scanner.rb', line 92

def register_scanners(scanner_ids)
  @scanners = scanner_ids.each_with_object({}) do |id, scanners|
    scanners[id] = self.send(id)
  end
  reset_scanner_cache!
end

.reset_scanner_cache!Object

Invalidate cached scanner lists; called by register_scanner(s) so tests that swap scanners in and out keep working.



125
126
127
128
# File 'lib/dentaku/token_scanner.rb', line 125

def reset_scanner_cache!
  @cached_full = nil
  @cached_no_datetime = nil
end

.scanners(options = {}) ⇒ Object



109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/dentaku/token_scanner.rb', line 109

def scanners(options = {})
  @case_sensitive = options.fetch(:case_sensitive, false)
  raw_date_literals = options.fetch(:raw_date_literals, true)

  # Cache the two possible scanner lists so repeated tokenize calls don't
  # rebuild the array each time (and the inner tokenize loop doesn't
  # rebuild it on every iteration).
  if raw_date_literals
    @cached_full ||= @scanners.values
  else
    @cached_no_datetime ||= @scanners.reject { |k, _| k == :datetime }.values
  end
end

.scanners=(scanner_ids) ⇒ Object



104
105
106
107
# File 'lib/dentaku/token_scanner.rb', line 104

def scanners=(scanner_ids)
  @scanners.select! { |k, v| scanner_ids.include?(k) }
  reset_scanner_cache!
end

.single_quoted_stringObject



157
158
159
# File 'lib/dentaku/token_scanner.rb', line 157

def single_quoted_string
  new(:string, "'[^']*'", lambda { |raw| raw.gsub(/^'|'$/, '') })
end

.whitespaceObject



130
131
132
# File 'lib/dentaku/token_scanner.rb', line 130

def whitespace
  new(:whitespace, '\s+')
end

Instance Method Details

#scan(string, last_token = nil) ⇒ Object

Legacy entry point: takes a string, returns false or an Array of Tokens.



24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/dentaku/token_scanner.rb', line 24

def scan(string, last_token = nil)
  if (m = @regexp.match(string)) && (@condition.nil? || @condition.call(last_token))
    value = raw = m.to_s
    value = @converter.call(raw) if @converter

    return Array(value).map do |v|
      Token === v ? v : Token.new(@category, v, raw)
    end
  end

  false
end

#scan_at(strscanner, last_token = nil) ⇒ Object

Fast path used by the Tokenizer: scans directly against a StringScanner without slicing strings, and returns either a single Token, an Array of Tokens, or nil. Avoids the per-call ‘Array(value).map` allocation in the common (single-token) case.



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/dentaku/token_scanner.rb', line 41

def scan_at(strscanner, last_token = nil)
  return nil if @condition && !@condition.call(last_token)

  raw = strscanner.scan(@ss_regexp)
  return nil unless raw

  if @converter
    value = @converter.call(raw)
    if value.is_a?(Array)
      value.map! { |v| Token === v ? v : Token.new(@category, v, raw) }
    elsif Token === value
      value
    else
      Token.new(@category, value, raw)
    end
  else
    Token.new(@category, raw, raw)
  end
end