Module: Mailmate::Lexer Private

Defined in:
lib/mailmate/lexer.rb

This module is part of a private API. You should avoid using this module if possible, as it may be removed or be changed in the future.

Defined Under Namespace

Classes: Error

Constant Summary collapse

KEYWORDS =

This constant is part of a private API. You should avoid using this constant if possible, as it may be removed or be changed in the future.

%w[and or not exists].freeze
OP_FLAG_CHARS =

This constant is part of a private API. You should avoid using this constant if possible, as it may be removed or be changed in the future.

operator-modifier flags, e.g. =[c], >[f], !=[x]

"cafx"

Class Method Summary collapse

Class Method Details

.lex(input) ⇒ Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/mailmate/lexer.rb', line 16

def self.lex(input)
  tokens = []
  i = 0
  while i < input.length
    c = input[i]

    # Whitespace
    if c =~ /\s/
      i += 1
      next
    end

    # Punctuation
    if c == "("
      tokens << [:lparen]; i += 1; next
    elsif c == ")"
      tokens << [:rparen]; i += 1; next
    elsif c == "."
      tokens << [:dot]; i += 1; next
    end

    # Variable reference: $SENT, $PERSONAL_INBOX
    if c == "$"
      j = i + 1
      j += 1 while j < input.length && input[j] =~ /[A-Z_]/
      raise Error, "empty variable name at #{i}" if j == i + 1
      tokens << [:var, input[(i + 1)...j]]
      i = j
      next
    end

    # String: '...'  Only `\\` and `\'` are recognized escapes; other
    # backslashes are preserved literally so IMAP keywords like
    # `\Seen` / `\Flagged` survive intact.
    if c == "'"
      j = i + 1
      buf = +""
      while j < input.length && input[j] != "'"
        if input[j] == "\\" && j + 1 < input.length && (input[j + 1] == "\\" || input[j + 1] == "'")
          buf << input[j + 1]
          j += 2
        else
          buf << input[j]
          j += 1
        end
      end
      raise Error, "unterminated string starting at #{i}" if j >= input.length
      tokens << [:string, buf]
      i = j + 1
      next
    end

    # Operator: !=, =, ~, !~, <, <=, >, >=, with optional [flags]
    if "!=~<>".include?(c)
      op = c
      j = i + 1
      if c == "!" && j < input.length && (input[j] == "=" || input[j] == "~")
        op = "!" + input[j]
        j += 1
      elsif (c == "<" || c == ">") && j < input.length && input[j] == "="
        op = c + "="
        j += 1
      end
      # Optional [flags]
      flags = []
      if j < input.length && input[j] == "["
        k = j + 1
        while k < input.length && OP_FLAG_CHARS.include?(input[k])
          flags << input[k]
          k += 1
        end
        raise Error, "expected ] after operator flags at #{j}" if k >= input.length || input[k] != "]"
        j = k + 1
      end
      tokens << [:op, op, flags]
      i = j
      next
    end

    # Number: 0-9+
    if c =~ /\d/
      j = i
      j += 1 while j < input.length && input[j] =~ /\d/
      tokens << [:number, input[i...j].to_i]
      i = j
      next
    end

    # Shorthand: # or ## followed by ident
    if c == "#"
      j = i
      j += 1 while j < input.length && input[j] == "#"
      start = j
      j += 1 while j < input.length && input[j] =~ /[a-zA-Z0-9_-]/
      raise Error, "empty shorthand at #{i}" if start == j
      tokens << [:shorthand, input[i...j]]
      i = j
      next
    end

    # Identifier: [a-zA-Z_][a-zA-Z0-9_-]*
    if c =~ /[a-zA-Z_]/
      j = i
      j += 1 while j < input.length && input[j] =~ /[a-zA-Z0-9_-]/
      word = input[i...j]
      if KEYWORDS.include?(word)
        tokens << [:keyword, word]
      else
        tokens << [:ident, word]
      end
      i = j
      next
    end

    raise Error, "unexpected char #{c.inspect} at position #{i}"
  end
  tokens << [:eof]
  tokens
end