Class: Alap::ExpressionParser

Inherits:
Object
  • Object
show all
Defined in:
lib/alap/expression_parser.rb

Overview

Recursive descent parser for Alap’s expression grammar:

query   = segment (',' segment)*
segment = term (op term)* refiner*
op      = '+' | '|' | '-'
term    = '(' segment ')' | atom
atom    = ITEM_ID | CLASS | DOM_REF | REGEX | PROTOCOL
refiner = '*' name (':' arg)* '*'

Supports: item IDs, .tag queries, @macro expansion, /regex/ search, :protocol:args: expressions, refiner:args post-processing, parenthesized grouping, + (AND/intersection), | (OR/union), - (WITHOUT/subtraction).

Defined Under Namespace

Classes: ParseResult, Token

Constant Summary collapse

MAX_DEPTH =
32
MAX_TOKENS =
1024
MAX_MACRO_EXPANSIONS =
10
MAX_REGEX_QUERIES =
5
MAX_SEARCH_RESULTS =
100
REGEX_TIMEOUT_MS =
20
MAX_REFINERS =
10
REGEX_FIELD_CODES =
"lutdka"
PROTOCOL_DELIMITERS =
" \t\n\r+|,()*/".freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ ExpressionParser

Returns a new instance of ExpressionParser.



39
40
41
42
43
# File 'lib/alap/expression_parser.rb', line 39

def initialize(config)
  @config = config
  @depth = 0
  @regex_count = 0
end

Instance Attribute Details

#configObject (readonly)

Returns the value of attribute config.



37
38
39
# File 'lib/alap/expression_parser.rb', line 37

def config
  @config
end

Instance Method Details

#query(expression, anchor_id: nil) ⇒ Object

Parse expression and return matching item IDs (deduplicated, order preserved).



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/alap/expression_parser.rb', line 54

def query(expression, anchor_id: nil)
  return [] unless expression.is_a?(String)

  expr = expression.strip
  return [] if expr.empty?

  all_links = @config["allLinks"]
  return [] unless all_links.is_a?(Hash) && !all_links.empty?

  expanded = expand_macros(expr, anchor_id)
  return [] if expanded.empty?

  tokens = tokenize(expanded)
  return [] if tokens.empty?

  if tokens.length > MAX_TOKENS
    warn "[Alap] Expression has #{tokens.length} tokens (max #{MAX_TOKENS}). " \
         "Ignoring: \"#{expression[0, 60]}...\""
    return []
  end

  @depth = 0
  @regex_count = 0
  ids = parse_query(tokens)

  # Deduplicate, preserve order
  seen = Set.new
  ids.select { |id| seen.add?(id) }
end

#search_by_class(class_name) ⇒ Object

Return all item IDs carrying class_name as a tag.



85
86
87
88
89
90
91
92
93
94
95
# File 'lib/alap/expression_parser.rb', line 85

def search_by_class(class_name)
  all_links = @config["allLinks"]
  return [] unless all_links.is_a?(Hash)

  all_links.each_with_object([]) do |(item_id, link), result|
    next unless link.is_a?(Hash)

    tags = link["tags"]
    result << item_id if tags.is_a?(Array) && tags.include?(class_name)
  end
end

#search_by_regex(pattern_key, field_opts = nil) ⇒ Object

Search allLinks using a named regex from config.searchPatterns.



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/alap/expression_parser.rb', line 98

def search_by_regex(pattern_key, field_opts = nil)
  @regex_count += 1
  if @regex_count > MAX_REGEX_QUERIES
    warn "[Alap] Regex query limit exceeded (max #{MAX_REGEX_QUERIES}). Skipping /#{pattern_key}/"
    return []
  end

  patterns = @config["searchPatterns"]
  unless patterns.is_a?(Hash) && patterns.key?(pattern_key)
    warn "[Alap] Search pattern \"#{pattern_key}\" not found in config.searchPatterns"
    return []
  end

  entry = patterns[pattern_key]
  spec = entry.is_a?(String) ? { "pattern" => entry } : entry

  pattern_str = spec["pattern"] || ""
  validation = ValidateRegex.call(pattern_str)
  unless validation["safe"]
    warn "[Alap] Unsafe regex \"#{pattern_str}\" in searchPatterns[\"#{pattern_key}\"]: #{validation["reason"]}"
    return []
  end

  begin
    compiled = Regexp.new(pattern_str, Regexp::IGNORECASE)
  rescue RegexpError
    warn "[Alap] Invalid regex \"#{pattern_str}\" in searchPatterns[\"#{pattern_key}\"]"
    return []
  end

  opts = spec["options"] || {}
  fields = parse_field_codes(field_opts || opts["fields"] || "a")

  all_links = @config["allLinks"]
  return [] unless all_links.is_a?(Hash)

  now_ms = (Time.now.to_f * 1000).to_i
  max_age = opts["age"] ? parse_age(opts["age"]) : 0
  limit = [opts.fetch("limit", MAX_SEARCH_RESULTS), MAX_SEARCH_RESULTS].min
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)

  results = []

  all_links.each do |item_id, link|
    next unless link.is_a?(Hash)

    # Timeout guard
    elapsed_ms = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000
    if elapsed_ms > REGEX_TIMEOUT_MS
      warn "[Alap] Regex search /#{pattern_key}/ timed out after #{REGEX_TIMEOUT_MS}ms"
      break
    end

    # Age filter
    if max_age > 0
      ts = to_timestamp(link["createdAt"])
      next if ts.zero? || (now_ms - ts) > max_age
    end

    # Field matching
    if matches_fields?(compiled, item_id, link, fields)
      ts = link["createdAt"] ? to_timestamp(link["createdAt"]) : 0
      results << { "id" => item_id, "createdAt" => ts }
      if results.length >= MAX_SEARCH_RESULTS
        warn "[Alap] Regex search /#{pattern_key}/ hit #{MAX_SEARCH_RESULTS} result cap"
        break
      end
    end
  end

  # Sort
  sort_mode = opts["sort"]
  case sort_mode
  when "alpha"
    results.sort_by! { |r| r["id"] }
  when "newest"
    results.sort_by! { |r| -r["createdAt"] }
  when "oldest"
    results.sort_by! { |r| r["createdAt"] }
  end

  results.first(limit).map { |r| r["id"] }
end

#update_config(config) ⇒ Object



45
46
47
# File 'lib/alap/expression_parser.rb', line 45

def update_config(config)
  @config = config
end