Module: ParadeDB::SearchMethods

Defined in:
lib/parade_db/search_methods.rb

Overview

SearchMethods extends ActiveRecord::Relation to add ParadeDB full-text search capabilities. This module is mixed into relations via .search() to provide chainable query methods.

Defined Under Namespace

Modules: AggregationRelation, FacetRelation, PredicateInspector Classes: AggregationQuery, FacetQuery

Constant Summary collapse

AGGREGATE_SAFE_TEXT_TOKENIZERS =
%w[literal literal_normalized].freeze
MLT_OPTION_ALIASES =
{
  min_term_freq: :min_term_frequency,
  min_term_frequency: :min_term_frequency,
  max_query_terms: :max_query_terms,
  min_doc_freq: :min_doc_frequency,
  min_doc_frequency: :min_doc_frequency,
  max_term_freq: :max_term_frequency,
  max_term_frequency: :max_term_frequency,
  max_doc_freq: :max_doc_frequency,
  max_doc_frequency: :max_doc_frequency,
  min_word_length: :min_word_length,
  max_word_length: :max_word_length,
  stopwords: :stopwords
}.freeze
MLT_INTEGER_OPTION_KEYS =
%i[
  min_term_frequency
  max_query_terms
  min_doc_frequency
  max_term_frequency
  max_doc_frequency
  min_word_length
  max_word_length
].freeze
MLT_OPTION_ORDER =
%i[
  min_term_frequency
  max_query_terms
  min_doc_frequency
  max_term_frequency
  max_doc_frequency
  min_word_length
  max_word_length
  stopwords
].freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#_paradedb_current_fieldObject

Internal state tracking



45
46
47
# File 'lib/parade_db/search_methods.rb', line 45

def _paradedb_current_field
  @_paradedb_current_field
end

#_paradedb_facet_fieldsObject

Returns the value of attribute _paradedb_facet_fields.



46
47
48
# File 'lib/parade_db/search_methods.rb', line 46

def _paradedb_facet_fields
  @_paradedb_facet_fields
end

Instance Method Details

#aggregate_by(*group_fields, exact: nil, **named_aggregations) ⇒ Object

Grouped ParadeDB aggregations:

Product.search(:id).match_all.aggregate_by(:rating, agg: ParadeDB::Aggregations.value_count(:id))


473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
# File 'lib/parade_db/search_methods.rb', line 473

def aggregate_by(*group_fields, exact: nil, **named_aggregations)
  ensure_paradedb_runtime!
  validate_exact_option!(exact)
  normalized_group_fields = normalize_group_fields(group_fields)
  agg_specs = normalize_named_aggregation_specs(named_aggregations)

  rel = self
  rel = rel.ensure_paradedb_predicate unless rel.has_paradedb_predicate?

  group_nodes = normalized_group_fields.map { |field| resolve_group_field_node(field) }
  aggregate_nodes = agg_specs.map do |alias_name, agg_spec|
    render_aggregation_node(agg_spec, exact: exact).as(alias_name.to_s)
  end

  rel.except(:select, :group).select(*group_nodes, *aggregate_nodes).group(*group_nodes)
end

#build_facet_query(fields:, size: 10, order: :count_desc, missing: nil, agg: nil, exact: nil) ⇒ Object

Internal method to build facet query (for testing)



405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
# File 'lib/parade_db/search_methods.rb', line 405

def build_facet_query(fields:, size: 10, order: :count_desc, missing: nil, agg: nil, exact: nil)
  ensure_paradedb_runtime!
  facet_args = normalize_facet_inputs(fields: fields, size: size, order: order, missing: missing, agg: agg)
  FacetQuery.build(
    relation: self,
    primary_key: paradedb_runtime_key_field,
    builder: builder,
    fields: facet_args[:fields],
    size: facet_args[:size],
    order: facet_args[:order],
    missing: facet_args[:missing],
    agg: facet_args[:agg],
    exact: exact,
    connection: connection
  )
end

#builderObject



94
95
96
97
98
99
# File 'lib/parade_db/search_methods.rb', line 94

def builder
  @_paradedb_builder ||= begin
    ensure_paradedb_runtime!
    ParadeDB::Arel::Builder.new(table_name)
  end
end

#ensure_paradedb_predicateObject



494
495
496
497
# File 'lib/parade_db/search_methods.rb', line 494

def ensure_paradedb_predicate
  # Add pdb.all() sentinel to force aggregate pushdown
  where(grouped(builder.match_all(paradedb_runtime_key_field)))
end

#excluding(*terms) ⇒ Object



171
172
173
174
175
176
# File 'lib/parade_db/search_methods.rb', line 171

def excluding(*terms)
  require_search_field!

  neg = builder.match(_paradedb_current_field, *terms)
  where(grouped(neg.not))
end

#exists(boost: nil, constant_score: nil) ⇒ Object

Exists wrapper to match rows where the indexed field has a value. Use with ‘.search(:id)` (or another exists-compatible indexed field).



286
287
288
289
290
# File 'lib/parade_db/search_methods.rb', line 286

def exists(boost: nil, constant_score: nil)
  require_search_field!

  where(grouped(builder.exists(_paradedb_current_field, boost: boost, constant_score: constant_score)))
end

#facets(*fields, size: 10, order: :count_desc, missing: nil, agg: nil, exact: nil) ⇒ Object

—- Facets —-



381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
# File 'lib/parade_db/search_methods.rb', line 381

def facets(*fields, size: 10, order: :count_desc, missing: nil, agg: nil, exact: nil)
  ensure_paradedb_runtime!
  validate_exact_option!(exact)
  if exact == false
    raise ArgumentError, "facets(exact: false) requires with_facets so aggregation runs as a window function"
  end

  build_facet_query(
    fields: fields,
    size: size,
    order: order,
    missing: missing,
    agg: agg,
    exact: exact
  ).execute
end

#facets_agg(exact: nil, **named_aggregations) ⇒ Object



398
399
400
401
402
# File 'lib/parade_db/search_methods.rb', line 398

def facets_agg(exact: nil, **named_aggregations)
  validate_exact_option!(exact)
  agg_specs = normalize_named_aggregation_specs(named_aggregations)
  build_aggregation_query(agg_specs, exact: exact).execute
end

#has_paradedb_predicate?Boolean

Returns:

  • (Boolean)


490
491
492
# File 'lib/parade_db/search_methods.rb', line 490

def has_paradedb_predicate?
  PredicateInspector.relation_has_paradedb_predicate?(self)
end

#match_all(boost: nil, constant_score: nil) ⇒ Object

Match-all wrapper for APIs that need an explicit ParadeDB predicate. Use with ‘.search(:id)` (or any indexed field): `Product.search(:id).match_all`.



278
279
280
281
282
# File 'lib/parade_db/search_methods.rb', line 278

def match_all(boost: nil, constant_score: nil)
  require_search_field!

  where(grouped(builder.match_all(_paradedb_current_field, boost: boost, constant_score: constant_score)))
end

#matching_all(*terms, tokenizer: nil, distance: nil, prefix: nil, transposition_cost_one: nil, boost: nil, constant_score: nil) ⇒ Object



123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/parade_db/search_methods.rb', line 123

def matching_all(
  *terms,
  tokenizer: nil,
  distance: nil,
  prefix: nil,
  transposition_cost_one: nil,
  boost: nil,
  constant_score: nil
)
  require_search_field!

  node = builder.match(
    _paradedb_current_field,
    *terms,
    tokenizer: tokenizer,
    distance: distance,
    prefix: prefix,
    transposition_cost_one: transposition_cost_one,
    boost: boost,
    constant_score: constant_score
  )
  where(grouped(node))
end

#matching_any(*terms, tokenizer: nil, distance: nil, prefix: nil, transposition_cost_one: nil, boost: nil, constant_score: nil) ⇒ Object



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/parade_db/search_methods.rb', line 147

def matching_any(
  *terms,
  tokenizer: nil,
  distance: nil,
  prefix: nil,
  transposition_cost_one: nil,
  boost: nil,
  constant_score: nil
)
  require_search_field!

  node = builder.match_any(
    _paradedb_current_field,
    *terms,
    tokenizer: tokenizer,
    distance: distance,
    prefix: prefix,
    transposition_cost_one: transposition_cost_one,
    boost: boost,
    constant_score: constant_score
  )
  where(grouped(node))
end

#more_like_this(key, fields: nil, **options) ⇒ Object



319
320
321
322
323
324
325
326
327
# File 'lib/parade_db/search_methods.rb', line 319

def more_like_this(key, fields: nil, **options)
  ensure_paradedb_runtime!
  runtime_key_field = paradedb_runtime_key_field
  key_value = more_like_this_key_value(key, runtime_key_field)
  pk_node = builder[runtime_key_field]
  mlt_options = normalize_more_like_this_options(options)
  node = builder.more_like_this(pk_node, key_value, fields: fields, options: mlt_options)
  where(grouped(node))
end

#near(proximity, boost: nil, const: nil) ⇒ Object



242
243
244
245
246
247
# File 'lib/parade_db/search_methods.rb', line 242

def near(proximity, boost: nil, const: nil)
  require_search_field!

  node = builder.near(_paradedb_current_field, proximity, boost: boost, const: const)
  where(grouped(node))
end

#parse(query, lenient: nil, conjunction_mode: nil, boost: nil, constant_score: nil) ⇒ Object

Parse query-string syntax into ParadeDB query AST (e.g. “running AND shoes”).



263
264
265
266
267
268
269
270
271
272
273
274
# File 'lib/parade_db/search_methods.rb', line 263

def parse(query, lenient: nil, conjunction_mode: nil, boost: nil, constant_score: nil)
  require_search_field!
  node = builder.parse(
    _paradedb_current_field,
    query,
    lenient: lenient,
    conjunction_mode: conjunction_mode,
    boost: boost,
    constant_score: constant_score
  )
  where(grouped(node))
end

#phrase(text, slop: nil, tokenizer: nil, boost: nil, constant_score: nil) ⇒ Object



178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/parade_db/search_methods.rb', line 178

def phrase(text, slop: nil, tokenizer: nil, boost: nil, constant_score: nil)
  require_search_field!

  node = builder.phrase(
    _paradedb_current_field,
    text,
    slop: slop,
    tokenizer: tokenizer,
    boost: boost,
    constant_score: constant_score
  )
  where(grouped(node))
end

#phrase_prefix(*terms, max_expansion: nil, boost: nil, constant_score: nil) ⇒ Object



249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/parade_db/search_methods.rb', line 249

def phrase_prefix(*terms, max_expansion: nil, boost: nil, constant_score: nil)
  require_search_field!

  node = builder.phrase_prefix(
    _paradedb_current_field,
    *terms,
    max_expansion: max_expansion,
    boost: boost,
    constant_score: constant_score
  )
  where(grouped(node))
end

#primary_keyObject



105
106
107
# File 'lib/parade_db/search_methods.rb', line 105

def primary_key
  klass.primary_key || :id
end

#range(value = nil, gte: nil, gt: nil, lte: nil, lt: nil, type: nil, boost: nil, constant_score: nil) ⇒ Object

Range wrapper for numeric/date/timestamp fields in ParadeDB query context. Examples:

Product.search(:rating).range(3..5)
Product.search(:rating).range(gte: 3, lt: 5)


296
297
298
299
300
301
302
# File 'lib/parade_db/search_methods.rb', line 296

def range(value = nil, gte: nil, gt: nil, lte: nil, lt: nil, type: nil, boost: nil, constant_score: nil)
  require_search_field!

  inferred_type = type || default_range_type_for_field(_paradedb_current_field)
  node = builder.range(_paradedb_current_field, value, gte: gte, gt: gt, lte: lte, lt: lt, type: inferred_type, boost: boost, constant_score: constant_score)
  where(grouped(node))
end

#range_term(value, relation: nil, range_type: nil, boost: nil, constant_score: nil) ⇒ Object



304
305
306
307
308
309
310
311
312
313
314
315
316
317
# File 'lib/parade_db/search_methods.rb', line 304

def range_term(value, relation: nil, range_type: nil, boost: nil, constant_score: nil)
  require_search_field!

  inferred_range_type = range_type || (relation && infer_range_type_for_field(_paradedb_current_field))
  node = builder.range_term(
    _paradedb_current_field,
    value,
    relation: relation,
    range_type: inferred_range_type,
    boost: boost,
    constant_score: constant_score
  )
  where(grouped(node))
end

#regex(pattern, boost: nil, constant_score: nil) ⇒ Object



192
193
194
195
196
197
# File 'lib/parade_db/search_methods.rb', line 192

def regex(pattern, boost: nil, constant_score: nil)
  require_search_field!

  node = builder.regex(_paradedb_current_field, pattern, boost: boost, constant_score: constant_score)
  where(grouped(node))
end

#regex_phrase(*patterns, slop: nil, max_expansions: nil, boost: nil, constant_score: nil) ⇒ Object



199
200
201
202
203
204
205
206
207
208
209
210
211
# File 'lib/parade_db/search_methods.rb', line 199

def regex_phrase(*patterns, slop: nil, max_expansions: nil, boost: nil, constant_score: nil)
  require_search_field!

  node = builder.regex_phrase(
    _paradedb_current_field,
    *patterns,
    slop: slop,
    max_expansions: max_expansions,
    boost: boost,
    constant_score: constant_score
  )
  where(grouped(node))
end

#search(column) ⇒ Object

—- ParadeDB search entrypoints —-



111
112
113
114
115
116
117
118
119
120
121
# File 'lib/parade_db/search_methods.rb', line 111

def search(column)
  ensure_paradedb_runtime!
  search_column =
    if (column.is_a?(Symbol) || column.instance_of?(String)) &&
       klass.respond_to?(:paradedb_normalize_search_column, true)
      klass.send(:paradedb_normalize_search_column, column)
    else
      column
    end
  extending(SearchMethods).tap { |rel| rel._paradedb_current_field = search_column }
end

#table_nameObject



101
102
103
# File 'lib/parade_db/search_methods.rb', line 101

def table_name
  klass.table_name
end

#term(value, distance: nil, prefix: nil, transposition_cost_one: nil, boost: nil, constant_score: nil) ⇒ Object



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/parade_db/search_methods.rb', line 213

def term(
  value,
  distance: nil,
  prefix: nil,
  transposition_cost_one: nil,
  boost: nil,
  constant_score: nil
)
  require_search_field!

  node = builder.term(
    _paradedb_current_field,
    value,
    distance: distance,
    prefix: prefix,
    transposition_cost_one: transposition_cost_one,
    boost: boost,
    constant_score: constant_score
  )
  where(grouped(node))
end

#term_set(*values, boost: nil, constant_score: nil) ⇒ Object



235
236
237
238
239
240
# File 'lib/parade_db/search_methods.rb', line 235

def term_set(*values, boost: nil, constant_score: nil)
  require_search_field!

  node = builder.term_set(_paradedb_current_field, *values, boost: boost, constant_score: constant_score)
  where(grouped(node))
end

#with_agg(exact: nil, **named_aggregations) ⇒ Object



452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
# File 'lib/parade_db/search_methods.rb', line 452

def with_agg(exact: nil, **named_aggregations)
  ensure_paradedb_runtime!
  validate_exact_option!(exact)
  agg_specs = normalize_named_aggregation_specs(named_aggregations)
  rel = extending(FacetRelation, AggregationRelation)
  rel._paradedb_facet_fields = agg_specs.keys

  unless rel.has_paradedb_predicate?
    rel = rel.ensure_paradedb_predicate
  end

  facet_selects = agg_specs.map do |alias_name, agg_spec|
    render_aggregation_node(agg_spec, exact: exact).over.as("_#{alias_name}_facet")
  end

  rel = rel.select(klass.arel_table[::Arel.star]) if rel.select_values.empty?
  rel.select(*facet_selects)
end

#with_facets(*fields, size: 10, order: :count_desc, missing: nil, agg: nil, exact: nil) ⇒ Object



422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
# File 'lib/parade_db/search_methods.rb', line 422

def with_facets(*fields, size: 10, order: :count_desc, missing: nil, agg: nil, exact: nil)
  ensure_paradedb_runtime!
  validate_exact_option!(exact)
  facet_args = normalize_facet_inputs(fields: fields, size: size, order: order, missing: missing, agg: agg)
  opts = {
    size: facet_args[:size],
    order: facet_args[:order],
    missing: facet_args[:missing],
    agg: facet_args[:agg]
  }
  facet_fields = facet_args[:agg].nil? ? facet_args[:fields] : [:agg]

  rel = extending(FacetRelation)
  rel._paradedb_facet_fields = facet_fields

  # Add pdb.all() if no ParadeDB predicates exist (for aggregate pushdown)
  unless rel.has_paradedb_predicate?
    rel = rel.ensure_paradedb_predicate
  end

  # Add window aggregates to SELECT using native Arel nodes.
  facet_selects = facet_fields.map do |field|
    json = facet_args[:agg] || facet_json(field, opts)
    builder.agg(json, exact: exact).over.as("_#{field}_facet")
  end

  rel = rel.select(klass.arel_table[::Arel.star]) if rel.select_values.empty?
  rel.select(*facet_selects)
end

#with_scoreObject

—- Decorators —-



331
332
333
# File 'lib/parade_db/search_methods.rb', line 331

def with_score
  with_projection(builder.score(paradedb_runtime_key_field).as("search_score"))
end

#with_snippet(column, start_tag: nil, end_tag: nil, max_chars: nil) ⇒ Object



335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
# File 'lib/parade_db/search_methods.rb', line 335

def with_snippet(column, start_tag: nil, end_tag: nil, max_chars: nil)
  formatted_args = []
  formatted_args << start_tag unless start_tag.nil?
  formatted_args << end_tag unless end_tag.nil?
  formatted_args << Integer(max_chars) unless max_chars.nil?

  snippet =
    if formatted_args.empty?
      builder.snippet(column)
    else
      builder.snippet(column, *formatted_args)
    end

  with_projection(snippet.as("#{column}_snippet"))
end

#with_snippet_positions(column, as: nil) ⇒ Object



374
375
376
377
# File 'lib/parade_db/search_methods.rb', line 374

def with_snippet_positions(column, as: nil)
  positions = builder.snippet_positions(column)
  with_projection(positions.as(normalize_projection_alias(as, "#{column}_snippet_positions")))
end

#with_snippets(column, start_tag: nil, end_tag: nil, max_chars: nil, limit: nil, offset: nil, sort_by: nil, as: nil) ⇒ Object



351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
# File 'lib/parade_db/search_methods.rb', line 351

def with_snippets(
  column,
  start_tag: nil,
  end_tag: nil,
  max_chars: nil,
  limit: nil,
  offset: nil,
  sort_by: nil,
  as: nil
)
  snippets = builder.snippets(
    column,
    start_tag: start_tag,
    end_tag: end_tag,
    max_num_chars: normalize_integer_option!(max_chars, "max_chars"),
    limit: normalize_integer_option!(limit, "limit"),
    offset: normalize_integer_option!(offset, "offset"),
    sort_by: normalize_snippets_sort_by(sort_by)
  )

  with_projection(snippets.as(normalize_projection_alias(as, "#{column}_snippets")))
end