Class: Noiseless::QueryBuilder

Inherits:
Object
  • Object
show all
Defined in:
lib/noiseless/query_builder.rb

Instance Method Summary collapse

Constructor Details

#initialize(model) ⇒ QueryBuilder

Returns a new instance of QueryBuilder.



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/noiseless/query_builder.rb', line 5

def initialize(model)
  @model        = model
  @indexes      = determine_indexes(model)
  @nodes        = []
  @aggregations = []
  @collapse     = nil
  @search_after = nil
  @hybrid       = nil
  @pipeline     = nil
  @image_query  = nil
  @conversation = nil
  @joins        = []
  @remove_duplicates = nil
  @facet_sample_slope = nil
  @pinned_hits = nil
end

Instance Method Details

#aggregation(name, type, field: nil) ⇒ Object Also known as: agg



86
87
88
89
90
91
92
93
94
95
96
# File 'lib/noiseless/query_builder.rb', line 86

def aggregation(name, type, field: nil, **, &)
  sub_aggs = []
  if block_given?
    sub_builder = AST::AggregationBuilder.new
    sub_builder.instance_eval(&)
    sub_aggs = sub_builder.aggregations
  end

  @aggregations << AST::Aggregation.new(name, type, field: field, sub_aggregations: sub_aggs, **)
  self
end

#collapse(field, inner_hits: nil, max_concurrent_group_searches: nil) ⇒ Object



100
101
102
103
104
# File 'lib/noiseless/query_builder.rb', line 100

def collapse(field, inner_hits: nil, max_concurrent_group_searches: nil)
  @collapse = AST::Collapse.new(field, inner_hits: inner_hits,
                                       max_concurrent_group_searches: max_concurrent_group_searches)
  self
end

#combined_fields(query, fields, operator: nil, minimum_should_match: nil) ⇒ Object



111
112
113
114
115
# File 'lib/noiseless/query_builder.rb', line 111

def combined_fields(query, fields, operator: nil, minimum_should_match: nil, **)
  @nodes << AST::CombinedFields.new(query, fields, operator: operator, minimum_should_match: minimum_should_match,
                                                   **)
  self
end

#conversational(model_id:, conversation_id: nil, system_prompt: nil) ⇒ Object Also known as: rag

Conversational/RAG search (Typesense and Elasticsearch)

Parameters:

  • model_id (String)

    The LLM model identifier

  • conversation_id (String, nil) (defaults to: nil)

    ID for multi-turn conversations

  • system_prompt (String, nil) (defaults to: nil)

    Custom system prompt



175
176
177
178
179
180
181
182
# File 'lib/noiseless/query_builder.rb', line 175

def conversational(model_id:, conversation_id: nil, system_prompt: nil)
  @conversation = AST::Conversation.new(
    model_id: model_id,
    conversation_id: conversation_id,
    system_prompt: system_prompt
  )
  self
end

#facet_sample_slope(value) ⇒ Object

Controls dynamic facet sampling behavior in Typesense.



207
208
209
210
# File 'lib/noiseless/query_builder.rb', line 207

def facet_sample_slope(value)
  @facet_sample_slope = value
  self
end

#filter(field, value) ⇒ Object Also known as: where



52
53
54
55
# File 'lib/noiseless/query_builder.rb', line 52

def filter(field, value)
  @nodes << AST::Filter.new(field, value)
  self
end

#geo_distance(field, lat:, lon:, distance:, **options) ⇒ Object



117
118
119
120
121
122
123
124
125
126
127
# File 'lib/noiseless/query_builder.rb', line 117

def geo_distance(field, lat:, lon:, distance:, **options)
  # Create a special geo filter node
  geo_filter = AST::Filter.new(field, {
                                 geo_distance: {
                                   distance: distance,
                                   "#{field}": { lat: lat, lon: lon }
                                 }.merge(options)
                               })
  @nodes << geo_filter
  self
end

#hybrid(text_query, embedding, field:, text_weight: 0.5, vector_weight: 0.5, k: 10) ⇒ Object

Hybrid search combining text query with vector search

Parameters:

  • text_query (String)

    The text query for BM25 matching

  • embedding (Array<Float>)

    The query embedding vector

  • field (Symbol)

    The embedding field name

  • text_weight (Float) (defaults to: 0.5)

    Weight for text search score (default: 0.5)

  • vector_weight (Float) (defaults to: 0.5)

    Weight for vector search score (default: 0.5)

  • k (Integer) (defaults to: 10)

    Number of nearest neighbors (default: 10)



149
150
151
152
153
# File 'lib/noiseless/query_builder.rb', line 149

def hybrid(text_query, embedding, field:, text_weight: 0.5, vector_weight: 0.5, k: 10)
  vector_node = AST::Vector.new(field, embedding, k: k)
  @hybrid = AST::Hybrid.new(text_query, vector_node, text_weight: text_weight, vector_weight: vector_weight)
  self
end

#image_search(field, image_data, k: 10) ⇒ Object

Image search using visual similarity (Typesense only)

Parameters:

  • field (Symbol)

    The image embedding field name

  • image_data (String)

    Image URL or base64 encoded image

  • k (Integer) (defaults to: 10)

    Number of nearest neighbors (default: 10)



166
167
168
169
# File 'lib/noiseless/query_builder.rb', line 166

def image_search(field, image_data, k: 10)
  @image_query = AST::ImageQuery.new(field, image_data, k: k)
  self
end

#indexes(names) ⇒ Object



22
23
24
25
# File 'lib/noiseless/query_builder.rb', line 22

def indexes(names)
  @indexes = Array(names).map(&:to_s)
  self
end

#join(collection, on:, include_fields: [], strategy: :left) ⇒ Object

Join with another collection (Typesense only)

Parameters:

  • collection (String, Symbol)

    The collection to join

  • on (Hash)

    Join conditions

  • include_fields (Array) (defaults to: [])

    Fields to include from joined collection

  • strategy (Symbol) (defaults to: :left)

    Join strategy :left or :inner



191
192
193
194
# File 'lib/noiseless/query_builder.rb', line 191

def join(collection, on:, include_fields: [], strategy: :left)
  @joins << AST::Join.new(collection, on: on, include_fields: include_fields, strategy: strategy)
  self
end

#limit(size) ⇒ Object



71
72
73
74
# File 'lib/noiseless/query_builder.rb', line 71

def limit(size)
  @nodes << AST::Paginate.new(1, size)
  self
end

#match(field, value) ⇒ Object



27
28
29
30
# File 'lib/noiseless/query_builder.rb', line 27

def match(field, value)
  @nodes << AST::Match.new(field, value)
  self
end

#multi_match(query, fields) ⇒ Object



32
33
34
35
# File 'lib/noiseless/query_builder.rb', line 32

def multi_match(query, fields, **)
  @nodes << AST::MultiMatch.new(query, fields, **)
  self
end

#offset(from) ⇒ Object



76
77
78
79
80
81
82
83
84
# File 'lib/noiseless/query_builder.rb', line 76

def offset(from)
  # Calculate page based on offset and current per_page
  existing_paginate = @nodes.find { |n| n.is_a?(AST::Paginate) }
  per_page = existing_paginate&.per_page || 20
  page = (from / per_page) + 1
  @nodes.reject! { |n| n.is_a?(AST::Paginate) }
  @nodes << AST::Paginate.new(page, per_page)
  self
end

#paginate(page: 1, per_page: 20) ⇒ Object



66
67
68
69
# File 'lib/noiseless/query_builder.rb', line 66

def paginate(page: 1, per_page: 20)
  @nodes << AST::Paginate.new(page, per_page)
  self
end

#pinned_hits(value) ⇒ Object

Pin specific document IDs to fixed result positions in Typesense.

Supported formats:

  • String: “id1:1,id2:2”

  • Hash: { “id1” => 1, “id2” => 2 }

  • Array of pairs: [[“id1”, 1], [“id2”, 2]]



218
219
220
221
# File 'lib/noiseless/query_builder.rb', line 218

def pinned_hits(value)
  @pinned_hits = normalize_pinned_hits(value)
  self
end

#pipeline(pipeline_name) ⇒ Object

Apply a search pipeline (OpenSearch only)

Parameters:

  • pipeline_name (String)

    Name of the search pipeline to use



157
158
159
160
# File 'lib/noiseless/query_builder.rb', line 157

def pipeline(pipeline_name)
  @pipeline = pipeline_name
  self
end

#prefix(field, value) ⇒ Object



47
48
49
50
# File 'lib/noiseless/query_builder.rb', line 47

def prefix(field, value)
  @nodes << AST::Prefix.new(field, value)
  self
end

#range(field, gte: nil, lte: nil, gt: nil, lt: nil) ⇒ Object



42
43
44
45
# File 'lib/noiseless/query_builder.rb', line 42

def range(field, gte: nil, lte: nil, gt: nil, lt: nil)
  @nodes << AST::Range.new(field, gte: gte, lte: lte, gt: gt, lt: lt)
  self
end

#remove_duplicates(value: true) ⇒ Object

Remove duplicate documents in Typesense union search results.



197
198
199
200
201
202
203
204
# File 'lib/noiseless/query_builder.rb', line 197

def remove_duplicates(value: true)
  @remove_duplicates = if value.nil?
                         nil
                       else
                         value ? true : false
                       end
  self
end

#search_after(values) ⇒ Object



106
107
108
109
# File 'lib/noiseless/query_builder.rb', line 106

def search_after(values)
  @search_after = AST::SearchAfter.new(values)
  self
end

#sort(field, dir = :asc) ⇒ Object Also known as: order



59
60
61
62
# File 'lib/noiseless/query_builder.rb', line 59

def sort(field, dir = :asc)
  @nodes << AST::Sort.new(field, dir)
  self
end

#to_astObject



223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/noiseless/query_builder.rb', line 223

def to_ast
  filter_nodes = @nodes.select { |n| n.is_a?(AST::Filter) }
  vector_nodes = @nodes.select { |n| n.is_a?(AST::Vector) }
  must_nodes = @nodes.reject do |n|
    n.is_a?(AST::Filter) || n.is_a?(AST::Sort) || n.is_a?(AST::Paginate) || n.is_a?(AST::Vector)
  end
  bool_node = AST::Bool.new(must: must_nodes, filter: filter_nodes)
  sort_nodes     = @nodes.select { |n| n.is_a?(AST::Sort) }
  paginate_node  = @nodes.find { |n| n.is_a?(AST::Paginate) }
  AST::Root.new(
    indexes: @indexes,
    bool: bool_node,
    sort: sort_nodes,
    paginate: paginate_node,
    vector: vector_nodes.first, # Only support one vector search per query for now
    collapse: @collapse,
    search_after: @search_after,
    aggregations: @aggregations,
    hybrid: @hybrid,
    pipeline: @pipeline,
    image_query: @image_query,
    conversation: @conversation,
    joins: @joins,
    remove_duplicates: @remove_duplicates,
    facet_sample_slope: @facet_sample_slope,
    pinned_hits: @pinned_hits
  )
end

#vector(field, embedding, k: 10, distance_metric: :cosine) ⇒ Object Also known as: knn, semantic_search

Vector/semantic search using embeddings (pgvector or OpenSearch knn)

Parameters:

  • field (Symbol)

    The embedding column/field

  • embedding (Array<Float>)

    The query embedding vector

  • k (Integer) (defaults to: 10)

    Number of nearest neighbors (default: 10)

  • distance_metric (Symbol) (defaults to: :cosine)

    :cosine, :l2, or :inner_product



134
135
136
137
# File 'lib/noiseless/query_builder.rb', line 134

def vector(field, embedding, k: 10, distance_metric: :cosine)
  @nodes << AST::Vector.new(field, embedding, k: k, distance_metric: distance_metric)
  self
end

#wildcard(field, value) ⇒ Object



37
38
39
40
# File 'lib/noiseless/query_builder.rb', line 37

def wildcard(field, value)
  @nodes << AST::Wildcard.new(field, value)
  self
end