Module: HTM::LongTermMemory::RelevanceScorer
- Included in:
- HTM::LongTermMemory
- Defined in:
- lib/htm/long_term_memory/relevance_scorer.rb
Overview
Relevance scoring for search results
Combines multiple signals to calculate dynamic relevance:
-
Vector similarity (semantic match) - config.relevance_semantic_weight (default: 0.5)
-
Tag overlap (categorical match) - config.relevance_tag_weight (default: 0.3)
-
Recency (freshness) - config.relevance_recency_weight (default: 0.1)
-
Access frequency (popularity/utility) - config.relevance_access_weight (default: 0.1)
Recency decay uses configurable half-life: config.relevance_recency_half_life_hours (default: 168 = 1 week)
Also provides tag similarity calculations using hierarchical Jaccard.
Constant Summary collapse
- DEFAULT_NEUTRAL_SCORE =
Default score when signal is unavailable
0.5- ACCESS_SCORE_NORMALIZER =
Access frequency normalization
10.0- RELEVANCE_SCALE =
Final score scaling
10.0- RELEVANCE_MIN =
0.0- RELEVANCE_MAX =
10.0
Instance Method Summary collapse
-
#calculate_relevance(node:, query_tags: [], vector_similarity: nil, node_tags: nil) ⇒ Float
Calculate dynamic relevance score for a node given query context.
-
#fetch_candidates_by_timeframe(timeframe:, metadata:, limit:) ⇒ Array<Hash>
Fetch candidates by timeframe using raw SQL (avoids ORM overhead).
- #recency_half_life_hours ⇒ Object
-
#search_by_tags(tags:, match_all: false, timeframe: nil, limit: 20) ⇒ Array<Hash>
Search nodes by tags.
-
#search_with_relevance(timeframe:, query: nil, query_tags: [], limit: 20, embedding_service: nil, metadata: {}) ⇒ Array<Hash>
Search with dynamic relevance scoring.
- #weight_access ⇒ Object
- #weight_recency ⇒ Object
-
#weight_semantic ⇒ Object
Configurable scoring weights (via HTM.configuration).
- #weight_tag ⇒ Object
Instance Method Details
#calculate_relevance(node:, query_tags: [], vector_similarity: nil, node_tags: nil) ⇒ Float
Calculate dynamic relevance score for a node given query context
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/htm/long_term_memory/relevance_scorer.rb', line 58 def calculate_relevance(node:, query_tags: [], vector_similarity: nil, node_tags: nil) # 1. Vector similarity (semantic match) semantic_score = if vector_similarity vector_similarity elsif node['similarity'] node['similarity'].to_f else DEFAULT_NEUTRAL_SCORE # Neutral if no embedding end # 2. Tag overlap (categorical relevance) # Use pre-loaded tags if provided, otherwise fetch (for backward compatibility) ||= (node['id']) tag_score = if .any? && .any? weighted_hierarchical_jaccard(, ) else DEFAULT_NEUTRAL_SCORE # Neutral if no tags end # 3. Recency (temporal relevance) - exponential decay with half-life age_hours = (Time.now - Time.parse(node['created_at'].to_s)) / 3600.0 recency_score = Math.exp(-age_hours / recency_half_life_hours) # 4. Access frequency (behavioral signal) - log-normalized access_count = node['access_count'] || 0 access_score = Math.log(1 + access_count) / ACCESS_SCORE_NORMALIZER # Weighted composite with final scaling relevance = ( (semantic_score * weight_semantic) + (tag_score * weight_tag) + (recency_score * weight_recency) + (access_score * weight_access) ) * RELEVANCE_SCALE relevance.clamp(RELEVANCE_MIN, RELEVANCE_MAX) end |
#fetch_candidates_by_timeframe(timeframe:, metadata:, limit:) ⇒ Array<Hash>
Fetch candidates by timeframe using raw SQL (avoids ORM overhead)
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
# File 'lib/htm/long_term_memory/relevance_scorer.rb', line 162 def fetch_candidates_by_timeframe(timeframe:, metadata:, limit:) timeframe_condition = HTM::SqlBuilder.timeframe_condition(timeframe) = HTM::SqlBuilder.() conditions = ['deleted_at IS NULL'] conditions << timeframe_condition if timeframe_condition conditions << if sql = <<~SQL SELECT id, content, access_count, created_at, token_count FROM nodes WHERE #{conditions.join(' AND ')} ORDER BY created_at DESC LIMIT ? SQL HTM.db.fetch(sql, limit).all.map { |r| r.transform_keys(&:to_s) } end |
#recency_half_life_hours ⇒ Object
46 47 48 |
# File 'lib/htm/long_term_memory/relevance_scorer.rb', line 46 def recency_half_life_hours HTM.configuration.relevance_recency_half_life_hours end |
#search_by_tags(tags:, match_all: false, timeframe: nil, limit: 20) ⇒ Array<Hash>
Search nodes by tags
189 190 191 192 193 194 |
# File 'lib/htm/long_term_memory/relevance_scorer.rb', line 189 def (tags:, match_all: false, timeframe: nil, limit: 20) return [] if .empty? nodes = (, match_all: match_all, timeframe: timeframe, limit: limit) enrich_nodes_with_relevance(nodes, query_tags: ) end |
#search_with_relevance(timeframe:, query: nil, query_tags: [], limit: 20, embedding_service: nil, metadata: {}) ⇒ Array<Hash>
Search with dynamic relevance scoring
Returns nodes with calculated relevance scores based on query context
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/htm/long_term_memory/relevance_scorer.rb', line 108 def search_with_relevance(timeframe:, query: nil, query_tags: [], limit: 20, embedding_service: nil, metadata: {}) # Get candidates from appropriate search method candidates = if query && # Vector search (returns hashes directly) search_uncached(timeframe: timeframe, query: query, limit: limit * 2, embedding_service: , metadata: ) elsif query # Full-text search (returns hashes directly) search_fulltext_uncached(timeframe: timeframe, query: query, limit: limit * 2, metadata: ) else # Time-range only - use raw SQL to avoid ORM object instantiation # This is more efficient than .map(&:attributes) which creates intermediate objects fetch_candidates_by_timeframe(timeframe: timeframe, metadata: , limit: limit * 2) end # Normalize similarity and text_rank to [0,1] across all candidates # before scoring so weighted sum is unbiased (ts_rank is unbounded, # similarity is already [0,1] but may be narrow) normalize_scores_batch(candidates) # Batch load all tags for candidates (fixes N+1 query) node_ids = candidates.map { |n| n['id'] } = (node_ids) # Calculate relevance for each candidate, building final hash in-place scored_nodes = candidates.map do |node| = [node['id']] || [] relevance = calculate_relevance( node: node, query_tags: , vector_similarity: node['similarity']&.to_f, node_tags: ) # Modify in-place to avoid creating new Hash node['relevance'] = relevance node['tags'] = node end # Sort by relevance and return top K scored_nodes .sort_by { |n| -n['relevance'] } .take(limit) end |
#weight_access ⇒ Object
42 43 44 |
# File 'lib/htm/long_term_memory/relevance_scorer.rb', line 42 def weight_access HTM.configuration.relevance_access_weight end |
#weight_recency ⇒ Object
38 39 40 |
# File 'lib/htm/long_term_memory/relevance_scorer.rb', line 38 def weight_recency HTM.configuration.relevance_recency_weight end |
#weight_semantic ⇒ Object
Configurable scoring weights (via HTM.configuration)
30 31 32 |
# File 'lib/htm/long_term_memory/relevance_scorer.rb', line 30 def weight_semantic HTM.configuration.relevance_semantic_weight end |
#weight_tag ⇒ Object
34 35 36 |
# File 'lib/htm/long_term_memory/relevance_scorer.rb', line 34 def weight_tag HTM.configuration.relevance_tag_weight end |