Class: Legion::LLM::Context::Curator

Inherits:
Object
  • Object
show all
Includes:
Legion::Logging::Helper
Defined in:
lib/legion/llm/context/curator.rb

Constant Summary collapse

CURATED_KEY =
:__curated__
THINKING_TAG_PAIRS =

All known provider thinking tag variants. Anthropic: <thinking>…</thinking> DeepSeek / Qwen / Ollama / vLLM inline: <think>…</think>

[
  ['<thinking>', '</thinking>'],
  ['<think>',    '</think>']
].freeze

Instance Method Summary collapse

Constructor Details

#initialize(conversation_id:) ⇒ Curator

Returns a new instance of Curator.



22
23
24
25
26
# File 'lib/legion/llm/context/curator.rb', line 22

def initialize(conversation_id:)
  @conversation_id = conversation_id
  @curated_messages = nil
  @curation_mutex = Mutex.new
end

Instance Method Details

#curate_turn(turn_messages:, assistant_response:) ⇒ Object

Called async after each turn completes — zero latency impact.



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/legion/llm/context/curator.rb', line 29

def curate_turn(turn_messages:, assistant_response:)
  return unless enabled?

  current_turn_count = Array(turn_messages).size + (assistant_response ? 1 : 0)
  log.debug "[llm][curator] action=curate_turn conversation_id=#{@conversation_id} turn_messages=#{current_turn_count}"

  Inference::Executor::ASYNC_THREAD_POOL.post do
    all_messages = Inference::Conversation.messages(@conversation_id)
    older = if current_turn_count.positive? && all_messages.size > current_turn_count
              all_messages[0...-current_turn_count]
            else
              []
            end

    if older.any?
      curated = older.map { |msg| curate_message(msg, assistant_response) }
      store_curated(@conversation_id, curated)
    end
    @curation_mutex.synchronize { @curated_messages = nil }
  rescue StandardError => e
    handle_exception(e, level: :warn, operation: 'llm.context_curator.curate_turn')
  end
end

#curated_messagesObject

Called sync when building next API request. Returns curated messages when available; nil means use raw history.



55
56
57
58
59
60
61
# File 'lib/legion/llm/context/curator.rb', line 55

def curated_messages
  return nil unless enabled?

  @curation_mutex.synchronize do
    @curated_messages ||= load_curated(@conversation_id)
  end
end

#dedup_similar(messages, threshold: nil) ⇒ Object

Heuristic: deduplicate near-identical messages using Jaccard similarity.



181
182
183
184
185
186
187
188
189
190
191
# File 'lib/legion/llm/context/curator.rb', line 181

def dedup_similar(messages, threshold: nil)
  return messages unless setting(:dedup_enabled, true)

  threshold ||= setting(:dedup_threshold, 0.85)
  result = Context::Compressor.deduplicate_messages(messages, threshold: threshold)
  if result[:removed].positive?
    log.info "[llm][curator] action=dedup_similar conversation_id=#{@conversation_id} " \
             "removed=#{result[:removed]} original_count=#{result[:original_count]}"
  end
  result[:messages]
end

#distill_tool_result(msg, _assistant_context = nil) ⇒ Object

Heuristic: distill a single tool-result message to a compact summary.



95
96
97
98
99
100
101
102
103
104
# File 'lib/legion/llm/context/curator.rb', line 95

def distill_tool_result(msg, _assistant_context = nil)
  content = msg[:content].to_s
  max_chars = setting(:tool_result_max_chars, 2000)
  return msg if content.length <= max_chars

  summary = heuristic_tool_summary(content, tool_name_from(msg))
  log.info "[llm][curator] action=distill_tool_result conversation_id=#{@conversation_id} " \
           "original_chars=#{content.length} summary_chars=#{summary.length}"
  msg.merge(content: summary, curated: true, original_content: content)
end

#drop_and_archive(messages, conversation_id:) ⇒ Object

Drops older conversation turns from the prompt window after archiving them into Apollo for scoped retrieval on future turns.



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/legion/llm/context/curator.rb', line 65

def drop_and_archive(messages, conversation_id:)
  return messages unless archive_dropped_turns?
  return messages unless messages.is_a?(Array) && messages.any?

  target_tokens = setting(:target_context_tokens, 40_000)
  estimated = Context::Compressor.estimate_tokens(messages)
  return messages if estimated <= target_tokens

  preserve_recent = setting(:archive_preserve_recent, setting(:preserve_recent, 10)).to_i
  preserve_recent = 1 unless preserve_recent.positive?
  return messages if messages.size <= preserve_recent

  retained = messages.last(preserve_recent)
  dropped = messages[0...-preserve_recent]
  return messages if dropped.empty?

  archived = archive_conversation_history(dropped, conversation_id: conversation_id)
  return messages unless archived

  tokens_freed = estimated - Context::Compressor.estimate_tokens(retained)
  log.warn("[llm][curator] action=drop_and_archive conversation_id=#{conversation_id} " \
           "messages_dropped=#{dropped.size} retained=#{retained.size} tokens_freed=#{tokens_freed}")
  retained
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'llm.context_curator.drop_and_archive',
                      conversation_id: conversation_id)
  messages
end

#evict_superseded(messages) ⇒ Object

Heuristic: if same file was read multiple times, keep only the latest read.



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/legion/llm/context/curator.rb', line 158

def evict_superseded(messages)
  return messages unless setting(:superseded_eviction, true)

  file_last_seen = {}
  messages.each_with_index do |msg, idx|
    path = extract_file_path(msg[:content].to_s)
    file_last_seen[path] = idx if path
  end

  result = messages.each_with_index.reject do |msg, idx|
    path = extract_file_path(msg[:content].to_s)
    path && file_last_seen[path] != idx
  end.map(&:first)

  evicted_count = messages.size - result.size
  if evicted_count.positive?
    log.info "[llm][curator] action=evict_superseded conversation_id=#{@conversation_id} " \
             "files_evicted=#{evicted_count} messages_before=#{messages.size} messages_after=#{result.size}"
  end
  result
end

#fold_resolved_exchanges(messages) ⇒ Object

Heuristic: detect multi-turn clarification that reached agreement; fold to single system note.



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/legion/llm/context/curator.rb', line 126

def fold_resolved_exchanges(messages)
  return messages unless setting(:exchange_folding, true)

  result = []
  folded_count = 0
  i = 0
  while i < messages.length
    window = messages[i, 4]
    if resolved_exchange?(window)
      conclusion = window.last[:content].to_s[0, 300]
      note = {
        role:             :system,
        content:          "[Exchange resolved: #{conclusion}]",
        curated:          true,
        original_content: window.map { |m| m[:content] }.join("\n")
      }
      result << note
      folded_count += window.length
      i += window.length
    else
      result << messages[i]
      i += 1
    end
  end
  if folded_count.positive?
    log.info "[llm][curator] action=fold_resolved_exchanges conversation_id=#{@conversation_id} " \
             "messages_folded=#{folded_count} result_size=#{result.size}"
  end
  result
end

#llm_distill_tool_result(msg, assistant_response = nil) ⇒ Object

LLM-assisted distillation: uses small/fast model to summarize tool results. Falls back to heuristic on any error.



195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# File 'lib/legion/llm/context/curator.rb', line 195

def llm_distill_tool_result(msg, assistant_response = nil)
  return distill_tool_result(msg, assistant_response) unless llm_assisted?

  content = msg[:content].to_s
  max_chars = setting(:tool_result_max_chars, 2000)
  return msg if content.length <= max_chars

  summary = llm_summarize_tool_result(content, tool_name_from(msg))
  if summary
    msg.merge(content: summary, curated: true, original_content: content)
  else
    distill_tool_result(msg, assistant_response)
  end
rescue StandardError => e
  handle_exception(e, level: :warn)
  distill_tool_result(msg, assistant_response)
end

#strip_thinking(msg) ⇒ Object

Heuristic: remove extended thinking blocks, keep conclusions.



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/legion/llm/context/curator.rb', line 107

def strip_thinking(msg)
  return msg unless setting(:thinking_eviction, true)

  content = msg[:content].to_s
  stripped = strip_thinking_tags(content)
  # Only strip lines that start with a heading containing "Thinking".
  # Avoid catastrophic backtracking by anchoring to the start of line
  # and using a non-greedy, bounded inner pattern.
  stripped = stripped.gsub(/^#+\s*[Tt]hinking[^\n]*\n(?!#+\s*[Tt]hinking[^\n]*\n)[^\n]*(?:\n(?![#\n])[^\n]*)*\n?/m, '').strip

  return msg if stripped == content || stripped.empty?

  chars_removed = content.length - stripped.length
  log.info "[llm][curator] action=strip_thinking conversation_id=#{@conversation_id} " \
           "chars_removed=#{chars_removed} original_chars=#{content.length} stripped_chars=#{stripped.length}"
  msg.merge(content: stripped, curated: true, original_content: content)
end