Class: Legion::LLM::Context::Curator

Inherits:
Object
  • Object
show all
Includes:
Legion::Logging::Helper
Defined in:
lib/legion/llm/context/curator.rb

Constant Summary collapse

CURATED_KEY =
:__curated__
THINKING_TAG_PAIRS =

All known provider thinking tag variants. Anthropic: <thinking>…</thinking> DeepSeek / Qwen / Ollama / vLLM inline: <think>…</think>

[
  ['<thinking>', '</thinking>'],
  ['<think>',    '</think>']
].freeze

Instance Method Summary collapse

Constructor Details

#initialize(conversation_id:) ⇒ Curator

Returns a new instance of Curator.



22
23
24
25
26
# File 'lib/legion/llm/context/curator.rb', line 22

def initialize(conversation_id:)
  @conversation_id = conversation_id
  @curated_messages = nil
  @curation_mutex = Mutex.new
end

Instance Method Details

#curate_turn(turn_messages:, assistant_response:) ⇒ Object

Called async after each turn completes — zero latency impact.



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/legion/llm/context/curator.rb', line 29

def curate_turn(turn_messages:, assistant_response:)
  return unless enabled?

  current_turn_count = Array(turn_messages).size + (assistant_response ? 1 : 0)
  log.debug "[llm][curator] action=curate_turn conversation_id=#{@conversation_id} turn_messages=#{current_turn_count}"

  Inference::Executor::ASYNC_THREAD_POOL.post do
    all_messages = Inference::Conversation.messages(@conversation_id)

    # Determine where the current turn starts in the full message list.
    current_turn_start = if current_turn_count.positive? && all_messages.size > current_turn_count
                           all_messages.size - current_turn_count
                         else
                           0
                         end

    # Respect preserve_recent_turns: don't distill tool results from the
    # last N turns so the model retains full context of recent work.
    # The current turn is always excluded (it's the most recent). We need
    # to additionally preserve (preserve_recent_turns - 1) prior turns.
    preserve_turns = setting(:preserve_recent_turns, 2).to_i
    preserve_turns = 1 unless preserve_turns.positive?
    preserve_prior = [preserve_turns - 1, 0].max

    # Find the split point among messages before the current turn.
    # Messages from split_idx onward are preserved (no distillation).
    pre_current = all_messages[0...current_turn_start]
    split_idx = find_preserve_split(pre_current, preserve_prior)

    older = all_messages[0...split_idx]

    if older.any?
      curated = older.map { |msg| curate_message(msg, assistant_response) }
      store_curated(@conversation_id, curated)
    end
    @curation_mutex.synchronize { @curated_messages = nil }
  rescue StandardError => e
    handle_exception(e, level: :warn, operation: 'llm.context_curator.curate_turn')
  end
end

#curated_messagesObject

Called sync when building next API request. Returns curated messages when available; nil means use raw history.



72
73
74
75
76
77
78
# File 'lib/legion/llm/context/curator.rb', line 72

def curated_messages
  return nil unless enabled?

  @curation_mutex.synchronize do
    @curated_messages ||= load_curated(@conversation_id)
  end
end

#dedup_similar(messages, threshold: nil) ⇒ Object

Heuristic: deduplicate near-identical messages using Jaccard similarity.



198
199
200
201
202
203
204
205
206
207
208
# File 'lib/legion/llm/context/curator.rb', line 198

def dedup_similar(messages, threshold: nil)
  return messages unless setting(:dedup_enabled, true)

  threshold ||= setting(:dedup_threshold, 0.85)
  result = Context::Compressor.deduplicate_messages(messages, threshold: threshold)
  if result[:removed].positive?
    log.info "[llm][curator] action=dedup_similar conversation_id=#{@conversation_id} " \
             "removed=#{result[:removed]} original_count=#{result[:original_count]}"
  end
  result[:messages]
end

#distill_tool_result(msg, _assistant_context = nil) ⇒ Object

Heuristic: distill a single tool-result message to a compact summary.



112
113
114
115
116
117
118
119
120
121
# File 'lib/legion/llm/context/curator.rb', line 112

def distill_tool_result(msg, _assistant_context = nil)
  content = msg[:content].to_s
  max_chars = setting(:tool_result_max_chars, 2000)
  return msg if content.length <= max_chars

  summary = heuristic_tool_summary(content, tool_name_from(msg))
  log.info "[llm][curator] action=distill_tool_result conversation_id=#{@conversation_id} " \
           "original_chars=#{content.length} summary_chars=#{summary.length}"
  msg.merge(content: summary, curated: true, original_content: content)
end

#drop_and_archive(messages, conversation_id:) ⇒ Object

Drops older conversation turns from the prompt window after archiving them into Apollo for scoped retrieval on future turns.



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/legion/llm/context/curator.rb', line 82

def drop_and_archive(messages, conversation_id:)
  return messages unless archive_dropped_turns?
  return messages unless messages.is_a?(Array) && messages.any?

  target_tokens = setting(:target_context_tokens, 40_000)
  estimated = Context::Compressor.estimate_tokens(messages)
  return messages if estimated <= target_tokens

  preserve_recent = setting(:archive_preserve_recent, setting(:preserve_recent, 10)).to_i
  preserve_recent = 1 unless preserve_recent.positive?
  return messages if messages.size <= preserve_recent

  retained = messages.last(preserve_recent)
  dropped = messages[0...-preserve_recent]
  return messages if dropped.empty?

  archived = archive_conversation_history(dropped, conversation_id: conversation_id)
  return messages unless archived

  tokens_freed = estimated - Context::Compressor.estimate_tokens(retained)
  log.warn("[llm][curator] action=drop_and_archive conversation_id=#{conversation_id} " \
           "messages_dropped=#{dropped.size} retained=#{retained.size} tokens_freed=#{tokens_freed}")
  retained
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'llm.context_curator.drop_and_archive',
                      conversation_id: conversation_id)
  messages
end

#evict_superseded(messages) ⇒ Object

Heuristic: if same file was read multiple times, keep only the latest read.



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/legion/llm/context/curator.rb', line 175

def evict_superseded(messages)
  return messages unless setting(:superseded_eviction, true)

  file_last_seen = {}
  messages.each_with_index do |msg, idx|
    path = extract_file_path(msg[:content].to_s)
    file_last_seen[path] = idx if path
  end

  result = messages.each_with_index.reject do |msg, idx|
    path = extract_file_path(msg[:content].to_s)
    path && file_last_seen[path] != idx
  end.map(&:first)

  evicted_count = messages.size - result.size
  if evicted_count.positive?
    log.info "[llm][curator] action=evict_superseded conversation_id=#{@conversation_id} " \
             "files_evicted=#{evicted_count} messages_before=#{messages.size} messages_after=#{result.size}"
  end
  result
end

#fold_resolved_exchanges(messages) ⇒ Object

Heuristic: detect multi-turn clarification that reached agreement; fold to single system note.



143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/legion/llm/context/curator.rb', line 143

def fold_resolved_exchanges(messages)
  return messages unless setting(:exchange_folding, true)

  result = []
  folded_count = 0
  i = 0
  while i < messages.length
    window = messages[i, 4]
    if resolved_exchange?(window)
      conclusion = window.last[:content].to_s[0, 300]
      note = {
        role:             :system,
        content:          "[Exchange resolved: #{conclusion}]",
        curated:          true,
        original_content: window.map { |m| m[:content] }.join("\n")
      }
      result << note
      folded_count += window.length
      i += window.length
    else
      result << messages[i]
      i += 1
    end
  end
  if folded_count.positive?
    log.info "[llm][curator] action=fold_resolved_exchanges conversation_id=#{@conversation_id} " \
             "messages_folded=#{folded_count} result_size=#{result.size}"
  end
  result
end

#llm_distill_tool_result(msg, assistant_response = nil) ⇒ Object

LLM-assisted distillation: uses small/fast model to summarize tool results. Falls back to heuristic on any error.



212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# File 'lib/legion/llm/context/curator.rb', line 212

def llm_distill_tool_result(msg, assistant_response = nil)
  return distill_tool_result(msg, assistant_response) unless llm_assisted?

  content = msg[:content].to_s
  max_chars = setting(:tool_result_max_chars, 2000)
  return msg if content.length <= max_chars

  summary = llm_summarize_tool_result(content, tool_name_from(msg))
  if summary
    msg.merge(content: summary, curated: true, original_content: content)
  else
    distill_tool_result(msg, assistant_response)
  end
rescue StandardError => e
  handle_exception(e, level: :warn)
  distill_tool_result(msg, assistant_response)
end

#strip_thinking(msg) ⇒ Object

Heuristic: remove extended thinking blocks, keep conclusions.



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/legion/llm/context/curator.rb', line 124

def strip_thinking(msg)
  return msg unless setting(:thinking_eviction, true)

  content = msg[:content].to_s
  stripped = strip_thinking_tags(content)
  # Only strip lines that start with a heading containing "Thinking".
  # Avoid catastrophic backtracking by anchoring to the start of line
  # and using a non-greedy, bounded inner pattern.
  stripped = stripped.gsub(/^#+\s*[Tt]hinking[^\n]*\n(?!#+\s*[Tt]hinking[^\n]*\n)[^\n]*(?:\n(?![#\n])[^\n]*)*\n?/m, '').strip

  return msg if stripped == content || stripped.empty?

  chars_removed = content.length - stripped.length
  log.info "[llm][curator] action=strip_thinking conversation_id=#{@conversation_id} " \
           "chars_removed=#{chars_removed} original_chars=#{content.length} stripped_chars=#{stripped.length}"
  msg.merge(content: stripped, curated: true, original_content: content)
end