Class: Legion::LLM::Context::Curator

Inherits:
Object
  • Object
show all
Includes:
Legion::Logging::Helper
Defined in:
lib/legion/llm/context/curator.rb

Constant Summary collapse

CURATED_KEY =
:__curated__
THINKING_TAG_PAIRS =

All known provider thinking tag variants. Anthropic: <thinking>…</thinking> DeepSeek / Qwen / Ollama / vLLM inline: <think>…</think>

[
  ['<thinking>', '</thinking>'],
  ['<think>',    '</think>']
].freeze

Instance Method Summary collapse

Constructor Details

#initialize(conversation_id:) ⇒ Curator

Returns a new instance of Curator.



22
23
24
25
# File 'lib/legion/llm/context/curator.rb', line 22

def initialize(conversation_id:)
  @conversation_id = conversation_id
  @curated_messages = nil
end

Instance Method Details

#curate_turn(turn_messages:, assistant_response:) ⇒ Object

Called async after each turn completes — zero latency impact.



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/legion/llm/context/curator.rb', line 28

def curate_turn(turn_messages:, assistant_response:)
  return unless enabled?

  current_turn_count = Array(turn_messages).size + (assistant_response ? 1 : 0)

  Thread.new do
    all_messages = Inference::Conversation.messages(@conversation_id)
    older = if current_turn_count.positive? && all_messages.size > current_turn_count
              all_messages[0...-current_turn_count]
            else
              []
            end

    if older.any?
      curated = older.map { |msg| curate_message(msg, assistant_response) }
      store_curated(@conversation_id, curated)
    end
    @curated_messages = nil
  rescue StandardError => e
    handle_exception(e, level: :warn, operation: 'llm.context_curator.curate_turn')
  end
end

#curated_messagesObject

Called sync when building next API request. Returns curated messages when available; nil means use raw history.



53
54
55
56
57
# File 'lib/legion/llm/context/curator.rb', line 53

def curated_messages
  return nil unless enabled?

  @curated_messages ||= load_curated(@conversation_id)
end

#dedup_similar(messages, threshold: nil) ⇒ Object

Heuristic: deduplicate near-identical messages using Jaccard similarity.



163
164
165
166
167
168
169
# File 'lib/legion/llm/context/curator.rb', line 163

def dedup_similar(messages, threshold: nil)
  return messages unless setting(:dedup_enabled, true)

  threshold ||= setting(:dedup_threshold, 0.85)
  result = Context::Compressor.deduplicate_messages(messages, threshold: threshold)
  result[:messages]
end

#distill_tool_result(msg, _assistant_context = nil) ⇒ Object

Heuristic: distill a single tool-result message to a compact summary.



90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/legion/llm/context/curator.rb', line 90

def distill_tool_result(msg, _assistant_context = nil)
  content = msg[:content].to_s
  max_chars = setting(:tool_result_max_chars, 2000)
  return msg if content.length <= max_chars

  summary = heuristic_tool_summary(content, tool_name_from(msg))
  log.debug "[llm][curator] action=distill_tool_result conversation_id=#{@conversation_id} " \
            "original_chars=#{content.length} summary_chars=#{summary.length}\n  " \
            "BEFORE: #{content}\n  " \
            "AFTER:  #{summary}"
  msg.merge(content: summary, curated: true, original_content: content)
end

#drop_and_archive(messages, conversation_id:) ⇒ Object

Drops older conversation turns from the prompt window after archiving them into Apollo for scoped retrieval on future turns.



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/legion/llm/context/curator.rb', line 61

def drop_and_archive(messages, conversation_id:)
  return messages unless archive_dropped_turns?
  return messages unless messages.is_a?(Array) && messages.any?

  target_tokens = setting(:target_context_tokens, 40_000)
  estimated = Context::Compressor.estimate_tokens(messages)
  return messages if estimated <= target_tokens

  preserve_recent = setting(:archive_preserve_recent, setting(:preserve_recent, 10)).to_i
  preserve_recent = 1 unless preserve_recent.positive?
  return messages if messages.size <= preserve_recent

  retained = messages.last(preserve_recent)
  dropped = messages[0...-preserve_recent]
  return messages if dropped.empty?

  archived = archive_conversation_history(dropped, conversation_id: conversation_id)
  return messages unless archived

  log.info("[llm][context_curator] action=drop_and_archive conversation_id=#{conversation_id} " \
           "dropped=#{dropped.size} retained=#{retained.size} estimated_tokens=#{estimated}")
  retained
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'llm.context_curator.drop_and_archive',
                      conversation_id: conversation_id)
  messages
end

#evict_superseded(messages) ⇒ Object

Heuristic: if same file was read multiple times, keep only the latest read.



147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/legion/llm/context/curator.rb', line 147

def evict_superseded(messages)
  return messages unless setting(:superseded_eviction, true)

  file_last_seen = {}
  messages.each_with_index do |msg, idx|
    path = extract_file_path(msg[:content].to_s)
    file_last_seen[path] = idx if path
  end

  messages.each_with_index.reject do |msg, idx|
    path = extract_file_path(msg[:content].to_s)
    path && file_last_seen[path] != idx
  end.map(&:first)
end

#fold_resolved_exchanges(messages) ⇒ Object

Heuristic: detect multi-turn clarification that reached agreement; fold to single system note.



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/legion/llm/context/curator.rb', line 121

def fold_resolved_exchanges(messages)
  return messages unless setting(:exchange_folding, true)

  result = []
  i = 0
  while i < messages.length
    window = messages[i, 4]
    if resolved_exchange?(window)
      conclusion = window.last[:content].to_s[0, 300]
      note = {
        role:             :system,
        content:          "[Exchange resolved: #{conclusion}]",
        curated:          true,
        original_content: window.map { |m| m[:content] }.join("\n")
      }
      result << note
      i += window.length
    else
      result << messages[i]
      i += 1
    end
  end
  result
end

#llm_distill_tool_result(msg, assistant_response = nil) ⇒ Object

LLM-assisted distillation: uses small/fast model to summarize tool results. Falls back to heuristic on any error.



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/legion/llm/context/curator.rb', line 173

def llm_distill_tool_result(msg, assistant_response = nil)
  return distill_tool_result(msg, assistant_response) unless llm_assisted?

  content = msg[:content].to_s
  max_chars = setting(:tool_result_max_chars, 2000)
  return msg if content.length <= max_chars

  summary = llm_summarize_tool_result(content, tool_name_from(msg))
  if summary
    msg.merge(content: summary, curated: true, original_content: content)
  else
    distill_tool_result(msg, assistant_response)
  end
rescue StandardError => e
  handle_exception(e, level: :warn)
  distill_tool_result(msg, assistant_response)
end

#strip_thinking(msg) ⇒ Object

Heuristic: remove extended thinking blocks, keep conclusions.



104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/legion/llm/context/curator.rb', line 104

def strip_thinking(msg)
  return msg unless setting(:thinking_eviction, true)

  content = msg[:content].to_s
  stripped = strip_thinking_tags(content)
  stripped = stripped.gsub(/^#+\s*[Tt]hinking[^\n]*\n(?:[^#\n][^\n]*\n)*/m, '').strip

  return msg if stripped == content || stripped.empty?

  log.debug "[llm][curator] action=strip_thinking conversation_id=#{@conversation_id} " \
            "original_chars=#{content.length} stripped_chars=#{stripped.length}\n  " \
            "BEFORE: #{content}\n  " \
            "AFTER:  #{stripped}"
  msg.merge(content: stripped, curated: true, original_content: content)
end