Class: Legion::LLM::Context::Curator

Inherits:
Object
  • Object
show all
Includes:
Legion::Logging::Helper
Defined in:
lib/legion/llm/context/curator.rb

Constant Summary collapse

CURATED_KEY =
:__curated__
THINKING_TAG_PAIRS =

All known provider thinking tag variants. Anthropic: <thinking>…</thinking> DeepSeek / Qwen / Ollama / vLLM inline: <think>…</think>

[
  ['<thinking>', '</thinking>'],
  ['<think>',    '</think>']
].freeze

Instance Method Summary collapse

Constructor Details

#initialize(conversation_id:) ⇒ Curator

Returns a new instance of Curator.



22
23
24
25
# File 'lib/legion/llm/context/curator.rb', line 22

def initialize(conversation_id:)
  @conversation_id = conversation_id
  @curated_messages = nil
end

Instance Method Details

#curate_turn(turn_messages:, assistant_response:) ⇒ Object

Called async after each turn completes — zero latency impact.



28
29
30
31
32
33
34
35
36
37
38
# File 'lib/legion/llm/context/curator.rb', line 28

def curate_turn(turn_messages:, assistant_response:)
  return unless enabled?

  Thread.new do
    curated = turn_messages.map { |msg| curate_message(msg, assistant_response) }
    store_curated(@conversation_id, curated)
    @curated_messages = nil
  rescue StandardError => e
    handle_exception(e, level: :warn)
  end
end

#curated_messagesObject

Called sync when building next API request. Returns curated messages when available; nil means use raw history.



42
43
44
45
46
# File 'lib/legion/llm/context/curator.rb', line 42

def curated_messages
  return nil unless enabled?

  @curated_messages ||= load_curated(@conversation_id)
end

#dedup_similar(messages, threshold: nil) ⇒ Object

Heuristic: deduplicate near-identical messages using Jaccard similarity.



148
149
150
151
152
153
154
# File 'lib/legion/llm/context/curator.rb', line 148

def dedup_similar(messages, threshold: nil)
  return messages unless setting(:dedup_enabled, true)

  threshold ||= setting(:dedup_threshold, 0.85)
  result = Context::Compressor.deduplicate_messages(messages, threshold: threshold)
  result[:messages]
end

#distill_tool_result(msg, _assistant_context = nil) ⇒ Object

Heuristic: distill a single tool-result message to a compact summary.



79
80
81
82
83
84
85
86
87
88
# File 'lib/legion/llm/context/curator.rb', line 79

def distill_tool_result(msg, _assistant_context = nil)
  content = msg[:content].to_s
  max_chars = setting(:tool_result_max_chars, 2000)
  return msg if content.length <= max_chars

  summary = heuristic_tool_summary(content, tool_name_from(msg))
  log.debug "[llm][curator] action=distill_tool_result conversation_id=#{@conversation_id} " \
            "original_chars=#{content.length} summary_chars=#{summary.length}"
  msg.merge(content: summary, curated: true, original_content: content)
end

#drop_and_archive(messages, conversation_id:) ⇒ Object

Drops older conversation turns from the prompt window after archiving them into Apollo for scoped retrieval on future turns.



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/legion/llm/context/curator.rb', line 50

def drop_and_archive(messages, conversation_id:)
  return messages unless archive_dropped_turns?
  return messages unless messages.is_a?(Array) && messages.any?

  target_tokens = setting(:target_context_tokens, 40_000)
  estimated = Context::Compressor.estimate_tokens(messages)
  return messages if estimated <= target_tokens

  preserve_recent = setting(:archive_preserve_recent, setting(:preserve_recent, 10)).to_i
  preserve_recent = 1 unless preserve_recent.positive?
  return messages if messages.size <= preserve_recent

  retained = messages.last(preserve_recent)
  dropped = messages[0...-preserve_recent]
  return messages if dropped.empty?

  archived = archive_conversation_history(dropped, conversation_id: conversation_id)
  return messages unless archived

  log.info("[llm][context_curator] action=drop_and_archive conversation_id=#{conversation_id} " \
           "dropped=#{dropped.size} retained=#{retained.size} estimated_tokens=#{estimated}")
  retained
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'llm.context_curator.drop_and_archive',
                      conversation_id: conversation_id)
  messages
end

#evict_superseded(messages) ⇒ Object

Heuristic: if same file was read multiple times, keep only the latest read.



132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/legion/llm/context/curator.rb', line 132

def evict_superseded(messages)
  return messages unless setting(:superseded_eviction, true)

  file_last_seen = {}
  messages.each_with_index do |msg, idx|
    path = extract_file_path(msg[:content].to_s)
    file_last_seen[path] = idx if path
  end

  messages.each_with_index.reject do |msg, idx|
    path = extract_file_path(msg[:content].to_s)
    path && file_last_seen[path] != idx
  end.map(&:first)
end

#fold_resolved_exchanges(messages) ⇒ Object

Heuristic: detect multi-turn clarification that reached agreement; fold to single system note.



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/legion/llm/context/curator.rb', line 106

def fold_resolved_exchanges(messages)
  return messages unless setting(:exchange_folding, true)

  result = []
  i = 0
  while i < messages.length
    window = messages[i, 4]
    if resolved_exchange?(window)
      conclusion = window.last[:content].to_s[0, 300]
      note = {
        role:             :system,
        content:          "[Exchange resolved: #{conclusion}]",
        curated:          true,
        original_content: window.map { |m| m[:content] }.join("\n")
      }
      result << note
      i += window.length
    else
      result << messages[i]
      i += 1
    end
  end
  result
end

#llm_distill_tool_result(msg, assistant_response = nil) ⇒ Object

LLM-assisted distillation: uses small/fast model to summarize tool results. Falls back to heuristic on any error.



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/legion/llm/context/curator.rb', line 158

def llm_distill_tool_result(msg, assistant_response = nil)
  return distill_tool_result(msg, assistant_response) unless llm_assisted?

  content = msg[:content].to_s
  max_chars = setting(:tool_result_max_chars, 2000)
  return msg if content.length <= max_chars

  summary = llm_summarize_tool_result(content, tool_name_from(msg))
  if summary
    msg.merge(content: summary, curated: true, original_content: content)
  else
    distill_tool_result(msg, assistant_response)
  end
rescue StandardError => e
  handle_exception(e, level: :warn)
  distill_tool_result(msg, assistant_response)
end

#strip_thinking(msg) ⇒ Object

Heuristic: remove extended thinking blocks, keep conclusions.



91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/legion/llm/context/curator.rb', line 91

def strip_thinking(msg)
  return msg unless setting(:thinking_eviction, true)

  content = msg[:content].to_s
  stripped = strip_thinking_tags(content)
  stripped = stripped.gsub(/^#+\s*[Tt]hinking[^\n]*\n(?:[^#\n][^\n]*\n)*/m, '').strip

  return msg if stripped == content || stripped.empty?

  log.debug "[llm][curator] action=strip_thinking conversation_id=#{@conversation_id} " \
            "original_chars=#{content.length} stripped_chars=#{stripped.length}"
  msg.merge(content: stripped, curated: true, original_content: content)
end