Class: Legion::LLM::Context::Curator

Inherits:
Object
  • Object
show all
Includes:
Legion::Logging::Helper
Defined in:
lib/legion/llm/context/curator.rb

Constant Summary collapse

CURATED_KEY =
:__curated__
THINKING_OPEN =
'<thinking>'
THINKING_CLOSE =
'</thinking>'

Instance Method Summary collapse

Constructor Details

#initialize(conversation_id:) ⇒ Curator

Returns a new instance of Curator.



16
17
18
19
# File 'lib/legion/llm/context/curator.rb', line 16

def initialize(conversation_id:)
  @conversation_id = conversation_id
  @curated_messages = nil
end

Instance Method Details

#curate_turn(turn_messages:, assistant_response:) ⇒ Object

Called async after each turn completes — zero latency impact.



22
23
24
25
26
27
28
29
30
31
32
# File 'lib/legion/llm/context/curator.rb', line 22

def curate_turn(turn_messages:, assistant_response:)
  return unless enabled?

  Thread.new do
    curated = turn_messages.map { |msg| curate_message(msg, assistant_response) }
    store_curated(@conversation_id, curated)
    @curated_messages = nil
  rescue StandardError => e
    handle_exception(e, level: :warn)
  end
end

#curated_messagesObject

Called sync when building next API request. Returns curated messages when available; nil means use raw history.



36
37
38
39
40
# File 'lib/legion/llm/context/curator.rb', line 36

def curated_messages
  return nil unless enabled?

  @curated_messages ||= load_curated(@conversation_id)
end

#dedup_similar(messages, threshold: nil) ⇒ Object

Heuristic: deduplicate near-identical messages using Jaccard similarity.



138
139
140
141
142
143
144
# File 'lib/legion/llm/context/curator.rb', line 138

def dedup_similar(messages, threshold: nil)
  return messages unless setting(:dedup_enabled, true)

  threshold ||= setting(:dedup_threshold, 0.85)
  result = Context::Compressor.deduplicate_messages(messages, threshold: threshold)
  result[:messages]
end

#distill_tool_result(msg, _assistant_context = nil) ⇒ Object

Heuristic: distill a single tool-result message to a compact summary.



73
74
75
76
77
78
79
80
# File 'lib/legion/llm/context/curator.rb', line 73

def distill_tool_result(msg, _assistant_context = nil)
  content = msg[:content].to_s
  max_chars = setting(:tool_result_max_chars, 2000)
  return msg if content.length <= max_chars

  summary = heuristic_tool_summary(content, tool_name_from(msg))
  msg.merge(content: summary, curated: true, original_content: content)
end

#drop_and_archive(messages, conversation_id:) ⇒ Object

Drops older conversation turns from the prompt window after archiving them into Apollo for scoped retrieval on future turns.



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/legion/llm/context/curator.rb', line 44

def drop_and_archive(messages, conversation_id:)
  return messages unless archive_dropped_turns?
  return messages unless messages.is_a?(Array) && messages.any?

  target_tokens = setting(:target_context_tokens, 40_000)
  estimated = Context::Compressor.estimate_tokens(messages)
  return messages if estimated <= target_tokens

  preserve_recent = setting(:archive_preserve_recent, setting(:preserve_recent, 10)).to_i
  preserve_recent = 1 unless preserve_recent.positive?
  return messages if messages.size <= preserve_recent

  retained = messages.last(preserve_recent)
  dropped = messages[0...-preserve_recent]
  return messages if dropped.empty?

  archived = archive_conversation_history(dropped, conversation_id: conversation_id)
  return messages unless archived

  log.info("[llm][context_curator] action=drop_and_archive conversation_id=#{conversation_id} " \
           "dropped=#{dropped.size} retained=#{retained.size} estimated_tokens=#{estimated}")
  retained
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'llm.context_curator.drop_and_archive',
                      conversation_id: conversation_id)
  messages
end

#evict_superseded(messages) ⇒ Object

Heuristic: if same file was read multiple times, keep only the latest read.



122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/legion/llm/context/curator.rb', line 122

def evict_superseded(messages)
  return messages unless setting(:superseded_eviction, true)

  file_last_seen = {}
  messages.each_with_index do |msg, idx|
    path = extract_file_path(msg[:content].to_s)
    file_last_seen[path] = idx if path
  end

  messages.each_with_index.reject do |msg, idx|
    path = extract_file_path(msg[:content].to_s)
    path && file_last_seen[path] != idx
  end.map(&:first)
end

#fold_resolved_exchanges(messages) ⇒ Object

Heuristic: detect multi-turn clarification that reached agreement; fold to single system note.



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/legion/llm/context/curator.rb', line 96

def fold_resolved_exchanges(messages)
  return messages unless setting(:exchange_folding, true)

  result = []
  i = 0
  while i < messages.length
    window = messages[i, 4]
    if resolved_exchange?(window)
      conclusion = window.last[:content].to_s[0, 300]
      note = {
        role:             :system,
        content:          "[Exchange resolved: #{conclusion}]",
        curated:          true,
        original_content: window.map { |m| m[:content] }.join("\n")
      }
      result << note
      i += window.length
    else
      result << messages[i]
      i += 1
    end
  end
  result
end

#llm_distill_tool_result(msg, assistant_response = nil) ⇒ Object

LLM-assisted distillation: uses small/fast model to summarize tool results. Falls back to heuristic on any error.



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/legion/llm/context/curator.rb', line 148

def llm_distill_tool_result(msg, assistant_response = nil)
  return distill_tool_result(msg, assistant_response) unless llm_assisted?

  content = msg[:content].to_s
  max_chars = setting(:tool_result_max_chars, 2000)
  return msg if content.length <= max_chars

  summary = llm_summarize_tool_result(content, tool_name_from(msg))
  if summary
    msg.merge(content: summary, curated: true, original_content: content)
  else
    distill_tool_result(msg, assistant_response)
  end
rescue StandardError => e
  handle_exception(e, level: :warn)
  distill_tool_result(msg, assistant_response)
end

#strip_thinking(msg) ⇒ Object

Heuristic: remove extended thinking blocks, keep conclusions.



83
84
85
86
87
88
89
90
91
92
93
# File 'lib/legion/llm/context/curator.rb', line 83

def strip_thinking(msg)
  return msg unless setting(:thinking_eviction, true)

  content = msg[:content].to_s
  stripped = strip_thinking_tags(content)
  stripped = stripped.gsub(/^#+\s*[Tt]hinking[^\n]*\n(?:[^#\n][^\n]*\n)*/m, '').strip

  return msg if stripped == content || stripped.empty?

  msg.merge(content: stripped, curated: true, original_content: content)
end