Module: Legion::LLM::Context::Compressor

Extended by:
Legion::Logging::Helper
Defined in:
lib/legion/llm/context/compressor.rb

Constant Summary collapse

NONE =
0
LIGHT =
1
MODERATE =
2
AGGRESSIVE =
3
LEVEL_WORDS =
{
  1 => %w[a an the just very really basically actually simply quite rather somewhat],
  2 => %w[however moreover furthermore additionally consequently therefore thus hence
          meanwhile nevertheless nonetheless accordingly indeed certainly],
  3 => %w[also then still even already yet again please note that]
}.freeze
SUMMARIZE_PROMPT =
<<~PROMPT
  Summarize this conversation concisely. Preserve:
  - Key decisions and conclusions
  - Code snippets and file paths
  - Action items and next steps
  - Technical details that would be needed to continue the conversation

  Omit pleasantries, repetition, and verbose explanations.
  Return only the summary, no preamble.
PROMPT
DEDUP_WINDOW =

Removes near-duplicate messages from a conversation history. Uses Jaccard similarity on word sets to detect duplicates. Keeps the last occurrence of similar messages.

Returns:

  • (Hash)

    { messages: Array, removed: Integer, original_count: Integer }

20

Class Method Summary collapse

Class Method Details

.auto_compact(messages, target_tokens:, preserve_recent: 10) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/legion/llm/context/compressor.rb', line 106

def auto_compact(messages, target_tokens:, preserve_recent: 10)
  preserve_recent = 1 unless preserve_recent.to_i.positive?
  return messages if messages.size <= preserve_recent

  recent = messages.last(preserve_recent)
  older  = messages[0..-(preserve_recent + 1)]

  tokens_before = estimate_tokens(messages)
  summarized = summarize_messages(older, max_tokens: target_tokens / 2)

  compaction_msg = {
    role:     'system',
    content:  "[Conversation compacted: #{older.size} turns summarized]",
    metadata: {
      compacted_at:   Time.now.utc.iso8601,
      original_count: messages.size,
      preserved:      recent.size
    }
  }

  summary_msg = {
    role:    'system',
    content: summarized[:summary]
  }

  result = [compaction_msg, summary_msg, *recent].flatten
  tokens_after = estimate_tokens(result)
  log.info("[llm][compressor] action=auto_compact messages_before=#{messages.size} " \
           "messages_after=#{result.size} tokens_before=#{tokens_before} " \
           "tokens_after=#{tokens_after} tokens_saved=#{tokens_before - tokens_after}")
  result
end

.compress(text, level: LIGHT) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
# File 'lib/legion/llm/context/compressor.rb', line 34

def compress(text, level: LIGHT)
  return text if text.nil? || text.empty? || level <= NONE

  original_length = text.length
  segments = split_segments(text)
  result = segments.map { |seg| seg[:protected] ? seg[:text] : compress_prose(seg[:text], level) }.join

  result = collapse_whitespace(result) if level >= AGGRESSIVE
  log.debug("Compressor applied level=#{level} original=#{original_length} compressed=#{result.length}")
  result
end

.deduplicate_messages(messages, threshold: 0.85) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/legion/llm/context/compressor.rb', line 75

def deduplicate_messages(messages, threshold: 0.85)
  return { messages: [], removed: 0, original_count: 0 } if messages.nil? || messages.empty?

  kept = []
  removed = 0

  messages.reverse_each do |msg|
    content = msg[:content].to_s
    next kept.unshift(msg) if content.length < 20

    window = kept.first(DEDUP_WINDOW)
    duplicate = window.any? do |existing|
      next false unless existing[:role] == msg[:role]

      jaccard_similarity(content, existing[:content].to_s) >= threshold
    end

    if duplicate
      removed += 1
    else
      kept.unshift(msg)
    end
  end

  if removed.positive?
    log.info("[llm][compressor] action=deduplicate_messages removed=#{removed} " \
             "original_count=#{messages.size} kept=#{kept.size}")
  end
  { messages: kept, removed: removed, original_count: messages.size }
end

.estimate_tokens(messages) ⇒ Object



139
140
141
142
143
144
# File 'lib/legion/llm/context/compressor.rb', line 139

def estimate_tokens(messages)
  return 0 if messages.nil? || messages.empty?

  total_chars = messages.sum { |m| m[:content].to_s.length }
  total_chars / 4
end

.stopwords_for_level(level) ⇒ Object



146
147
148
149
150
# File 'lib/legion/llm/context/compressor.rb', line 146

def stopwords_for_level(level)
  return [] if level <= NONE

  (1..[level, AGGRESSIVE].min).flat_map { |l| LEVEL_WORDS.fetch(l, []) }
end

.summarize_messages(messages, max_tokens: 2000) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/legion/llm/context/compressor.rb', line 46

def summarize_messages(messages, max_tokens: 2000)
  return { summary: '', original_count: 0 } if messages.nil? || messages.empty?

  text = messages.map { |m| "#{m[:role]}: #{m[:content]}" }.join("\n\n")
  return { summary: text, original_count: messages.size, compressed: false } if text.length < max_tokens * 4

  summary = llm_summarize(text, max_tokens)
  if summary
    log.info("[llm][compressor] summarized messages=#{messages.size} summary_chars=#{summary.length}")
    { summary: summary, original_count: messages.size, compressed: true }
  else
    fallback = compress(text, level: AGGRESSIVE)
    log.info(
      "[llm][compressor] fallback_compress messages=#{messages.size} " \
      "input_chars=#{text.length} summary_chars=#{fallback.length}"
    )
    { summary: fallback, original_count: messages.size, compressed: true, method: :stopword }
  end
end