Class: OllamaAgent::Context::Manager

Inherits:
Object
  • Object
show all
Defined in:
lib/ollama_agent/context/manager.rb

Overview

Trims the messages array to fit within a token budget before each chat call. Never mutates the input. Never removes the system message or the last user message.

Constant Summary collapse

DEFAULT_MAX_TOKENS =
32_768
TRIM_THRESHOLD =
0.85

Instance Method Summary collapse

Constructor Details

#initialize(max_tokens: nil, context_summarize: false) ⇒ Manager

Returns a new instance of Manager.



13
14
15
16
# File 'lib/ollama_agent/context/manager.rb', line 13

def initialize(max_tokens: nil, context_summarize: false)
  @max_tokens = (max_tokens || env_max_tokens).to_i
  @context_summarize = context_summarize
end

Instance Method Details

#trim(messages) ⇒ Object

rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity Returns a (possibly shorter) copy of messages that fits within the token budget.



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/ollama_agent/context/manager.rb', line 20

def trim(messages)
  normalized = messages.map { |m| m.transform_keys(&:to_sym) }
  estimates = normalized.map { |m| TokenCounter.estimate(m[:content].to_s) }
  return normalized unless over_budget_sum?(estimates)

  trimmed = normalized.dup
  est = estimates.dup
  last_user_idx = trimmed.rindex { |m| m[:role] == "user" }
  dropped = [] if @context_summarize

  while over_budget_sum?(est)
    drop_idx = find_droppable_index(trimmed, last_user_idx)
    break if drop_idx.nil?

    msgs = if assistant_with_tool_calls?(trimmed[drop_idx])
             drop_assistant_and_tool_results(trimmed, est, drop_idx)
           else
             est.delete_at(drop_idx)
             [trimmed.delete_at(drop_idx)]
           end
    dropped&.concat(msgs)
    last_user_idx = trimmed.rindex { |m| m[:role] == "user" }
  end

  inject_summary(trimmed, dropped) if @context_summarize && dropped&.any?
  trimmed
end