Module: Legion::LLM::Pipeline::Steps::PromptCache

Extended by:: PromptCache, Legion::Logging::Helper

Included in:: Executor, PromptCache

Defined in:: lib/legion/llm/pipeline/steps/prompt_cache.rb

Instance Method Summary collapse

#apply_cache_control(system_blocks) ⇒ Array<Hash>

Adds cache_control to the last system block when prompt caching is enabled and the combined content exceeds the configured min_tokens threshold.
#apply_conversation_breakpoint(messages) ⇒ Array<Hash>

Marks the last stable (non-new) message with a cache breakpoint so the provider can cache the conversation prefix up to that point.
#sort_tools_deterministically(tools) ⇒ Array<Hash>

Sorts tool schemas deterministically by name so the cache key is stable across calls with the same tool set in different order.

Instance Method Details

#apply_cache_control(system_blocks) ⇒ `Array<Hash>`

Adds cache_control to the last system block when prompt caching is enabled and the combined content exceeds the configured min_tokens threshold.

Parameters:

system_blocks (Array<Hash>) —

array of system message hashes

Returns:

(Array<Hash>) —

system blocks, possibly with cache_control on last entry

# File 'lib/legion/llm/pipeline/steps/prompt_cache.rb', line 18

def apply_cache_control(system_blocks)
  return system_blocks unless caching_enabled? && cache_system_prompt?
  return system_blocks if system_blocks.nil? || system_blocks.empty?

  total_chars = system_blocks.sum { |b| b[:content].to_s.length }
  min_chars   = prompt_caching_settings.fetch(:min_tokens, 1024) * 4

  return system_blocks if total_chars < min_chars

  scope = prompt_caching_settings.fetch(:scope, 'ephemeral')
  log.info("[llm][prompt_cache] cache_control scope=#{scope} total_chars=#{total_chars}")
  system_blocks[0..-2] + [system_blocks.last.merge(cache_control: { type: scope })]
end

#apply_conversation_breakpoint(messages) ⇒ `Array<Hash>`

Marks the last stable (non-new) message with a cache breakpoint so the provider can cache the conversation prefix up to that point.

Parameters:

messages (Array<Hash>) —

ordered list of conversation messages

Returns:

(Array<Hash>) —

messages, possibly with cache_control on the last stable one

# File 'lib/legion/llm/pipeline/steps/prompt_cache.rb', line 50

def apply_conversation_breakpoint(messages)
  return messages unless caching_enabled? && cache_conversation?
  return messages if messages.nil? || messages.size < 2

  scope   = prompt_caching_settings.fetch(:scope, 'ephemeral')
  prior   = messages[0..-2]
  current = messages.last

  last_stable_idx = prior.rindex { |m| !m[:cache_control] }
  return messages unless last_stable_idx

  updated_prior = prior.dup
  updated_prior[last_stable_idx] = prior[last_stable_idx].merge(cache_control: { type: scope })
  log.info("[llm][prompt_cache] conversation_breakpoint scope=#{scope} index=#{last_stable_idx}")
  updated_prior + [current]
end

#sort_tools_deterministically(tools) ⇒ `Array<Hash>`

Sorts tool schemas deterministically by name so the cache key is stable across calls with the same tool set in different order.

Parameters:

tools (Array<Hash>) —

array of tool definition hashes with :name key

Returns:

(Array<Hash>) —

tools sorted by name

# File 'lib/legion/llm/pipeline/steps/prompt_cache.rb', line 37

def sort_tools_deterministically(tools)
  return tools unless caching_enabled? && sort_tools?
  return tools if tools.nil? || tools.empty?

  log.debug("[llm][prompt_cache] sort_tools count=#{tools.size}")
  tools.sort_by { |t| t[:name].to_s }
end

Module: Legion::LLM::Pipeline::Steps::PromptCache

Instance Method Summary collapse

Instance Method Details

#apply_cache_control(system_blocks) ⇒ Array<Hash>

#apply_conversation_breakpoint(messages) ⇒ Array<Hash>

#sort_tools_deterministically(tools) ⇒ Array<Hash>

#apply_cache_control(system_blocks) ⇒ `Array<Hash>`

#apply_conversation_breakpoint(messages) ⇒ `Array<Hash>`

#sort_tools_deterministically(tools) ⇒ `Array<Hash>`