Module: Legion::LLM::Inference::Steps::PromptCache

Extended by:: PromptCache, Legion::Logging::Helper

Included in:: Executor, PromptCache

Defined in:: lib/legion/llm/inference/steps/prompt_cache.rb

Instance Method Summary collapse

#apply_cache_control(system_blocks) ⇒ Array<Hash>

Adds cache_control to the last system block when prompt caching is enabled and the combined content exceeds the configured min_tokens threshold.
#apply_conversation_breakpoint(messages) ⇒ Array<Hash>

Marks the last stable (non-new) message with a cache breakpoint so the provider can cache the conversation prefix up to that point.
#sort_tools_deterministically(tools) ⇒ Array<Hash>

Sorts tool schemas deterministically by name so the cache key is stable across calls with the same tool set in different order.

Instance Method Details

#apply_cache_control(system_blocks) ⇒ `Array<Hash>`

Adds cache_control to the last system block when prompt caching is enabled and the combined content exceeds the configured min_tokens threshold.

Parameters:

system_blocks (Array<Hash>) —

array of system message hashes

Returns:

(Array<Hash>) —

system blocks, possibly with cache_control on last entry

# File 'lib/legion/llm/inference/steps/prompt_cache.rb', line 18

def apply_cache_control(system_blocks)
  unless caching_enabled? && cache_system_prompt?
    log.debug('[llm][prompt_cache] cache_control skipped=disabled')
    return system_blocks
  end
  if system_blocks.nil? || system_blocks.empty?
    log.debug('[llm][prompt_cache] cache_control skipped=empty_system')
    return system_blocks
  end

  total_chars = system_blocks.sum { |b| b[:content].to_s.length }
  min_chars   = prompt_caching_value(:min_tokens, 1024) * 4

  if total_chars < min_chars
    log.debug("[llm][prompt_cache] cache_control skipped=below_threshold total_chars=#{total_chars} min_chars=#{min_chars}")
    return system_blocks
  end

  scope = prompt_caching_value(:scope, 'ephemeral')
  log.info("[llm][prompt_cache] cache_control scope=#{scope} total_chars=#{total_chars}")
  system_blocks[0..-2] + [system_blocks.last.merge(cache_control: { type: scope })]
end

#apply_conversation_breakpoint(messages) ⇒ `Array<Hash>`

Marks the last stable (non-new) message with a cache breakpoint so the provider can cache the conversation prefix up to that point.

Parameters:

messages (Array<Hash>) —

ordered list of conversation messages

Returns:

(Array<Hash>) —

messages, possibly with cache_control on the last stable one

# File 'lib/legion/llm/inference/steps/prompt_cache.rb', line 65

def apply_conversation_breakpoint(messages)
  unless caching_enabled? && cache_conversation?
    log.debug('[llm][prompt_cache] conversation_breakpoint skipped=disabled')
    return messages
  end
  if messages.nil? || messages.size < 2
    log.debug("[llm][prompt_cache] conversation_breakpoint skipped=too_few_messages count=#{messages&.size || 0}")
    return messages
  end

  scope   = prompt_caching_value(:scope, 'ephemeral')
  prior   = messages[0..-2]
  current = messages.last

  last_stable_idx = prior.rindex { |m| !m[:cache_control] }
  unless last_stable_idx
    log.debug('[llm][prompt_cache] conversation_breakpoint skipped=no_stable_message')
    return messages
  end

  updated_prior = prior.dup
  updated_prior[last_stable_idx] = prior[last_stable_idx].merge(cache_control: { type: scope })
  log.info("[llm][prompt_cache] conversation_breakpoint scope=#{scope} index=#{last_stable_idx}")
  updated_prior + [current]
end

#sort_tools_deterministically(tools) ⇒ `Array<Hash>`

Sorts tool schemas deterministically by name so the cache key is stable across calls with the same tool set in different order.

Parameters:

tools (Array<Hash>) —

array of tool definition hashes with :name key

Returns:

(Array<Hash>) —

tools sorted by name

# File 'lib/legion/llm/inference/steps/prompt_cache.rb', line 46

def sort_tools_deterministically(tools)
  unless caching_enabled? && sort_tools?
    log.debug('[llm][prompt_cache] sort_tools skipped=disabled')
    return tools
  end
  if tools.nil? || tools.empty?
    log.debug('[llm][prompt_cache] sort_tools skipped=empty_tools')
    return tools
  end

  log.debug("[llm][prompt_cache] sort_tools count=#{tools.size}")
  tools.sort_by { |t| t[:name].to_s }
end

Module: Legion::LLM::Inference::Steps::PromptCache

Instance Method Summary collapse

Instance Method Details

#apply_cache_control(system_blocks) ⇒ Array<Hash>

#apply_conversation_breakpoint(messages) ⇒ Array<Hash>

#sort_tools_deterministically(tools) ⇒ Array<Hash>

#apply_cache_control(system_blocks) ⇒ `Array<Hash>`

#apply_conversation_breakpoint(messages) ⇒ `Array<Hash>`

#sort_tools_deterministically(tools) ⇒ `Array<Hash>`