Module: Legion::LLM::Pipeline::Steps::PromptCache

Extended by:
PromptCache, Legion::Logging::Helper
Included in:
Executor, PromptCache
Defined in:
lib/legion/llm/pipeline/steps/prompt_cache.rb

Instance Method Summary collapse

Instance Method Details

#apply_cache_control(system_blocks) ⇒ Array<Hash>

Adds cache_control to the last system block when prompt caching is enabled and the combined content exceeds the configured min_tokens threshold.

Parameters:

  • system_blocks (Array<Hash>)

    array of system message hashes

Returns:

  • (Array<Hash>)

    system blocks, possibly with cache_control on last entry



18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/legion/llm/pipeline/steps/prompt_cache.rb', line 18

def apply_cache_control(system_blocks)
  return system_blocks unless caching_enabled? && cache_system_prompt?
  return system_blocks if system_blocks.nil? || system_blocks.empty?

  total_chars = system_blocks.sum { |b| b[:content].to_s.length }
  min_chars   = prompt_caching_settings.fetch(:min_tokens, 1024) * 4

  return system_blocks if total_chars < min_chars

  scope = prompt_caching_settings.fetch(:scope, 'ephemeral')
  log.info("[llm][prompt_cache] cache_control scope=#{scope} total_chars=#{total_chars}")
  system_blocks[0..-2] + [system_blocks.last.merge(cache_control: { type: scope })]
end

#apply_conversation_breakpoint(messages) ⇒ Array<Hash>

Marks the last stable (non-new) message with a cache breakpoint so the provider can cache the conversation prefix up to that point.

Parameters:

  • messages (Array<Hash>)

    ordered list of conversation messages

Returns:

  • (Array<Hash>)

    messages, possibly with cache_control on the last stable one



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/legion/llm/pipeline/steps/prompt_cache.rb', line 50

def apply_conversation_breakpoint(messages)
  return messages unless caching_enabled? && cache_conversation?
  return messages if messages.nil? || messages.size < 2

  scope   = prompt_caching_settings.fetch(:scope, 'ephemeral')
  prior   = messages[0..-2]
  current = messages.last

  last_stable_idx = prior.rindex { |m| !m[:cache_control] }
  return messages unless last_stable_idx

  updated_prior = prior.dup
  updated_prior[last_stable_idx] = prior[last_stable_idx].merge(cache_control: { type: scope })
  log.info("[llm][prompt_cache] conversation_breakpoint scope=#{scope} index=#{last_stable_idx}")
  updated_prior + [current]
end

#sort_tools_deterministically(tools) ⇒ Array<Hash>

Sorts tool schemas deterministically by name so the cache key is stable across calls with the same tool set in different order.

Parameters:

  • tools (Array<Hash>)

    array of tool definition hashes with :name key

Returns:

  • (Array<Hash>)

    tools sorted by name



37
38
39
40
41
42
43
# File 'lib/legion/llm/pipeline/steps/prompt_cache.rb', line 37

def sort_tools_deterministically(tools)
  return tools unless caching_enabled? && sort_tools?
  return tools if tools.nil? || tools.empty?

  log.debug("[llm][prompt_cache] sort_tools count=#{tools.size}")
  tools.sort_by { |t| t[:name].to_s }
end