Module: Legion::LLM::Inference::Steps::PromptCache

Extended by:
PromptCache, Legion::Logging::Helper
Included in:
Executor, PromptCache
Defined in:
lib/legion/llm/inference/steps/prompt_cache.rb

Instance Method Summary collapse

Instance Method Details

#apply_cache_control(system_blocks) ⇒ Array<Hash>

Adds cache_control to the last system block when prompt caching is enabled and the combined content exceeds the configured min_tokens threshold.

Parameters:

  • system_blocks (Array<Hash>)

    array of system message hashes

Returns:

  • (Array<Hash>)

    system blocks, possibly with cache_control on last entry



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/legion/llm/inference/steps/prompt_cache.rb', line 18

def apply_cache_control(system_blocks)
  unless caching_enabled? && cache_system_prompt?
    log.debug('[llm][prompt_cache] cache_control skipped=disabled')
    return system_blocks
  end
  if system_blocks.nil? || system_blocks.empty?
    log.debug('[llm][prompt_cache] cache_control skipped=empty_system')
    return system_blocks
  end

  total_chars = system_blocks.sum { |b| b[:content].to_s.length }
  min_chars   = prompt_caching_value(:min_tokens, 1024) * 4

  if total_chars < min_chars
    log.debug("[llm][prompt_cache] cache_control skipped=below_threshold total_chars=#{total_chars} min_chars=#{min_chars}")
    return system_blocks
  end

  scope = prompt_caching_value(:scope, 'ephemeral')
  log.info("[llm][prompt_cache] cache_control scope=#{scope} total_chars=#{total_chars}")
  system_blocks[0..-2] + [system_blocks.last.merge(cache_control: { type: scope })]
end

#apply_conversation_breakpoint(messages) ⇒ Array<Hash>

Marks the last stable (non-new) message with a cache breakpoint so the provider can cache the conversation prefix up to that point.

Parameters:

  • messages (Array<Hash>)

    ordered list of conversation messages

Returns:

  • (Array<Hash>)

    messages, possibly with cache_control on the last stable one



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/legion/llm/inference/steps/prompt_cache.rb', line 65

def apply_conversation_breakpoint(messages)
  unless caching_enabled? && cache_conversation?
    log.debug('[llm][prompt_cache] conversation_breakpoint skipped=disabled')
    return messages
  end
  if messages.nil? || messages.size < 2
    log.debug("[llm][prompt_cache] conversation_breakpoint skipped=too_few_messages count=#{messages&.size || 0}")
    return messages
  end

  scope   = prompt_caching_value(:scope, 'ephemeral')
  prior   = messages[0..-2]
  current = messages.last

  last_stable_idx = prior.rindex { |m| !m[:cache_control] }
  unless last_stable_idx
    log.debug('[llm][prompt_cache] conversation_breakpoint skipped=no_stable_message')
    return messages
  end

  updated_prior = prior.dup
  updated_prior[last_stable_idx] = prior[last_stable_idx].merge(cache_control: { type: scope })
  log.info("[llm][prompt_cache] conversation_breakpoint scope=#{scope} index=#{last_stable_idx}")
  updated_prior + [current]
end

#sort_tools_deterministically(tools) ⇒ Array<Hash>

Sorts tool schemas deterministically by name so the cache key is stable across calls with the same tool set in different order.

Parameters:

  • tools (Array<Hash>)

    array of tool definition hashes with :name key

Returns:

  • (Array<Hash>)

    tools sorted by name



46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/legion/llm/inference/steps/prompt_cache.rb', line 46

def sort_tools_deterministically(tools)
  unless caching_enabled? && sort_tools?
    log.debug('[llm][prompt_cache] sort_tools skipped=disabled')
    return tools
  end
  if tools.nil? || tools.empty?
    log.debug('[llm][prompt_cache] sort_tools skipped=empty_tools')
    return tools
  end

  log.debug("[llm][prompt_cache] sort_tools count=#{tools.size}")
  tools.sort_by { |t| t[:name].to_s }
end