Module: Legion::LLM::Pipeline::Steps::PromptCache
- Extended by:
- PromptCache, Legion::Logging::Helper
- Included in:
- Executor, PromptCache
- Defined in:
- lib/legion/llm/pipeline/steps/prompt_cache.rb
Instance Method Summary collapse
-
#apply_cache_control(system_blocks) ⇒ Array<Hash>
Adds cache_control to the last system block when prompt caching is enabled and the combined content exceeds the configured min_tokens threshold.
-
#apply_conversation_breakpoint(messages) ⇒ Array<Hash>
Marks the last stable (non-new) message with a cache breakpoint so the provider can cache the conversation prefix up to that point.
-
#sort_tools_deterministically(tools) ⇒ Array<Hash>
Sorts tool schemas deterministically by name so the cache key is stable across calls with the same tool set in different order.
Instance Method Details
#apply_cache_control(system_blocks) ⇒ Array<Hash>
Adds cache_control to the last system block when prompt caching is enabled and the combined content exceeds the configured min_tokens threshold.
18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/legion/llm/pipeline/steps/prompt_cache.rb', line 18 def apply_cache_control(system_blocks) return system_blocks unless caching_enabled? && cache_system_prompt? return system_blocks if system_blocks.nil? || system_blocks.empty? total_chars = system_blocks.sum { |b| b[:content].to_s.length } min_chars = prompt_caching_settings.fetch(:min_tokens, 1024) * 4 return system_blocks if total_chars < min_chars scope = prompt_caching_settings.fetch(:scope, 'ephemeral') log.info("[llm][prompt_cache] cache_control scope=#{scope} total_chars=#{total_chars}") system_blocks[0..-2] + [system_blocks.last.merge(cache_control: { type: scope })] end |
#apply_conversation_breakpoint(messages) ⇒ Array<Hash>
Marks the last stable (non-new) message with a cache breakpoint so the provider can cache the conversation prefix up to that point.
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/legion/llm/pipeline/steps/prompt_cache.rb', line 50 def apply_conversation_breakpoint() return unless caching_enabled? && cache_conversation? return if .nil? || .size < 2 scope = prompt_caching_settings.fetch(:scope, 'ephemeral') prior = [0..-2] current = .last last_stable_idx = prior.rindex { |m| !m[:cache_control] } return unless last_stable_idx updated_prior = prior.dup updated_prior[last_stable_idx] = prior[last_stable_idx].merge(cache_control: { type: scope }) log.info("[llm][prompt_cache] conversation_breakpoint scope=#{scope} index=#{last_stable_idx}") updated_prior + [current] end |
#sort_tools_deterministically(tools) ⇒ Array<Hash>
Sorts tool schemas deterministically by name so the cache key is stable across calls with the same tool set in different order.
37 38 39 40 41 42 43 |
# File 'lib/legion/llm/pipeline/steps/prompt_cache.rb', line 37 def sort_tools_deterministically(tools) return tools unless caching_enabled? && sort_tools? return tools if tools.nil? || tools.empty? log.debug("[llm][prompt_cache] sort_tools count=#{tools.size}") tools.sort_by { |t| t[:name].to_s } end |