Module: LlmOptimizer

Extended by:
Pipeline
Defined in:
lib/llm_optimizer.rb,
lib/llm_optimizer/railtie.rb,
lib/llm_optimizer/version.rb,
lib/llm_optimizer/pipeline.rb,
lib/llm_optimizer/compressor.rb,
lib/llm_optimizer/model_router.rb,
lib/llm_optimizer/configuration.rb,
lib/llm_optimizer/semantic_cache.rb,
lib/llm_optimizer/history_manager.rb,
lib/llm_optimizer/optimize_result.rb,
lib/llm_optimizer/embedding_client.rb,
lib/llm_optimizer/conversation_store.rb,
lib/generators/llm_optimizer/install_generator.rb

Defined Under Namespace

Modules: Generators, Pipeline, WrapperModule Classes: Compressor, Configuration, ConfigurationError, ConversationStore, EmbeddingClient, EmbeddingError, Error, HistoryManager, ModelRouter, OptimizeResult, Railtie, SemanticCache, TimeoutError

Constant Summary collapse

VERSION =
"0.1.5"

Class Method Summary collapse

Class Method Details

.clear_conversation(conversation_id) ⇒ Object



51
52
53
54
55
56
57
58
59
60
# File 'lib/llm_optimizer.rb', line 51

def self.clear_conversation(conversation_id)
  raise ConfigurationError, "redis_url must be configured to use clear_conversation" unless configuration.redis_url

  redis   = build_redis(configuration.redis_url)
  key     = "#{ConversationStore::KEY_NAMESPACE}#{conversation_id}"
  deleted = redis.del(key)
  deleted.positive?
rescue ::Redis::BaseError => e
  raise LlmOptimizer::Error, "Redis error in clear_conversation: #{e.message}"
end

.configurationObject



43
44
45
# File 'lib/llm_optimizer.rb', line 43

def self.configuration
  @configuration ||= Configuration.new
end

.configure {|temp| ... } ⇒ Object

Yields:

  • (temp)


26
27
28
29
30
31
# File 'lib/llm_optimizer.rb', line 26

def self.configure
  temp = Configuration.new
  yield temp
  configuration.merge!(temp)
  validate_configuration!(configuration)
end

.optimize(prompt, options = {}) ⇒ Object



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/llm_optimizer.rb', line 88

def self.optimize(prompt, options = {}, &)
  start           = Process.clock_gettime(Process::CLOCK_MONOTONIC)
  call_config     = build_call_config(options, &)
  conversation_id = options[:conversation_id]
  validate_conversation_options!(conversation_id, options, call_config)

  original_prompt           = prompt
  original_tokens           = Compressor.new.estimate_tokens(prompt)
  prompt, compressed_tokens = compress(prompt, call_config)
  model_tier, model         = route(prompt, call_config)

  embedding, cached_result = semantic_cache_lookup(prompt, model, model_tier,
                                                   original_tokens, compressed_tokens,
                                                   original_prompt, start, call_config)
  return cached_result if cached_result

  messages, store = load_conversation(conversation_id, options, call_config)
  messages        = apply_history_manager(messages, call_config)
  response        = raw_llm_call(prompt, messages: messages, model: model, config: call_config)
  messages        = persist_conversation(store, conversation_id, messages, prompt, response)
  store_in_cache(embedding, response, call_config)

  latency_ms = elapsed_ms(start)
  emit_log(call_config.logger, call_config,
           cache_status: :miss, model_tier: model_tier,
           original_tokens: original_tokens, compressed_tokens: compressed_tokens,
           latency_ms: latency_ms, prompt: original_prompt, response: response)
  build_result(response, model, model_tier, :miss, original_tokens, compressed_tokens,
               latency_ms, messages)
rescue EmbeddingError => e
  configuration.logger.warn("[llm_optimizer] EmbeddingError (outer rescue): #{e.message}")
  fallback_result(original_prompt, original_tokens, options, start)
rescue ConfigurationError
  raise
rescue LlmOptimizer::Error, StandardError => e
  configuration.logger.error("[llm_optimizer] #{e.class}: #{e.message}\n#{e.backtrace&.first(5)&.join("\n")}")
  fallback_result(original_prompt, original_tokens, options, start)
end

.optimize_post_call(pre_call_result, response, config = configuration) ⇒ Object



148
149
150
# File 'lib/llm_optimizer.rb', line 148

def self.optimize_post_call(pre_call_result, response, config = configuration)
  store_in_cache(pre_call_result[:embedding], response, config)
end

.optimize_pre_call(prompt, config = configuration) ⇒ Object



127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/llm_optimizer.rb', line 127

def self.optimize_pre_call(prompt, config = configuration)
  prompt     = Compressor.new.compress(prompt) if config.compress_prompt
  model_tier = ModelRouter.new(config).route(prompt)
  model      = model_tier == :simple ? config.simple_model : config.complex_model

  unless config.use_semantic_cache && config.redis_url
    return { prompt: prompt, model: model, model_tier: model_tier,
             embedding: nil, cache_status: :miss, response: nil }
  end

  embedding, result = semantic_cache_lookup(prompt, model, model_tier, nil, nil,
                                            prompt, Process.clock_gettime(Process::CLOCK_MONOTONIC), config)
  if result
    return { prompt: prompt, model: model, model_tier: model_tier,
             embedding: embedding, cache_status: :hit, response: result.response }
  end

  { prompt: prompt, model: model, model_tier: model_tier,
    embedding: embedding, cache_status: :miss, response: nil }
end

.reset_configuration!Object



47
48
49
# File 'lib/llm_optimizer.rb', line 47

def self.reset_configuration!
  @configuration = Configuration.new
end

.validate_configuration!(config) ⇒ Object



33
34
35
36
37
38
39
40
41
# File 'lib/llm_optimizer.rb', line 33

def self.validate_configuration!(config)
  return unless config.use_semantic_cache && config.embedding_caller.nil?

  config.logger.warn(
    "[llm_optimizer] use_semantic_cache is true but no embedding_caller is configured. " \
    "Semantic caching will be skipped. Set config.embedding_caller to enable it."
  )
  config.use_semantic_cache = false
end

.wrap_client(client_class) ⇒ Object



82
83
84
85
86
# File 'lib/llm_optimizer.rb', line 82

def self.wrap_client(client_class)
  return if client_class.ancestors.include?(WrapperModule)

  client_class.prepend(WrapperModule)
end