Class: Clacky::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/clacky/client.rb

Constant Summary collapse

MAX_RETRIES =
10
RETRY_DELAY =

seconds

5

Instance Method Summary collapse

Constructor Details

#initialize(api_key, base_url:, model:, anthropic_format: false) ⇒ Client

Returns a new instance of Client.



11
12
13
14
15
16
17
18
# File 'lib/clacky/client.rb', line 11

def initialize(api_key, base_url:, model:, anthropic_format: false)
  @api_key = api_key
  @base_url = base_url
  @model = model
  @use_anthropic_format = anthropic_format
  # Detect Bedrock: ABSK key prefix (native AWS) or abs- model prefix (Clacky AI proxy)
  @use_bedrock = MessageFormat::Bedrock.bedrock_api_key?(api_key, model)
end

Instance Method Details

#add_cache_control_to_message(msg) ⇒ Object

Wrap or extend the message’s content with a cache_control marker.



217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/clacky/client.rb', line 217

def add_cache_control_to_message(msg)
  content = msg[:content]

  content_array = case content
                  when String
                    [{ type: "text", text: content, cache_control: { type: "ephemeral" } }]
                  when Array
                    content.map.with_index do |block, idx|
                      idx == content.length - 1 ? block.merge(cache_control: { type: "ephemeral" }) : block
                    end
                  else
                    return msg
                  end

  msg.merge(content: content_array)
end

#anthropic_connectionObject



267
268
269
270
271
272
273
274
275
276
277
278
# File 'lib/clacky/client.rb', line 267

def anthropic_connection
  @anthropic_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]   = "application/json"
    conn.headers["x-api-key"]      = @api_key
    conn.headers["anthropic-version"] = "2023-06-01"
    conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
    conn.options.timeout      = 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#anthropic_format?(model = nil) ⇒ Boolean

Returns true when the client is talking directly to the Anthropic API (determined at construction time via the anthropic_format flag).

Returns:

  • (Boolean)


27
28
29
# File 'lib/clacky/client.rb', line 27

def anthropic_format?(model = nil)
  @use_anthropic_format && !@use_bedrock
end

#apply_message_caching(messages) ⇒ Object

Add cache_control markers to the last 2 messages in the array.

Why 2 markers:

Turn N   — marks messages[-2] and messages[-1]; server caches prefix up to [-1]
Turn N+1 — messages[-2] is Turn N's last message (still marked) → cache READ hit;
           messages[-1] is the new message (marked) → cache WRITE for Turn N+2

With only 1 marker (old behavior): Turn N marks messages; in Turn N+1 that same message is now [-2] and carries no marker → server sees a different prefix → cache MISS.

Compression instructions (system_injected: true) are skipped — we never want to cache those ephemeral injection messages.



200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/clacky/client.rb', line 200

def apply_message_caching(messages)
  return messages if messages.empty?

  # Collect up to 2 candidate indices from the tail, skipping compression instructions.
  candidate_indices = []
  (messages.length - 1).downto(0) do |i|
    break if candidate_indices.length >= 2

    candidate_indices << i unless is_compression_instruction?(messages[i])
  end

  messages.map.with_index do |msg, idx|
    candidate_indices.include?(idx) ? add_cache_control_to_message(msg) : msg
  end
end

#bedrock?Boolean

Returns true when the client is using the AWS Bedrock Converse API.

Returns:

  • (Boolean)


21
22
23
# File 'lib/clacky/client.rb', line 21

def bedrock?
  @use_bedrock
end

#bedrock_connectionObject



245
246
247
248
249
250
251
252
253
254
# File 'lib/clacky/client.rb', line 245

def bedrock_connection
  @bedrock_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]  = "application/json"
    conn.headers["Authorization"] = "Bearer #{@api_key}"
    conn.options.timeout      = 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#bedrock_endpoint(model) ⇒ Object

Bedrock Converse API endpoint path for a given model ID.



241
242
243
# File 'lib/clacky/client.rb', line 241

def bedrock_endpoint(model)
  "/model/#{model}/converse"
end

#check_html_response(response) ⇒ Object

Raise a friendly error if the response body is HTML (e.g. gateway error page returned with 200)



317
318
319
320
321
322
# File 'lib/clacky/client.rb', line 317

def check_html_response(response)
  body = response.body.to_s.lstrip
  if body.start_with?("<!DOCTYPE", "<!doctype", "<html", "<HTML")
    raise RetryableError, "LLM service temporarily unavailable (received HTML error page), retrying..."
  end
end

#deep_clone(obj) ⇒ Object

── Utilities ─────────────────────────────────────────────────────────────



339
340
341
342
343
344
345
# File 'lib/clacky/client.rb', line 339

def deep_clone(obj)
  case obj
  when Hash  then obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
  when Array then obj.map { |item| deep_clone(item) }
  else obj
  end
end

#extract_error_message(error_body, raw_body) ⇒ Object



324
325
326
327
328
329
330
331
332
333
334
335
# File 'lib/clacky/client.rb', line 324

def extract_error_message(error_body, raw_body)
  if raw_body.is_a?(String) && raw_body.strip.start_with?("<!DOCTYPE", "<html")
    return "Invalid API endpoint or server error (received HTML instead of JSON)"
  end

  return raw_body unless error_body.is_a?(Hash)

  error_body["upstreamMessage"]&.then { |m| return m unless m.empty? }
  error_body.dig("error", "message")&.then { |m| return m } if error_body["error"].is_a?(Hash)
  error_body["message"]&.then             { |m| return m }
  error_body["error"].is_a?(String) ? error_body["error"] : (raw_body.to_s[0..200] + (raw_body.to_s.length > 200 ? "..." : ""))
end

#format_tool_results(response, tool_results, model:) ⇒ Object

Format tool results into canonical messages ready to append to @messages. Always returns canonical format (role: “tool”) regardless of API type —conversion to API-native happens inside each send_*_request.



103
104
105
106
107
108
109
110
111
112
113
# File 'lib/clacky/client.rb', line 103

def format_tool_results(response, tool_results, model:)
  return [] if tool_results.empty?

  if bedrock?
    MessageFormat::Bedrock.format_tool_results(response, tool_results)
  elsif anthropic_format?
    MessageFormat::Anthropic.format_tool_results(response, tool_results)
  else
    MessageFormat::OpenAI.format_tool_results(response, tool_results)
  end
end

#handle_test_response(response) ⇒ Object

── Error handling ────────────────────────────────────────────────────────



282
283
284
285
286
287
# File 'lib/clacky/client.rb', line 282

def handle_test_response(response)
  return { success: true } if response.status == 200

  error_body = JSON.parse(response.body) rescue nil
  { success: false, error: extract_error_message(error_body, response.body) }
end

#is_compression_instruction?(message) ⇒ Boolean

Returns:

  • (Boolean)


234
235
236
# File 'lib/clacky/client.rb', line 234

def is_compression_instruction?(message)
  message.is_a?(Hash) && message[:system_injected] == true
end

#openai_connectionObject



256
257
258
259
260
261
262
263
264
265
# File 'lib/clacky/client.rb', line 256

def openai_connection
  @openai_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]  = "application/json"
    conn.headers["Authorization"] = "Bearer #{@api_key}"
    conn.options.timeout      = 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#parse_simple_anthropic_response(response) ⇒ Object



160
161
162
163
164
# File 'lib/clacky/client.rb', line 160

def parse_simple_anthropic_response(response)
  raise_error(response) unless response.status == 200
  data = JSON.parse(response.body)
  (data["content"] || []).select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
end

#parse_simple_bedrock_response(response) ⇒ Object



137
138
139
140
141
142
143
144
# File 'lib/clacky/client.rb', line 137

def parse_simple_bedrock_response(response)
  raise_error(response) unless response.status == 200
  data = JSON.parse(response.body)
  (data.dig("output", "message", "content") || [])
    .select { |b| b["text"] }
    .map { |b| b["text"] }
    .join("")
end

#parse_simple_openai_response(response) ⇒ Object



181
182
183
184
# File 'lib/clacky/client.rb', line 181

def parse_simple_openai_response(response)
  raise_error(response) unless response.status == 200
  JSON.parse(response.body)["choices"].first["message"]["content"]
end

#raise_error(response) ⇒ Object



289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
# File 'lib/clacky/client.rb', line 289

def raise_error(response)
  error_body    = JSON.parse(response.body) rescue nil
  error_message = extract_error_message(error_body, response.body)

  case response.status
  when 400
    # Well-behaved APIs (Anthropic, OpenAI) never put quota/availability issues in 400.
    # However, some proxy/relay providers do — so we inspect the message first.
    # Also, Bedrock returns ThrottlingException as 400 instead of 429.
    if error_message.match?(/ThrottlingException|unavailable|quota/i)
      hint = error_message.match?(/quota/i) ? " (possibly out of credits)" : ""
      raise RetryableError, "Rate limit or service issue (400): #{error_message}#{hint}"
    end

    # True bad request — our message was malformed. Roll back history so the
    # broken message is not replayed on the next user turn.
    raise BadRequestError, "API request failed (400): #{error_message}"
  when 401 then raise AgentError, "Invalid API key"
  when 402 then raise AgentError, "Billing or payment issue (possibly out of credits): #{error_message}"
  when 403 then raise AgentError, "Access denied: #{error_message}"
  when 404 then raise AgentError, "API endpoint not found: #{error_message}"
  when 429 then raise RetryableError, "Rate limit exceeded, please wait a moment"
  when 500..599 then raise RetryableError, "LLM service temporarily unavailable (#{response.status}), retrying..."
  else raise AgentError, "Unexpected error (#{response.status}): #{error_message}"
  end
end

#send_anthropic_request(messages, model, tools, max_tokens, caching_enabled) ⇒ Object

── Anthropic request / response ──────────────────────────────────────────



148
149
150
151
152
153
154
155
156
157
158
# File 'lib/clacky/client.rb', line 148

def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled)
  # Apply cache_control to the message that marks the cache breakpoint
  messages = apply_message_caching(messages) if caching_enabled

  body     = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
  response = anthropic_connection.post("v1/messages") { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  MessageFormat::Anthropic.parse_response(JSON.parse(response.body))
end

#send_bedrock_request(messages, model, tools, max_tokens, caching_enabled) ⇒ Object

── Bedrock Converse request / response ───────────────────────────────────



128
129
130
131
132
133
134
135
# File 'lib/clacky/client.rb', line 128

def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled)
  body     = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled)
  response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  MessageFormat::Bedrock.parse_response(JSON.parse(response.body))
end

#send_message(content, model:, max_tokens:) ⇒ Object

Send a single string message and return the reply text.



61
62
63
64
# File 'lib/clacky/client.rb', line 61

def send_message(content, model:, max_tokens:)
  messages = [{ role: "user", content: content }]
  send_messages(messages, model: model, max_tokens: max_tokens)
end

#send_messages(messages, model:, max_tokens:) ⇒ Object

Send a messages array and return the reply text.



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/clacky/client.rb', line 67

def send_messages(messages, model:, max_tokens:)
  if bedrock?
    body     = MessageFormat::Bedrock.build_request_body(messages, model, [], max_tokens)
    response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
    parse_simple_bedrock_response(response)
  elsif anthropic_format?
    body     = MessageFormat::Anthropic.build_request_body(messages, model, [], max_tokens, false)
    response = anthropic_connection.post("v1/messages") { |r| r.body = body.to_json }
    parse_simple_anthropic_response(response)
  else
    body     = { model: model, max_tokens: max_tokens, messages: messages }
    response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
    parse_simple_openai_response(response)
  end
end

#send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false) ⇒ Object

Send messages with tool-calling support. Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage: }



87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/clacky/client.rb', line 87

def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
  caching_enabled = enable_caching && supports_prompt_caching?(model)
  cloned = deep_clone(messages)

  if bedrock?
    send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled)
  elsif anthropic_format?
    send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
  else
    send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
  end
end

#send_openai_request(messages, model, tools, max_tokens, caching_enabled) ⇒ Object

── OpenAI request / response ─────────────────────────────────────────────



168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/clacky/client.rb', line 168

def send_openai_request(messages, model, tools, max_tokens, caching_enabled)
  # Apply cache_control markers to messages when caching is enabled.
  # OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
  messages = apply_message_caching(messages) if caching_enabled

  body     = MessageFormat::OpenAI.build_request_body(messages, model, tools, max_tokens, caching_enabled)
  response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  MessageFormat::OpenAI.parse_response(JSON.parse(response.body))
end

#supports_prompt_caching?(model) ⇒ Boolean

Returns true for Claude 3.5+ models that support prompt caching.

Returns:

  • (Boolean)


118
119
120
121
122
123
# File 'lib/clacky/client.rb', line 118

def supports_prompt_caching?(model)
  model_str = model.to_s.downcase
  return false unless model_str.include?("claude")

  model_str.match?(/claude(?:-3[-.]?[5-9]|-[4-9]|-sonnet-[34])/)
end

#test_connection(model:) ⇒ Object

Test API connection by sending a minimal request. Returns { success: true } or { success: false, error: “…” }.



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/clacky/client.rb', line 35

def test_connection(model:)
  if bedrock?
    body = MessageFormat::Bedrock.build_request_body(
      [{ role: :user, content: "hi" }], model, [], 16
    ).to_json
    response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body }
  elsif anthropic_format?
    minimal_body = { model: model, max_tokens: 16,
                     messages: [{ role: "user", content: "hi" }] }.to_json
    response = anthropic_connection.post("v1/messages") { |r| r.body = minimal_body }
  else
    minimal_body = { model: model, max_tokens: 16,
                     messages: [{ role: "user", content: "hi" }] }.to_json
    response = openai_connection.post("chat/completions") { |r| r.body = minimal_body }
  end
  handle_test_response(response)
rescue Faraday::Error => e
  { success: false, error: "Connection error: #{e.message}" }
rescue => e
  Clacky::Logger.error("[test_connection] #{e.class}: #{e.message}", error: e)
  { success: false, error: e.message }
end