Class: Clacky::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/clacky/client.rb

Constant Summary collapse

MAX_RETRIES =
10
RETRY_DELAY =

seconds

5

Instance Method Summary collapse

Constructor Details

#initialize(api_key, base_url:, model:, anthropic_format: false) ⇒ Client

Returns a new instance of Client.



11
12
13
14
15
16
17
18
# File 'lib/clacky/client.rb', line 11

def initialize(api_key, base_url:, model:, anthropic_format: false)
  @api_key = api_key
  @base_url = base_url
  @model = model
  @use_anthropic_format = anthropic_format
  # Detect Bedrock: ABSK key prefix (native AWS) or abs- model prefix (Clacky AI proxy)
  @use_bedrock = MessageFormat::Bedrock.bedrock_api_key?(api_key, model)
end

Instance Method Details

#add_cache_control_to_message(msg) ⇒ Object

Wrap or extend the message’s content with a cache_control marker.



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/clacky/client.rb', line 235

def add_cache_control_to_message(msg)
  content = msg[:content]

  content_array = case content
                  when String
                    [{ type: "text", text: content, cache_control: { type: "ephemeral" } }]
                  when Array
                    content.map.with_index do |block, idx|
                      idx == content.length - 1 ? block.merge(cache_control: { type: "ephemeral" }) : block
                    end
                  else
                    return msg
                  end

  msg.merge(content: content_array)
end

#anthropic_connectionObject



285
286
287
288
289
290
291
292
293
294
295
296
# File 'lib/clacky/client.rb', line 285

def anthropic_connection
  @anthropic_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]   = "application/json"
    conn.headers["x-api-key"]      = @api_key
    conn.headers["anthropic-version"] = "2023-06-01"
    conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
    conn.options.timeout      = 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#anthropic_format?(model = nil) ⇒ Boolean

Returns true when the client is talking directly to the Anthropic API (determined at construction time via the anthropic_format flag).

Returns:

  • (Boolean)


27
28
29
# File 'lib/clacky/client.rb', line 27

def anthropic_format?(model = nil)
  @use_anthropic_format && !@use_bedrock
end

#apply_message_caching(messages) ⇒ Object

Add cache_control markers to the last 2 messages in the array.

Why 2 markers:

Turn N   — marks messages[-2] and messages[-1]; server caches prefix up to [-1]
Turn N+1 — messages[-2] is Turn N's last message (still marked) → cache READ hit;
           messages[-1] is the new message (marked) → cache WRITE for Turn N+2

With only 1 marker (old behavior): Turn N marks messages; in Turn N+1 that same message is now [-2] and carries no marker → server sees a different prefix → cache MISS.

Compression instructions (system_injected: true) are skipped — we never want to cache those ephemeral injection messages.



218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/clacky/client.rb', line 218

def apply_message_caching(messages)
  return messages if messages.empty?

  # Collect up to 2 candidate indices from the tail, skipping compression instructions.
  candidate_indices = []
  (messages.length - 1).downto(0) do |i|
    break if candidate_indices.length >= 2

    candidate_indices << i unless is_compression_instruction?(messages[i])
  end

  messages.map.with_index do |msg, idx|
    candidate_indices.include?(idx) ? add_cache_control_to_message(msg) : msg
  end
end

#bedrock?Boolean

Returns true when the client is using the AWS Bedrock Converse API.

Returns:

  • (Boolean)


21
22
23
# File 'lib/clacky/client.rb', line 21

def bedrock?
  @use_bedrock
end

#bedrock_connectionObject



263
264
265
266
267
268
269
270
271
272
# File 'lib/clacky/client.rb', line 263

def bedrock_connection
  @bedrock_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]  = "application/json"
    conn.headers["Authorization"] = "Bearer #{@api_key}"
    conn.options.timeout      = 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#bedrock_endpoint(model) ⇒ Object

Bedrock Converse API endpoint path for a given model ID.



259
260
261
# File 'lib/clacky/client.rb', line 259

def bedrock_endpoint(model)
  "/model/#{model}/converse"
end

#check_html_response(response) ⇒ Object

Raise a friendly error if the response body is HTML (e.g. gateway error page returned with 200)



335
336
337
338
339
340
# File 'lib/clacky/client.rb', line 335

def check_html_response(response)
  body = response.body.to_s.lstrip
  if body.start_with?("<!DOCTYPE", "<!doctype", "<html", "<HTML")
    raise RetryableError, "[LLM] Service temporarily unavailable (received HTML error page), retrying..."
  end
end

#deep_clone(obj) ⇒ Object

── Utilities ─────────────────────────────────────────────────────────────



383
384
385
386
387
388
389
# File 'lib/clacky/client.rb', line 383

def deep_clone(obj)
  case obj
  when Hash  then obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
  when Array then obj.map { |item| deep_clone(item) }
  else obj
  end
end

#extract_error_message(error_body, raw_body) ⇒ Object



342
343
344
345
346
347
348
349
350
351
352
353
# File 'lib/clacky/client.rb', line 342

def extract_error_message(error_body, raw_body)
  if raw_body.is_a?(String) && raw_body.strip.start_with?("<!DOCTYPE", "<html")
    return "Invalid API endpoint or server error (received HTML instead of JSON)"
  end

  return raw_body unless error_body.is_a?(Hash)

  error_body["upstreamMessage"]&.then { |m| return m unless m.empty? }
  error_body.dig("error", "message")&.then { |m| return m } if error_body["error"].is_a?(Hash)
  error_body["message"]&.then             { |m| return m }
  error_body["error"].is_a?(String) ? error_body["error"] : (raw_body.to_s[0..200] + (raw_body.to_s.length > 200 ? "..." : ""))
end

#format_tool_results(response, tool_results, model:) ⇒ Object

Format tool results into canonical messages ready to append to @messages. Always returns canonical format (role: “tool”) regardless of API type —conversion to API-native happens inside each send_*_request.



103
104
105
106
107
108
109
110
111
112
113
# File 'lib/clacky/client.rb', line 103

def format_tool_results(response, tool_results, model:)
  return [] if tool_results.empty?

  if bedrock?
    MessageFormat::Bedrock.format_tool_results(response, tool_results)
  elsif anthropic_format?
    MessageFormat::Anthropic.format_tool_results(response, tool_results)
  else
    MessageFormat::OpenAI.format_tool_results(response, tool_results)
  end
end

#handle_test_response(response) ⇒ Object

── Error handling ────────────────────────────────────────────────────────



300
301
302
303
304
305
# File 'lib/clacky/client.rb', line 300

def handle_test_response(response)
  return { success: true } if response.status == 200

  error_body = JSON.parse(response.body) rescue nil
  { success: false, error: extract_error_message(error_body, response.body) }
end

#is_compression_instruction?(message) ⇒ Boolean

Returns:

  • (Boolean)


252
253
254
# File 'lib/clacky/client.rb', line 252

def is_compression_instruction?(message)
  message.is_a?(Hash) && message[:system_injected] == true
end

#openai_connectionObject



274
275
276
277
278
279
280
281
282
283
# File 'lib/clacky/client.rb', line 274

def openai_connection
  @openai_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]  = "application/json"
    conn.headers["Authorization"] = "Bearer #{@api_key}"
    conn.options.timeout      = 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#parse_simple_anthropic_response(response) ⇒ Object



175
176
177
178
179
# File 'lib/clacky/client.rb', line 175

def parse_simple_anthropic_response(response)
  raise_error(response) unless response.status == 200
  data = safe_json_parse(response.body, context: "LLM response")
  (data["content"] || []).select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
end

#parse_simple_bedrock_response(response) ⇒ Object



151
152
153
154
155
156
157
158
# File 'lib/clacky/client.rb', line 151

def parse_simple_bedrock_response(response)
  raise_error(response) unless response.status == 200
  data = safe_json_parse(response.body, context: "LLM response")
  (data.dig("output", "message", "content") || [])
    .select { |b| b["text"] }
    .map { |b| b["text"] }
    .join("")
end

#parse_simple_openai_response(response) ⇒ Object



198
199
200
201
202
# File 'lib/clacky/client.rb', line 198

def parse_simple_openai_response(response)
  raise_error(response) unless response.status == 200
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  parsed_body["choices"].first["message"]["content"]
end

#raise_error(response) ⇒ Object



307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
# File 'lib/clacky/client.rb', line 307

def raise_error(response)
  error_body    = JSON.parse(response.body) rescue nil
  error_message = extract_error_message(error_body, response.body)

  case response.status
  when 400
    # Well-behaved APIs (Anthropic, OpenAI) never put quota/availability issues in 400.
    # However, some proxy/relay providers do — so we inspect the message first.
    # Also, Bedrock returns ThrottlingException as 400 instead of 429.
    if error_message.match?(/ThrottlingException|unavailable|quota/i)
      hint = error_message.match?(/quota/i) ? " (possibly out of credits)" : ""
      raise RetryableError, "[LLM] Rate limit or service issue: #{error_message}#{hint}"
    end

    # True bad request — our message was malformed. Roll back history so the
    # broken message is not replayed on the next user turn.
    raise BadRequestError, "[LLM] Client request error: #{error_message}"
  when 401 then raise AgentError, "[LLM] Invalid API key"
  when 402 then raise AgentError, "[LLM] Billing or payment issue (possibly out of credits): #{error_message}"
  when 403 then raise AgentError, "[LLM] Access denied: #{error_message}"
  when 404 then raise AgentError, "[LLM] API endpoint not found: #{error_message}"
  when 429 then raise RetryableError, "[LLM] Rate limit exceeded, please wait a moment"
  when 500..599 then raise RetryableError, "[LLM] Service temporarily unavailable (#{response.status}), retrying..."
  else raise AgentError, "[LLM] Unexpected error (#{response.status}): #{error_message}"
  end
end

#safe_json_parse(json_string, context: "response") ⇒ Hash, Array

Parse JSON with user-friendly error messages.

Parameters:

  • json_string (String)

    the JSON string to parse

  • context (String) (defaults to: "response")

    a description of what’s being parsed (e.g., “LLM response”)

Returns:

  • (Hash, Array)

    the parsed JSON

Raises:

  • (RetryableError)

    if parsing fails (indicates a malformed LLM response)



360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
# File 'lib/clacky/client.rb', line 360

def safe_json_parse(json_string, context: "response")
  JSON.parse(json_string)
rescue JSON::ParserError => e
  # Transform technical JSON parsing errors into user-friendly messages.
  # These are usually caused by:
  #   1. Incomplete/truncated LLM response (network issue, timeout)
  #   2. LLM service returned malformed data
  #   3. Proxy/gateway corruption
  error_detail = if json_string.to_s.strip.empty?
    "received empty response"
  elsif json_string.to_s.bytesize > 500
    "response was truncated or malformed (#{json_string.to_s.bytesize} bytes received)"
  else
    "response format is invalid"
  end

  raise RetryableError, "[LLM] Failed to parse #{context}: #{error_detail}. " \
                       "This usually means the AI service returned incomplete or corrupted data. " \
                       "The request will be retried automatically."
end

#send_anthropic_request(messages, model, tools, max_tokens, caching_enabled) ⇒ Object

── Anthropic request / response ──────────────────────────────────────────



162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/clacky/client.rb', line 162

def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled)
  # Apply cache_control to the message that marks the cache breakpoint
  messages = apply_message_caching(messages) if caching_enabled

  body     = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
  response = anthropic_connection.post("v1/messages") { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::Anthropic.parse_response(parsed_body)
end

#send_bedrock_request(messages, model, tools, max_tokens, caching_enabled) ⇒ Object

── Bedrock Converse request / response ───────────────────────────────────



141
142
143
144
145
146
147
148
149
# File 'lib/clacky/client.rb', line 141

def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled)
  body     = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled)
  response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::Bedrock.parse_response(parsed_body)
end

#send_message(content, model:, max_tokens:) ⇒ Object

Send a single string message and return the reply text.



61
62
63
64
# File 'lib/clacky/client.rb', line 61

def send_message(content, model:, max_tokens:)
  messages = [{ role: "user", content: content }]
  send_messages(messages, model: model, max_tokens: max_tokens)
end

#send_messages(messages, model:, max_tokens:) ⇒ Object

Send a messages array and return the reply text.



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/clacky/client.rb', line 67

def send_messages(messages, model:, max_tokens:)
  if bedrock?
    body     = MessageFormat::Bedrock.build_request_body(messages, model, [], max_tokens)
    response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
    parse_simple_bedrock_response(response)
  elsif anthropic_format?
    body     = MessageFormat::Anthropic.build_request_body(messages, model, [], max_tokens, false)
    response = anthropic_connection.post("v1/messages") { |r| r.body = body.to_json }
    parse_simple_anthropic_response(response)
  else
    body     = { model: model, max_tokens: max_tokens, messages: messages }
    response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
    parse_simple_openai_response(response)
  end
end

#send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false) ⇒ Object

Send messages with tool-calling support. Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage: }



87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/clacky/client.rb', line 87

def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false)
  caching_enabled = enable_caching && supports_prompt_caching?(model)
  cloned = deep_clone(messages)

  if bedrock?
    send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled)
  elsif anthropic_format?
    send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled)
  else
    send_openai_request(cloned, model, tools, max_tokens, caching_enabled)
  end
end

#send_openai_request(messages, model, tools, max_tokens, caching_enabled) ⇒ Object

── OpenAI request / response ─────────────────────────────────────────────



183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/clacky/client.rb', line 183

def send_openai_request(messages, model, tools, max_tokens, caching_enabled)
  # Apply cache_control markers to messages when caching is enabled.
  # OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
  messages = apply_message_caching(messages) if caching_enabled

  body     = MessageFormat::OpenAI.build_request_body(messages, model, tools, max_tokens, caching_enabled)
  response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::OpenAI.parse_response(parsed_body)
end

#supports_prompt_caching?(model) ⇒ Boolean

Returns true for Claude models that support prompt caching (gen 3.5+ or gen 4+).

Handles both direct model names (e.g. “claude-haiku-4-5”) and Clacky AI Bedrock proxy names with “abs-” prefix (e.g. “abs-claude-haiku-4-5”).

Why only Claude models:

- MiniMax uses automatic server-side caching (no cache_control needed from client)
- Kimi uses a proprietary prompt_cache_key param, not cache_control
- MiMo has no documented caching API
- Only Claude (direct, OpenRouter, or ClackyAI Bedrock proxy) consumes our
  cache_control / cachePoint markers

Returns:

  • (Boolean)


128
129
130
131
132
133
134
135
136
# File 'lib/clacky/client.rb', line 128

def supports_prompt_caching?(model)
  # Strip ClackyAI Bedrock proxy prefix before matching
  model_str = model.to_s.downcase.sub(/^abs-/, "")
  return false unless model_str.include?("claude")

  # Match Claude gen 3.5+ (3.5/3.6/3.7…) or gen 4+ in any name format:
  #   claude-3.5-sonnet-...  claude-3-7-sonnet  claude-haiku-4-5  claude-sonnet-4-6
  model_str.match?(/claude(?:-3[-.]?[5-9]|.*-[4-9][-.]|.*-[4-9]$|-[4-9][-.]|-[4-9]$|-sonnet-[34])/)
end

#test_connection(model:) ⇒ Object

Test API connection by sending a minimal request. Returns { success: true } or { success: false, error: “…” }.



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/clacky/client.rb', line 35

def test_connection(model:)
  if bedrock?
    body = MessageFormat::Bedrock.build_request_body(
      [{ role: :user, content: "hi" }], model, [], 16
    ).to_json
    response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body }
  elsif anthropic_format?
    minimal_body = { model: model, max_tokens: 16,
                     messages: [{ role: "user", content: "hi" }] }.to_json
    response = anthropic_connection.post("v1/messages") { |r| r.body = minimal_body }
  else
    minimal_body = { model: model, max_tokens: 16,
                     messages: [{ role: "user", content: "hi" }] }.to_json
    response = openai_connection.post("chat/completions") { |r| r.body = minimal_body }
  end
  handle_test_response(response)
rescue Faraday::Error => e
  { success: false, error: "Connection error: #{e.message}" }
rescue => e
  Clacky::Logger.error("[test_connection] #{e.class}: #{e.message}", error: e)
  { success: false, error: e.message }
end