Class: Clacky::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/clacky/client.rb

Constant Summary collapse

MAX_RETRIES =
10
RETRY_DELAY =

seconds

5

Instance Method Summary collapse

Constructor Details

#initialize(api_key, base_url:, model:, anthropic_format: false) ⇒ Client

Returns a new instance of Client.



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/clacky/client.rb', line 11

def initialize(api_key, base_url:, model:, anthropic_format: false)
  @api_key = api_key
  @base_url = base_url
  @model = model
  # Detect Bedrock: ABSK key prefix (native AWS) or abs- model prefix (Clacky AI proxy)
  @use_bedrock = MessageFormat::Bedrock.bedrock_api_key?(api_key, model)

  # Resolve provider once — reused for capability + api-type lookups.
  provider_id = Providers.resolve_provider(base_url: @base_url, api_key: @api_key)

  # Decide anthropic_format dynamically based on provider+model, falling
  # back to the explicit constructor flag for unknown providers / custom
  # base_urls. This lets e.g. OpenRouter's Claude models auto-route to the
  # native /v1/messages endpoint (preserving cache_control byte-for-byte)
  # without requiring any change to user YAML.
  provider_prefers_anthropic = provider_id &&
                               Providers.anthropic_format_for_model?(provider_id, @model)
  @use_anthropic_format = provider_prefers_anthropic || anthropic_format

  # Remember the provider id so we can tune connection headers below
  # (OpenRouter's /v1/messages accepts either Bearer or x-api-key, but
  # some OpenRouter-compatible relays only honour Bearer — send both).
  @provider_id = provider_id

  # Determine vision support once at construction time.
  # Non-vision models (DeepSeek, Kimi, MiniMax, etc.) reject image_url
  # content blocks; the conversion layer strips them when this is false.
  @vision_supported = Providers.supports?(provider_id, :vision, model_name: @model)
end

Instance Method Details

#add_cache_control_to_message(msg) ⇒ Object

Wrap or extend the message’s content with a cache_control marker.



385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
# File 'lib/clacky/client.rb', line 385

def add_cache_control_to_message(msg)
  content = msg[:content]

  content_array = case content
                  when String
                    [{ type: "text", text: content, cache_control: { type: "ephemeral" } }]
                  when Array
                    content.map.with_index do |block, idx|
                      idx == content.length - 1 ? block.merge(cache_control: { type: "ephemeral" }) : block
                    end
                  else
                    return msg
                  end

  msg.merge(content: content_array)
end

#anthropic_connectionObject



462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
# File 'lib/clacky/client.rb', line 462

def anthropic_connection
  @anthropic_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]   = "application/json"
    conn.headers["x-api-key"]      = @api_key
    conn.headers["anthropic-version"] = "2023-06-01"
    conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
    # OpenRouter's /v1/messages endpoint authenticates with a Bearer
    # token (the OpenRouter API key), not Anthropic's x-api-key. We send
    # both so the same connection code works for direct Anthropic and
    # for OpenRouter-proxied Claude — each endpoint ignores the header
    # it doesn't recognise.
    if @provider_id == "openrouter"
      conn.headers["Authorization"] = "Bearer #{@api_key}"
    end
    # Moonshot's Kimi Code (Coding Plan) endpoint enforces a User-Agent
    # prefix whitelist limited to first-party coding agents (Kimi CLI,
    # Claude Code, Roo Code, Kilo Code, ...). Requests with the default
    # Faraday UA are rejected with HTTP 403 access_terminated_error,
    # despite a valid API key. We send a Claude Code-shaped UA here
    # because openclacky talks to this endpoint over the same Anthropic
    # /v1/messages protocol that Claude Code uses, so the UA matches the
    # wire-level behaviour. Hardcoding rather than exposing as a config
    # field is intentional: the only UAs known to pass the gate are the
    # whitelisted-client formats, and the project's preset registry is
    # the single source of truth for provider-specific quirks (mirroring
    # how the openrouter Bearer-fallback above is hardcoded).
    if @provider_id == "kimi-coding"
      conn.headers["User-Agent"] = "claude-cli/1.0.51 (external, cli)"
    end
    conn.options.timeout      = 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#anthropic_format?(model = nil) ⇒ Boolean

Returns true when the client is talking directly to the Anthropic API (determined at construction time via the anthropic_format flag).

Returns:

  • (Boolean)


48
49
50
# File 'lib/clacky/client.rb', line 48

def anthropic_format?(model = nil)
  @use_anthropic_format && !@use_bedrock
end

#apply_message_caching(messages) ⇒ Object

Add cache_control markers to the last 2 messages in the array.

Why 2 markers:

Turn N   — marks messages[-2] and messages[-1]; server caches prefix up to [-1]
Turn N+1 — messages[-2] is Turn N's last message (still marked) → cache READ hit;
           messages[-1] is the new message (marked) → cache WRITE for Turn N+2

With only 1 marker (old behavior): Turn N marks messages; in Turn N+1 that same message is now [-2] and carries no marker → server sees a different prefix → cache MISS.

Compression instructions (system_injected: true) are skipped — we never want to cache those ephemeral injection messages.



368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
# File 'lib/clacky/client.rb', line 368

def apply_message_caching(messages)
  return messages if messages.empty?

  # Collect up to 2 candidate indices from the tail, skipping compression instructions.
  candidate_indices = []
  (messages.length - 1).downto(0) do |i|
    break if candidate_indices.length >= 2

    candidate_indices << i unless is_compression_instruction?(messages[i])
  end

  messages.map.with_index do |msg, idx|
    candidate_indices.include?(idx) ? add_cache_control_to_message(msg) : msg
  end
end

#bedrock?Boolean

Returns true when the client is using the AWS Bedrock Converse API.

Returns:

  • (Boolean)


42
43
44
# File 'lib/clacky/client.rb', line 42

def bedrock?
  @use_bedrock
end

#bedrock_connectionObject



440
441
442
443
444
445
446
447
448
449
# File 'lib/clacky/client.rb', line 440

def bedrock_connection
  @bedrock_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]  = "application/json"
    conn.headers["Authorization"] = "Bearer #{@api_key}"
    conn.options.timeout      = 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#bedrock_endpoint(model) ⇒ Object

Bedrock Converse API endpoint path for a given model ID.



409
410
411
# File 'lib/clacky/client.rb', line 409

def bedrock_endpoint(model)
  "/model/#{model}/converse"
end

#check_html_response(response) ⇒ Object

Raise a friendly error if the response body is HTML (e.g. gateway error page returned with 200)



551
552
553
554
555
556
# File 'lib/clacky/client.rb', line 551

def check_html_response(response)
  body = response.body.to_s.lstrip
  if body.start_with?("<!DOCTYPE", "<!doctype", "<html", "<HTML")
    raise RetryableError, "[LLM] Service temporarily unavailable (received HTML error page), retrying..."
  end
end

#deep_clone(obj) ⇒ Object

── Utilities ─────────────────────────────────────────────────────────────



611
612
613
614
615
616
617
# File 'lib/clacky/client.rb', line 611

def deep_clone(obj)
  case obj
  when Hash  then obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
  when Array then obj.map { |item| deep_clone(item) }
  else obj
  end
end

#extract_error_message(error_body, raw_body) ⇒ Object



558
559
560
561
562
563
564
565
566
567
568
569
# File 'lib/clacky/client.rb', line 558

def extract_error_message(error_body, raw_body)
  if raw_body.is_a?(String) && raw_body.strip.start_with?("<!DOCTYPE", "<html")
    return "Invalid API endpoint or server error (received HTML instead of JSON)"
  end

  return raw_body unless error_body.is_a?(Hash)

  error_body["upstreamMessage"]&.then { |m| return m unless m.empty? }
  error_body.dig("error", "message")&.then { |m| return m } if error_body["error"].is_a?(Hash)
  error_body["message"]&.then             { |m| return m }
  error_body["error"].is_a?(String) ? error_body["error"] : (raw_body.to_s[0..200] + (raw_body.to_s.length > 200 ? "..." : ""))
end

#format_tool_results(response, tool_results, model:) ⇒ Object

Format tool results into canonical messages ready to append to @messages. Always returns canonical format (role: “tool”) regardless of API type —conversion to API-native happens inside each send_*_request.



182
183
184
185
186
187
188
189
190
191
192
# File 'lib/clacky/client.rb', line 182

def format_tool_results(response, tool_results, model:)
  return [] if tool_results.empty?

  if bedrock?
    MessageFormat::Bedrock.format_tool_results(response, tool_results)
  elsif anthropic_format?
    MessageFormat::Anthropic.format_tool_results(response, tool_results)
  else
    MessageFormat::OpenAI.format_tool_results(response, tool_results)
  end
end

#handle_test_response(response) ⇒ Object

── Error handling ────────────────────────────────────────────────────────



516
517
518
519
520
521
# File 'lib/clacky/client.rb', line 516

def handle_test_response(response)
  return { success: true } if response.status == 200

  error_body = JSON.parse(response.body) rescue nil
  { success: false, error: extract_error_message(error_body, response.body) }
end

#is_compression_instruction?(message) ⇒ Boolean

Returns:

  • (Boolean)


402
403
404
# File 'lib/clacky/client.rb', line 402

def is_compression_instruction?(message)
  message.is_a?(Hash) && message[:system_injected] == true
end

#openai_connectionObject



451
452
453
454
455
456
457
458
459
460
# File 'lib/clacky/client.rb', line 451

def openai_connection
  @openai_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]  = "application/json"
    conn.headers["Authorization"] = "Bearer #{@api_key}"
    conn.options.timeout      = 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#parse_simple_anthropic_response(response) ⇒ Object



299
300
301
302
303
# File 'lib/clacky/client.rb', line 299

def parse_simple_anthropic_response(response)
  raise_error(response) unless response.status == 200
  data = safe_json_parse(response.body, context: "LLM response")
  (data["content"] || []).select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
end

#parse_simple_bedrock_response(response) ⇒ Object



255
256
257
258
259
260
261
262
# File 'lib/clacky/client.rb', line 255

def parse_simple_bedrock_response(response)
  raise_error(response) unless response.status == 200
  data = safe_json_parse(response.body, context: "LLM response")
  (data.dig("output", "message", "content") || [])
    .select { |b| b["text"] }
    .map { |b| b["text"] }
    .join("")
end

#parse_simple_openai_response(response) ⇒ Object



348
349
350
351
352
# File 'lib/clacky/client.rb', line 348

def parse_simple_openai_response(response)
  raise_error(response) unless response.status == 200
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  parsed_body["choices"].first["message"]["content"]
end

#raise_error(response) ⇒ Object



523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
# File 'lib/clacky/client.rb', line 523

def raise_error(response)
  error_body    = JSON.parse(response.body) rescue nil
  error_message = extract_error_message(error_body, response.body)

  case response.status
  when 400
    # Well-behaved APIs (Anthropic, OpenAI) never put quota/availability issues in 400.
    # However, some proxy/relay providers do — so we inspect the message first.
    # Also, Bedrock returns ThrottlingException as 400 instead of 429.
    if error_message.match?(/ThrottlingException|unavailable|quota/i)
      hint = error_message.match?(/quota/i) ? " (possibly out of credits)" : ""
      raise RetryableError, "[LLM] Rate limit or service issue: #{error_message}#{hint}"
    end

    # True bad request — our message was malformed. Roll back history so the
    # broken message is not replayed on the next user turn.
    raise BadRequestError, "[LLM] Client request error: #{error_message}"
  when 401 then raise AgentError, "[LLM] Invalid API key"
  when 402 then raise AgentError, "[LLM] Billing or payment issue (possibly out of credits): #{error_message}"
  when 403 then raise AgentError, "[LLM] Access denied: #{error_message}"
  when 404 then raise AgentError, "[LLM] API endpoint not found: #{error_message}"
  when 429 then raise RetryableError, "[LLM] Rate limit exceeded, please wait a moment"
  when 500..599 then raise RetryableError, "[LLM] Service temporarily unavailable (#{response.status}), retrying..."
  else raise AgentError, "[LLM] Unexpected error (#{response.status}): #{error_message}"
  end
end

#safe_json_parse(json_string, context: "response") ⇒ Hash, Array

Parse JSON with user-friendly error messages.

Parameters:

  • json_string (String)

    the JSON string to parse

  • context (String) (defaults to: "response")

    a description of what’s being parsed (e.g., “LLM response”)

Returns:

  • (Hash, Array)

    the parsed JSON

Raises:

  • (RetryableError)

    if parsing fails (indicates a malformed LLM response)



576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
# File 'lib/clacky/client.rb', line 576

def safe_json_parse(json_string, context: "response")
  JSON.parse(json_string)
rescue JSON::ParserError => e
  # Transform technical JSON parsing errors into user-friendly messages.
  # These are usually caused by:
  #   1. Incomplete/truncated LLM response (network issue, timeout)
  #   2. LLM service returned malformed data
  #   3. Proxy/gateway corruption
  error_detail = if json_string.to_s.strip.empty?
    "received empty response"
  elsif json_string.to_s.bytesize > 500
    "response was truncated or malformed (#{json_string.to_s.bytesize} bytes received)"
  else
    "response format is invalid"
  end

  raise RetryableError, "[LLM] Failed to parse #{context}: #{error_detail}. " \
                       "This usually means the AI service returned incomplete or corrupted data. " \
                       "The request will be retried automatically."
end

#send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil) ⇒ Object

── Anthropic request / response ──────────────────────────────────────────



266
267
268
269
270
271
272
273
274
275
276
277
278
279
# File 'lib/clacky/client.rb', line 266

def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
  # Apply cache_control to the message that marks the cache breakpoint
  messages = apply_message_caching(messages) if caching_enabled

  body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled)
  return send_anthropic_stream_request(body, on_chunk) if on_chunk

  response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::Anthropic.parse_response(parsed_body)
end

#send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil) ⇒ Object

── Bedrock Converse request / response ───────────────────────────────────



220
221
222
223
224
225
226
227
228
229
230
# File 'lib/clacky/client.rb', line 220

def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
  body = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled)
  return send_bedrock_stream_request(body, model, on_chunk) if on_chunk

  response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::Bedrock.parse_response(parsed_body)
end

#send_message(content, model:, max_tokens:) ⇒ Object

Send a single string message and return the reply text.



82
83
84
85
# File 'lib/clacky/client.rb', line 82

def send_message(content, model:, max_tokens:)
  messages = [{ role: "user", content: content }]
  send_messages(messages, model: model, max_tokens: max_tokens)
end

#send_messages(messages, model:, max_tokens:) ⇒ Object

Send a messages array and return the reply text.



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/clacky/client.rb', line 88

def send_messages(messages, model:, max_tokens:)
  if bedrock?
    body     = MessageFormat::Bedrock.build_request_body(messages, model, [], max_tokens)
    response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
    parse_simple_bedrock_response(response)
  elsif anthropic_format?
    body     = MessageFormat::Anthropic.build_request_body(messages, model, [], max_tokens, false)
    response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
    parse_simple_anthropic_response(response)
  else
    body     = { model: model, max_tokens: max_tokens, messages: messages }
    response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
    parse_simple_openai_response(response)
  end
end

#send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, on_chunk: nil) ⇒ Object

Send messages with tool-calling support. Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage:, latency: }

Latency measurement:

Because the current HTTP path is *non-streaming* (plain POST, response
body read in one shot), TTFB (time to response headers) is not exposed
by Faraday's default adapter without extra plumbing. What we CAN measure
cheaply — and what users actually feel — is total request duration,
which for a non-streaming call equals the time from "hit Enter" to
"first token visible" (since we receive everything at once).

So we record `duration_ms` as the authoritative number and alias it to
`ttft_ms` for downstream consumers (status bar uses ttft_ms as its
signal metric — see docs). When we migrate to streaming later, this
same `ttft_ms` field will start carrying the *actual* first-token
latency without any schema change.

Parameters:

  • on_chunk (Proc, nil) (defaults to: nil)

    optional streaming progress callback. Receives keyword args { input_tokens:, output_tokens: } with cumulative token counts. When nil, behaves exactly as the historical non-streaming path. When given but streaming is not yet wired for the active provider, a single synthetic invocation is fired after the response is received, so UI plumbing can be exercised end-to-end without the proxy work.



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/clacky/client.rb', line 128

def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, on_chunk: nil)
  caching_enabled = enable_caching && supports_prompt_caching?(model)
  cloned = deep_clone(messages)

  streaming_used = false
  first_chunk_at = nil
  wrapped_on_chunk = on_chunk && lambda do |**kwargs|
    first_chunk_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
    on_chunk.call(**kwargs)
  end

  t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
  response =
    if bedrock?
      streaming_used = !on_chunk.nil?
      send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
    elsif anthropic_format?
      streaming_used = !on_chunk.nil?
      send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
    else
      streaming_used = !on_chunk.nil?
      send_openai_request(cloned, model, tools, max_tokens, caching_enabled, on_chunk: wrapped_on_chunk)
    end
  t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)

  if on_chunk && !streaming_used
    usage = response[:usage] || {}
    safe_invoke_on_chunk(
      on_chunk,
      input_tokens:  usage[:prompt_tokens].to_i,
      output_tokens: usage[:completion_tokens].to_i
    )
  end

  duration_ms = ((t1 - t0) * 1000).round
  ttft_ms = first_chunk_at ? ((first_chunk_at - t0) * 1000).round : duration_ms
  output_tokens = response[:usage]&.dig(:completion_tokens).to_i
  tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil

  response[:latency] = {
    ttft_ms:     ttft_ms,
    duration_ms: duration_ms,
    output_tokens: output_tokens,
    tps:         tps,
    model:       model,
    measured_at: Time.now.to_f,
    streaming:   streaming_used
  }
  response
end

#send_openai_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil) ⇒ Object

── OpenAI request / response ─────────────────────────────────────────────



307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# File 'lib/clacky/client.rb', line 307

def send_openai_request(messages, model, tools, max_tokens, caching_enabled, on_chunk: nil)
  # Apply cache_control markers to messages when caching is enabled.
  # OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
  messages = apply_message_caching(messages) if caching_enabled

  body = MessageFormat::OpenAI.build_request_body(
    messages, model, tools, max_tokens, caching_enabled,
    vision_supported: @vision_supported
  )
  return send_openai_stream_request(body, on_chunk) if on_chunk

  response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)

  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::OpenAI.parse_response(parsed_body)
end

#supports_prompt_caching?(model) ⇒ Boolean

Returns true for Claude models that support prompt caching (gen 3.5+ or gen 4+).

Handles both direct model names (e.g. “claude-haiku-4-5”) and Clacky AI Bedrock proxy names with “abs-” prefix (e.g. “abs-claude-haiku-4-5”).

Why only Claude models:

- MiniMax uses automatic server-side caching (no cache_control needed from client)
- Kimi uses a proprietary prompt_cache_key param, not cache_control
- MiMo has no documented caching API
- Only Claude (direct, OpenRouter, or ClackyAI Bedrock proxy) consumes our
  cache_control / cachePoint markers

Returns:

  • (Boolean)


207
208
209
210
211
212
213
214
215
# File 'lib/clacky/client.rb', line 207

def supports_prompt_caching?(model)
  # Strip ClackyAI Bedrock proxy prefix before matching
  model_str = model.to_s.downcase.sub(/^abs-/, "")
  return false unless model_str.include?("claude")

  # Match Claude gen 3.5+ (3.5/3.6/3.7…) or gen 4+ in any name format:
  #   claude-3.5-sonnet-...  claude-3-7-sonnet  claude-haiku-4-5  claude-sonnet-4-6
  model_str.match?(/claude(?:-3[-.]?[5-9]|.*-[4-9][-.]|.*-[4-9]$|-[4-9][-.]|-[4-9]$|-sonnet-[34])/)
end

#test_connection(model:) ⇒ Object

Test API connection by sending a minimal request. Returns { success: true } or { success: false, error: “…” }.



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/clacky/client.rb', line 56

def test_connection(model:)
  if bedrock?
    body = MessageFormat::Bedrock.build_request_body(
      [{ role: :user, content: "hi" }], model, [], 16
    ).to_json
    response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body }
  elsif anthropic_format?
    minimal_body = { model: model, max_tokens: 16,
                     messages: [{ role: "user", content: "hi" }] }.to_json
    response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = minimal_body }
  else
    minimal_body = { model: model, max_tokens: 16,
                     messages: [{ role: "user", content: "hi" }] }.to_json
    response = openai_connection.post("chat/completions") { |r| r.body = minimal_body }
  end
  handle_test_response(response)
rescue Faraday::Error => e
  { success: false, error: "Connection error: #{e.message}" }
rescue => e
  Clacky::Logger.error("[test_connection] #{e.class}: #{e.message}", error: e)
  { success: false, error: e.message }
end