Class: Clacky::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/clacky/client.rb

Constant Summary collapse

MAX_RETRIES =
10
RETRY_DELAY =

seconds

5

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(api_key, base_url:, model:, anthropic_format: false, read_timeout: nil) ⇒ Client

Returns a new instance of Client.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/clacky/client.rb', line 13

def initialize(api_key, base_url:, model:, anthropic_format: false, read_timeout: nil)
  @api_key = api_key
  @base_url = base_url
  @model = model
  # Detect Bedrock: ABSK key prefix (native AWS) or abs- model prefix (Clacky AI proxy)
  @use_bedrock = MessageFormat::Bedrock.bedrock_api_key?(api_key, model)

  # Resolve provider once — reused for capability + api-type lookups.
  provider_id = Providers.resolve_provider(base_url: @base_url, api_key: @api_key)

  # Decide anthropic_format dynamically based on provider+model, falling
  # back to the explicit constructor flag for unknown providers / custom
  # base_urls. This lets e.g. OpenRouter's Claude models auto-route to the
  # native /v1/messages endpoint (preserving cache_control byte-for-byte)
  # without requiring any change to user YAML.
  provider_prefers_anthropic = provider_id &&
                               Providers.anthropic_format_for_model?(provider_id, @model)
  @use_anthropic_format = provider_prefers_anthropic || anthropic_format

  # Remember the provider id so we can tune connection headers below
  # (OpenRouter's /v1/messages accepts either Bearer or x-api-key, but
  # some OpenRouter-compatible relays only honour Bearer — send both).
  @provider_id = provider_id

  # Determine vision support once at construction time.
  # Non-vision models (DeepSeek, Kimi, MiniMax, etc.) reject image_url
  # content blocks; the conversion layer strips them when this is false.
  @vision_supported = Providers.supports?(provider_id, :vision, model_name: @model)

  # Optional override for Faraday read_timeout (e.g. benchmark calls).
  # nil means use the default (300s for streaming).
  @read_timeout = read_timeout
end

Instance Attribute Details

#provider_idObject (readonly)

Returns the value of attribute provider_id.



11
12
13
# File 'lib/clacky/client.rb', line 11

def provider_id
  @provider_id
end

Instance Method Details

#add_cache_control_to_message(msg) ⇒ Object

Wrap or extend the message’s content with a cache_control marker.



395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
# File 'lib/clacky/client.rb', line 395

def add_cache_control_to_message(msg)
  content = msg[:content]

  content_array = case content
                  when String
                    [{ type: "text", text: content, cache_control: { type: "ephemeral" } }]
                  when Array
                    content.map.with_index do |block, idx|
                      idx == content.length - 1 ? block.merge(cache_control: { type: "ephemeral" }) : block
                    end
                  else
                    return msg
                  end

  msg.merge(content: content_array)
end

#anthropic_connectionObject



472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
# File 'lib/clacky/client.rb', line 472

def anthropic_connection
  @anthropic_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]   = "application/json"
    conn.headers["x-api-key"]      = @api_key
    conn.headers["anthropic-version"] = "2023-06-01"
    conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
    # OpenRouter's /v1/messages endpoint authenticates with a Bearer
    # token (the OpenRouter API key), not Anthropic's x-api-key. We send
    # both so the same connection code works for direct Anthropic and
    # for OpenRouter-proxied Claude — each endpoint ignores the header
    # it doesn't recognise.
    if @provider_id == "openrouter"
      conn.headers["Authorization"] = "Bearer #{@api_key}"
    end
    # Moonshot's Kimi Code (Coding Plan) endpoint enforces a User-Agent
    # prefix whitelist limited to first-party coding agents (Kimi CLI,
    # Claude Code, Roo Code, Kilo Code, ...). Requests with the default
    # Faraday UA are rejected with HTTP 403 access_terminated_error,
    # despite a valid API key. We send a Claude Code-shaped UA here
    # because openclacky talks to this endpoint over the same Anthropic
    # /v1/messages protocol that Claude Code uses, so the UA matches the
    # wire-level behaviour. Hardcoding rather than exposing as a config
    # field is intentional: the only UAs known to pass the gate are the
    # whitelisted-client formats, and the project's preset registry is
    # the single source of truth for provider-specific quirks (mirroring
    # how the openrouter Bearer-fallback above is hardcoded).
    if @provider_id == "kimi-coding"
      conn.headers["User-Agent"] = "claude-cli/1.0.51 (external, cli)"
    end
    conn.options.timeout      = @read_timeout || 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#anthropic_format?(model = nil) ⇒ Boolean

Returns true when the client is talking directly to the Anthropic API (determined at construction time via the anthropic_format flag).

Returns:

  • (Boolean)


54
55
56
# File 'lib/clacky/client.rb', line 54

def anthropic_format?(model = nil)
  @use_anthropic_format && !@use_bedrock
end

#apply_message_caching(messages) ⇒ Object

Add cache_control markers to the last 2 messages in the array.

Why 2 markers:

Turn N   — marks messages[-2] and messages[-1]; server caches prefix up to [-1]
Turn N+1 — messages[-2] is Turn N's last message (still marked) → cache READ hit;
           messages[-1] is the new message (marked) → cache WRITE for Turn N+2

With only 1 marker (old behavior): Turn N marks messages; in Turn N+1 that same message is now [-2] and carries no marker → server sees a different prefix → cache MISS.

Compression instructions (system_injected: true) are skipped — we never want to cache those ephemeral injection messages.



378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
# File 'lib/clacky/client.rb', line 378

def apply_message_caching(messages)
  return messages if messages.empty?

  # Collect up to 2 candidate indices from the tail, skipping compression instructions.
  candidate_indices = []
  (messages.length - 1).downto(0) do |i|
    break if candidate_indices.length >= 2

    candidate_indices << i unless is_compression_instruction?(messages[i])
  end

  messages.map.with_index do |msg, idx|
    candidate_indices.include?(idx) ? add_cache_control_to_message(msg) : msg
  end
end

#bedrock?Boolean

Returns true when the client is using the AWS Bedrock Converse API.

Returns:

  • (Boolean)


48
49
50
# File 'lib/clacky/client.rb', line 48

def bedrock?
  @use_bedrock
end

#bedrock_connectionObject



450
451
452
453
454
455
456
457
458
459
# File 'lib/clacky/client.rb', line 450

def bedrock_connection
  @bedrock_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]  = "application/json"
    conn.headers["Authorization"] = "Bearer #{@api_key}"
    conn.options.timeout      = @read_timeout || 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#bedrock_endpoint(model) ⇒ Object

Bedrock Converse API endpoint path for a given model ID.



419
420
421
# File 'lib/clacky/client.rb', line 419

def bedrock_endpoint(model)
  "/model/#{model}/converse"
end

#check_html_response(response) ⇒ Object

Raise a friendly error if the response body is HTML (e.g. gateway error page returned with 200)



561
562
563
564
565
566
# File 'lib/clacky/client.rb', line 561

def check_html_response(response)
  body = response.body.to_s.lstrip
  if body.start_with?("<!DOCTYPE", "<!doctype", "<html", "<HTML")
    raise RetryableError, "[LLM] Service temporarily unavailable (received HTML error page), retrying..."
  end
end

#deep_clone(obj) ⇒ Object

── Utilities ─────────────────────────────────────────────────────────────



621
622
623
624
625
626
627
# File 'lib/clacky/client.rb', line 621

def deep_clone(obj)
  case obj
  when Hash  then obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
  when Array then obj.map { |item| deep_clone(item) }
  else obj
  end
end

#extract_error_message(error_body, raw_body) ⇒ Object



568
569
570
571
572
573
574
575
576
577
578
579
# File 'lib/clacky/client.rb', line 568

def extract_error_message(error_body, raw_body)
  if raw_body.is_a?(String) && raw_body.strip.start_with?("<!DOCTYPE", "<html")
    return "Invalid API endpoint or server error (received HTML instead of JSON)"
  end

  return raw_body unless error_body.is_a?(Hash)

  error_body["upstreamMessage"]&.then { |m| return m unless m.empty? }
  error_body.dig("error", "message")&.then { |m| return m } if error_body["error"].is_a?(Hash)
  error_body["message"]&.then             { |m| return m }
  error_body["error"].is_a?(String) ? error_body["error"] : (raw_body.to_s[0..200] + (raw_body.to_s.length > 200 ? "..." : ""))
end

#format_tool_results(response, tool_results, model:) ⇒ Object

Format tool results into canonical messages ready to append to @messages. Always returns canonical format (role: “tool”) regardless of API type —conversion to API-native happens inside each send_*_request.



188
189
190
191
192
193
194
195
196
197
198
# File 'lib/clacky/client.rb', line 188

def format_tool_results(response, tool_results, model:)
  return [] if tool_results.empty?

  if bedrock?
    MessageFormat::Bedrock.format_tool_results(response, tool_results)
  elsif anthropic_format?
    MessageFormat::Anthropic.format_tool_results(response, tool_results)
  else
    MessageFormat::OpenAI.format_tool_results(response, tool_results)
  end
end

#handle_test_response(response) ⇒ Object

── Error handling ────────────────────────────────────────────────────────



526
527
528
529
530
531
# File 'lib/clacky/client.rb', line 526

def handle_test_response(response)
  return { success: true } if response.status == 200

  error_body = JSON.parse(response.body) rescue nil
  { success: false, error: extract_error_message(error_body, response.body) }
end

#is_compression_instruction?(message) ⇒ Boolean

Returns:

  • (Boolean)


412
413
414
# File 'lib/clacky/client.rb', line 412

def is_compression_instruction?(message)
  message.is_a?(Hash) && message[:system_injected] == true
end

#openai_connectionObject



461
462
463
464
465
466
467
468
469
470
# File 'lib/clacky/client.rb', line 461

def openai_connection
  @openai_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]  = "application/json"
    conn.headers["Authorization"] = "Bearer #{@api_key}"
    conn.options.timeout      = @read_timeout || 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#parse_simple_anthropic_response(response) ⇒ Object



308
309
310
311
312
# File 'lib/clacky/client.rb', line 308

def parse_simple_anthropic_response(response)
  raise_error(response) unless response.status == 200
  data = safe_json_parse(response.body, context: "LLM response")
  (data["content"] || []).select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
end

#parse_simple_bedrock_response(response) ⇒ Object



264
265
266
267
268
269
270
271
# File 'lib/clacky/client.rb', line 264

def parse_simple_bedrock_response(response)
  raise_error(response) unless response.status == 200
  data = safe_json_parse(response.body, context: "LLM response")
  (data.dig("output", "message", "content") || [])
    .select { |b| b["text"] }
    .map { |b| b["text"] }
    .join("")
end

#parse_simple_openai_response(response) ⇒ Object



358
359
360
361
362
# File 'lib/clacky/client.rb', line 358

def parse_simple_openai_response(response)
  raise_error(response) unless response.status == 200
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  parsed_body["choices"].first["message"]["content"]
end

#raise_error(response) ⇒ Object



533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
# File 'lib/clacky/client.rb', line 533

def raise_error(response)
  error_body    = JSON.parse(response.body) rescue nil
  error_message = extract_error_message(error_body, response.body)

  case response.status
  when 400
    # Well-behaved APIs (Anthropic, OpenAI) never put quota/availability issues in 400.
    # However, some proxy/relay providers do — so we inspect the message first.
    # Also, Bedrock returns ThrottlingException as 400 instead of 429.
    if error_message.match?(/ThrottlingException|unavailable|quota/i)
      hint = error_message.match?(/quota/i) ? " (possibly out of credits)" : ""
      raise RetryableError, "[LLM] Rate limit or service issue: #{error_message}#{hint}"
    end

    # True bad request — our message was malformed. Roll back history so the
    # broken message is not replayed on the next user turn.
    raise BadRequestError, "[LLM] Client request error: #{error_message}"
  when 401 then raise AgentError, "[LLM] Invalid API key"
  when 402 then raise AgentError, "[LLM] Billing or payment issue (possibly out of credits): #{error_message}"
  when 403 then raise AgentError, "[LLM] Access denied: #{error_message}"
  when 404 then raise AgentError, "[LLM] API endpoint not found: #{error_message}"
  when 429 then raise RetryableError, "[LLM] Rate limit exceeded, please wait a moment"
  when 500..599 then raise RetryableError, "[LLM] Service temporarily unavailable (#{response.status}), retrying..."
  else raise AgentError, "[LLM] Unexpected error (#{response.status}): #{error_message}"
  end
end

#safe_json_parse(json_string, context: "response") ⇒ Hash, Array

Parse JSON with user-friendly error messages.

Parameters:

  • json_string (String)

    the JSON string to parse

  • context (String) (defaults to: "response")

    a description of what’s being parsed (e.g., “LLM response”)

Returns:

  • (Hash, Array)

    the parsed JSON

Raises:

  • (RetryableError)

    if parsing fails (indicates a malformed LLM response)



586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
# File 'lib/clacky/client.rb', line 586

def safe_json_parse(json_string, context: "response")
  JSON.parse(json_string)
rescue JSON::ParserError => e
  # Transform technical JSON parsing errors into user-friendly messages.
  # These are usually caused by:
  #   1. Incomplete/truncated LLM response (network issue, timeout)
  #   2. LLM service returned malformed data
  #   3. Proxy/gateway corruption
  error_detail = if json_string.to_s.strip.empty?
    "received empty response"
  elsif json_string.to_s.bytesize > 500
    "response was truncated or malformed (#{json_string.to_s.bytesize} bytes received)"
  else
    "response format is invalid"
  end

  raise RetryableError, "[LLM] Failed to parse #{context}: #{error_detail}. " \
                       "This usually means the AI service returned incomplete or corrupted data. " \
                       "The request will be retried automatically."
end

#send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ Object

── Anthropic request / response ──────────────────────────────────────────



275
276
277
278
279
280
281
282
283
284
285
286
287
288
# File 'lib/clacky/client.rb', line 275

def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
  # Apply cache_control to the message that marks the cache breakpoint
  messages = apply_message_caching(messages) if caching_enabled

  body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort)
  return send_anthropic_stream_request(body, on_chunk) if on_chunk

  response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::Anthropic.parse_response(parsed_body)
end

#send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ Object

── Bedrock Converse request / response ───────────────────────────────────



226
227
228
229
230
231
232
233
234
235
236
# File 'lib/clacky/client.rb', line 226

def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
  body = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort)
  return send_bedrock_stream_request(body, model, on_chunk) if on_chunk

  response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::Bedrock.parse_response(parsed_body)
end

#send_message(content, model:, max_tokens:) ⇒ Object

Send a single string message and return the reply text.



88
89
90
91
# File 'lib/clacky/client.rb', line 88

def send_message(content, model:, max_tokens:)
  messages = [{ role: "user", content: content }]
  send_messages(messages, model: model, max_tokens: max_tokens)
end

#send_messages(messages, model:, max_tokens:) ⇒ Object

Send a messages array and return the reply text.



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/clacky/client.rb', line 94

def send_messages(messages, model:, max_tokens:)
  if bedrock?
    body     = MessageFormat::Bedrock.build_request_body(messages, model, [], max_tokens)
    response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
    parse_simple_bedrock_response(response)
  elsif anthropic_format?
    body     = MessageFormat::Anthropic.build_request_body(messages, model, [], max_tokens, false)
    response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
    parse_simple_anthropic_response(response)
  else
    body     = { model: model, max_tokens: max_tokens, messages: messages }
    response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
    parse_simple_openai_response(response)
  end
end

#send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, reasoning_effort: nil, on_chunk: nil) ⇒ Object

Send messages with tool-calling support. Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage:, latency: }

Latency measurement:

Because the current HTTP path is *non-streaming* (plain POST, response
body read in one shot), TTFB (time to response headers) is not exposed
by Faraday's default adapter without extra plumbing. What we CAN measure
cheaply — and what users actually feel — is total request duration,
which for a non-streaming call equals the time from "hit Enter" to
"first token visible" (since we receive everything at once).

So we record `duration_ms` as the authoritative number and alias it to
`ttft_ms` for downstream consumers (status bar uses ttft_ms as its
signal metric — see docs). When we migrate to streaming later, this
same `ttft_ms` field will start carrying the *actual* first-token
latency without any schema change.

Parameters:

  • on_chunk (Proc, nil) (defaults to: nil)

    optional streaming progress callback. Receives keyword args { input_tokens:, output_tokens: } with cumulative token counts. When nil, behaves exactly as the historical non-streaming path. When given but streaming is not yet wired for the active provider, a single synthetic invocation is fired after the response is received, so UI plumbing can be exercised end-to-end without the proxy work.



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/clacky/client.rb', line 134

def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, reasoning_effort: nil, on_chunk: nil)
  caching_enabled = enable_caching && supports_prompt_caching?(model)
  cloned = deep_clone(messages)

  streaming_used = false
  first_chunk_at = nil
  wrapped_on_chunk = on_chunk && lambda do |**kwargs|
    first_chunk_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
    on_chunk.call(**kwargs)
  end

  t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
  response =
    if bedrock?
      streaming_used = !on_chunk.nil?
      send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
    elsif anthropic_format?
      streaming_used = !on_chunk.nil?
      send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
    else
      streaming_used = !on_chunk.nil?
      send_openai_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
    end
  t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)

  if on_chunk && !streaming_used
    usage = response[:usage] || {}
    safe_invoke_on_chunk(
      on_chunk,
      input_tokens:  usage[:prompt_tokens].to_i,
      output_tokens: usage[:completion_tokens].to_i
    )
  end

  duration_ms = ((t1 - t0) * 1000).round
  ttft_ms = first_chunk_at ? ((first_chunk_at - t0) * 1000).round : duration_ms
  output_tokens = response[:usage]&.dig(:completion_tokens).to_i
  tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil

  response[:latency] = {
    ttft_ms:     ttft_ms,
    duration_ms: duration_ms,
    output_tokens: output_tokens,
    tps:         tps,
    model:       model,
    measured_at: Time.now.to_f,
    streaming:   streaming_used
  }
  response
end

#send_openai_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ Object

── OpenAI request / response ─────────────────────────────────────────────



316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
# File 'lib/clacky/client.rb', line 316

def send_openai_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
  # Apply cache_control markers to messages when caching is enabled.
  # OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
  messages = apply_message_caching(messages) if caching_enabled

  body = MessageFormat::OpenAI.build_request_body(
    messages, model, tools, max_tokens, caching_enabled,
    vision_supported: @vision_supported,
    reasoning_effort: reasoning_effort
  )
  return send_openai_stream_request(body, on_chunk) if on_chunk

  response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)

  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::OpenAI.parse_response(parsed_body)
end

#supports_prompt_caching?(model) ⇒ Boolean

Returns true for Claude models that support prompt caching (gen 3.5+ or gen 4+).

Handles both direct model names (e.g. “claude-haiku-4-5”) and Clacky AI Bedrock proxy names with “abs-” prefix (e.g. “abs-claude-haiku-4-5”).

Why only Claude models:

- MiniMax uses automatic server-side caching (no cache_control needed from client)
- Kimi uses a proprietary prompt_cache_key param, not cache_control
- MiMo has no documented caching API
- Only Claude (direct, OpenRouter, or ClackyAI Bedrock proxy) consumes our
  cache_control / cachePoint markers

Returns:

  • (Boolean)


213
214
215
216
217
218
219
220
221
# File 'lib/clacky/client.rb', line 213

def supports_prompt_caching?(model)
  # Strip ClackyAI Bedrock proxy prefix before matching
  model_str = model.to_s.downcase.sub(/^abs-/, "")
  return false unless model_str.include?("claude")

  # Match Claude gen 3.5+ (3.5/3.6/3.7…) or gen 4+ in any name format:
  #   claude-3.5-sonnet-...  claude-3-7-sonnet  claude-haiku-4-5  claude-sonnet-4-6
  model_str.match?(/claude(?:-3[-.]?[5-9]|.*-[4-9][-.]|.*-[4-9]$|-[4-9][-.]|-[4-9]$|-sonnet-[34])/)
end

#test_connection(model:) ⇒ Object

Test API connection by sending a minimal request. Returns { success: true } or { success: false, error: “…” }.



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/clacky/client.rb', line 62

def test_connection(model:)
  if bedrock?
    body = MessageFormat::Bedrock.build_request_body(
      [{ role: :user, content: "hi" }], model, [], 16
    ).to_json
    response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body }
  elsif anthropic_format?
    minimal_body = { model: model, max_tokens: 16,
                     messages: [{ role: "user", content: "hi" }] }.to_json
    response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = minimal_body }
  else
    minimal_body = { model: model, max_tokens: 16,
                     messages: [{ role: "user", content: "hi" }] }.to_json
    response = openai_connection.post("chat/completions") { |r| r.body = minimal_body }
  end
  handle_test_response(response)
rescue Faraday::Error => e
  { success: false, error: "Connection error: #{e.message}" }
rescue => e
  Clacky::Logger.error("[test_connection] #{e.class}: #{e.message}", error: e)
  { success: false, error: e.message }
end