Class: Clacky::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/clacky/client.rb

Constant Summary collapse

MAX_RETRIES =
10
RETRY_DELAY =

seconds

5

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(api_key, base_url:, model:, anthropic_format: false, read_timeout: nil) ⇒ Client

Returns a new instance of Client.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/clacky/client.rb', line 13

def initialize(api_key, base_url:, model:, anthropic_format: false, read_timeout: nil)
  @api_key = api_key
  @base_url = base_url
  @model = model
  # Detect Bedrock: ABSK key prefix (native AWS) or abs- model prefix (Clacky AI proxy)
  @use_bedrock = MessageFormat::Bedrock.bedrock_api_key?(api_key, model)

  # Resolve provider once — reused for capability + api-type lookups.
  provider_id = Providers.resolve_provider(base_url: @base_url, api_key: @api_key)

  # Decide anthropic_format dynamically based on provider+model, falling
  # back to the explicit constructor flag for unknown providers / custom
  # base_urls. This lets e.g. OpenRouter's Claude models auto-route to the
  # native /v1/messages endpoint (preserving cache_control byte-for-byte)
  # without requiring any change to user YAML.
  provider_prefers_anthropic = provider_id &&
                               Providers.anthropic_format_for_model?(provider_id, @model)
  @use_anthropic_format = provider_prefers_anthropic || anthropic_format

  # Remember the provider id so we can tune connection headers below
  # (OpenRouter's /v1/messages accepts either Bearer or x-api-key, but
  # some OpenRouter-compatible relays only honour Bearer — send both).
  @provider_id = provider_id

  # Optional override for Faraday read_timeout (e.g. benchmark calls).
  # nil means use the default (300s for streaming).
  @read_timeout = read_timeout
end

Instance Attribute Details

#provider_idObject (readonly)

Returns the value of attribute provider_id.



11
12
13
# File 'lib/clacky/client.rb', line 11

def provider_id
  @provider_id
end

Instance Method Details

#add_cache_control_to_message(msg) ⇒ Object

Wrap or extend the message's content with a cache_control marker.



443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
# File 'lib/clacky/client.rb', line 443

def add_cache_control_to_message(msg)
  content = msg[:content]

  content_array = case content
                  when String
                    [{ type: "text", text: content, cache_control: { type: "ephemeral" } }]
                  when Array
                    content.map.with_index do |block, idx|
                      idx == content.length - 1 ? block.merge(cache_control: { type: "ephemeral" }) : block
                    end
                  else
                    return msg
                  end

  msg.merge(content: content_array)
end

#anthropic_connectionObject



556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
# File 'lib/clacky/client.rb', line 556

def anthropic_connection
  current_epoch = Clacky::ProxyConfig.epoch
  if @anthropic_connection.nil? ||
     (!@anthropic_connection_epoch.nil? && @anthropic_connection_epoch != current_epoch)
    @anthropic_connection = Faraday.new(url: @base_url) do |conn|
      conn.headers["Content-Type"]   = "application/json"
      conn.headers["x-api-key"]      = @api_key
      conn.headers["anthropic-version"] = "2023-06-01"
      conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
      if @provider_id == "openrouter"
        conn.headers["Authorization"] = "Bearer #{@api_key}"
      end
      # Moonshot's Kimi Code (Coding Plan) endpoint enforces a User-Agent
      # prefix whitelist limited to first-party coding agents.
      if @provider_id == "kimi-coding"
        conn.headers["User-Agent"] = "claude-cli/1.0.51 (external, cli)"
      end
      conn.options.timeout      = @read_timeout || 300
      conn.options.open_timeout = 10
      conn.ssl.verify           = false
      conn.adapter Faraday.default_adapter
    end
    @anthropic_connection_epoch = current_epoch
  end
  @anthropic_connection
end

#anthropic_format?(model = nil) ⇒ Boolean

Returns true when the client is talking directly to the Anthropic API (determined at construction time via the anthropic_format flag).

Returns:

  • (Boolean)


49
50
51
# File 'lib/clacky/client.rb', line 49

def anthropic_format?(model = nil)
  @use_anthropic_format && !@use_bedrock
end

#apply_message_caching(messages) ⇒ Object

Add cache_control markers to the last 2 messages in the array.

Why 2 markers:

Turn N   — marks messages[-2] and messages[-1]; server caches prefix up to [-1]
Turn N+1 — messages[-2] is Turn N's last message (still marked) → cache READ hit;
         messages[-1] is the new message (marked) → cache WRITE for Turn N+2

With only 1 marker (old behavior): Turn N marks messages; in Turn N+1 that same message is now [-2] and carries no marker → server sees a different prefix → cache MISS.

Compression instructions (system_injected: true) are skipped — we never want to cache those ephemeral injection messages.



426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
# File 'lib/clacky/client.rb', line 426

def apply_message_caching(messages)
  return messages if messages.empty?

  # Collect up to 2 candidate indices from the tail, skipping compression instructions.
  candidate_indices = []
  (messages.length - 1).downto(0) do |i|
    break if candidate_indices.length >= 2

    candidate_indices << i unless is_compression_instruction?(messages[i])
  end

  messages.map.with_index do |msg, idx|
    candidate_indices.include?(idx) ? add_cache_control_to_message(msg) : msg
  end
end

#bedrock?Boolean

Returns true when the client is using the AWS Bedrock Converse API.

Returns:

  • (Boolean)


43
44
45
# File 'lib/clacky/client.rb', line 43

def bedrock?
  @use_bedrock
end

#bedrock_connectionObject



522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
# File 'lib/clacky/client.rb', line 522

def bedrock_connection
  current_epoch = Clacky::ProxyConfig.epoch
  if @bedrock_connection.nil? ||
     (!@bedrock_connection_epoch.nil? && @bedrock_connection_epoch != current_epoch)
    @bedrock_connection = Faraday.new(url: @base_url) do |conn|
      conn.headers["Content-Type"]  = "application/json"
      conn.headers["Authorization"] = "Bearer #{@api_key}"
      conn.options.timeout      = @read_timeout || 300
      conn.options.open_timeout = 10
      conn.ssl.verify           = false
      conn.adapter Faraday.default_adapter
    end
    @bedrock_connection_epoch = current_epoch
  end
  @bedrock_connection
end

#bedrock_endpoint(model) ⇒ Object

Bedrock Converse API endpoint path for a given model ID.



467
468
469
# File 'lib/clacky/client.rb', line 467

def bedrock_endpoint(model)
  "/model/#{model}/converse"
end

#check_html_response(response) ⇒ Object

Raise a friendly error if the response body is HTML (e.g. gateway error page returned with 200)



674
675
676
677
678
679
# File 'lib/clacky/client.rb', line 674

def check_html_response(response)
  body = response.body.to_s.lstrip
  if body.start_with?("<!DOCTYPE", "<!doctype", "<html", "<HTML")
    raise RetryableError, "[LLM] #{I18n.t("llm.error.html_response")}"
  end
end

#deep_clone(obj) ⇒ Object

── Utilities ─────────────────────────────────────────────────────────────



766
767
768
769
770
771
772
# File 'lib/clacky/client.rb', line 766

def deep_clone(obj)
  case obj
  when Hash  then obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
  when Array then obj.map { |item| deep_clone(item) }
  else obj
  end
end

#extract_error_message(error_body, raw_body) ⇒ Object



688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
# File 'lib/clacky/client.rb', line 688

def extract_error_message(error_body, raw_body)
  if raw_body.is_a?(String) && raw_body.strip.start_with?("<!DOCTYPE", "<html")
    return "Invalid API endpoint or server error (received HTML instead of JSON)"
  end

  return "(empty response body)" if raw_body.to_s.strip.empty? && !error_body.is_a?(Hash)
  return raw_body unless error_body.is_a?(Hash)

  error_body["upstreamMessage"]&.then { |m| return m unless m.empty? }

  if error_body["error"].is_a?(Hash)
    upstream_msg = extract_upstream_error(error_body["error"])
    return upstream_msg if upstream_msg
  end

  error_body["message"]&.then             { |m| return m }
  error_body["error"].is_a?(String) ? error_body["error"] : (raw_body.to_s[0..200] + (raw_body.to_s.length > 200 ? "..." : ""))
end

#format_tool_results(response, tool_results, model:) ⇒ Object

Format tool results into canonical messages ready to append to @messages. Always returns canonical format (role: "tool") regardless of API type — conversion to API-native happens inside each send_*_request.



183
184
185
186
187
188
189
190
191
192
193
# File 'lib/clacky/client.rb', line 183

def format_tool_results(response, tool_results, model:)
  return [] if tool_results.empty?

  if bedrock?
    MessageFormat::Bedrock.format_tool_results(response, tool_results)
  elsif anthropic_format?
    MessageFormat::Anthropic.format_tool_results(response, tool_results)
  else
    MessageFormat::OpenAI.format_tool_results(response, tool_results)
  end
end

#handle_test_response(response) ⇒ Object

── Error handling ────────────────────────────────────────────────────────



601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
# File 'lib/clacky/client.rb', line 601

def handle_test_response(response)
  return { success: true, status: response.status } if response.status == 200

  error_body = JSON.parse(response.body) rescue nil
  error_code = extract_error_code(error_body)

  translated = case response.status
  when 402       then I18n.t("llm.error.insufficient_credit")
  when 400       then I18n.t("llm.error.rate_limit_400")
  when 401       then I18n.t("llm.error.invalid_api_key")
  when 403       then I18n.t("llm.error.403.#{error_code || "default"}")
  when 404       then I18n.t("llm.error.endpoint_not_found")
  when 429       then I18n.t("llm.error.rate_limit_429")
  when 500..599  then I18n.t("llm.error.server_error", status: response.status)
  else                extract_error_message(error_body, response.body)
  end

  {
    success:    false,
    status:     response.status,
    error:      translated,
    error_code: error_code
  }
end

#is_compression_instruction?(message) ⇒ Boolean

Returns:

  • (Boolean)


460
461
462
# File 'lib/clacky/client.rb', line 460

def is_compression_instruction?(message)
  message.is_a?(Hash) && message[:system_injected] == true
end

#openai_connectionObject



539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
# File 'lib/clacky/client.rb', line 539

def openai_connection
  current_epoch = Clacky::ProxyConfig.epoch
  if @openai_connection.nil? ||
     (!@openai_connection_epoch.nil? && @openai_connection_epoch != current_epoch)
    @openai_connection = Faraday.new(url: @base_url) do |conn|
      conn.headers["Content-Type"]  = "application/json"
      conn.headers["Authorization"] = "Bearer #{@api_key}"
      conn.options.timeout      = @read_timeout || 300
      conn.options.open_timeout = 10
      conn.ssl.verify           = false
      conn.adapter Faraday.default_adapter
    end
    @openai_connection_epoch = current_epoch
  end
  @openai_connection
end

#parse_simple_anthropic_response(response) ⇒ Object



328
329
330
331
332
# File 'lib/clacky/client.rb', line 328

def parse_simple_anthropic_response(response)
  raise_error(response) unless response.status == 200
  data = safe_json_parse(response.body, context: "LLM response")
  (data["content"] || []).select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
end

#parse_simple_bedrock_response(response) ⇒ Object



269
270
271
272
273
274
275
276
# File 'lib/clacky/client.rb', line 269

def parse_simple_bedrock_response(response)
  raise_error(response) unless response.status == 200
  data = safe_json_parse(response.body, context: "LLM response")
  (data.dig("output", "message", "content") || [])
    .select { |b| b["text"] }
    .map { |b| b["text"] }
    .join("")
end

#parse_simple_openai_response(response) ⇒ Object



396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
# File 'lib/clacky/client.rb', line 396

def parse_simple_openai_response(response)
  raise_error(response) unless response.status == 200
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  content = parsed_body.dig("choices", 0, "message", "content")
  if content.nil?
    snippet = response.body.to_s[0, 1200]
    if defined?(Clacky::Logger)
      Clacky::Logger.warn("[parse_simple_openai_response] no content. status=#{response.status} body=#{snippet}")
    end
    raise Clacky::Error,
      "Upstream OpenAI-compatible response missing choices[0].message.content. " \
      "Body snippet: #{snippet}"
  end
  content
end

#raise_error(response) ⇒ Object



626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
# File 'lib/clacky/client.rb', line 626

def raise_error(response)
  error_body    = JSON.parse(response.body) rescue nil
  error_message = extract_error_message(error_body, response.body)
  error_code    = extract_error_code(error_body)

  Clacky::Logger.warn("client.raise_error",
    status: response.status,
    body: response.body.to_s[0, 2000],
    error_message: error_message.to_s[0, 500],
    error_code: error_code
  )

  if error_code == "insufficient_credit" || response.status == 402
    raise InsufficientCreditError.new(
      "[LLM] #{I18n.t("llm.error.insufficient_credit")}",
      error_code: "insufficient_credit",
      provider_id: @provider_id,
      raw_message: error_message
    )
  end

  case response.status
  when 400
    if error_message.match?(/ThrottlingException|unavailable|quota/i)
      raise RetryableError, "[LLM] #{I18n.t("llm.error.rate_limit_400")}"
    end

    raise BadRequestError.new(
      "[LLM] Client request error: #{error_message}",
      display_message: "[LLM] #{I18n.t("llm.error.bad_request")}",
      raw_message: error_message
    )
  when 401
    raise AgentError.new("[LLM] #{I18n.t("llm.error.invalid_api_key")}", raw_message: error_message)
  when 403
    i18n_key = "llm.error.403.#{error_code}"
    translated = I18n.t(i18n_key)
    translated = I18n.t("llm.error.403.default") if translated == i18n_key
    raise AgentError.new("[LLM] #{translated}", raw_message: error_message)
  when 404
    raise AgentError.new("[LLM] #{I18n.t("llm.error.endpoint_not_found")}", raw_message: error_message)
  when 429 then raise RetryableError, "[LLM] #{I18n.t("llm.error.rate_limit_429")}"
  when 500..599 then raise RetryableError, "[LLM] #{I18n.t("llm.error.server_error", status: response.status)}"
  else raise AgentError.new("[LLM] #{I18n.t("llm.error.unexpected", status: response.status)}", raw_message: error_message)
  end
end

#reset_connections!Object



516
517
518
519
520
# File 'lib/clacky/client.rb', line 516

def reset_connections!
  @bedrock_connection = nil
  @openai_connection = nil
  @anthropic_connection = nil
end

#safe_json_parse(json_string, context: "response") ⇒ Hash, Array

Parse JSON with user-friendly error messages.

Parameters:

  • json_string (String)

    the JSON string to parse

  • context (String) (defaults to: "response")

    a description of what's being parsed (e.g., "LLM response")

Returns:

  • (Hash, Array)

    the parsed JSON

Raises:

  • (RetryableError)

    if parsing fails (indicates a malformed LLM response)



731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
# File 'lib/clacky/client.rb', line 731

def safe_json_parse(json_string, context: "response")
  JSON.parse(json_string)
rescue JSON::ParserError => e
  # Transform technical JSON parsing errors into user-friendly messages.
  # These are usually caused by:
  #   1. Incomplete/truncated LLM response (network issue, timeout)
  #   2. LLM service returned malformed data
  #   3. Proxy/gateway corruption
  error_detail = if json_string.to_s.strip.empty?
    "received empty response"
  elsif json_string.to_s.bytesize > 500
    "response was truncated or malformed (#{json_string.to_s.bytesize} bytes received)"
  else
    "response format is invalid"
  end

  raise RetryableError, "[LLM] Failed to parse #{context}: #{error_detail}. " \
                       "This usually means the AI service returned incomplete or corrupted data. " \
                       "The request will be retried automatically."
end

#send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ Object

── Anthropic request / response ──────────────────────────────────────────



280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/clacky/client.rb', line 280

def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
  # Apply cache_control to the message that marks the cache breakpoint
  messages = apply_message_caching(messages) if caching_enabled

  body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort)
  return send_anthropic_stream_request(body, on_chunk) if on_chunk

  response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::Anthropic.parse_response(parsed_body)
end

#send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ Object

── Bedrock Converse request / response ───────────────────────────────────



221
222
223
224
225
226
227
228
229
230
231
# File 'lib/clacky/client.rb', line 221

def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
  body = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort)
  return send_bedrock_stream_request(body, model, on_chunk) if on_chunk

  response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::Bedrock.parse_response(parsed_body)
end

#send_message(content, model:, max_tokens:) ⇒ Object

Send a single string message and return the reply text.



83
84
85
86
# File 'lib/clacky/client.rb', line 83

def send_message(content, model:, max_tokens:)
  messages = [{ role: "user", content: content }]
  send_messages(messages, model: model, max_tokens: max_tokens)
end

#send_messages(messages, model:, max_tokens:) ⇒ Object

Send a messages array and return the reply text.



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/clacky/client.rb', line 89

def send_messages(messages, model:, max_tokens:)
  if bedrock?
    body     = MessageFormat::Bedrock.build_request_body(messages, model, [], max_tokens)
    response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
    parse_simple_bedrock_response(response)
  elsif anthropic_format?
    body     = MessageFormat::Anthropic.build_request_body(messages, model, [], max_tokens, false)
    response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
    parse_simple_anthropic_response(response)
  else
    body     = { model: model, max_tokens: max_tokens, messages: messages }
    response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
    parse_simple_openai_response(response)
  end
end

#send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, reasoning_effort: nil, on_chunk: nil) ⇒ Object

Send messages with tool-calling support. Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage:, latency: }

Latency measurement:

Because the current HTTP path is *non-streaming* (plain POST, response
body read in one shot), TTFB (time to response headers) is not exposed
by Faraday's default adapter without extra plumbing. What we CAN measure
cheaply — and what users actually feel — is total request duration,
which for a non-streaming call equals the time from "hit Enter" to
"first token visible" (since we receive everything at once).

So we record `duration_ms` as the authoritative number and alias it to
`ttft_ms` for downstream consumers (status bar uses ttft_ms as its
signal metric — see docs). When we migrate to streaming later, this
same `ttft_ms` field will start carrying the *actual* first-token
latency without any schema change.

Parameters:

  • on_chunk (Proc, nil) (defaults to: nil)

    optional streaming progress callback. Receives keyword args { input_tokens:, output_tokens: } with cumulative token counts. When nil, behaves exactly as the historical non-streaming path. When given but streaming is not yet wired for the active provider, a single synthetic invocation is fired after the response is received, so UI plumbing can be exercised end-to-end without the proxy work.



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/clacky/client.rb', line 129

def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, reasoning_effort: nil, on_chunk: nil)
  caching_enabled = enable_caching && supports_prompt_caching?(model)
  cloned = deep_clone(messages)

  streaming_used = false
  first_chunk_at = nil
  wrapped_on_chunk = on_chunk && lambda do |**kwargs|
    first_chunk_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
    on_chunk.call(**kwargs)
  end

  t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
  response =
    if bedrock?
      streaming_used = !on_chunk.nil?
      send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
    elsif anthropic_format?
      streaming_used = !on_chunk.nil?
      send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
    else
      streaming_used = !on_chunk.nil?
      send_openai_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
    end
  t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)

  if on_chunk && !streaming_used
    usage = response[:usage] || {}
    safe_invoke_on_chunk(
      on_chunk,
      input_tokens:  usage[:prompt_tokens].to_i,
      output_tokens: usage[:completion_tokens].to_i
    )
  end

  duration_ms = ((t1 - t0) * 1000).round
  ttft_ms = first_chunk_at ? ((first_chunk_at - t0) * 1000).round : duration_ms
  output_tokens = response[:usage]&.dig(:completion_tokens).to_i
  tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil

  response[:latency] = {
    ttft_ms:     ttft_ms,
    duration_ms: duration_ms,
    output_tokens: output_tokens,
    tps:         tps,
    model:       model,
    measured_at: Time.now.to_f,
    streaming:   streaming_used
  }
  response
end

#send_openai_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ Object

── OpenAI request / response ─────────────────────────────────────────────



336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# File 'lib/clacky/client.rb', line 336

def send_openai_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
  # Apply cache_control markers to messages when caching is enabled.
  # OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
  messages = apply_message_caching(messages) if caching_enabled

  # Vision support is resolved against the request's actual model (which may
  # differ from @model after a runtime switch or fallback override), so the
  # conversion layer strips image_url blocks for non-vision models.
  body = MessageFormat::OpenAI.build_request_body(
    messages, model, tools, max_tokens, caching_enabled,
    vision_supported: Providers.supports?(@provider_id, :vision, model_name: model),
    reasoning_effort: reasoning_effort
  )
  return send_openai_stream_request(body, on_chunk) if on_chunk

  response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)

  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::OpenAI.parse_response(parsed_body)
end

#supports_prompt_caching?(model) ⇒ Boolean

Returns true for Claude models that support prompt caching (gen 3.5+ or gen 4+).

Handles both direct model names (e.g. "claude-haiku-4-5") and Clacky AI Bedrock proxy names with "abs-" prefix (e.g. "abs-claude-haiku-4-5").

Why only Claude models:

- MiniMax uses automatic server-side caching (no cache_control needed from client)
- Kimi uses a proprietary prompt_cache_key param, not cache_control
- MiMo has no documented caching API
- Only Claude (direct, OpenRouter, or ClackyAI Bedrock proxy) consumes our
cache_control / cachePoint markers

Returns:

  • (Boolean)


208
209
210
211
212
213
214
215
216
# File 'lib/clacky/client.rb', line 208

def supports_prompt_caching?(model)
  # Strip ClackyAI Bedrock proxy prefix before matching
  model_str = model.to_s.downcase.sub(/^abs-/, "")
  return false unless model_str.include?("claude")

  # Match Claude gen 3.5+ (3.5/3.6/3.7…) or gen 4+ in any name format:
  #   claude-3.5-sonnet-...  claude-3-7-sonnet  claude-haiku-4-5  claude-sonnet-4-6
  model_str.match?(/claude(?:-3[-.]?[5-9]|.*-[4-9][-.]|.*-[4-9]$|-[4-9][-.]|-[4-9]$|-sonnet-[34])/)
end

#test_connection(model:) ⇒ Object

Test API connection by sending a minimal request. Returns { success: true } or { success: false, error: "..." }.



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/clacky/client.rb', line 57

def test_connection(model:)
  if bedrock?
    body = MessageFormat::Bedrock.build_request_body(
      [{ role: :user, content: "hi" }], model, [], 16
    ).to_json
    response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body }
  elsif anthropic_format?
    minimal_body = { model: model, max_tokens: 16,
                     messages: [{ role: "user", content: "hi" }] }.to_json
    response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = minimal_body }
  else
    minimal_body = { model: model, max_tokens: 16,
                     messages: [{ role: "user", content: "hi" }] }.to_json
    response = openai_connection.post("chat/completions") { |r| r.body = minimal_body }
  end
  handle_test_response(response)
rescue Faraday::Error => e
  { success: false, error: "Connection error: #{e.message}" }
rescue => e
  Clacky::Logger.error("[test_connection] #{e.class}: #{e.message}", error: e)
  { success: false, error: e.message }
end