Class: Clacky::Client

Inherits:

Object

Object
Clacky::Client

show all

Defined in:: lib/clacky/client.rb

Constant Summary collapse

MAX_RETRIES =

RETRY_DELAY = seconds

Instance Attribute Summary collapse

#provider_id ⇒ Object readonly

Returns the value of attribute provider_id.

Instance Method Summary collapse

#add_cache_control_to_message(msg) ⇒ Object

Wrap or extend the message’s content with a cache_control marker.
#anthropic_connection ⇒ Object
#anthropic_format?(model = nil) ⇒ Boolean

Returns true when the client is talking directly to the Anthropic API (determined at construction time via the anthropic_format flag).
#apply_message_caching(messages) ⇒ Object

Add cache_control markers to the last 2 messages in the array.
#bedrock? ⇒ Boolean

Returns true when the client is using the AWS Bedrock Converse API.
#bedrock_connection ⇒ Object
#bedrock_endpoint(model) ⇒ Object

Bedrock Converse API endpoint path for a given model ID.
#check_html_response(response) ⇒ Object

Raise a friendly error if the response body is HTML (e.g. gateway error page returned with 200).
#deep_clone(obj) ⇒ Object

── Utilities ─────────────────────────────────────────────────────────────.
#extract_error_message(error_body, raw_body) ⇒ Object
#format_tool_results(response, tool_results, model:) ⇒ Object

Format tool results into canonical messages ready to append to @messages.
#handle_test_response(response) ⇒ Object

── Error handling ────────────────────────────────────────────────────────.
#initialize(api_key, base_url:, model:, anthropic_format: false, read_timeout: nil) ⇒ Client constructor

A new instance of Client.
#is_compression_instruction?(message) ⇒ Boolean
#openai_connection ⇒ Object
#parse_simple_anthropic_response(response) ⇒ Object
#parse_simple_bedrock_response(response) ⇒ Object
#parse_simple_openai_response(response) ⇒ Object
#raise_error(response) ⇒ Object
#safe_json_parse(json_string, context: "response") ⇒ Hash, Array

Parse JSON with user-friendly error messages.
#send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ Object

── Anthropic request / response ──────────────────────────────────────────.
#send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ Object

── Bedrock Converse request / response ───────────────────────────────────.
#send_message(content, model:, max_tokens:) ⇒ Object

Send a single string message and return the reply text.
#send_messages(messages, model:, max_tokens:) ⇒ Object

Send a messages array and return the reply text.
#send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, reasoning_effort: nil, on_chunk: nil) ⇒ Object

Send messages with tool-calling support.
#send_openai_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ Object

── OpenAI request / response ─────────────────────────────────────────────.
#supports_prompt_caching?(model) ⇒ Boolean

Returns true for Claude models that support prompt caching (gen 3.5+ or gen 4+).
#test_connection(model:) ⇒ Object

Test API connection by sending a minimal request.

Constructor Details

#initialize(api_key, base_url:, model:, anthropic_format: false, read_timeout: nil) ⇒ `Client`

Returns a new instance of Client.

# File 'lib/clacky/client.rb', line 13

def initialize(api_key, base_url:, model:, anthropic_format: false, read_timeout: nil)
  @api_key = api_key
  @base_url = base_url
  @model = model
  # Detect Bedrock: ABSK key prefix (native AWS) or abs- model prefix (Clacky AI proxy)
  @use_bedrock = MessageFormat::Bedrock.bedrock_api_key?(api_key, model)

  # Resolve provider once — reused for capability + api-type lookups.
  provider_id = Providers.resolve_provider(base_url: @base_url, api_key: @api_key)

  # Decide anthropic_format dynamically based on provider+model, falling
  # back to the explicit constructor flag for unknown providers / custom
  # base_urls. This lets e.g. OpenRouter's Claude models auto-route to the
  # native /v1/messages endpoint (preserving cache_control byte-for-byte)
  # without requiring any change to user YAML.
  provider_prefers_anthropic = provider_id &&
                               Providers.anthropic_format_for_model?(provider_id, @model)
  @use_anthropic_format = provider_prefers_anthropic || anthropic_format

  # Remember the provider id so we can tune connection headers below
  # (OpenRouter's /v1/messages accepts either Bearer or x-api-key, but
  # some OpenRouter-compatible relays only honour Bearer — send both).
  @provider_id = provider_id

  # Determine vision support once at construction time.
  # Non-vision models (DeepSeek, Kimi, MiniMax, etc.) reject image_url
  # content blocks; the conversion layer strips them when this is false.
  @vision_supported = Providers.supports?(provider_id, :vision, model_name: @model)

  # Optional override for Faraday read_timeout (e.g. benchmark calls).
  # nil means use the default (300s for streaming).
  @read_timeout = read_timeout
end

Instance Attribute Details

#provider_id ⇒ `Object` (readonly)

Returns the value of attribute provider_id.



11
12
13

# File 'lib/clacky/client.rb', line 11

def provider_id
  @provider_id
end

Instance Method Details

#add_cache_control_to_message(msg) ⇒ `Object`

Wrap or extend the message’s content with a cache_control marker.

# File 'lib/clacky/client.rb', line 435

def add_cache_control_to_message(msg)
  content = msg[:content]

  content_array = case content
                  when String
                    [{ type: "text", text: content, cache_control: { type: "ephemeral" } }]
                  when Array
                    content.map.with_index do |block, idx|
                      idx == content.length - 1 ? block.merge(cache_control: { type: "ephemeral" }) : block
                    end
                  else
                    return msg
                  end

  msg.merge(content: content_array)
end

#anthropic_connection ⇒ `Object`

# File 'lib/clacky/client.rb', line 530

def anthropic_connection
  @anthropic_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]   = "application/json"
    conn.headers["x-api-key"]      = @api_key
    conn.headers["anthropic-version"] = "2023-06-01"
    conn.headers["anthropic-dangerous-direct-browser-access"] = "true"
    # OpenRouter's /v1/messages endpoint authenticates with a Bearer
    # token (the OpenRouter API key), not Anthropic's x-api-key. We send
    # both so the same connection code works for direct Anthropic and
    # for OpenRouter-proxied Claude — each endpoint ignores the header
    # it doesn't recognise.
    if @provider_id == "openrouter"
      conn.headers["Authorization"] = "Bearer #{@api_key}"
    end
    # Moonshot's Kimi Code (Coding Plan) endpoint enforces a User-Agent
    # prefix whitelist limited to first-party coding agents (Kimi CLI,
    # Claude Code, Roo Code, Kilo Code, ...). Requests with the default
    # Faraday UA are rejected with HTTP 403 access_terminated_error,
    # despite a valid API key. We send a Claude Code-shaped UA here
    # because openclacky talks to this endpoint over the same Anthropic
    # /v1/messages protocol that Claude Code uses, so the UA matches the
    # wire-level behaviour. Hardcoding rather than exposing as a config
    # field is intentional: the only UAs known to pass the gate are the
    # whitelisted-client formats, and the project's preset registry is
    # the single source of truth for provider-specific quirks (mirroring
    # how the openrouter Bearer-fallback above is hardcoded).
    if @provider_id == "kimi-coding"
      conn.headers["User-Agent"] = "claude-cli/1.0.51 (external, cli)"
    end
    conn.options.timeout      = @read_timeout || 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#anthropic_format?(model = nil) ⇒ `Boolean`

Returns true when the client is talking directly to the Anthropic API (determined at construction time via the anthropic_format flag).

Returns:

(Boolean)



54
55
56

# File 'lib/clacky/client.rb', line 54

def anthropic_format?(model = nil)
  @use_anthropic_format && !@use_bedrock
end

#apply_message_caching(messages) ⇒ `Object`

Add cache_control markers to the last 2 messages in the array.

Why 2 markers:

Turn N   — marks messages[-2] and messages[-1]; server caches prefix up to [-1]
Turn N+1 — messages[-2] is Turn N's last message (still marked) → cache READ hit;
           messages[-1] is the new message (marked) → cache WRITE for Turn N+2

With only 1 marker (old behavior): Turn N marks messages; in Turn N+1 that same message is now [-2] and carries no marker → server sees a different prefix → cache MISS.

Compression instructions (system_injected: true) are skipped — we never want to cache those ephemeral injection messages.

# File 'lib/clacky/client.rb', line 418

def apply_message_caching(messages)
  return messages if messages.empty?

  # Collect up to 2 candidate indices from the tail, skipping compression instructions.
  candidate_indices = []
  (messages.length - 1).downto(0) do |i|
    break if candidate_indices.length >= 2

    candidate_indices << i unless is_compression_instruction?(messages[i])
  end

  messages.map.with_index do |msg, idx|
    candidate_indices.include?(idx) ? add_cache_control_to_message(msg) : msg
  end
end

#bedrock? ⇒ `Boolean`

Returns true when the client is using the AWS Bedrock Converse API.

Returns:

(Boolean)



48
49
50

# File 'lib/clacky/client.rb', line 48

def bedrock?
  @use_bedrock
end

#bedrock_connection ⇒ `Object`

# File 'lib/clacky/client.rb', line 508

def bedrock_connection
  @bedrock_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]  = "application/json"
    conn.headers["Authorization"] = "Bearer #{@api_key}"
    conn.options.timeout      = @read_timeout || 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#bedrock_endpoint(model) ⇒ `Object`

Bedrock Converse API endpoint path for a given model ID.



459
460
461

# File 'lib/clacky/client.rb', line 459

def bedrock_endpoint(model)
  "/model/#{model}/converse"
end

#check_html_response(response) ⇒ `Object`

Raise a friendly error if the response body is HTML (e.g. gateway error page returned with 200)

# File 'lib/clacky/client.rb', line 639

def check_html_response(response)
  body = response.body.to_s.lstrip
  if body.start_with?("<!DOCTYPE", "<!doctype", "<html", "<HTML")
    raise RetryableError, "[LLM] Service temporarily unavailable (received HTML error page), retrying..."
  end
end

#deep_clone(obj) ⇒ `Object`

── Utilities ─────────────────────────────────────────────────────────────

# File 'lib/clacky/client.rb', line 731

def deep_clone(obj)
  case obj
  when Hash  then obj.each_with_object({}) { |(k, v), h| h[k] = deep_clone(v) }
  when Array then obj.map { |item| deep_clone(item) }
  else obj
  end
end

#extract_error_message(error_body, raw_body) ⇒ `Object`

# File 'lib/clacky/client.rb', line 653

def extract_error_message(error_body, raw_body)
  if raw_body.is_a?(String) && raw_body.strip.start_with?("<!DOCTYPE", "<html")
    return "Invalid API endpoint or server error (received HTML instead of JSON)"
  end

  return "(empty response body)" if raw_body.to_s.strip.empty? && !error_body.is_a?(Hash)
  return raw_body unless error_body.is_a?(Hash)

  error_body["upstreamMessage"]&.then { |m| return m unless m.empty? }

  if error_body["error"].is_a?(Hash)
    upstream_msg = extract_upstream_error(error_body["error"])
    return upstream_msg if upstream_msg
  end

  error_body["message"]&.then             { |m| return m }
  error_body["error"].is_a?(String) ? error_body["error"] : (raw_body.to_s[0..200] + (raw_body.to_s.length > 200 ? "..." : ""))
end

#format_tool_results(response, tool_results, model:) ⇒ `Object`

Format tool results into canonical messages ready to append to @messages. Always returns canonical format (role: “tool”) regardless of API type —conversion to API-native happens inside each send_*_request.

# File 'lib/clacky/client.rb', line 188

def format_tool_results(response, tool_results, model:)
  return [] if tool_results.empty?

  if bedrock?
    MessageFormat::Bedrock.format_tool_results(response, tool_results)
  elsif anthropic_format?
    MessageFormat::Anthropic.format_tool_results(response, tool_results)
  else
    MessageFormat::OpenAI.format_tool_results(response, tool_results)
  end
end

#handle_test_response(response) ⇒ `Object`

── Error handling ────────────────────────────────────────────────────────

# File 'lib/clacky/client.rb', line 584

def handle_test_response(response)
  return { success: true, status: response.status } if response.status == 200

  error_body = JSON.parse(response.body) rescue nil
  {
    success:    false,
    status:     response.status,
    error:      extract_error_message(error_body, response.body),
    error_code: extract_error_code(error_body)
  }
end

#is_compression_instruction?(message) ⇒ `Boolean`

Returns:

(Boolean)



452
453
454

# File 'lib/clacky/client.rb', line 452

def is_compression_instruction?(message)
  message.is_a?(Hash) && message[:system_injected] == true
end

#openai_connection ⇒ `Object`

# File 'lib/clacky/client.rb', line 519

def openai_connection
  @openai_connection ||= Faraday.new(url: @base_url) do |conn|
    conn.headers["Content-Type"]  = "application/json"
    conn.headers["Authorization"] = "Bearer #{@api_key}"
    conn.options.timeout      = @read_timeout || 300
    conn.options.open_timeout = 10
    conn.ssl.verify           = false
    conn.adapter Faraday.default_adapter
  end
end

#parse_simple_anthropic_response(response) ⇒ `Object`

# File 'lib/clacky/client.rb', line 333

def parse_simple_anthropic_response(response)
  raise_error(response) unless response.status == 200
  data = safe_json_parse(response.body, context: "LLM response")
  (data["content"] || []).select { |b| b["type"] == "text" }.map { |b| b["text"] }.join("")
end

#parse_simple_bedrock_response(response) ⇒ `Object`

# File 'lib/clacky/client.rb', line 274

def parse_simple_bedrock_response(response)
  raise_error(response) unless response.status == 200
  data = safe_json_parse(response.body, context: "LLM response")
  (data.dig("output", "message", "content") || [])
    .select { |b| b["text"] }
    .map { |b| b["text"] }
    .join("")
end

#parse_simple_openai_response(response) ⇒ `Object`

# File 'lib/clacky/client.rb', line 398

def parse_simple_openai_response(response)
  raise_error(response) unless response.status == 200
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  parsed_body["choices"].first["message"]["content"]
end

#raise_error(response) ⇒ `Object`

# File 'lib/clacky/client.rb', line 596

def raise_error(response)
  error_body    = JSON.parse(response.body) rescue nil
  error_message = extract_error_message(error_body, response.body)
  error_code    = extract_error_code(error_body)

  Clacky::Logger.warn("client.raise_error",
    status: response.status,
    body: response.body.to_s[0, 2000],
    error_message: error_message.to_s[0, 500],
    error_code: error_code
  )

  if error_code == "insufficient_credit" || response.status == 402
    raise InsufficientCreditError.new(
      "[LLM] Insufficient credit: #{error_message}",
      error_code: "insufficient_credit",
      provider_id: @provider_id
    )
  end

  case response.status
  when 400
    # Well-behaved APIs (Anthropic, OpenAI) never put quota/availability issues in 400.
    # However, some proxy/relay providers do — so we inspect the message first.
    # Also, Bedrock returns ThrottlingException as 400 instead of 429.
    if error_message.match?(/ThrottlingException|unavailable|quota/i)
      hint = error_message.match?(/quota/i) ? " (possibly out of credits)" : ""
      raise RetryableError, "[LLM] Rate limit or service issue: #{error_message}#{hint}"
    end

    # True bad request — our message was malformed. Roll back history so the
    # broken message is not replayed on the next user turn.
    raise BadRequestError, "[LLM] Client request error: #{error_message}"
  when 401 then raise AgentError, "[LLM] Invalid API key"
  when 403 then raise AgentError, "[LLM] Access denied: #{error_message}"
  when 404 then raise AgentError, "[LLM] API endpoint not found: #{error_message}"
  when 429 then raise RetryableError, "[LLM] Rate limit exceeded, please wait a moment"
  when 500..599 then raise RetryableError, "[LLM] Service temporarily unavailable (#{response.status}), retrying..."
  else raise AgentError, "[LLM] Unexpected error (#{response.status}): #{error_message}"
  end
end

#safe_json_parse(json_string, context: "response") ⇒ `Hash`, `Array`

Parse JSON with user-friendly error messages.

Parameters:

json_string (String) —

the JSON string to parse
context (String) (defaults to: "response") —

a description of what’s being parsed (e.g., “LLM response”)

Returns:

(Hash, Array) —

the parsed JSON

Raises:

(RetryableError) —

if parsing fails (indicates a malformed LLM response)

# File 'lib/clacky/client.rb', line 696

def safe_json_parse(json_string, context: "response")
  JSON.parse(json_string)
rescue JSON::ParserError => e
  # Transform technical JSON parsing errors into user-friendly messages.
  # These are usually caused by:
  #   1. Incomplete/truncated LLM response (network issue, timeout)
  #   2. LLM service returned malformed data
  #   3. Proxy/gateway corruption
  error_detail = if json_string.to_s.strip.empty?
    "received empty response"
  elsif json_string.to_s.bytesize > 500
    "response was truncated or malformed (#{json_string.to_s.bytesize} bytes received)"
  else
    "response format is invalid"
  end

  raise RetryableError, "[LLM] Failed to parse #{context}: #{error_detail}. " \
                       "This usually means the AI service returned incomplete or corrupted data. " \
                       "The request will be retried automatically."
end

#send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ `Object`

── Anthropic request / response ──────────────────────────────────────────

# File 'lib/clacky/client.rb', line 285

def send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
  # Apply cache_control to the message that marks the cache breakpoint
  messages = apply_message_caching(messages) if caching_enabled

  body = MessageFormat::Anthropic.build_request_body(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort)
  return send_anthropic_stream_request(body, on_chunk) if on_chunk

  response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::Anthropic.parse_response(parsed_body)
end

#send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ `Object`

── Bedrock Converse request / response ───────────────────────────────────

# File 'lib/clacky/client.rb', line 226

def send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
  body = MessageFormat::Bedrock.build_request_body(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort)
  return send_bedrock_stream_request(body, model, on_chunk) if on_chunk

  response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)
  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::Bedrock.parse_response(parsed_body)
end

#send_message(content, model:, max_tokens:) ⇒ `Object`

Send a single string message and return the reply text.

# File 'lib/clacky/client.rb', line 88

def send_message(content, model:, max_tokens:)
  messages = [{ role: "user", content: content }]
  send_messages(messages, model: model, max_tokens: max_tokens)
end

#send_messages(messages, model:, max_tokens:) ⇒ `Object`

Send a messages array and return the reply text.

# File 'lib/clacky/client.rb', line 94

def send_messages(messages, model:, max_tokens:)
  if bedrock?
    body     = MessageFormat::Bedrock.build_request_body(messages, model, [], max_tokens)
    response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body.to_json }
    parse_simple_bedrock_response(response)
  elsif anthropic_format?
    body     = MessageFormat::Anthropic.build_request_body(messages, model, [], max_tokens, false)
    response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = body.to_json }
    parse_simple_anthropic_response(response)
  else
    body     = { model: model, max_tokens: max_tokens, messages: messages }
    response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }
    parse_simple_openai_response(response)
  end
end

#send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, reasoning_effort: nil, on_chunk: nil) ⇒ `Object`

Send messages with tool-calling support. Returns canonical response hash: { content:, tool_calls:, finish_reason:, usage:, latency: }

Latency measurement:

Because the current HTTP path is *non-streaming* (plain POST, response
body read in one shot), TTFB (time to response headers) is not exposed
by Faraday's default adapter without extra plumbing. What we CAN measure
cheaply — and what users actually feel — is total request duration,
which for a non-streaming call equals the time from "hit Enter" to
"first token visible" (since we receive everything at once).

So we record `duration_ms` as the authoritative number and alias it to
`ttft_ms` for downstream consumers (status bar uses ttft_ms as its
signal metric — see docs). When we migrate to streaming later, this
same `ttft_ms` field will start carrying the *actual* first-token
latency without any schema change.

Parameters:

on_chunk (Proc, nil) (defaults to: nil) —

optional streaming progress callback. Receives keyword args { input_tokens:, output_tokens: } with cumulative token counts. When nil, behaves exactly as the historical non-streaming path. When given but streaming is not yet wired for the active provider, a single synthetic invocation is fired after the response is received, so UI plumbing can be exercised end-to-end without the proxy work.

# File 'lib/clacky/client.rb', line 134

def send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, reasoning_effort: nil, on_chunk: nil)
  caching_enabled = enable_caching && supports_prompt_caching?(model)
  cloned = deep_clone(messages)

  streaming_used = false
  first_chunk_at = nil
  wrapped_on_chunk = on_chunk && lambda do |**kwargs|
    first_chunk_at ||= Process.clock_gettime(Process::CLOCK_MONOTONIC)
    on_chunk.call(**kwargs)
  end

  t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
  response =
    if bedrock?
      streaming_used = !on_chunk.nil?
      send_bedrock_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
    elsif anthropic_format?
      streaming_used = !on_chunk.nil?
      send_anthropic_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
    else
      streaming_used = !on_chunk.nil?
      send_openai_request(cloned, model, tools, max_tokens, caching_enabled, reasoning_effort: reasoning_effort, on_chunk: wrapped_on_chunk)
    end
  t1 = Process.clock_gettime(Process::CLOCK_MONOTONIC)

  if on_chunk && !streaming_used
    usage = response[:usage] || {}
    safe_invoke_on_chunk(
      on_chunk,
      input_tokens:  usage[:prompt_tokens].to_i,
      output_tokens: usage[:completion_tokens].to_i
    )
  end

  duration_ms = ((t1 - t0) * 1000).round
  ttft_ms = first_chunk_at ? ((first_chunk_at - t0) * 1000).round : duration_ms
  output_tokens = response[:usage]&.dig(:completion_tokens).to_i
  tps = (output_tokens >= 10 && duration_ms > 0) ? (output_tokens * 1000.0 / duration_ms).round(1) : nil

  response[:latency] = {
    ttft_ms:     ttft_ms,
    duration_ms: duration_ms,
    output_tokens: output_tokens,
    tps:         tps,
    model:       model,
    measured_at: Time.now.to_f,
    streaming:   streaming_used
  }
  response
end

#send_openai_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ `Object`

── OpenAI request / response ─────────────────────────────────────────────

# File 'lib/clacky/client.rb', line 341

def send_openai_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil)
  # Apply cache_control markers to messages when caching is enabled.
  # OpenRouter proxies Claude with the same cache_control field convention as Anthropic direct.
  messages = apply_message_caching(messages) if caching_enabled

  body = MessageFormat::OpenAI.build_request_body(
    messages, model, tools, max_tokens, caching_enabled,
    vision_supported: @vision_supported,
    reasoning_effort: reasoning_effort
  )
  return send_openai_stream_request(body, on_chunk) if on_chunk

  response = openai_connection.post("chat/completions") { |r| r.body = body.to_json }

  raise_error(response) unless response.status == 200
  check_html_response(response)

  parsed_body = safe_json_parse(response.body, context: "LLM response")
  MessageFormat::OpenAI.parse_response(parsed_body)
end

#supports_prompt_caching?(model) ⇒ `Boolean`

Returns true for Claude models that support prompt caching (gen 3.5+ or gen 4+).

Handles both direct model names (e.g. “claude-haiku-4-5”) and Clacky AI Bedrock proxy names with “abs-” prefix (e.g. “abs-claude-haiku-4-5”).

Why only Claude models:

- MiniMax uses automatic server-side caching (no cache_control needed from client)
- Kimi uses a proprietary prompt_cache_key param, not cache_control
- MiMo has no documented caching API
- Only Claude (direct, OpenRouter, or ClackyAI Bedrock proxy) consumes our
  cache_control / cachePoint markers

Returns:

(Boolean)

# File 'lib/clacky/client.rb', line 213

def supports_prompt_caching?(model)
  # Strip ClackyAI Bedrock proxy prefix before matching
  model_str = model.to_s.downcase.sub(/^abs-/, "")
  return false unless model_str.include?("claude")

  # Match Claude gen 3.5+ (3.5/3.6/3.7…) or gen 4+ in any name format:
  #   claude-3.5-sonnet-...  claude-3-7-sonnet  claude-haiku-4-5  claude-sonnet-4-6
  model_str.match?(/claude(?:-3[-.]?[5-9]|.*-[4-9][-.]|.*-[4-9]$|-[4-9][-.]|-[4-9]$|-sonnet-[34])/)
end

#test_connection(model:) ⇒ `Object`

Test API connection by sending a minimal request. Returns { success: true } or { success: false, error: “…” }.

# File 'lib/clacky/client.rb', line 62

def test_connection(model:)
  if bedrock?
    body = MessageFormat::Bedrock.build_request_body(
      [{ role: :user, content: "hi" }], model, [], 16
    ).to_json
    response = bedrock_connection.post(bedrock_endpoint(model)) { |r| r.body = body }
  elsif anthropic_format?
    minimal_body = { model: model, max_tokens: 16,
                     messages: [{ role: "user", content: "hi" }] }.to_json
    response = anthropic_connection.post(anthropic_messages_path) { |r| r.body = minimal_body }
  else
    minimal_body = { model: model, max_tokens: 16,
                     messages: [{ role: "user", content: "hi" }] }.to_json
    response = openai_connection.post("chat/completions") { |r| r.body = minimal_body }
  end
  handle_test_response(response)
rescue Faraday::Error => e
  { success: false, error: "Connection error: #{e.message}" }
rescue => e
  Clacky::Logger.error("[test_connection] #{e.class}: #{e.message}", error: e)
  { success: false, error: e.message }
end

Class: Clacky::Client

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(api_key, base_url:, model:, anthropic_format: false, read_timeout: nil) ⇒ Client

Instance Attribute Details

#provider_id ⇒ Object (readonly)

Instance Method Details

#add_cache_control_to_message(msg) ⇒ Object

#anthropic_connection ⇒ Object

#anthropic_format?(model = nil) ⇒ Boolean

#apply_message_caching(messages) ⇒ Object

#bedrock? ⇒ Boolean

#bedrock_connection ⇒ Object

#bedrock_endpoint(model) ⇒ Object

#check_html_response(response) ⇒ Object

#deep_clone(obj) ⇒ Object

#extract_error_message(error_body, raw_body) ⇒ Object

#format_tool_results(response, tool_results, model:) ⇒ Object

#handle_test_response(response) ⇒ Object

#is_compression_instruction?(message) ⇒ Boolean

#openai_connection ⇒ Object

#parse_simple_anthropic_response(response) ⇒ Object

#parse_simple_bedrock_response(response) ⇒ Object

#parse_simple_openai_response(response) ⇒ Object

#raise_error(response) ⇒ Object

#safe_json_parse(json_string, context: "response") ⇒ Hash, Array

#send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ Object

#send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ Object

#send_message(content, model:, max_tokens:) ⇒ Object

#send_messages(messages, model:, max_tokens:) ⇒ Object

#send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, reasoning_effort: nil, on_chunk: nil) ⇒ Object

#send_openai_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ Object

#supports_prompt_caching?(model) ⇒ Boolean

#test_connection(model:) ⇒ Object

#initialize(api_key, base_url:, model:, anthropic_format: false, read_timeout: nil) ⇒ `Client`

#provider_id ⇒ `Object` (readonly)

#add_cache_control_to_message(msg) ⇒ `Object`

#anthropic_connection ⇒ `Object`

#anthropic_format?(model = nil) ⇒ `Boolean`

#apply_message_caching(messages) ⇒ `Object`

#bedrock? ⇒ `Boolean`

#bedrock_connection ⇒ `Object`

#bedrock_endpoint(model) ⇒ `Object`

#check_html_response(response) ⇒ `Object`

#deep_clone(obj) ⇒ `Object`

#extract_error_message(error_body, raw_body) ⇒ `Object`

#format_tool_results(response, tool_results, model:) ⇒ `Object`

#handle_test_response(response) ⇒ `Object`

#is_compression_instruction?(message) ⇒ `Boolean`

#openai_connection ⇒ `Object`

#parse_simple_anthropic_response(response) ⇒ `Object`

#parse_simple_bedrock_response(response) ⇒ `Object`

#parse_simple_openai_response(response) ⇒ `Object`

#raise_error(response) ⇒ `Object`

#safe_json_parse(json_string, context: "response") ⇒ `Hash`, `Array`

#send_anthropic_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ `Object`

#send_bedrock_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ `Object`

#send_message(content, model:, max_tokens:) ⇒ `Object`

#send_messages(messages, model:, max_tokens:) ⇒ `Object`

#send_messages_with_tools(messages, model:, tools:, max_tokens:, enable_caching: false, reasoning_effort: nil, on_chunk: nil) ⇒ `Object`

#send_openai_request(messages, model, tools, max_tokens, caching_enabled, reasoning_effort: nil, on_chunk: nil) ⇒ `Object`

#supports_prompt_caching?(model) ⇒ `Boolean`

#test_connection(model:) ⇒ `Object`