Module: Clacky::ModelPricing

Defined in:
lib/clacky/utils/model_pricing.rb

Overview

Module for handling AI model pricing Supports different pricing tiers and prompt caching

Constant Summary collapse

PRICING_TABLE =

Pricing per 1M tokens (MTok) in USD All pricing is based on official API documentation

{
  # Claude 4.5 models - tiered pricing based on prompt length
  "claude-opus-4.5" => {
    input: {
      default: 5.00,              # $5/MTok for prompts ≤ 200K tokens
      over_200k: 5.00             # same for all tiers
    },
    output: {
      default: 25.00,             # $25/MTok for prompts ≤ 200K tokens
      over_200k: 25.00            # same for all tiers
    },
    cache: {
      write: 6.25,                # $6.25/MTok cache write
      read: 0.50                  # $0.50/MTok cache read
    }
  },
  
  "claude-sonnet-4.5" => {
    input: {
      default: 3.00,              # $3/MTok for prompts ≤ 200K tokens
      over_200k: 6.00             # $6/MTok for prompts > 200K tokens
    },
    output: {
      default: 15.00,             # $15/MTok for prompts ≤ 200K tokens
      over_200k: 22.50            # $22.50/MTok for prompts > 200K tokens
    },
    cache: {
      write_default: 3.75,        # $3.75/MTok cache write (≤ 200K)
      write_over_200k: 7.50,      # $7.50/MTok cache write (> 200K)
      read_default: 0.30,         # $0.30/MTok cache read (≤ 200K)
      read_over_200k: 0.60        # $0.60/MTok cache read (> 200K)
    }
  },
  
  "claude-haiku-4.5" => {
    input: {
      default: 1.00,              # $1/MTok
      over_200k: 1.00             # same for all tiers
    },
    output: {
      default: 5.00,              # $5/MTok
      over_200k: 5.00             # same for all tiers
    },
    cache: {
      write: 1.25,                # $1.25/MTok cache write
      read: 0.10                  # $0.10/MTok cache read
    }
  },

  # Claude 3.5 models (for backwards compatibility)
  "claude-3-5-sonnet-20241022" => {
    input: {
      default: 3.00,
      over_200k: 6.00
    },
    output: {
      default: 15.00,
      over_200k: 22.50
    },
    cache: {
      write_default: 3.75,
      write_over_200k: 7.50,
      read_default: 0.30,
      read_over_200k: 0.60
    }
  },

  "claude-3-5-sonnet-20240620" => {
    input: {
      default: 3.00,
      over_200k: 6.00
    },
    output: {
      default: 15.00,
      over_200k: 22.50
    },
    cache: {
      write_default: 3.75,
      write_over_200k: 7.50,
      read_default: 0.30,
      read_over_200k: 0.60
    }
  },

  "claude-3-5-haiku-20241022" => {
    input: {
      default: 1.00,
      over_200k: 1.00
    },
    output: {
      default: 5.00,
      over_200k: 5.00
    },
    cache: {
      write: 1.25,
      read: 0.10
    }
  },

  # DeepSeek V4 models
  # Source: https://api-docs.deepseek.com/quick_start/pricing (USD / 1M tokens)
  # DeepSeek billing model:
  #   - "cache miss input" = regular prompt_tokens rate
  #   - "cache hit input"  = cache_read rate (DeepSeek has no separate cache-write charge)
  #   - No tiered pricing (single rate regardless of context length)
  "deepseek-v4-flash" => {
    input: {
      default: 0.14,                  # $0.14/MTok cache miss
      over_200k: 0.14                 # no tiered pricing
    },
    output: {
      default: 0.28,                  # $0.28/MTok
      over_200k: 0.28
    },
    cache: {
      write: 0.14,                    # DeepSeek doesn't charge extra for writes; bill at miss rate
      read: 0.028                     # $0.028/MTok cache hit
    }
  },

  "deepseek-v4-pro" => {
    input: {
      default: 1.74,                  # $1.74/MTok cache miss
      over_200k: 1.74
    },
    output: {
      default: 3.48,                  # $3.48/MTok
      over_200k: 3.48
    },
    cache: {
      write: 1.74,                    # no separate write charge; bill at miss rate
      read: 0.145                     # $0.145/MTok cache hit
    }
  },

  # Default fallback pricing (conservative estimates)
  "default" => {
    input: {
      default: 0.50,
      over_200k: 0.50
    },
    output: {
      default: 1.50,
      over_200k: 1.50
    },
    cache: {
      write: 0.625,
      read: 0.05
    }
  }
}.freeze
TIERED_PRICING_THRESHOLD =

Threshold for tiered pricing (200K tokens)

200_000

Class Method Summary collapse

Class Method Details

.calculate_cache_cost(pricing:, cache_write_tokens:, cache_read_tokens:, over_threshold:) ⇒ Object

Calculate cache-related costs



292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# File 'lib/clacky/utils/model_pricing.rb', line 292

def calculate_cache_cost(pricing:, cache_write_tokens:, cache_read_tokens:, over_threshold:)
  cache_cost = 0.0
  
  # Cache write cost
  if cache_write_tokens > 0
    write_rate = if pricing[:cache].key?(:write)
                   # Simple pricing (Opus 4.5, Haiku 4.5)
                   pricing[:cache][:write]
                 elsif over_threshold
                   # Tiered pricing (Sonnet 4.5)
                   pricing[:cache][:write_over_200k]
                 else
                   pricing[:cache][:write_default]
                 end
    
    cache_cost += (cache_write_tokens / 1_000_000.0) * write_rate
  end
  
  # Cache read cost
  if cache_read_tokens > 0
    read_rate = if pricing[:cache].key?(:read)
                  # Simple pricing (Opus 4.5, Haiku 4.5)
                  pricing[:cache][:read]
                elsif over_threshold
                  # Tiered pricing (Sonnet 4.5)
                  pricing[:cache][:read_over_200k]
                else
                  pricing[:cache][:read_default]
                end
    
    cache_cost += (cache_read_tokens / 1_000_000.0) * read_rate
  end
  
  cache_cost
end

.calculate_cost(model:, usage:) ⇒ Hash

Calculate cost for the given model and usage

Parameters:

  • model (String)

    Model identifier

  • usage (Hash)

    Usage statistics containing:

    • prompt_tokens: number of input tokens

    • completion_tokens: number of output tokens

    • cache_creation_input_tokens: tokens written to cache (optional)

    • cache_read_input_tokens: tokens read from cache (optional)

Returns:

  • (Hash)

    Hash containing:

    • cost: Cost in USD (Float)

    • source: Cost source (:price or :default) (Symbol)



176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/clacky/utils/model_pricing.rb', line 176

def calculate_cost(model:, usage:)
  pricing_result = get_pricing_with_source(model)
  pricing = pricing_result[:pricing]
  source = pricing_result[:source]
  
  prompt_tokens = usage[:prompt_tokens] || 0
  completion_tokens = usage[:completion_tokens] || 0
  cache_write_tokens = usage[:cache_creation_input_tokens] || 0
  cache_read_tokens = usage[:cache_read_input_tokens] || 0
  
  # Determine if we're in the over_200k tier
  # Note: prompt_tokens includes cache_read_tokens but NOT cache_write_tokens
  # cache_write_tokens are additional tokens that were written to cache
  total_input_tokens = prompt_tokens + cache_write_tokens
  over_threshold = total_input_tokens > TIERED_PRICING_THRESHOLD
  
  # Calculate regular input cost (non-cached tokens)
  # prompt_tokens already includes cache_read_tokens, so we need to subtract them
  # cache_write_tokens are not part of prompt_tokens, so they're handled separately in cache_cost
  regular_input_tokens = prompt_tokens - cache_read_tokens
  input_rate = over_threshold ? pricing[:input][:over_200k] : pricing[:input][:default]
  input_cost = (regular_input_tokens / 1_000_000.0) * input_rate
  
  # Calculate output cost
  output_rate = over_threshold ? pricing[:output][:over_200k] : pricing[:output][:default]
  output_cost = (completion_tokens / 1_000_000.0) * output_rate
  
  # Calculate cache costs
  cache_cost = calculate_cache_cost(
    pricing: pricing,
    cache_write_tokens: cache_write_tokens,
    cache_read_tokens: cache_read_tokens,
    over_threshold: over_threshold
  )
  
  {
    cost: input_cost + output_cost + cache_cost,
    source: source
  }
end

.get_pricing(model) ⇒ Hash

Get pricing for a specific model Falls back to default pricing if model not found

Parameters:

  • model (String)

    Model identifier

Returns:

  • (Hash)

    Pricing structure for the model



222
223
224
# File 'lib/clacky/utils/model_pricing.rb', line 222

def get_pricing(model)
  get_pricing_with_source(model)[:pricing]
end

.get_pricing_with_source(model) ⇒ Hash

Get pricing with source information

Parameters:

  • model (String)

    Model identifier

Returns:

  • (Hash)

    Hash containing:

    • pricing: Pricing structure for the model

    • source: :price (matched model) or :default (fallback)



232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# File 'lib/clacky/utils/model_pricing.rb', line 232

def get_pricing_with_source(model)
  # Normalize model name (remove version suffixes, handle variations)
  normalized_model = normalize_model_name(model)
  
  if normalized_model == "default"
    # Using default fallback pricing
    {
      pricing: PRICING_TABLE["default"],
      source: :default
    }
  else
    # Found specific pricing for this model
    {
      pricing: PRICING_TABLE[normalized_model],
      source: :price
    }
  end
end

.normalize_model_name(model) ⇒ Object

Normalize model name to match pricing table keys



253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# File 'lib/clacky/utils/model_pricing.rb', line 253

def normalize_model_name(model)
  return "default" if model.nil? || model.empty?
  
  model = model.downcase.strip
  
  # Direct match
  return model if PRICING_TABLE.key?(model)
  
  # Check for Claude model variations
  # Support both dot and dash separators (e.g., "4.5", "4-5", "4-6")
  # Also handles Bedrock cross-region prefixes (e.g. "jp.anthropic.claude-sonnet-4-6")
  case model
  when /claude.*opus.*4[.-]?[5-9]/i
    "claude-opus-4.5"
  when /claude.*sonnet.*4[.-]?[5-9]/i
    "claude-sonnet-4.5"
  when /claude.*haiku.*4[.-]?[5-9]/i
    "claude-haiku-4.5"
  when /claude-3-5-sonnet-20241022/i
    "claude-3-5-sonnet-20241022"
  when /claude-3-5-sonnet-20240620/i
    "claude-3-5-sonnet-20240620"
  when /claude-3-5-haiku-20241022/i
    "claude-3-5-haiku-20241022"
  when /deepseek-v4-pro/i, /deepseek.*v4.*pro/i
    "deepseek-v4-pro"
  when /deepseek-v4-flash/i, /deepseek.*v4.*flash/i
    "deepseek-v4-flash"
  # Legacy aliases: deepseek-chat and deepseek-reasoner are being
  # deprecated on 2026-07-24 and map to deepseek-v4-flash's
  # non-thinking / thinking modes respectively. Bill at flash rates.
  when /^deepseek-chat$/i, /^deepseek-reasoner$/i
    "deepseek-v4-flash"
  else
    "default"
  end
end