Module: Clacky::ModelPricing

Defined in:
lib/clacky/utils/model_pricing.rb

Overview

Module for handling AI model pricing Supports different pricing tiers and prompt caching

Constant Summary collapse

PRICING_TABLE =

Pricing per 1M tokens (MTok) in USD All pricing is based on official API documentation

{
  # Claude 4.5 models - tiered pricing based on prompt length
  "claude-opus-4.5" => {
    input: {
      default: 5.00,              # $5/MTok for prompts ≤ 200K tokens
      over_200k: 5.00             # same for all tiers
    },
    output: {
      default: 25.00,             # $25/MTok for prompts ≤ 200K tokens
      over_200k: 25.00            # same for all tiers
    },
    cache: {
      write: 6.25,                # $6.25/MTok cache write
      read: 0.50                  # $0.50/MTok cache read
    }
  },
  
  "claude-sonnet-4.5" => {
    input: {
      default: 3.00,              # $3/MTok for prompts ≤ 200K tokens
      over_200k: 6.00             # $6/MTok for prompts > 200K tokens
    },
    output: {
      default: 15.00,             # $15/MTok for prompts ≤ 200K tokens
      over_200k: 22.50            # $22.50/MTok for prompts > 200K tokens
    },
    cache: {
      write_default: 3.75,        # $3.75/MTok cache write (≤ 200K)
      write_over_200k: 7.50,      # $7.50/MTok cache write (> 200K)
      read_default: 0.30,         # $0.30/MTok cache read (≤ 200K)
      read_over_200k: 0.60        # $0.60/MTok cache read (> 200K)
    }
  },
  
  "claude-haiku-4.5" => {
    input: {
      default: 1.00,              # $1/MTok
      over_200k: 1.00             # same for all tiers
    },
    output: {
      default: 5.00,              # $5/MTok
      over_200k: 5.00             # same for all tiers
    },
    cache: {
      write: 1.25,                # $1.25/MTok cache write
      read: 0.10                  # $0.10/MTok cache read
    }
  },

  # Claude 3.5 models (for backwards compatibility)
  "claude-3-5-sonnet-20241022" => {
    input: {
      default: 3.00,
      over_200k: 6.00
    },
    output: {
      default: 15.00,
      over_200k: 22.50
    },
    cache: {
      write_default: 3.75,
      write_over_200k: 7.50,
      read_default: 0.30,
      read_over_200k: 0.60
    }
  },

  "claude-3-5-sonnet-20240620" => {
    input: {
      default: 3.00,
      over_200k: 6.00
    },
    output: {
      default: 15.00,
      over_200k: 22.50
    },
    cache: {
      write_default: 3.75,
      write_over_200k: 7.50,
      read_default: 0.30,
      read_over_200k: 0.60
    }
  },

  "claude-3-5-haiku-20241022" => {
    input: {
      default: 1.00,
      over_200k: 1.00
    },
    output: {
      default: 5.00,
      over_200k: 5.00
    },
    cache: {
      write: 1.25,
      read: 0.10
    }
  },

  # DeepSeek V4 models
  # Source: https://api-docs.deepseek.com/quick_start/pricing (USD / 1M tokens)
  # DeepSeek billing model:
  #   - "cache miss input" = regular prompt_tokens rate
  #   - "cache hit input"  = cache_read rate (DeepSeek has no separate cache-write charge)
  #   - No tiered pricing (single rate regardless of context length)
  "deepseek-v4-flash" => {
    input: {
      default: 0.14,                  # $0.14/MTok cache miss
      over_200k: 0.14                 # no tiered pricing
    },
    output: {
      default: 0.28,                  # $0.28/MTok
      over_200k: 0.28
    },
    cache: {
      write: 0.14,                    # DeepSeek doesn't charge extra for writes; bill at miss rate
      read: 0.028                     # $0.028/MTok cache hit
    }
  },

  "deepseek-v4-pro" => {
    input: {
      default: 1.74,                  # $1.74/MTok cache miss
      over_200k: 1.74
    },
    output: {
      default: 3.48,                  # $3.48/MTok
      over_200k: 3.48
    },
    cache: {
      write: 1.74,                    # no separate write charge; bill at miss rate
      read: 0.145                     # $0.145/MTok cache hit
    }
  },

  # Kimi K2.5 / K2.6 multimodal models
  # Source: https://platform.moonshot.cn (USD / 1M tokens)
  # Kimi billing model (same shape as DeepSeek):
  #   - "cache miss input" = regular prompt_tokens rate
  #   - "cache hit input"  = cache_read rate (no separate cache-write charge)
  #   - No tiered pricing (single rate regardless of context length)
  "kimi-k2.5" => {
    input: {
      default: 0.60,                  # $0.60/MTok cache miss
      over_200k: 0.60                 # no tiered pricing
    },
    output: {
      default: 3.00,                  # $3.00/MTok
      over_200k: 3.00
    },
    cache: {
      write: 0.60,                    # Kimi doesn't charge extra for writes; bill at miss rate
      read: 0.10                      # $0.10/MTok cache hit
    }
  },

  "kimi-k2.6" => {
    input: {
      default: 0.95,                  # $0.95/MTok cache miss
      over_200k: 0.95
    },
    output: {
      default: 4.00,                  # $4.00/MTok
      over_200k: 4.00
    },
    cache: {
      write: 0.95,                    # no separate write charge; bill at miss rate
      read: 0.16                      # $0.16/MTok cache hit
    }
  },

  # OpenAI GPT-5.5 / GPT-5.4 — breakpoint at 272K input tokens
  # Source: https://openai.com/api/pricing/ (USD / 1M tokens)
  # Note: OpenAI's actual tiered-pricing threshold is 272K, not the
  # global 200K below.  Prompts between 200K–272K will slightly
  # over-estimate costs until a per-model threshold is implemented.
  "gpt-5.5" => {
    input: {
      default: 5.00,              # $5/MTok for prompts ≤ 272K tokens
      over_200k: 10.00            # $10/MTok for prompts > 272K tokens
    },
    output: {
      default: 30.00,             # $30/MTok for prompts ≤ 272K tokens
      over_200k: 45.00            # $45/MTok for prompts > 272K tokens
    },
    cache: {
      write_default: 5.00,        # $5/MTok cache write (≤ 272K)
      write_over_200k: 10.00,     # $10/MTok cache write (> 272K)
      read_default: 0.50,         # $0.50/MTok cache read (≤ 272K)
      read_over_200k: 1.00        # $1.00/MTok cache read (> 272K)
    }
  },

  "gpt-5.4" => {
    input: {
      default: 2.50,              # $2.50/MTok for prompts ≤ 272K tokens
      over_200k: 5.00             # $5/MTok for prompts > 272K tokens
    },
    output: {
      default: 15.00,             # $15/MTok for prompts ≤ 272K tokens
      over_200k: 22.50           # $22.50/MTok for prompts > 272K tokens
    },
    cache: {
      write_default: 2.50,        # $2.50/MTok cache write (≤ 272K)
      write_over_200k: 5.00,      # $5/MTok cache write (> 272K)
      read_default: 0.25,         # $0.25/MTok cache read (≤ 272K)
      read_over_200k: 0.50        # $0.50/MTok cache read (> 272K)
    }
  },

  # GPT-5.4 flat-rate models (no breakpoint, single rate regardless of context)
  "gpt-5.4-mini" => {
    input: {
      default: 0.75,              # $0.75/MTok
      over_200k: 0.75
    },
    output: {
      default: 4.50,              # $4.50/MTok
      over_200k: 4.50
    },
    cache: {
      write: 0.75,                # $0.75/MTok cache write
      read: 0.075                 # $0.075/MTok cache read (10% of input)
    }
  },

  "gpt-5.4-nano" => {
    input: {
      default: 0.20,              # $0.20/MTok
      over_200k: 0.20
    },
    output: {
      default: 1.25,              # $1.25/MTok
      over_200k: 1.25
    },
    cache: {
      write: 0.20,                # $0.20/MTok cache write
      read: 0.02                  # $0.02/MTok cache read (10% of input)
    }
  },

  # O-series reasoning models — flat-rate (200K context window)
  # Source: https://openai.com/api/pricing/
  "o3" => {
    input: {
      default: 2.00,              # $2/MTok
      over_200k: 2.00             # flat rate
    },
    output: {
      default: 8.00,              # $8/MTok
      over_200k: 8.00
    },
    cache: {
      write: 2.00,                # $2/MTok cache write (same as input)
      read: 0.50                  # $0.50/MTok cache read (25% of input)
    }
  },

  "o4-mini" => {
    input: {
      default: 1.10,              # $1.10/MTok
      over_200k: 1.10             # flat rate
    },
    output: {
      default: 4.40,              # $4.40/MTok
      over_200k: 4.40
    },
    cache: {
      write: 1.10,                # $1.10/MTok cache write (same as input)
      read: 0.275                 # $0.275/MTok cache read (25% of input)
    }
  },

}.freeze
TIERED_PRICING_THRESHOLD =

Threshold for tiered pricing (200K tokens) NOTE: OpenAI GPT-5.5/GPT-5.4 use a 272K breakpoint, not 200K. Costs for prompts between 200K–272K will be slightly over-estimated.

200_000

Class Method Summary collapse

Class Method Details

.calculate_cache_cost(pricing:, cache_write_tokens:, cache_read_tokens:, over_threshold:) ⇒ Object

Calculate cache-related costs



443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
# File 'lib/clacky/utils/model_pricing.rb', line 443

def calculate_cache_cost(pricing:, cache_write_tokens:, cache_read_tokens:, over_threshold:)
  cache_cost = 0.0
  
  # Cache write cost
  if cache_write_tokens > 0
    write_rate = if pricing[:cache].key?(:write)
                   # Simple pricing (Opus 4.5, Haiku 4.5)
                   pricing[:cache][:write]
                 elsif over_threshold
                   # Tiered pricing (Sonnet 4.5)
                   pricing[:cache][:write_over_200k]
                 else
                   pricing[:cache][:write_default]
                 end
    
    cache_cost += (cache_write_tokens / 1_000_000.0) * write_rate
  end
  
  # Cache read cost
  if cache_read_tokens > 0
    read_rate = if pricing[:cache].key?(:read)
                  # Simple pricing (Opus 4.5, Haiku 4.5)
                  pricing[:cache][:read]
                elsif over_threshold
                  # Tiered pricing (Sonnet 4.5)
                  pricing[:cache][:read_over_200k]
                else
                  pricing[:cache][:read_default]
                end
    
    cache_cost += (cache_read_tokens / 1_000_000.0) * read_rate
  end
  
  cache_cost
end

.calculate_cost(model:, usage:) ⇒ Hash

Calculate cost for the given model and usage

Parameters:

  • model (String)

    Model identifier

  • usage (Hash)

    Usage statistics containing:

    • prompt_tokens: number of input tokens

    • completion_tokens: number of output tokens

    • cache_creation_input_tokens: tokens written to cache (optional)

    • cache_read_input_tokens: tokens read from cache (optional)

Returns:

  • (Hash)

    Hash containing:

    • cost: Cost in USD (Float) or nil if model pricing is unknown

    • source: Cost source (:price) or nil if unknown (Symbol or nil)



301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
# File 'lib/clacky/utils/model_pricing.rb', line 301

def calculate_cost(model:, usage:)
  pricing_result = get_pricing_with_source(model)
  pricing = pricing_result[:pricing]
  source = pricing_result[:source]

  # If no pricing table matches this model, return nil cost.
  # Unknown models should display as N/A, never fall back to guesses.
  return { cost: nil, source: nil } unless pricing

  prompt_tokens = usage[:prompt_tokens] || 0
  completion_tokens = usage[:completion_tokens] || 0
  cache_write_tokens = usage[:cache_creation_input_tokens] || 0
  cache_read_tokens = usage[:cache_read_input_tokens] || 0
  
  # Determine if we're in the over_200k tier
  # Note: prompt_tokens includes cache_read_tokens but NOT cache_write_tokens
  # cache_write_tokens are additional tokens that were written to cache
  total_input_tokens = prompt_tokens + cache_write_tokens
  over_threshold = total_input_tokens > TIERED_PRICING_THRESHOLD
  
  # Calculate regular input cost (non-cached tokens)
  # prompt_tokens already includes cache_read_tokens, so we need to subtract them
  # cache_write_tokens are not part of prompt_tokens, so they're handled separately in cache_cost
  regular_input_tokens = prompt_tokens - cache_read_tokens
  input_rate = over_threshold ? pricing[:input][:over_200k] : pricing[:input][:default]
  input_cost = (regular_input_tokens / 1_000_000.0) * input_rate
  
  # Calculate output cost
  output_rate = over_threshold ? pricing[:output][:over_200k] : pricing[:output][:default]
  output_cost = (completion_tokens / 1_000_000.0) * output_rate
  
  # Calculate cache costs
  cache_cost = calculate_cache_cost(
    pricing: pricing,
    cache_write_tokens: cache_write_tokens,
    cache_read_tokens: cache_read_tokens,
    over_threshold: over_threshold
  )
  
  {
    cost: input_cost + output_cost + cache_cost,
    source: source
  }
end

.get_pricing(model) ⇒ Hash

Get pricing for a specific model Falls back to default pricing if model not found

Parameters:

  • model (String)

    Model identifier

Returns:

  • (Hash)

    Pricing structure for the model



351
352
353
# File 'lib/clacky/utils/model_pricing.rb', line 351

def get_pricing(model)
  get_pricing_with_source(model)[:pricing]
end

.get_pricing_with_source(model) ⇒ Hash

Get pricing with source information

Parameters:

  • model (String)

    Model identifier

Returns:

  • (Hash)

    Hash containing:

    • pricing: Pricing structure or nil if model is unknown

    • source: :price (matched) or nil (unknown)



361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
# File 'lib/clacky/utils/model_pricing.rb', line 361

def get_pricing_with_source(model)
  # Normalize model name (remove version suffixes, handle variations)
  normalized_model = normalize_model_name(model)

  if normalized_model
    # Found specific pricing for this model
    {
      pricing: PRICING_TABLE[normalized_model],
      source: :price
    }
  else
    # No matching pricing table entry — cost is unknown
    { pricing: nil, source: nil }
  end
end

.normalize_model_name(model) ⇒ Object

Normalize model name to match pricing table keys. Returns the canonical key on match, or nil when no pricing is available.



380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
# File 'lib/clacky/utils/model_pricing.rb', line 380

def normalize_model_name(model)
  return nil if model.nil? || model.empty?
  
  model = model.downcase.strip
  
  # Direct match
  return model if PRICING_TABLE.key?(model)
  
  # Check for Claude model variations
  # Support both dot and dash separators (e.g., "4.5", "4-5", "4-6")
  # Also handles Bedrock cross-region prefixes (e.g. "jp.anthropic.claude-sonnet-4-6")
  case model
  when /claude.*opus.*4[.-]?[5-9]/i
    "claude-opus-4.5"
  when /claude.*sonnet.*4[.-]?[5-9]/i
    "claude-sonnet-4.5"
  when /claude.*haiku.*4[.-]?[5-9]/i
    "claude-haiku-4.5"
  when /claude-3-5-sonnet-20241022/i
    "claude-3-5-sonnet-20241022"
  when /claude-3-5-sonnet-20240620/i
    "claude-3-5-sonnet-20240620"
  when /claude-3-5-haiku-20241022/i
    "claude-3-5-haiku-20241022"
  when /deepseek-v4-pro/i, /deepseek.*v4.*pro/i
    "deepseek-v4-pro"
  when /deepseek-v4-flash/i, /deepseek.*v4.*flash/i
    "deepseek-v4-flash"
  # Legacy aliases: deepseek-chat and deepseek-reasoner are being
  # deprecated on 2026-07-24 and map to deepseek-v4-flash's
  # non-thinking / thinking modes respectively. Bill at flash rates.
  when /^deepseek-chat$/i, /^deepseek-reasoner$/i
    "deepseek-v4-flash"
  # Kimi K2.5 / K2.6 — strict match only. K2 text-only models
  # (kimi-k2-0905-preview, kimi-k2-thinking, etc.) are not yet
  # registered in providers.rb and will be added in a follow-up
  # issue together with their model_capabilities overrides.
  when /^kimi-k2\.?5$/i
    "kimi-k2.5"
  when /^kimi-k2\.?6$/i
    "kimi-k2.6"

  # OpenAI GPT-5.x models — match various dashed/dotted/compact forms
  # (e.g. "gpt-5.5", "gpt-5-5", "gpt5.5", "gpt55")
  when /^gpt-?5\.?5$/i, /^gpt-?5[\.-]?5$/i
    "gpt-5.5"
  when /^gpt-?5\.?4[^.]*mini$/i, /^gpt-?5\.?4[\.-]?mini$/i
    "gpt-5.4-mini"
  when /^gpt-?5\.?4[^.]*nano$/i, /^gpt-?5\.?4[\.-]?nano$/i
    "gpt-5.4-nano"
  when /^gpt-?5\.?4$/i, /^gpt-?5[\.-]?4$/i
    "gpt-5.4"
  # O-series reasoning models
  when /^o4[\.-]?mini$/i
    "o4-mini"
  when /^o3$/i
    "o3"
  else
    nil  # No pricing available for this model — cost will show as N/A
  end
end