Fix AI cost calculation — derive thinking tokens and correct pricing
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

- Thinking tokens were not counted (SDK field doesn't exist), now derived
  from total_token_count - prompt - candidates
- Remove separate thinking price rate — Google bills thinking at output rate
- Update GEMINI_PRICING to match Google pricing page (verified 2026-03-25)
- Net effect: ~2% cost increase per query (previously undercharging)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-03-25 14:05:43 +01:00
parent 2776a371b3
commit a816d8adf7

View File

@ -73,12 +73,14 @@ THINKING_LEVELS = {
# Pricing per 1M tokens (USD) - updated 2026-01-29
# Note: Flash on Free Tier = $0.00, Pro on Paid Tier = paid pricing
GEMINI_PRICING = {
'gemini-2.5-flash': {'input': 0.30, 'output': 2.50, 'thinking': 0},
'gemini-2.5-flash-lite': {'input': 0.10, 'output': 0.40, 'thinking': 0},
'gemini-2.5-pro': {'input': 1.25, 'output': 10.00, 'thinking': 0},
'gemini-3-flash-preview': {'input': 0.50, 'output': 3.00, 'thinking': 1.00}, # Paid tier
'gemini-3.1-pro-preview': {'input': 2.00, 'output': 12.00, 'thinking': 4.00}, # Paid tier
'gemini-3.1-flash-lite-preview': {'input': 0.25, 'output': 1.50, 'thinking': 0}, # Paid tier
# Prices per 1M tokens (USD). Verified 2026-03-25 against ai.google.dev/gemini-api/docs/pricing
# Google bills thinking tokens at the OUTPUT rate (included in output pricing).
'gemini-2.5-flash': {'input': 0.30, 'output': 2.50},
'gemini-2.5-flash-lite': {'input': 0.10, 'output': 0.40},
'gemini-2.5-pro': {'input': 1.25, 'output': 10.00},
'gemini-3-flash-preview': {'input': 0.50, 'output': 3.00}, # Paid tier
'gemini-3.1-pro-preview': {'input': 2.00, 'output': 12.00}, # Paid tier
'gemini-3.1-flash-lite-preview': {'input': 0.25, 'output': 1.50}, # Paid tier
}
@ -536,8 +538,15 @@ class GeminiService:
elif token_type == 'output':
return getattr(usage, 'candidates_token_count', 0) or 0
elif token_type == 'thinking':
# Gemini 3 reports thinking tokens separately
return getattr(usage, 'thinking_token_count', 0) or 0
# SDK may not expose thinking_token_count directly.
# Derive from: total - prompt - candidates = thinking tokens.
thinking = getattr(usage, 'thinking_token_count', 0) or 0
if not thinking:
total = getattr(usage, 'total_token_count', 0) or 0
prompt = getattr(usage, 'prompt_token_count', 0) or 0
candidates = getattr(usage, 'candidates_token_count', 0) or 0
thinking = max(0, total - prompt - candidates)
return thinking
except Exception:
return 0
return 0
@ -585,11 +594,11 @@ class GeminiService:
try:
# Calculate costs using actual model pricing
pricing = GEMINI_PRICING.get(actual_model, {'input': 0.50, 'output': 3.00, 'thinking': 1.00})
# Google bills thinking tokens at the output rate
pricing = GEMINI_PRICING.get(actual_model, {'input': 0.50, 'output': 3.00})
input_cost = (input_tokens / 1_000_000) * pricing['input']
output_cost = (output_tokens / 1_000_000) * pricing['output']
thinking_cost = (thinking_tokens / 1_000_000) * pricing.get('thinking', 0)
total_cost = input_cost + output_cost + thinking_cost
output_cost = ((output_tokens + thinking_tokens) / 1_000_000) * pricing['output']
total_cost = input_cost + output_cost
# Cost in cents for AIUsageLog
cost_cents = total_cost * 100
@ -611,7 +620,7 @@ class GeminiService:
output_tokens=output_tokens + thinking_tokens, # Combined for legacy
total_tokens=input_tokens + output_tokens + thinking_tokens,
input_cost=input_cost,
output_cost=output_cost + thinking_cost, # Combined for legacy
output_cost=output_cost, # Includes thinking (billed at output rate)
total_cost=total_cost,
success=success,
error_message=error_message,