feat: update Gemini models — migrate 3-pro to 3.1-pro, add 3.1-flash-lite, remove old SDK
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

- Replace gemini-3-pro-preview with gemini-3.1-pro-preview (old deprecated March 9)
- Add gemini-3.1-flash-lite-preview as quality fallback in chain
- Remove last google.generativeai import from zopk_knowledge_service.py
- Update pricing, thinking models, and preview models sets
- Keep '3-pro' alias for backward compatibility across codebase

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-03-07 19:45:33 +01:00
parent 917d686a10
commit 3bc69f9455
4 changed files with 22 additions and 19 deletions

View File

@ -967,7 +967,7 @@ def _log_json_parse_failure(company_id: int, audit_type: str, details: str):
try:
log = AIUsageLog(
request_type='audit_json_parse_failure',
model='gemini-3-pro-preview',
model='gemini-3.1-pro-preview',
tokens_input=0,
tokens_output=0,
cost_cents=0,

View File

@ -195,7 +195,7 @@ def chat_send_message(conversation_id):
# Map model choice to actual model name and thinking level
model_map = {
'flash': '3-flash', # Gemini 3 Flash - 10K RPD, thinking mode
'pro': '3-pro' # Gemini 3 Pro - 250 RPD, premium
'pro': '3-pro' # Gemini 3.1 Pro - premium reasoning
}
thinking_map = {
'flash': 'high',

View File

@ -38,27 +38,29 @@ except ImportError:
# Available Gemini models (2026 - Gemini 3 generation available)
GEMINI_MODELS = {
'flash': 'gemini-2.5-flash', # Best for general use - balanced cost/quality
'flash-lite': 'gemini-2.5-flash-lite', # Ultra cheap - $0.10/$0.40 per 1M tokens
'pro': 'gemini-2.5-pro', # High quality - best reasoning/coding
'flash-2.0': 'gemini-2.0-flash', # Second generation - 1M context window (wycofywany 31.03.2026)
'3-flash': 'gemini-3-flash-preview', # Gemini 3 Flash - 7x lepszy reasoning, thinking mode
'3-pro': 'gemini-3-pro-preview', # Gemini 3 Pro - najlepszy reasoning, 2M context
'flash': 'gemini-2.5-flash', # Balanced cost/quality
'flash-lite': 'gemini-2.5-flash-lite', # Ultra cheap - $0.10/$0.40 per 1M tokens
'pro': 'gemini-2.5-pro', # High quality 2.5 gen
'flash-2.0': 'gemini-2.0-flash', # Second generation (wycofywany 31.03.2026)
'3-flash': 'gemini-3-flash-preview', # Gemini 3 Flash - thinking mode
'3-pro': 'gemini-3.1-pro-preview', # Gemini 3.1 Pro (alias zachowany dla kompatybilności)
'3.1-pro': 'gemini-3.1-pro-preview', # Gemini 3.1 Pro - najlepszy reasoning
'3.1-flash-lite': 'gemini-3.1-flash-lite-preview', # Gemini 3.1 Flash Lite - szybki, tani
}
# Models that support thinking mode
THINKING_MODELS = {'gemini-3-flash-preview', 'gemini-3-pro-preview'}
THINKING_MODELS = {'gemini-3-flash-preview', 'gemini-3.1-pro-preview', 'gemini-3.1-flash-lite-preview'}
# Preview models — monitor for GA release to switch for better stability
# Track at: https://ai.google.dev/gemini-api/docs/models
PREVIEW_MODELS = {'gemini-3-flash-preview', 'gemini-3-pro-preview'}
PREVIEW_MODELS = {'gemini-3-flash-preview', 'gemini-3.1-pro-preview', 'gemini-3.1-flash-lite-preview'}
# Fallback chain for rate limit (429) resilience — Paid Tier 1
# Order: primary → fast fallback → backup
# Order: primary → quality fallback → cheapest fallback
MODEL_FALLBACK_CHAIN = [
'gemini-3-flash-preview', # 10K RPD paid tier - thinking mode
'gemini-2.5-flash-lite', # Unlimited RPD paid tier - fast fallback
'gemini-2.5-flash', # 10K RPD paid tier - backup
'gemini-3-flash-preview', # 10K RPD - thinking mode, primary
'gemini-3.1-flash-lite-preview', # Quality fallback - gen 3.1
'gemini-2.5-flash-lite', # Unlimited RPD - cheapest, last resort
]
# Available thinking levels for Gemini 3 Flash
@ -77,7 +79,8 @@ GEMINI_PRICING = {
'gemini-2.5-pro': {'input': 1.25, 'output': 10.00, 'thinking': 0},
'gemini-2.0-flash': {'input': 0.10, 'output': 0.40, 'thinking': 0},
'gemini-3-flash-preview': {'input': 0.50, 'output': 3.00, 'thinking': 1.00}, # Paid tier
'gemini-3-pro-preview': {'input': 2.00, 'output': 12.00, 'thinking': 4.00}, # Paid tier
'gemini-3.1-pro-preview': {'input': 2.00, 'output': 12.00, 'thinking': 4.00}, # Paid tier
'gemini-3.1-flash-lite-preview': {'input': 0.25, 'output': 1.50, 'thinking': 0}, # Paid tier
}

View File

@ -2653,7 +2653,7 @@ def categorize_milestones_with_ai(
Adds AI-improved titles, categories, and extracts dates more accurately.
"""
import google.generativeai as genai
from gemini_service import GeminiService
import json
if not suggestions:
@ -2684,11 +2684,11 @@ Odpowiedz TYLKO jako JSON array:
[{{"id": 1, "category": "...", "short_title": "...", "target_date": "YYYY-MM-DD lub null", "status": "..."}}]"""
try:
model = genai.GenerativeModel(model_name)
response = model.generate_content(prompt)
service = GeminiService(model=model_name)
response_text = service.generate_text(prompt)
# Parse response
response_text = response.text.strip()
response_text = response_text.strip()
if response_text.startswith('```'):
response_text = response_text.split('```')[1]
if response_text.startswith('json'):