feat: update Gemini models — migrate 3-pro to 3.1-pro, add 3.1-flash-lite, remove old SDK

- Replace gemini-3-pro-preview with gemini-3.1-pro-preview (old deprecated March 9) - Add gemini-3.1-flash-lite-preview as quality fallback in chain - Remove last google.generativeai import from zopk_knowledge_service.py - Update pricing, thinking models, and preview models sets - Keep '3-pro' alias for backward compatibility across codebase Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 19:45:33 +01:00 · 2026-03-07 19:45:33 +01:00 · 3bc69f9455
commit 3bc69f9455
parent 917d686a10
4 changed files with 22 additions and 19 deletions
--- a/audit_ai_service.py
+++ b/audit_ai_service.py
@ -967,7 +967,7 @@ def _log_json_parse_failure(company_id: int, audit_type: str, details: str):
        try:
            log = AIUsageLog(
                request_type='audit_json_parse_failure',
-                model='gemini-3-pro-preview',
+                model='gemini-3.1-pro-preview',
                tokens_input=0,
                tokens_output=0,
                cost_cents=0,
--- a/blueprints/chat/routes.py
+++ b/blueprints/chat/routes.py
@ -195,7 +195,7 @@ def chat_send_message(conversation_id):
        # Map model choice to actual model name and thinking level
        model_map = {
            'flash': '3-flash',           # Gemini 3 Flash - 10K RPD, thinking mode
-            'pro': '3-pro'                # Gemini 3 Pro - 250 RPD, premium
+            'pro': '3-pro'                # Gemini 3.1 Pro - premium reasoning
        }
        thinking_map = {
            'flash': 'high',
--- a/gemini_service.py
+++ b/gemini_service.py
@ -38,27 +38,29 @@ except ImportError:

 # Available Gemini models (2026 - Gemini 3 generation available)
 GEMINI_MODELS = {
-    'flash': 'gemini-2.5-flash',           # Best for general use - balanced cost/quality
-    'flash-lite': 'gemini-2.5-flash-lite', # Ultra cheap - $0.10/$0.40 per 1M tokens
-    'pro': 'gemini-2.5-pro',               # High quality - best reasoning/coding
-    'flash-2.0': 'gemini-2.0-flash',       # Second generation - 1M context window (wycofywany 31.03.2026)
-    '3-flash': 'gemini-3-flash-preview',   # Gemini 3 Flash - 7x lepszy reasoning, thinking mode
-    '3-pro': 'gemini-3-pro-preview',       # Gemini 3 Pro - najlepszy reasoning, 2M context
+    'flash': 'gemini-2.5-flash',                   # Balanced cost/quality
+    'flash-lite': 'gemini-2.5-flash-lite',         # Ultra cheap - $0.10/$0.40 per 1M tokens
+    'pro': 'gemini-2.5-pro',                       # High quality 2.5 gen
+    'flash-2.0': 'gemini-2.0-flash',               # Second generation (wycofywany 31.03.2026)
+    '3-flash': 'gemini-3-flash-preview',           # Gemini 3 Flash - thinking mode
+    '3-pro': 'gemini-3.1-pro-preview',              # Gemini 3.1 Pro (alias zachowany dla kompatybilności)
+    '3.1-pro': 'gemini-3.1-pro-preview',           # Gemini 3.1 Pro - najlepszy reasoning
+    '3.1-flash-lite': 'gemini-3.1-flash-lite-preview',  # Gemini 3.1 Flash Lite - szybki, tani
 }

 # Models that support thinking mode
-THINKING_MODELS = {'gemini-3-flash-preview', 'gemini-3-pro-preview'}
+THINKING_MODELS = {'gemini-3-flash-preview', 'gemini-3.1-pro-preview', 'gemini-3.1-flash-lite-preview'}

 # Preview models — monitor for GA release to switch for better stability
 # Track at: https://ai.google.dev/gemini-api/docs/models
-PREVIEW_MODELS = {'gemini-3-flash-preview', 'gemini-3-pro-preview'}
+PREVIEW_MODELS = {'gemini-3-flash-preview', 'gemini-3.1-pro-preview', 'gemini-3.1-flash-lite-preview'}

 # Fallback chain for rate limit (429) resilience — Paid Tier 1
-# Order: primary → fast fallback → backup
+# Order: primary → quality fallback → cheapest fallback
 MODEL_FALLBACK_CHAIN = [
-    'gemini-3-flash-preview',   # 10K RPD paid tier - thinking mode
-    'gemini-2.5-flash-lite',    # Unlimited RPD paid tier - fast fallback
-    'gemini-2.5-flash',         # 10K RPD paid tier - backup
+    'gemini-3-flash-preview',           # 10K RPD - thinking mode, primary
+    'gemini-3.1-flash-lite-preview',    # Quality fallback - gen 3.1
+    'gemini-2.5-flash-lite',            # Unlimited RPD - cheapest, last resort
 ]

 # Available thinking levels for Gemini 3 Flash
@ -77,7 +79,8 @@ GEMINI_PRICING = {
    'gemini-2.5-pro': {'input': 1.25, 'output': 10.00, 'thinking': 0},
    'gemini-2.0-flash': {'input': 0.10, 'output': 0.40, 'thinking': 0},
    'gemini-3-flash-preview': {'input': 0.50, 'output': 3.00, 'thinking': 1.00},  # Paid tier
-    'gemini-3-pro-preview': {'input': 2.00, 'output': 12.00, 'thinking': 4.00},   # Paid tier
+    'gemini-3.1-pro-preview': {'input': 2.00, 'output': 12.00, 'thinking': 4.00},   # Paid tier
+    'gemini-3.1-flash-lite-preview': {'input': 0.25, 'output': 1.50, 'thinking': 0},  # Paid tier
 }


--- a/zopk_knowledge_service.py
+++ b/zopk_knowledge_service.py
@ -2653,7 +2653,7 @@ def categorize_milestones_with_ai(

    Adds AI-improved titles, categories, and extracts dates more accurately.
    """
-    import google.generativeai as genai
+    from gemini_service import GeminiService
    import json

    if not suggestions:
@ -2684,11 +2684,11 @@ Odpowiedz TYLKO jako JSON array:
 [{{"id": 1, "category": "...", "short_title": "...", "target_date": "YYYY-MM-DD lub null", "status": "..."}}]"""

    try:
-        model = genai.GenerativeModel(model_name)
-        response = model.generate_content(prompt)
+        service = GeminiService(model=model_name)
+        response_text = service.generate_text(prompt)

        # Parse response
-        response_text = response.text.strip()
+        response_text = response_text.strip()
        if response_text.startswith('```'):
            response_text = response_text.split('```')[1]
            if response_text.startswith('json'):