diff --git a/audit_ai_service.py b/audit_ai_service.py
index dd12ce0..7a66a2d 100644
--- a/audit_ai_service.py
+++ b/audit_ai_service.py
@@ -967,7 +967,7 @@ def _log_json_parse_failure(company_id: int, audit_type: str, details: str):
         try:
             log = AIUsageLog(
                 request_type='audit_json_parse_failure',
-                model='gemini-3-pro-preview',
+                model='gemini-3.1-pro-preview',
                 tokens_input=0,
                 tokens_output=0,
                 cost_cents=0,
diff --git a/blueprints/chat/routes.py b/blueprints/chat/routes.py
index 6fb4910..0049cdf 100644
--- a/blueprints/chat/routes.py
+++ b/blueprints/chat/routes.py
@@ -195,7 +195,7 @@ def chat_send_message(conversation_id):
         # Map model choice to actual model name and thinking level
         model_map = {
             'flash': '3-flash',           # Gemini 3 Flash - 10K RPD, thinking mode
-            'pro': '3-pro'                # Gemini 3 Pro - 250 RPD, premium
+            'pro': '3-pro'                # Gemini 3.1 Pro - premium reasoning
         }
         thinking_map = {
             'flash': 'high',
diff --git a/gemini_service.py b/gemini_service.py
index a1bb668..7278ff2 100644
--- a/gemini_service.py
+++ b/gemini_service.py
@@ -38,27 +38,29 @@ except ImportError:
 
 # Available Gemini models (2026 - Gemini 3 generation available)
 GEMINI_MODELS = {
-    'flash': 'gemini-2.5-flash',           # Best for general use - balanced cost/quality
-    'flash-lite': 'gemini-2.5-flash-lite', # Ultra cheap - $0.10/$0.40 per 1M tokens
-    'pro': 'gemini-2.5-pro',               # High quality - best reasoning/coding
-    'flash-2.0': 'gemini-2.0-flash',       # Second generation - 1M context window (wycofywany 31.03.2026)
-    '3-flash': 'gemini-3-flash-preview',   # Gemini 3 Flash - 7x lepszy reasoning, thinking mode
-    '3-pro': 'gemini-3-pro-preview',       # Gemini 3 Pro - najlepszy reasoning, 2M context
+    'flash': 'gemini-2.5-flash',                   # Balanced cost/quality
+    'flash-lite': 'gemini-2.5-flash-lite',         # Ultra cheap - $0.10/$0.40 per 1M tokens
+    'pro': 'gemini-2.5-pro',                       # High quality 2.5 gen
+    'flash-2.0': 'gemini-2.0-flash',               # Second generation (wycofywany 31.03.2026)
+    '3-flash': 'gemini-3-flash-preview',           # Gemini 3 Flash - thinking mode
+    '3-pro': 'gemini-3.1-pro-preview',              # Gemini 3.1 Pro (alias zachowany dla kompatybilności)
+    '3.1-pro': 'gemini-3.1-pro-preview',           # Gemini 3.1 Pro - najlepszy reasoning
+    '3.1-flash-lite': 'gemini-3.1-flash-lite-preview',  # Gemini 3.1 Flash Lite - szybki, tani
 }
 
 # Models that support thinking mode
-THINKING_MODELS = {'gemini-3-flash-preview', 'gemini-3-pro-preview'}
+THINKING_MODELS = {'gemini-3-flash-preview', 'gemini-3.1-pro-preview', 'gemini-3.1-flash-lite-preview'}
 
 # Preview models — monitor for GA release to switch for better stability
 # Track at: https://ai.google.dev/gemini-api/docs/models
-PREVIEW_MODELS = {'gemini-3-flash-preview', 'gemini-3-pro-preview'}
+PREVIEW_MODELS = {'gemini-3-flash-preview', 'gemini-3.1-pro-preview', 'gemini-3.1-flash-lite-preview'}
 
 # Fallback chain for rate limit (429) resilience — Paid Tier 1
-# Order: primary → fast fallback → backup
+# Order: primary → quality fallback → cheapest fallback
 MODEL_FALLBACK_CHAIN = [
-    'gemini-3-flash-preview',   # 10K RPD paid tier - thinking mode
-    'gemini-2.5-flash-lite',    # Unlimited RPD paid tier - fast fallback
-    'gemini-2.5-flash',         # 10K RPD paid tier - backup
+    'gemini-3-flash-preview',           # 10K RPD - thinking mode, primary
+    'gemini-3.1-flash-lite-preview',    # Quality fallback - gen 3.1
+    'gemini-2.5-flash-lite',            # Unlimited RPD - cheapest, last resort
 ]
 
 # Available thinking levels for Gemini 3 Flash
@@ -77,7 +79,8 @@ GEMINI_PRICING = {
     'gemini-2.5-pro': {'input': 1.25, 'output': 10.00, 'thinking': 0},
     'gemini-2.0-flash': {'input': 0.10, 'output': 0.40, 'thinking': 0},
     'gemini-3-flash-preview': {'input': 0.50, 'output': 3.00, 'thinking': 1.00},  # Paid tier
-    'gemini-3-pro-preview': {'input': 2.00, 'output': 12.00, 'thinking': 4.00},   # Paid tier
+    'gemini-3.1-pro-preview': {'input': 2.00, 'output': 12.00, 'thinking': 4.00},   # Paid tier
+    'gemini-3.1-flash-lite-preview': {'input': 0.25, 'output': 1.50, 'thinking': 0},  # Paid tier
 }
 
 
diff --git a/zopk_knowledge_service.py b/zopk_knowledge_service.py
index 41cc3f4..f683dbe 100644
--- a/zopk_knowledge_service.py
+++ b/zopk_knowledge_service.py
@@ -2653,7 +2653,7 @@ def categorize_milestones_with_ai(
 
     Adds AI-improved titles, categories, and extracts dates more accurately.
     """
-    import google.generativeai as genai
+    from gemini_service import GeminiService
     import json
 
     if not suggestions:
@@ -2684,11 +2684,11 @@ Odpowiedz TYLKO jako JSON array:
 [{{"id": 1, "category": "...", "short_title": "...", "target_date": "YYYY-MM-DD lub null", "status": "..."}}]"""
 
     try:
-        model = genai.GenerativeModel(model_name)
-        response = model.generate_content(prompt)
+        service = GeminiService(model=model_name)
+        response_text = service.generate_text(prompt)
 
         # Parse response
-        response_text = response.text.strip()
+        response_text = response_text.strip()
         if response_text.startswith('```'):
             response_text = response_text.split('```')[1]
             if response_text.startswith('json'):