diff --git a/audit_ai_service.py b/audit_ai_service.py index dd12ce0..7a66a2d 100644 --- a/audit_ai_service.py +++ b/audit_ai_service.py @@ -967,7 +967,7 @@ def _log_json_parse_failure(company_id: int, audit_type: str, details: str): try: log = AIUsageLog( request_type='audit_json_parse_failure', - model='gemini-3-pro-preview', + model='gemini-3.1-pro-preview', tokens_input=0, tokens_output=0, cost_cents=0, diff --git a/blueprints/chat/routes.py b/blueprints/chat/routes.py index 6fb4910..0049cdf 100644 --- a/blueprints/chat/routes.py +++ b/blueprints/chat/routes.py @@ -195,7 +195,7 @@ def chat_send_message(conversation_id): # Map model choice to actual model name and thinking level model_map = { 'flash': '3-flash', # Gemini 3 Flash - 10K RPD, thinking mode - 'pro': '3-pro' # Gemini 3 Pro - 250 RPD, premium + 'pro': '3-pro' # Gemini 3.1 Pro - premium reasoning } thinking_map = { 'flash': 'high', diff --git a/gemini_service.py b/gemini_service.py index a1bb668..7278ff2 100644 --- a/gemini_service.py +++ b/gemini_service.py @@ -38,27 +38,29 @@ except ImportError: # Available Gemini models (2026 - Gemini 3 generation available) GEMINI_MODELS = { - 'flash': 'gemini-2.5-flash', # Best for general use - balanced cost/quality - 'flash-lite': 'gemini-2.5-flash-lite', # Ultra cheap - $0.10/$0.40 per 1M tokens - 'pro': 'gemini-2.5-pro', # High quality - best reasoning/coding - 'flash-2.0': 'gemini-2.0-flash', # Second generation - 1M context window (wycofywany 31.03.2026) - '3-flash': 'gemini-3-flash-preview', # Gemini 3 Flash - 7x lepszy reasoning, thinking mode - '3-pro': 'gemini-3-pro-preview', # Gemini 3 Pro - najlepszy reasoning, 2M context + 'flash': 'gemini-2.5-flash', # Balanced cost/quality + 'flash-lite': 'gemini-2.5-flash-lite', # Ultra cheap - $0.10/$0.40 per 1M tokens + 'pro': 'gemini-2.5-pro', # High quality 2.5 gen + 'flash-2.0': 'gemini-2.0-flash', # Second generation (wycofywany 31.03.2026) + '3-flash': 'gemini-3-flash-preview', # Gemini 3 Flash - thinking mode + '3-pro': 'gemini-3.1-pro-preview', # Gemini 3.1 Pro (alias zachowany dla kompatybilności) + '3.1-pro': 'gemini-3.1-pro-preview', # Gemini 3.1 Pro - najlepszy reasoning + '3.1-flash-lite': 'gemini-3.1-flash-lite-preview', # Gemini 3.1 Flash Lite - szybki, tani } # Models that support thinking mode -THINKING_MODELS = {'gemini-3-flash-preview', 'gemini-3-pro-preview'} +THINKING_MODELS = {'gemini-3-flash-preview', 'gemini-3.1-pro-preview', 'gemini-3.1-flash-lite-preview'} # Preview models — monitor for GA release to switch for better stability # Track at: https://ai.google.dev/gemini-api/docs/models -PREVIEW_MODELS = {'gemini-3-flash-preview', 'gemini-3-pro-preview'} +PREVIEW_MODELS = {'gemini-3-flash-preview', 'gemini-3.1-pro-preview', 'gemini-3.1-flash-lite-preview'} # Fallback chain for rate limit (429) resilience — Paid Tier 1 -# Order: primary → fast fallback → backup +# Order: primary → quality fallback → cheapest fallback MODEL_FALLBACK_CHAIN = [ - 'gemini-3-flash-preview', # 10K RPD paid tier - thinking mode - 'gemini-2.5-flash-lite', # Unlimited RPD paid tier - fast fallback - 'gemini-2.5-flash', # 10K RPD paid tier - backup + 'gemini-3-flash-preview', # 10K RPD - thinking mode, primary + 'gemini-3.1-flash-lite-preview', # Quality fallback - gen 3.1 + 'gemini-2.5-flash-lite', # Unlimited RPD - cheapest, last resort ] # Available thinking levels for Gemini 3 Flash @@ -77,7 +79,8 @@ GEMINI_PRICING = { 'gemini-2.5-pro': {'input': 1.25, 'output': 10.00, 'thinking': 0}, 'gemini-2.0-flash': {'input': 0.10, 'output': 0.40, 'thinking': 0}, 'gemini-3-flash-preview': {'input': 0.50, 'output': 3.00, 'thinking': 1.00}, # Paid tier - 'gemini-3-pro-preview': {'input': 2.00, 'output': 12.00, 'thinking': 4.00}, # Paid tier + 'gemini-3.1-pro-preview': {'input': 2.00, 'output': 12.00, 'thinking': 4.00}, # Paid tier + 'gemini-3.1-flash-lite-preview': {'input': 0.25, 'output': 1.50, 'thinking': 0}, # Paid tier } diff --git a/zopk_knowledge_service.py b/zopk_knowledge_service.py index 41cc3f4..f683dbe 100644 --- a/zopk_knowledge_service.py +++ b/zopk_knowledge_service.py @@ -2653,7 +2653,7 @@ def categorize_milestones_with_ai( Adds AI-improved titles, categories, and extracts dates more accurately. """ - import google.generativeai as genai + from gemini_service import GeminiService import json if not suggestions: @@ -2684,11 +2684,11 @@ Odpowiedz TYLKO jako JSON array: [{{"id": 1, "category": "...", "short_title": "...", "target_date": "YYYY-MM-DD lub null", "status": "..."}}]""" try: - model = genai.GenerativeModel(model_name) - response = model.generate_content(prompt) + service = GeminiService(model=model_name) + response_text = service.generate_text(prompt) # Parse response - response_text = response.text.strip() + response_text = response_text.strip() if response_text.startswith('```'): response_text = response_text.split('```')[1] if response_text.startswith('json'):