feat(ai): Upgrade to Gemini 3 Flash + add 503 fallback resilience
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

- Switch primary model from flash-lite (2.5) to 3-flash (Gemini 3 Flash Preview)
  for better reasoning and thinking mode across all AI features
- Add _is_retryable() method to handle 503 UNAVAILABLE (server overload)
  in addition to existing 429 rate limit fallback
- Fallback chain: 3-flash → 2.5-flash-lite → 2.5-flash

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-07 17:36:54 +01:00
parent aa49c18f7a
commit 3d26ea6119
2 changed files with 11 additions and 4 deletions

2
app.py
View File

@ -308,7 +308,7 @@ login_manager.login_message = 'Zaloguj się, aby uzyskać dostęp do tej strony.
# Initialize Gemini service
try:
gemini_service.init_gemini_service(model='flash-lite') # Paid tier: Unlimited RPD, fallback: 3-flash (10K RPD) → 2.5-flash (10K RPD)
gemini_service.init_gemini_service(model='3-flash') # Paid tier: 10K RPD, thinking mode, fallback: 2.5-flash-lite (Unlimited) → 2.5-flash (10K)
logger.info("Gemini service initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize Gemini service: {e}")

View File

@ -193,6 +193,13 @@ class GeminiService:
error_str = str(error)
return '429' in error_str or 'RESOURCE_EXHAUSTED' in error_str
@staticmethod
def _is_retryable(error: Exception) -> bool:
"""Check if error is retryable (rate limit or server overload)."""
error_str = str(error)
return ('429' in error_str or 'RESOURCE_EXHAUSTED' in error_str or
'503' in error_str or 'UNAVAILABLE' in error_str)
def _build_generation_config(self, model: str, temperature: float,
max_tokens: Optional[int],
thinking_level: Optional[str]) -> types.GenerateContentConfig:
@ -324,12 +331,12 @@ class GeminiService:
return response_text
except Exception as e:
if self._is_rate_limited(e) and model != models_to_try[-1]:
logger.warning(f"Rate limited on {model}, trying next fallback...")
if self._is_retryable(e) and model != models_to_try[-1]:
logger.warning(f"Retryable error on {model} ({type(e).__name__}), trying next fallback...")
last_error = e
continue
# Non-429 error or last model in chain — fail
# Non-retryable error or last model in chain — fail
latency_ms = int((time.time() - start_time) * 1000)
self._log_api_cost(