From 3d26ea61196eb279398cf3d1fe1dfa67c47afde1 Mon Sep 17 00:00:00 2001 From: Maciej Pienczyn Date: Sat, 7 Feb 2026 17:36:54 +0100 Subject: [PATCH] feat(ai): Upgrade to Gemini 3 Flash + add 503 fallback resilience MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Switch primary model from flash-lite (2.5) to 3-flash (Gemini 3 Flash Preview) for better reasoning and thinking mode across all AI features - Add _is_retryable() method to handle 503 UNAVAILABLE (server overload) in addition to existing 429 rate limit fallback - Fallback chain: 3-flash → 2.5-flash-lite → 2.5-flash Co-Authored-By: Claude Opus 4.6 --- app.py | 2 +- gemini_service.py | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/app.py b/app.py index c2f9f18..967e836 100644 --- a/app.py +++ b/app.py @@ -308,7 +308,7 @@ login_manager.login_message = 'Zaloguj się, aby uzyskać dostęp do tej strony. # Initialize Gemini service try: - gemini_service.init_gemini_service(model='flash-lite') # Paid tier: Unlimited RPD, fallback: 3-flash (10K RPD) → 2.5-flash (10K RPD) + gemini_service.init_gemini_service(model='3-flash') # Paid tier: 10K RPD, thinking mode, fallback: 2.5-flash-lite (Unlimited) → 2.5-flash (10K) logger.info("Gemini service initialized successfully") except Exception as e: logger.error(f"Failed to initialize Gemini service: {e}") diff --git a/gemini_service.py b/gemini_service.py index 611ddf1..fa11bc8 100644 --- a/gemini_service.py +++ b/gemini_service.py @@ -193,6 +193,13 @@ class GeminiService: error_str = str(error) return '429' in error_str or 'RESOURCE_EXHAUSTED' in error_str + @staticmethod + def _is_retryable(error: Exception) -> bool: + """Check if error is retryable (rate limit or server overload).""" + error_str = str(error) + return ('429' in error_str or 'RESOURCE_EXHAUSTED' in error_str or + '503' in error_str or 'UNAVAILABLE' in error_str) + def _build_generation_config(self, model: str, temperature: float, max_tokens: Optional[int], thinking_level: Optional[str]) -> types.GenerateContentConfig: @@ -324,12 +331,12 @@ class GeminiService: return response_text except Exception as e: - if self._is_rate_limited(e) and model != models_to_try[-1]: - logger.warning(f"Rate limited on {model}, trying next fallback...") + if self._is_retryable(e) and model != models_to_try[-1]: + logger.warning(f"Retryable error on {model} ({type(e).__name__}), trying next fallback...") last_error = e continue - # Non-429 error or last model in chain — fail + # Non-retryable error or last model in chain — fail latency_ms = int((time.time() - start_time) * 1000) self._log_api_cost(