diff --git a/app.py b/app.py index c2f9f18..967e836 100644 --- a/app.py +++ b/app.py @@ -308,7 +308,7 @@ login_manager.login_message = 'Zaloguj się, aby uzyskać dostęp do tej strony. # Initialize Gemini service try: - gemini_service.init_gemini_service(model='flash-lite') # Paid tier: Unlimited RPD, fallback: 3-flash (10K RPD) → 2.5-flash (10K RPD) + gemini_service.init_gemini_service(model='3-flash') # Paid tier: 10K RPD, thinking mode, fallback: 2.5-flash-lite (Unlimited) → 2.5-flash (10K) logger.info("Gemini service initialized successfully") except Exception as e: logger.error(f"Failed to initialize Gemini service: {e}") diff --git a/gemini_service.py b/gemini_service.py index 611ddf1..fa11bc8 100644 --- a/gemini_service.py +++ b/gemini_service.py @@ -193,6 +193,13 @@ class GeminiService: error_str = str(error) return '429' in error_str or 'RESOURCE_EXHAUSTED' in error_str + @staticmethod + def _is_retryable(error: Exception) -> bool: + """Check if error is retryable (rate limit or server overload).""" + error_str = str(error) + return ('429' in error_str or 'RESOURCE_EXHAUSTED' in error_str or + '503' in error_str or 'UNAVAILABLE' in error_str) + def _build_generation_config(self, model: str, temperature: float, max_tokens: Optional[int], thinking_level: Optional[str]) -> types.GenerateContentConfig: @@ -324,12 +331,12 @@ class GeminiService: return response_text except Exception as e: - if self._is_rate_limited(e) and model != models_to_try[-1]: - logger.warning(f"Rate limited on {model}, trying next fallback...") + if self._is_retryable(e) and model != models_to_try[-1]: + logger.warning(f"Retryable error on {model} ({type(e).__name__}), trying next fallback...") last_error = e continue - # Non-429 error or last model in chain — fail + # Non-retryable error or last model in chain — fail latency_ms = int((time.time() - start_time) * 1000) self._log_api_cost(