diff --git a/blueprints/admin/routes_social.py b/blueprints/admin/routes_social.py index c662519..38c48c6 100644 --- a/blueprints/admin/routes_social.py +++ b/blueprints/admin/routes_social.py @@ -992,7 +992,7 @@ def _run_enrichment_background(company_ids): elif platform_name == 'instagram': profile_result['reason'] = 'Instagram wymaga logowania. Podłącz Meta API (OAuth), aby pobierać dane.' elif platform_name == 'linkedin': - profile_result['reason'] = 'LinkedIn blokuje dostęp publiczny dla botów.' + profile_result['reason'] = 'LinkedIn blokuje boty (3 próby z opóźnieniem). Wyniki mogą się różnić między skanami.' else: profile_result['reason'] = f'{profile.platform} — brak danych publicznych do pobrania.' diff --git a/scripts/social_media_audit.py b/scripts/social_media_audit.py index 0b4fc3e..7feaa3f 100644 --- a/scripts/social_media_audit.py +++ b/scripts/social_media_audit.py @@ -1201,28 +1201,51 @@ class SocialProfileEnricher: return result def _enrich_linkedin(self, url: str) -> Dict[str, Any]: - """Enrich LinkedIn company page data.""" + """Enrich LinkedIn company page data. + + LinkedIn aggressively blocks bots — retries with random delays + to improve success rate. Returns empty dict if all attempts fail. + """ + import random + result = {} - try: - resp = self.session.get(url, timeout=REQUEST_TIMEOUT) - if resp.status_code == 200: - html = resp.text - og_desc = re.search(r' 0: + delay = random.uniform(2, 5) + time.sleep(delay) + resp = self.session.get(url, timeout=REQUEST_TIMEOUT) + if resp.status_code == 200: + html = resp.text + # Check if LinkedIn returned a login wall instead of data + if 'authwall' in html[:2000].lower() or 'sign in' in html[:2000].lower(): + logger.debug(f"LinkedIn authwall on attempt {attempt+1} for {url}") + continue + og_desc = re.search(r' Dict[str, Any]: