feat: Twitter/X data fetching via guest token GraphQL API
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Add twitter_service.py using Twitter's internal GraphQL API with guest token authentication (free, no API key needed). Fetches followers, tweets, bio, location, media count, and more. Integrated into social audit enrichment with _twitter_extra data stored in content_types JSONB, displayed on audit detail cards. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
ef8257486a
commit
660ed68a0d
@ -1309,8 +1309,53 @@ class SocialProfileEnricher:
|
||||
return result
|
||||
|
||||
def _enrich_twitter(self, url: str) -> Dict[str, Any]:
|
||||
"""Enrich Twitter/X profile data using og tags from public page."""
|
||||
"""Enrich Twitter/X profile data via Twitter GraphQL API (guest token).
|
||||
|
||||
Uses Twitter's internal API with guest authentication — no paid API key needed.
|
||||
Rate limit: ~50 requests per 15 minutes per IP.
|
||||
Falls back to og tag scraping if API is unavailable.
|
||||
"""
|
||||
result = {}
|
||||
|
||||
# Try Twitter GraphQL API first
|
||||
try:
|
||||
import sys
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
from twitter_service import TwitterService
|
||||
tw = TwitterService()
|
||||
username = tw.extract_username_from_url(url)
|
||||
if username:
|
||||
profile = tw.get_profile(username)
|
||||
if profile:
|
||||
result['followers_count'] = profile['followers_count']
|
||||
result['page_name'] = profile['name']
|
||||
if profile.get('description'):
|
||||
result['profile_description'] = profile['description'][:500]
|
||||
result['has_bio'] = True
|
||||
result['has_profile_photo'] = bool(profile.get('profile_image_url'))
|
||||
result['has_cover_photo'] = bool(profile.get('profile_banner_url'))
|
||||
result['posts_count_365d'] = profile['tweet_count']
|
||||
# Store extra data in content_types JSONB
|
||||
result['_twitter_extra'] = {
|
||||
'following_count': profile.get('following_count', 0),
|
||||
'listed_count': profile.get('listed_count', 0),
|
||||
'media_count': profile.get('media_count', 0),
|
||||
'favourites_count': profile.get('favourites_count', 0),
|
||||
'location': profile.get('location', ''),
|
||||
'created_at': profile.get('created_at', ''),
|
||||
'profile_image_url': profile.get('profile_image_url', ''),
|
||||
'profile_banner_url': profile.get('profile_banner_url', ''),
|
||||
'verified': profile.get('verified', False),
|
||||
'url': profile.get('url', ''),
|
||||
}
|
||||
return result
|
||||
except (ImportError, ValueError) as e:
|
||||
logger.debug(f"Twitter API not available ({e}), falling back to scraping")
|
||||
except Exception as e:
|
||||
logger.debug(f"Twitter API enrichment failed: {e}")
|
||||
|
||||
# Fallback: og tag scraping (usually returns nothing for x.com)
|
||||
try:
|
||||
resp = self.session.get(url, timeout=REQUEST_TIMEOUT)
|
||||
if resp.status_code == 200:
|
||||
@ -1325,7 +1370,7 @@ class SocialProfileEnricher:
|
||||
if name_match:
|
||||
result['page_name'] = name_match.group(1)
|
||||
except Exception as e:
|
||||
logger.debug(f"Twitter enrichment failed: {e}")
|
||||
logger.debug(f"Twitter scraping failed: {e}")
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
|
||||
@ -833,6 +833,29 @@
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Twitter/X extra data -->
|
||||
{% if p.platform == 'twitter' and (ct.get('following_count') or ct.get('location') or ct.get('media_count')) %}
|
||||
<div style="margin-top: var(--spacing-sm);">
|
||||
{% set tw_info = [] %}
|
||||
{% if ct.get('following_count') %}{% if tw_info.append(('👤', '{:,}'.format(ct.following_count).replace(',', ' ') ~ ' obserwowanych')) %}{% endif %}{% endif %}
|
||||
{% if ct.get('media_count') %}{% if tw_info.append(('🖼️', '{:,}'.format(ct.media_count).replace(',', ' ') ~ ' mediów')) %}{% endif %}{% endif %}
|
||||
{% if ct.get('favourites_count') %}{% if tw_info.append(('❤️', '{:,}'.format(ct.favourites_count).replace(',', ' ') ~ ' polubień')) %}{% endif %}{% endif %}
|
||||
{% if ct.get('listed_count') %}{% if tw_info.append(('📋', '{:,}'.format(ct.listed_count).replace(',', ' ') ~ ' list')) %}{% endif %}{% endif %}
|
||||
{% if ct.get('location') %}{% if tw_info.append(('📍', ct.location)) %}{% endif %}{% endif %}
|
||||
{% if ct.get('created_at') %}{% if tw_info.append(('📅', 'Konto od: ' ~ ct.created_at[:16])) %}{% endif %}{% endif %}
|
||||
{% if ct.get('verified') %}{% if tw_info.append(('✓', 'Zweryfikowany')) %}{% endif %}{% endif %}
|
||||
{% if tw_info %}
|
||||
<div style="display: flex; gap: var(--spacing-xs); flex-wrap: wrap; margin-bottom: var(--spacing-sm);">
|
||||
{% for icon, val in tw_info %}
|
||||
<span style="background: var(--background); padding: 3px 10px; border-radius: var(--radius); font-size: 12px; color: var(--text-secondary); border: 1px solid var(--border-color, #e5e7eb);">
|
||||
{{ icon }} {{ val }}
|
||||
</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- YouTube extra data -->
|
||||
{% if ct.get('view_count') or ct.get('recent_videos') %}
|
||||
<div style="margin-top: var(--spacing-sm);">
|
||||
|
||||
221
twitter_service.py
Normal file
221
twitter_service.py
Normal file
@ -0,0 +1,221 @@
|
||||
"""
|
||||
Twitter/X Profile Data Service for NordaBiz
|
||||
============================================
|
||||
|
||||
Fetches Twitter profile data using Twitter's public GraphQL API
|
||||
with guest token authentication. No paid API key required.
|
||||
|
||||
Note: This uses Twitter's internal API with guest tokens.
|
||||
Rate limits apply (~50 requests per 15 minutes per IP).
|
||||
|
||||
Author: NordaBiz Development Team
|
||||
Created: 2026-03-12
|
||||
"""
|
||||
|
||||
import re
|
||||
import logging
|
||||
from typing import Optional, Dict
|
||||
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Twitter's public bearer token (embedded in twitter.com JavaScript bundle)
|
||||
_BEARER_TOKEN = (
|
||||
"AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs"
|
||||
"%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
||||
)
|
||||
|
||||
_GRAPHQL_USER_BY_SCREEN_NAME = (
|
||||
"https://twitter.com/i/api/graphql/xc8f1g7BYqr6VTzTbvNlGw/UserByScreenName"
|
||||
)
|
||||
|
||||
_GRAPHQL_FEATURES = {
|
||||
"hidden_profile_subscriptions_enabled": True,
|
||||
"responsive_web_graphql_exclude_directive_enabled": True,
|
||||
"verified_phone_label_enabled": False,
|
||||
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
|
||||
"responsive_web_graphql_timeline_navigation_enabled": True,
|
||||
}
|
||||
|
||||
|
||||
class TwitterService:
|
||||
"""Fetches Twitter/X profile data via guest token + GraphQL API."""
|
||||
|
||||
def __init__(self):
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/131.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Authorization": f"Bearer {_BEARER_TOKEN}",
|
||||
})
|
||||
self._guest_token = None
|
||||
|
||||
def _ensure_guest_token(self) -> bool:
|
||||
"""Obtain a guest token from Twitter. Returns True on success."""
|
||||
if self._guest_token:
|
||||
return True
|
||||
try:
|
||||
resp = self.session.post(
|
||||
"https://api.twitter.com/1.1/guest/activate.json",
|
||||
timeout=10,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
self._guest_token = resp.json().get("guest_token")
|
||||
if self._guest_token:
|
||||
self.session.headers["x-guest-token"] = self._guest_token
|
||||
return True
|
||||
logger.warning("Twitter guest token response missing token")
|
||||
return False
|
||||
except requests.RequestException as e:
|
||||
logger.warning(f"Failed to get Twitter guest token: {e}")
|
||||
return False
|
||||
|
||||
def _invalidate_guest_token(self):
|
||||
"""Force re-acquisition of guest token on next request."""
|
||||
self._guest_token = None
|
||||
self.session.headers.pop("x-guest-token", None)
|
||||
|
||||
@staticmethod
|
||||
def extract_username_from_url(url: str) -> Optional[str]:
|
||||
"""Extract Twitter username from URL.
|
||||
|
||||
Supported:
|
||||
- x.com/username
|
||||
- twitter.com/username
|
||||
- x.com/username/status/...
|
||||
"""
|
||||
if not url:
|
||||
return None
|
||||
match = re.search(
|
||||
r'(?:twitter\.com|x\.com)/([A-Za-z0-9_]+)(?:/|$|\?)', url
|
||||
)
|
||||
if match:
|
||||
username = match.group(1)
|
||||
# Exclude non-profile paths
|
||||
if username.lower() in (
|
||||
'i', 'home', 'explore', 'search', 'settings',
|
||||
'notifications', 'messages', 'hashtag',
|
||||
):
|
||||
return None
|
||||
return username
|
||||
return None
|
||||
|
||||
def get_profile(self, username: str) -> Optional[Dict]:
|
||||
"""Fetch Twitter profile data via GraphQL API.
|
||||
|
||||
Args:
|
||||
username: Twitter screen name (without @)
|
||||
|
||||
Returns:
|
||||
Dict with profile data or None on error:
|
||||
{
|
||||
'username': str,
|
||||
'name': str,
|
||||
'description': str,
|
||||
'followers_count': int,
|
||||
'following_count': int,
|
||||
'tweet_count': int,
|
||||
'listed_count': int,
|
||||
'media_count': int,
|
||||
'favourites_count': int,
|
||||
'location': str,
|
||||
'created_at': str,
|
||||
'profile_image_url': str,
|
||||
'profile_banner_url': str,
|
||||
'verified': bool,
|
||||
'protected': bool,
|
||||
'url': str,
|
||||
}
|
||||
"""
|
||||
if not username:
|
||||
return None
|
||||
|
||||
import json
|
||||
|
||||
for attempt in range(2):
|
||||
if not self._ensure_guest_token():
|
||||
return None
|
||||
|
||||
try:
|
||||
variables = json.dumps({
|
||||
"screen_name": username,
|
||||
"withSafetyModeUserFields": True,
|
||||
})
|
||||
features = json.dumps(_GRAPHQL_FEATURES)
|
||||
|
||||
resp = self.session.get(
|
||||
_GRAPHQL_USER_BY_SCREEN_NAME,
|
||||
params={"variables": variables, "features": features},
|
||||
timeout=15,
|
||||
)
|
||||
|
||||
if resp.status_code == 403:
|
||||
# Guest token expired, retry with new one
|
||||
self._invalidate_guest_token()
|
||||
continue
|
||||
|
||||
if resp.status_code == 429:
|
||||
logger.warning("Twitter API rate limited")
|
||||
return None
|
||||
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
result_data = (
|
||||
data.get("data", {})
|
||||
.get("user", {})
|
||||
.get("result", {})
|
||||
)
|
||||
|
||||
if result_data.get("__typename") == "UserUnavailable":
|
||||
logger.info(f"Twitter user @{username} unavailable/suspended")
|
||||
return None
|
||||
|
||||
legacy = result_data.get("legacy", {})
|
||||
if not legacy:
|
||||
logger.warning(f"No legacy data for @{username}")
|
||||
return None
|
||||
|
||||
# High-res profile image (remove _normal suffix)
|
||||
profile_img = legacy.get("profile_image_url_https", "")
|
||||
profile_img_hq = re.sub(r"_normal(\.\w+)$", r"\1", profile_img)
|
||||
|
||||
profile = {
|
||||
"username": legacy.get("screen_name", username),
|
||||
"name": legacy.get("name", ""),
|
||||
"description": legacy.get("description", ""),
|
||||
"followers_count": legacy.get("followers_count", 0),
|
||||
"following_count": legacy.get("friends_count", 0),
|
||||
"tweet_count": legacy.get("statuses_count", 0),
|
||||
"listed_count": legacy.get("listed_count", 0),
|
||||
"media_count": legacy.get("media_count", 0),
|
||||
"favourites_count": legacy.get("favourites_count", 0),
|
||||
"location": legacy.get("location", ""),
|
||||
"created_at": legacy.get("created_at", ""),
|
||||
"profile_image_url": profile_img_hq,
|
||||
"profile_banner_url": legacy.get("profile_banner_url", ""),
|
||||
"verified": legacy.get("verified", False),
|
||||
"protected": legacy.get("protected", False),
|
||||
"url": legacy.get("url", ""),
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"Fetched Twitter profile @{username}: "
|
||||
f"{profile['followers_count']} followers, "
|
||||
f"{profile['tweet_count']} tweets"
|
||||
)
|
||||
return profile
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.warning(f"Twitter API request error for @{username}: {e}")
|
||||
return None
|
||||
except (KeyError, ValueError, TypeError) as e:
|
||||
logger.warning(f"Twitter API parse error for @{username}: {e}")
|
||||
return None
|
||||
|
||||
logger.warning(f"Twitter API failed for @{username} after retries")
|
||||
return None
|
||||
Loading…
Reference in New Issue
Block a user