Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
222 lines
7.7 KiB
Python
222 lines
7.7 KiB
Python
"""
|
|
Twitter/X Profile Data Service for NordaBiz
|
|
============================================
|
|
|
|
Fetches Twitter profile data using Twitter's public GraphQL API
|
|
with guest token authentication. No paid API key required.
|
|
|
|
Note: This uses Twitter's internal API with guest tokens.
|
|
Rate limits apply (~50 requests per 15 minutes per IP).
|
|
|
|
Author: Maciej Pienczyn, InPi sp. z o.o.
|
|
Created: 2026-03-12
|
|
"""
|
|
|
|
import re
|
|
import logging
|
|
from typing import Optional, Dict
|
|
|
|
import requests
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Twitter's public bearer token (embedded in twitter.com JavaScript bundle)
|
|
_BEARER_TOKEN = (
|
|
"AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs"
|
|
"%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
|
)
|
|
|
|
_GRAPHQL_USER_BY_SCREEN_NAME = (
|
|
"https://twitter.com/i/api/graphql/xc8f1g7BYqr6VTzTbvNlGw/UserByScreenName"
|
|
)
|
|
|
|
_GRAPHQL_FEATURES = {
|
|
"hidden_profile_subscriptions_enabled": True,
|
|
"responsive_web_graphql_exclude_directive_enabled": True,
|
|
"verified_phone_label_enabled": False,
|
|
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
|
|
"responsive_web_graphql_timeline_navigation_enabled": True,
|
|
}
|
|
|
|
|
|
class TwitterService:
|
|
"""Fetches Twitter/X profile data via guest token + GraphQL API."""
|
|
|
|
def __init__(self):
|
|
self.session = requests.Session()
|
|
self.session.headers.update({
|
|
"User-Agent": (
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
"Chrome/131.0.0.0 Safari/537.36"
|
|
),
|
|
"Authorization": f"Bearer {_BEARER_TOKEN}",
|
|
})
|
|
self._guest_token = None
|
|
|
|
def _ensure_guest_token(self) -> bool:
|
|
"""Obtain a guest token from Twitter. Returns True on success."""
|
|
if self._guest_token:
|
|
return True
|
|
try:
|
|
resp = self.session.post(
|
|
"https://api.twitter.com/1.1/guest/activate.json",
|
|
timeout=10,
|
|
)
|
|
resp.raise_for_status()
|
|
self._guest_token = resp.json().get("guest_token")
|
|
if self._guest_token:
|
|
self.session.headers["x-guest-token"] = self._guest_token
|
|
return True
|
|
logger.warning("Twitter guest token response missing token")
|
|
return False
|
|
except requests.RequestException as e:
|
|
logger.warning(f"Failed to get Twitter guest token: {e}")
|
|
return False
|
|
|
|
def _invalidate_guest_token(self):
|
|
"""Force re-acquisition of guest token on next request."""
|
|
self._guest_token = None
|
|
self.session.headers.pop("x-guest-token", None)
|
|
|
|
@staticmethod
|
|
def extract_username_from_url(url: str) -> Optional[str]:
|
|
"""Extract Twitter username from URL.
|
|
|
|
Supported:
|
|
- x.com/username
|
|
- twitter.com/username
|
|
- x.com/username/status/...
|
|
"""
|
|
if not url:
|
|
return None
|
|
match = re.search(
|
|
r'(?:twitter\.com|x\.com)/([A-Za-z0-9_]+)(?:/|$|\?)', url
|
|
)
|
|
if match:
|
|
username = match.group(1)
|
|
# Exclude non-profile paths
|
|
if username.lower() in (
|
|
'i', 'home', 'explore', 'search', 'settings',
|
|
'notifications', 'messages', 'hashtag',
|
|
):
|
|
return None
|
|
return username
|
|
return None
|
|
|
|
def get_profile(self, username: str) -> Optional[Dict]:
|
|
"""Fetch Twitter profile data via GraphQL API.
|
|
|
|
Args:
|
|
username: Twitter screen name (without @)
|
|
|
|
Returns:
|
|
Dict with profile data or None on error:
|
|
{
|
|
'username': str,
|
|
'name': str,
|
|
'description': str,
|
|
'followers_count': int,
|
|
'following_count': int,
|
|
'tweet_count': int,
|
|
'listed_count': int,
|
|
'media_count': int,
|
|
'favourites_count': int,
|
|
'location': str,
|
|
'created_at': str,
|
|
'profile_image_url': str,
|
|
'profile_banner_url': str,
|
|
'verified': bool,
|
|
'protected': bool,
|
|
'url': str,
|
|
}
|
|
"""
|
|
if not username:
|
|
return None
|
|
|
|
import json
|
|
|
|
for attempt in range(2):
|
|
if not self._ensure_guest_token():
|
|
return None
|
|
|
|
try:
|
|
variables = json.dumps({
|
|
"screen_name": username,
|
|
"withSafetyModeUserFields": True,
|
|
})
|
|
features = json.dumps(_GRAPHQL_FEATURES)
|
|
|
|
resp = self.session.get(
|
|
_GRAPHQL_USER_BY_SCREEN_NAME,
|
|
params={"variables": variables, "features": features},
|
|
timeout=15,
|
|
)
|
|
|
|
if resp.status_code == 403:
|
|
# Guest token expired, retry with new one
|
|
self._invalidate_guest_token()
|
|
continue
|
|
|
|
if resp.status_code == 429:
|
|
logger.warning("Twitter API rate limited")
|
|
return None
|
|
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
|
|
result_data = (
|
|
data.get("data", {})
|
|
.get("user", {})
|
|
.get("result", {})
|
|
)
|
|
|
|
if result_data.get("__typename") == "UserUnavailable":
|
|
logger.info(f"Twitter user @{username} unavailable/suspended")
|
|
return None
|
|
|
|
legacy = result_data.get("legacy", {})
|
|
if not legacy:
|
|
logger.warning(f"No legacy data for @{username}")
|
|
return None
|
|
|
|
# High-res profile image (remove _normal suffix)
|
|
profile_img = legacy.get("profile_image_url_https", "")
|
|
profile_img_hq = re.sub(r"_normal(\.\w+)$", r"\1", profile_img)
|
|
|
|
profile = {
|
|
"username": legacy.get("screen_name", username),
|
|
"name": legacy.get("name", ""),
|
|
"description": legacy.get("description", ""),
|
|
"followers_count": legacy.get("followers_count", 0),
|
|
"following_count": legacy.get("friends_count", 0),
|
|
"tweet_count": legacy.get("statuses_count", 0),
|
|
"listed_count": legacy.get("listed_count", 0),
|
|
"media_count": legacy.get("media_count", 0),
|
|
"favourites_count": legacy.get("favourites_count", 0),
|
|
"location": legacy.get("location", ""),
|
|
"created_at": legacy.get("created_at", ""),
|
|
"profile_image_url": profile_img_hq,
|
|
"profile_banner_url": legacy.get("profile_banner_url", ""),
|
|
"verified": legacy.get("verified", False),
|
|
"protected": legacy.get("protected", False),
|
|
"url": legacy.get("url", ""),
|
|
}
|
|
|
|
logger.info(
|
|
f"Fetched Twitter profile @{username}: "
|
|
f"{profile['followers_count']} followers, "
|
|
f"{profile['tweet_count']} tweets"
|
|
)
|
|
return profile
|
|
|
|
except requests.RequestException as e:
|
|
logger.warning(f"Twitter API request error for @{username}: {e}")
|
|
return None
|
|
except (KeyError, ValueError, TypeError) as e:
|
|
logger.warning(f"Twitter API parse error for @{username}: {e}")
|
|
return None
|
|
|
|
logger.warning(f"Twitter API failed for @{username} after retries")
|
|
return None
|