nordabiz/twitter_service.py
Maciej Pienczyn 5030b71beb
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
chore: update Author to Maciej Pienczyn, InPi sp. z o.o. across all files
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 08:20:47 +02:00

222 lines
7.7 KiB
Python

"""
Twitter/X Profile Data Service for NordaBiz
============================================
Fetches Twitter profile data using Twitter's public GraphQL API
with guest token authentication. No paid API key required.
Note: This uses Twitter's internal API with guest tokens.
Rate limits apply (~50 requests per 15 minutes per IP).
Author: Maciej Pienczyn, InPi sp. z o.o.
Created: 2026-03-12
"""
import re
import logging
from typing import Optional, Dict
import requests
logger = logging.getLogger(__name__)
# Twitter's public bearer token (embedded in twitter.com JavaScript bundle)
_BEARER_TOKEN = (
"AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs"
"%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
)
_GRAPHQL_USER_BY_SCREEN_NAME = (
"https://twitter.com/i/api/graphql/xc8f1g7BYqr6VTzTbvNlGw/UserByScreenName"
)
_GRAPHQL_FEATURES = {
"hidden_profile_subscriptions_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
"responsive_web_graphql_timeline_navigation_enabled": True,
}
class TwitterService:
"""Fetches Twitter/X profile data via guest token + GraphQL API."""
def __init__(self):
self.session = requests.Session()
self.session.headers.update({
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/131.0.0.0 Safari/537.36"
),
"Authorization": f"Bearer {_BEARER_TOKEN}",
})
self._guest_token = None
def _ensure_guest_token(self) -> bool:
"""Obtain a guest token from Twitter. Returns True on success."""
if self._guest_token:
return True
try:
resp = self.session.post(
"https://api.twitter.com/1.1/guest/activate.json",
timeout=10,
)
resp.raise_for_status()
self._guest_token = resp.json().get("guest_token")
if self._guest_token:
self.session.headers["x-guest-token"] = self._guest_token
return True
logger.warning("Twitter guest token response missing token")
return False
except requests.RequestException as e:
logger.warning(f"Failed to get Twitter guest token: {e}")
return False
def _invalidate_guest_token(self):
"""Force re-acquisition of guest token on next request."""
self._guest_token = None
self.session.headers.pop("x-guest-token", None)
@staticmethod
def extract_username_from_url(url: str) -> Optional[str]:
"""Extract Twitter username from URL.
Supported:
- x.com/username
- twitter.com/username
- x.com/username/status/...
"""
if not url:
return None
match = re.search(
r'(?:twitter\.com|x\.com)/([A-Za-z0-9_]+)(?:/|$|\?)', url
)
if match:
username = match.group(1)
# Exclude non-profile paths
if username.lower() in (
'i', 'home', 'explore', 'search', 'settings',
'notifications', 'messages', 'hashtag',
):
return None
return username
return None
def get_profile(self, username: str) -> Optional[Dict]:
"""Fetch Twitter profile data via GraphQL API.
Args:
username: Twitter screen name (without @)
Returns:
Dict with profile data or None on error:
{
'username': str,
'name': str,
'description': str,
'followers_count': int,
'following_count': int,
'tweet_count': int,
'listed_count': int,
'media_count': int,
'favourites_count': int,
'location': str,
'created_at': str,
'profile_image_url': str,
'profile_banner_url': str,
'verified': bool,
'protected': bool,
'url': str,
}
"""
if not username:
return None
import json
for attempt in range(2):
if not self._ensure_guest_token():
return None
try:
variables = json.dumps({
"screen_name": username,
"withSafetyModeUserFields": True,
})
features = json.dumps(_GRAPHQL_FEATURES)
resp = self.session.get(
_GRAPHQL_USER_BY_SCREEN_NAME,
params={"variables": variables, "features": features},
timeout=15,
)
if resp.status_code == 403:
# Guest token expired, retry with new one
self._invalidate_guest_token()
continue
if resp.status_code == 429:
logger.warning("Twitter API rate limited")
return None
resp.raise_for_status()
data = resp.json()
result_data = (
data.get("data", {})
.get("user", {})
.get("result", {})
)
if result_data.get("__typename") == "UserUnavailable":
logger.info(f"Twitter user @{username} unavailable/suspended")
return None
legacy = result_data.get("legacy", {})
if not legacy:
logger.warning(f"No legacy data for @{username}")
return None
# High-res profile image (remove _normal suffix)
profile_img = legacy.get("profile_image_url_https", "")
profile_img_hq = re.sub(r"_normal(\.\w+)$", r"\1", profile_img)
profile = {
"username": legacy.get("screen_name", username),
"name": legacy.get("name", ""),
"description": legacy.get("description", ""),
"followers_count": legacy.get("followers_count", 0),
"following_count": legacy.get("friends_count", 0),
"tweet_count": legacy.get("statuses_count", 0),
"listed_count": legacy.get("listed_count", 0),
"media_count": legacy.get("media_count", 0),
"favourites_count": legacy.get("favourites_count", 0),
"location": legacy.get("location", ""),
"created_at": legacy.get("created_at", ""),
"profile_image_url": profile_img_hq,
"profile_banner_url": legacy.get("profile_banner_url", ""),
"verified": legacy.get("verified", False),
"protected": legacy.get("protected", False),
"url": legacy.get("url", ""),
}
logger.info(
f"Fetched Twitter profile @{username}: "
f"{profile['followers_count']} followers, "
f"{profile['tweet_count']} tweets"
)
return profile
except requests.RequestException as e:
logger.warning(f"Twitter API request error for @{username}: {e}")
return None
except (KeyError, ValueError, TypeError) as e:
logger.warning(f"Twitter API parse error for @{username}: {e}")
return None
logger.warning(f"Twitter API failed for @{username} after retries")
return None