feat: Twitter/X data fetching via guest token GraphQL API
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

Add twitter_service.py using Twitter's internal GraphQL API with
guest token authentication (free, no API key needed). Fetches
followers, tweets, bio, location, media count, and more.

Integrated into social audit enrichment with _twitter_extra data
stored in content_types JSONB, displayed on audit detail cards.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-03-12 14:25:41 +01:00
parent ef8257486a
commit 660ed68a0d
3 changed files with 291 additions and 2 deletions

View File

@ -1309,8 +1309,53 @@ class SocialProfileEnricher:
return result
def _enrich_twitter(self, url: str) -> Dict[str, Any]:
"""Enrich Twitter/X profile data using og tags from public page."""
"""Enrich Twitter/X profile data via Twitter GraphQL API (guest token).
Uses Twitter's internal API with guest authentication — no paid API key needed.
Rate limit: ~50 requests per 15 minutes per IP.
Falls back to og tag scraping if API is unavailable.
"""
result = {}
# Try Twitter GraphQL API first
try:
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from twitter_service import TwitterService
tw = TwitterService()
username = tw.extract_username_from_url(url)
if username:
profile = tw.get_profile(username)
if profile:
result['followers_count'] = profile['followers_count']
result['page_name'] = profile['name']
if profile.get('description'):
result['profile_description'] = profile['description'][:500]
result['has_bio'] = True
result['has_profile_photo'] = bool(profile.get('profile_image_url'))
result['has_cover_photo'] = bool(profile.get('profile_banner_url'))
result['posts_count_365d'] = profile['tweet_count']
# Store extra data in content_types JSONB
result['_twitter_extra'] = {
'following_count': profile.get('following_count', 0),
'listed_count': profile.get('listed_count', 0),
'media_count': profile.get('media_count', 0),
'favourites_count': profile.get('favourites_count', 0),
'location': profile.get('location', ''),
'created_at': profile.get('created_at', ''),
'profile_image_url': profile.get('profile_image_url', ''),
'profile_banner_url': profile.get('profile_banner_url', ''),
'verified': profile.get('verified', False),
'url': profile.get('url', ''),
}
return result
except (ImportError, ValueError) as e:
logger.debug(f"Twitter API not available ({e}), falling back to scraping")
except Exception as e:
logger.debug(f"Twitter API enrichment failed: {e}")
# Fallback: og tag scraping (usually returns nothing for x.com)
try:
resp = self.session.get(url, timeout=REQUEST_TIMEOUT)
if resp.status_code == 200:
@ -1325,7 +1370,7 @@ class SocialProfileEnricher:
if name_match:
result['page_name'] = name_match.group(1)
except Exception as e:
logger.debug(f"Twitter enrichment failed: {e}")
logger.debug(f"Twitter scraping failed: {e}")
return result
@staticmethod

View File

@ -833,6 +833,29 @@
</div>
{% endif %}
<!-- Twitter/X extra data -->
{% if p.platform == 'twitter' and (ct.get('following_count') or ct.get('location') or ct.get('media_count')) %}
<div style="margin-top: var(--spacing-sm);">
{% set tw_info = [] %}
{% if ct.get('following_count') %}{% if tw_info.append(('👤', '{:,}'.format(ct.following_count).replace(',', ' ') ~ ' obserwowanych')) %}{% endif %}{% endif %}
{% if ct.get('media_count') %}{% if tw_info.append(('🖼️', '{:,}'.format(ct.media_count).replace(',', ' ') ~ ' mediów')) %}{% endif %}{% endif %}
{% if ct.get('favourites_count') %}{% if tw_info.append(('❤️', '{:,}'.format(ct.favourites_count).replace(',', ' ') ~ ' polubień')) %}{% endif %}{% endif %}
{% if ct.get('listed_count') %}{% if tw_info.append(('📋', '{:,}'.format(ct.listed_count).replace(',', ' ') ~ ' list')) %}{% endif %}{% endif %}
{% if ct.get('location') %}{% if tw_info.append(('📍', ct.location)) %}{% endif %}{% endif %}
{% if ct.get('created_at') %}{% if tw_info.append(('📅', 'Konto od: ' ~ ct.created_at[:16])) %}{% endif %}{% endif %}
{% if ct.get('verified') %}{% if tw_info.append(('✓', 'Zweryfikowany')) %}{% endif %}{% endif %}
{% if tw_info %}
<div style="display: flex; gap: var(--spacing-xs); flex-wrap: wrap; margin-bottom: var(--spacing-sm);">
{% for icon, val in tw_info %}
<span style="background: var(--background); padding: 3px 10px; border-radius: var(--radius); font-size: 12px; color: var(--text-secondary); border: 1px solid var(--border-color, #e5e7eb);">
{{ icon }} {{ val }}
</span>
{% endfor %}
</div>
{% endif %}
</div>
{% endif %}
<!-- YouTube extra data -->
{% if ct.get('view_count') or ct.get('recent_videos') %}
<div style="margin-top: var(--spacing-sm);">

221
twitter_service.py Normal file
View File

@ -0,0 +1,221 @@
"""
Twitter/X Profile Data Service for NordaBiz
============================================
Fetches Twitter profile data using Twitter's public GraphQL API
with guest token authentication. No paid API key required.
Note: This uses Twitter's internal API with guest tokens.
Rate limits apply (~50 requests per 15 minutes per IP).
Author: NordaBiz Development Team
Created: 2026-03-12
"""
import re
import logging
from typing import Optional, Dict
import requests
logger = logging.getLogger(__name__)
# Twitter's public bearer token (embedded in twitter.com JavaScript bundle)
_BEARER_TOKEN = (
"AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs"
"%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
)
_GRAPHQL_USER_BY_SCREEN_NAME = (
"https://twitter.com/i/api/graphql/xc8f1g7BYqr6VTzTbvNlGw/UserByScreenName"
)
_GRAPHQL_FEATURES = {
"hidden_profile_subscriptions_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False,
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
"responsive_web_graphql_timeline_navigation_enabled": True,
}
class TwitterService:
"""Fetches Twitter/X profile data via guest token + GraphQL API."""
def __init__(self):
self.session = requests.Session()
self.session.headers.update({
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/131.0.0.0 Safari/537.36"
),
"Authorization": f"Bearer {_BEARER_TOKEN}",
})
self._guest_token = None
def _ensure_guest_token(self) -> bool:
"""Obtain a guest token from Twitter. Returns True on success."""
if self._guest_token:
return True
try:
resp = self.session.post(
"https://api.twitter.com/1.1/guest/activate.json",
timeout=10,
)
resp.raise_for_status()
self._guest_token = resp.json().get("guest_token")
if self._guest_token:
self.session.headers["x-guest-token"] = self._guest_token
return True
logger.warning("Twitter guest token response missing token")
return False
except requests.RequestException as e:
logger.warning(f"Failed to get Twitter guest token: {e}")
return False
def _invalidate_guest_token(self):
"""Force re-acquisition of guest token on next request."""
self._guest_token = None
self.session.headers.pop("x-guest-token", None)
@staticmethod
def extract_username_from_url(url: str) -> Optional[str]:
"""Extract Twitter username from URL.
Supported:
- x.com/username
- twitter.com/username
- x.com/username/status/...
"""
if not url:
return None
match = re.search(
r'(?:twitter\.com|x\.com)/([A-Za-z0-9_]+)(?:/|$|\?)', url
)
if match:
username = match.group(1)
# Exclude non-profile paths
if username.lower() in (
'i', 'home', 'explore', 'search', 'settings',
'notifications', 'messages', 'hashtag',
):
return None
return username
return None
def get_profile(self, username: str) -> Optional[Dict]:
"""Fetch Twitter profile data via GraphQL API.
Args:
username: Twitter screen name (without @)
Returns:
Dict with profile data or None on error:
{
'username': str,
'name': str,
'description': str,
'followers_count': int,
'following_count': int,
'tweet_count': int,
'listed_count': int,
'media_count': int,
'favourites_count': int,
'location': str,
'created_at': str,
'profile_image_url': str,
'profile_banner_url': str,
'verified': bool,
'protected': bool,
'url': str,
}
"""
if not username:
return None
import json
for attempt in range(2):
if not self._ensure_guest_token():
return None
try:
variables = json.dumps({
"screen_name": username,
"withSafetyModeUserFields": True,
})
features = json.dumps(_GRAPHQL_FEATURES)
resp = self.session.get(
_GRAPHQL_USER_BY_SCREEN_NAME,
params={"variables": variables, "features": features},
timeout=15,
)
if resp.status_code == 403:
# Guest token expired, retry with new one
self._invalidate_guest_token()
continue
if resp.status_code == 429:
logger.warning("Twitter API rate limited")
return None
resp.raise_for_status()
data = resp.json()
result_data = (
data.get("data", {})
.get("user", {})
.get("result", {})
)
if result_data.get("__typename") == "UserUnavailable":
logger.info(f"Twitter user @{username} unavailable/suspended")
return None
legacy = result_data.get("legacy", {})
if not legacy:
logger.warning(f"No legacy data for @{username}")
return None
# High-res profile image (remove _normal suffix)
profile_img = legacy.get("profile_image_url_https", "")
profile_img_hq = re.sub(r"_normal(\.\w+)$", r"\1", profile_img)
profile = {
"username": legacy.get("screen_name", username),
"name": legacy.get("name", ""),
"description": legacy.get("description", ""),
"followers_count": legacy.get("followers_count", 0),
"following_count": legacy.get("friends_count", 0),
"tweet_count": legacy.get("statuses_count", 0),
"listed_count": legacy.get("listed_count", 0),
"media_count": legacy.get("media_count", 0),
"favourites_count": legacy.get("favourites_count", 0),
"location": legacy.get("location", ""),
"created_at": legacy.get("created_at", ""),
"profile_image_url": profile_img_hq,
"profile_banner_url": legacy.get("profile_banner_url", ""),
"verified": legacy.get("verified", False),
"protected": legacy.get("protected", False),
"url": legacy.get("url", ""),
}
logger.info(
f"Fetched Twitter profile @{username}: "
f"{profile['followers_count']} followers, "
f"{profile['tweet_count']} tweets"
)
return profile
except requests.RequestException as e:
logger.warning(f"Twitter API request error for @{username}: {e}")
return None
except (KeyError, ValueError, TypeError) as e:
logger.warning(f"Twitter API parse error for @{username}: {e}")
return None
logger.warning(f"Twitter API failed for @{username} after retries")
return None