Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
210 lines
7.7 KiB
Python
210 lines
7.7 KiB
Python
"""
|
|
YouTube Data API v3 Service for NordaBiz
|
|
=========================================
|
|
|
|
Simple YouTube API client for fetching channel statistics.
|
|
Uses the YouTube Data API v3 with the same Google API key as Places API.
|
|
|
|
API Reference: https://developers.google.com/youtube/v3/docs/channels
|
|
|
|
Author: Maciej Pienczyn, InPi sp. z o.o.
|
|
Created: 2026-02-08
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import logging
|
|
from typing import Optional, Dict
|
|
|
|
import requests
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# API Configuration
|
|
YOUTUBE_API_BASE = "https://www.googleapis.com/youtube/v3"
|
|
|
|
|
|
class YouTubeService:
|
|
"""Fetches YouTube channel statistics via YouTube Data API v3."""
|
|
|
|
def __init__(self, api_key: str = None):
|
|
self.api_key = api_key or os.getenv('YOUTUBE_API_KEY') or os.getenv('GOOGLE_PAGESPEED_API_KEY') or os.getenv('GOOGLE_PLACES_API_KEY')
|
|
if not self.api_key:
|
|
raise ValueError("No Google API key found (YOUTUBE_API_KEY, GOOGLE_PAGESPEED_API_KEY, or GOOGLE_PLACES_API_KEY)")
|
|
self.session = requests.Session()
|
|
|
|
def extract_channel_id_from_url(self, url: str) -> Optional[str]:
|
|
"""
|
|
Extract channel ID or handle from YouTube URL.
|
|
|
|
Supported formats:
|
|
- youtube.com/channel/UC1234567890abcdef
|
|
- youtube.com/@handle
|
|
- youtube.com/c/channelname
|
|
- youtube.com/user/username
|
|
|
|
Args:
|
|
url: YouTube channel URL
|
|
|
|
Returns:
|
|
Channel ID (starts with UC) or handle (without @) or None
|
|
"""
|
|
if not url:
|
|
return None
|
|
|
|
# Direct channel ID (UC...)
|
|
match = re.search(r'youtube\.com/channel/([A-Za-z0-9_-]+)', url)
|
|
if match:
|
|
return match.group(1)
|
|
|
|
# Handle (@username) — dots allowed in YouTube handles
|
|
match = re.search(r'youtube\.com/@([A-Za-z0-9._-]+)', url)
|
|
if match:
|
|
return match.group(1) # Return without @
|
|
|
|
# Legacy /c/ and /user/ formats
|
|
match = re.search(r'youtube\.com/(?:c|user)/([A-Za-z0-9._-]+)', url)
|
|
if match:
|
|
return match.group(1)
|
|
|
|
logger.warning(f"Unable to extract channel ID from URL: {url}")
|
|
return None
|
|
|
|
def get_channel_stats(self, channel_id_or_username: str) -> Optional[Dict]:
|
|
"""
|
|
Fetch channel statistics from YouTube Data API v3.
|
|
|
|
Args:
|
|
channel_id_or_username: YouTube channel ID (UC...) or username/handle
|
|
|
|
Returns:
|
|
Dict with channel stats or None on error:
|
|
{
|
|
'subscriber_count': int,
|
|
'view_count': int,
|
|
'video_count': int,
|
|
'channel_title': str,
|
|
'channel_description': str
|
|
}
|
|
"""
|
|
if not channel_id_or_username:
|
|
return None
|
|
|
|
url = f"{YOUTUBE_API_BASE}/channels"
|
|
|
|
# Determine if it's a channel ID (starts with UC) or handle/username
|
|
if channel_id_or_username.startswith('UC'):
|
|
params = {
|
|
'part': 'statistics,snippet,brandingSettings',
|
|
'id': channel_id_or_username,
|
|
'key': self.api_key
|
|
}
|
|
else:
|
|
# For handles, we need to use forHandle (modern) or forUsername (legacy)
|
|
params = {
|
|
'part': 'statistics,snippet,brandingSettings',
|
|
'forHandle': channel_id_or_username,
|
|
'key': self.api_key
|
|
}
|
|
|
|
try:
|
|
response = self.session.get(url, params=params, timeout=15)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
items = data.get('items', [])
|
|
if not items:
|
|
# Try forUsername as fallback
|
|
if not channel_id_or_username.startswith('UC'):
|
|
params = {
|
|
'part': 'statistics,snippet,brandingSettings',
|
|
'forUsername': channel_id_or_username,
|
|
'key': self.api_key
|
|
}
|
|
response = self.session.get(url, params=params, timeout=15)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
items = data.get('items', [])
|
|
|
|
if not items:
|
|
logger.warning(f"No YouTube channel found for: {channel_id_or_username}")
|
|
return None
|
|
|
|
channel = items[0]
|
|
channel_id = channel.get('id', channel_id_or_username)
|
|
stats = channel.get('statistics', {})
|
|
snippet = channel.get('snippet', {})
|
|
branding = channel.get('brandingSettings', {}).get('channel', {})
|
|
thumbnails = snippet.get('thumbnails', {})
|
|
|
|
result = {
|
|
'channel_id': channel_id,
|
|
'subscriber_count': int(stats.get('subscriberCount', 0)),
|
|
'hidden_subscriber_count': stats.get('hiddenSubscriberCount', False),
|
|
'view_count': int(stats.get('viewCount', 0)),
|
|
'video_count': int(stats.get('videoCount', 0)),
|
|
'channel_title': snippet.get('title', ''),
|
|
'channel_description': snippet.get('description', ''),
|
|
'custom_url': snippet.get('customUrl', ''),
|
|
'published_at': snippet.get('publishedAt', ''),
|
|
'country': snippet.get('country', ''),
|
|
'thumbnail_url': thumbnails.get('high', thumbnails.get('default', {})).get('url', ''),
|
|
'banner_url': channel.get('brandingSettings', {}).get('image', {}).get('bannerExternalUrl', ''),
|
|
'keywords': branding.get('keywords', ''),
|
|
}
|
|
|
|
logger.info(f"Fetched YouTube stats for {result['channel_title']}: "
|
|
f"{result['subscriber_count']} subscribers, "
|
|
f"{result['video_count']} videos")
|
|
|
|
return result
|
|
|
|
except requests.exceptions.HTTPError as e:
|
|
logger.warning(f"YouTube API HTTP error for {channel_id_or_username}: "
|
|
f"{e.response.status_code} - {e.response.text}")
|
|
return None
|
|
except requests.exceptions.RequestException as e:
|
|
logger.warning(f"YouTube API request error for {channel_id_or_username}: {e}")
|
|
return None
|
|
except (KeyError, ValueError, TypeError) as e:
|
|
logger.warning(f"YouTube API response parse error: {e}")
|
|
return None
|
|
|
|
def get_recent_videos(self, channel_id: str, max_results: int = 5) -> list:
|
|
"""Fetch recent videos from a channel (costs 100 quota units).
|
|
|
|
Args:
|
|
channel_id: YouTube channel ID (UC...)
|
|
max_results: Number of videos to fetch (max 50)
|
|
|
|
Returns:
|
|
List of dicts with video info, or empty list on error.
|
|
"""
|
|
if not channel_id or not channel_id.startswith('UC'):
|
|
return []
|
|
|
|
try:
|
|
resp = self.session.get(f"{YOUTUBE_API_BASE}/search", params={
|
|
'part': 'snippet',
|
|
'channelId': channel_id,
|
|
'order': 'date',
|
|
'type': 'video',
|
|
'maxResults': max_results,
|
|
'key': self.api_key,
|
|
}, timeout=15)
|
|
resp.raise_for_status()
|
|
items = resp.json().get('items', [])
|
|
|
|
videos = []
|
|
for item in items:
|
|
snippet = item.get('snippet', {})
|
|
videos.append({
|
|
'title': snippet.get('title', ''),
|
|
'date': snippet.get('publishedAt', '')[:10],
|
|
'video_id': item.get('id', {}).get('videoId', ''),
|
|
})
|
|
return videos
|
|
except Exception as e:
|
|
logger.debug(f"YouTube recent videos fetch failed: {e}")
|
|
return []
|