""" YouTube Data API v3 Service for NordaBiz ========================================= Simple YouTube API client for fetching channel statistics. Uses the YouTube Data API v3 with the same Google API key as Places API. API Reference: https://developers.google.com/youtube/v3/docs/channels Author: Maciej Pienczyn, InPi sp. z o.o. Created: 2026-02-08 """ import os import re import logging from typing import Optional, Dict import requests logger = logging.getLogger(__name__) # API Configuration YOUTUBE_API_BASE = "https://www.googleapis.com/youtube/v3" class YouTubeService: """Fetches YouTube channel statistics via YouTube Data API v3.""" def __init__(self, api_key: str = None): self.api_key = api_key or os.getenv('YOUTUBE_API_KEY') or os.getenv('GOOGLE_PAGESPEED_API_KEY') or os.getenv('GOOGLE_PLACES_API_KEY') if not self.api_key: raise ValueError("No Google API key found (YOUTUBE_API_KEY, GOOGLE_PAGESPEED_API_KEY, or GOOGLE_PLACES_API_KEY)") self.session = requests.Session() def extract_channel_id_from_url(self, url: str) -> Optional[str]: """ Extract channel ID or handle from YouTube URL. Supported formats: - youtube.com/channel/UC1234567890abcdef - youtube.com/@handle - youtube.com/c/channelname - youtube.com/user/username Args: url: YouTube channel URL Returns: Channel ID (starts with UC) or handle (without @) or None """ if not url: return None # Direct channel ID (UC...) match = re.search(r'youtube\.com/channel/([A-Za-z0-9_-]+)', url) if match: return match.group(1) # Handle (@username) — dots allowed in YouTube handles match = re.search(r'youtube\.com/@([A-Za-z0-9._-]+)', url) if match: return match.group(1) # Return without @ # Legacy /c/ and /user/ formats match = re.search(r'youtube\.com/(?:c|user)/([A-Za-z0-9._-]+)', url) if match: return match.group(1) logger.warning(f"Unable to extract channel ID from URL: {url}") return None def get_channel_stats(self, channel_id_or_username: str) -> Optional[Dict]: """ Fetch channel statistics from YouTube Data API v3. Args: channel_id_or_username: YouTube channel ID (UC...) or username/handle Returns: Dict with channel stats or None on error: { 'subscriber_count': int, 'view_count': int, 'video_count': int, 'channel_title': str, 'channel_description': str } """ if not channel_id_or_username: return None url = f"{YOUTUBE_API_BASE}/channels" # Determine if it's a channel ID (starts with UC) or handle/username if channel_id_or_username.startswith('UC'): params = { 'part': 'statistics,snippet,brandingSettings', 'id': channel_id_or_username, 'key': self.api_key } else: # For handles, we need to use forHandle (modern) or forUsername (legacy) params = { 'part': 'statistics,snippet,brandingSettings', 'forHandle': channel_id_or_username, 'key': self.api_key } try: response = self.session.get(url, params=params, timeout=15) response.raise_for_status() data = response.json() items = data.get('items', []) if not items: # Try forUsername as fallback if not channel_id_or_username.startswith('UC'): params = { 'part': 'statistics,snippet,brandingSettings', 'forUsername': channel_id_or_username, 'key': self.api_key } response = self.session.get(url, params=params, timeout=15) response.raise_for_status() data = response.json() items = data.get('items', []) if not items: logger.warning(f"No YouTube channel found for: {channel_id_or_username}") return None channel = items[0] channel_id = channel.get('id', channel_id_or_username) stats = channel.get('statistics', {}) snippet = channel.get('snippet', {}) branding = channel.get('brandingSettings', {}).get('channel', {}) thumbnails = snippet.get('thumbnails', {}) result = { 'channel_id': channel_id, 'subscriber_count': int(stats.get('subscriberCount', 0)), 'hidden_subscriber_count': stats.get('hiddenSubscriberCount', False), 'view_count': int(stats.get('viewCount', 0)), 'video_count': int(stats.get('videoCount', 0)), 'channel_title': snippet.get('title', ''), 'channel_description': snippet.get('description', ''), 'custom_url': snippet.get('customUrl', ''), 'published_at': snippet.get('publishedAt', ''), 'country': snippet.get('country', ''), 'thumbnail_url': thumbnails.get('high', thumbnails.get('default', {})).get('url', ''), 'banner_url': channel.get('brandingSettings', {}).get('image', {}).get('bannerExternalUrl', ''), 'keywords': branding.get('keywords', ''), } logger.info(f"Fetched YouTube stats for {result['channel_title']}: " f"{result['subscriber_count']} subscribers, " f"{result['video_count']} videos") return result except requests.exceptions.HTTPError as e: logger.warning(f"YouTube API HTTP error for {channel_id_or_username}: " f"{e.response.status_code} - {e.response.text}") return None except requests.exceptions.RequestException as e: logger.warning(f"YouTube API request error for {channel_id_or_username}: {e}") return None except (KeyError, ValueError, TypeError) as e: logger.warning(f"YouTube API response parse error: {e}") return None def get_recent_videos(self, channel_id: str, max_results: int = 5) -> list: """Fetch recent videos from a channel (costs 100 quota units). Args: channel_id: YouTube channel ID (UC...) max_results: Number of videos to fetch (max 50) Returns: List of dicts with video info, or empty list on error. """ if not channel_id or not channel_id.startswith('UC'): return [] try: resp = self.session.get(f"{YOUTUBE_API_BASE}/search", params={ 'part': 'snippet', 'channelId': channel_id, 'order': 'date', 'type': 'video', 'maxResults': max_results, 'key': self.api_key, }, timeout=15) resp.raise_for_status() items = resp.json().get('items', []) videos = [] for item in items: snippet = item.get('snippet', {}) videos.append({ 'title': snippet.get('title', ''), 'date': snippet.get('publishedAt', '')[:10], 'video_id': item.get('id', {}).get('videoId', ''), }) return videos except Exception as e: logger.debug(f"YouTube recent videos fetch failed: {e}") return []