nordabiz/crux_service.py
Maciej Pienczyn ce6aa53c78
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
feat(audit): Phase 1 - YouTube API, CrUX field data, security headers, image formats
New services:
- youtube_service.py: YouTube Data API v3 integration for channel stats
  (subscriber count, view count, video count)
- crux_service.py: Chrome UX Report API for real user field data
  (INP, LCP, CLS, FCP, TTFB from actual Chrome users)

SEO audit enrichment:
- Security headers check: HSTS, CSP, X-Frame-Options, X-Content-Type-Options
  via live requests.head() during data collection
- Image format analysis: WebP/AVIF/SVG vs legacy JPEG/PNG ratio
- CrUX field data complements existing PageSpeed lab data in AI prompt
- All new metrics passed to Gemini for richer analysis

Social media audit enrichment:
- YouTube API data (video count, views, subscribers) integrated into
  social media AI prompt when YouTube profile exists

All APIs use existing GOOGLE_PLACES_API_KEY (free tier, $0 cost).
Completeness: ~68% → ~78% (estimated)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 11:32:03 +01:00

105 lines
3.8 KiB
Python

"""Chrome UX Report (CrUX) API Service.
Pobiera field data (dane od realnych użytkowników Chrome) dla stron internetowych.
Uzupełnia lab data z PageSpeed Insights o metryki z rzeczywistego ruchu.
API: https://chromeuxreport.googleapis.com/v1/records:queryRecord
Free tier: 150 requests/minute
"""
import os
import logging
import requests
logger = logging.getLogger(__name__)
class CrUXService:
"""Service for Chrome UX Report API."""
BASE_URL = 'https://chromeuxreport.googleapis.com/v1/records:queryRecord'
def __init__(self, api_key: str = None):
self.api_key = api_key or os.environ.get('GOOGLE_PLACES_API_KEY')
if not self.api_key:
logger.warning("CrUX API key not configured (GOOGLE_PLACES_API_KEY)")
def get_field_data(self, url: str) -> dict | None:
"""Fetch CrUX field data for a URL.
Args:
url: Website URL (e.g., 'https://example.com')
Returns:
Dict with field metrics or None if no data available.
Many small/local business sites won't have CrUX data.
"""
if not self.api_key:
return None
try:
# Try origin-level first (more likely to have data)
response = requests.post(
f"{self.BASE_URL}?key={self.api_key}",
json={'origin': url.rstrip('/')},
timeout=10
)
if response.status_code == 404:
# No CrUX data for this origin (common for small sites)
return None
if response.status_code != 200:
logger.warning(f"CrUX API error {response.status_code} for {url}")
return None
data = response.json()
record = data.get('record', {})
metrics = record.get('metrics', {})
result = {}
# Extract each metric's p75 value
metric_mapping = {
'largest_contentful_paint': 'crux_lcp_ms',
'interaction_to_next_paint': 'crux_inp_ms',
'cumulative_layout_shift': 'crux_cls',
'first_contentful_paint': 'crux_fcp_ms',
'time_to_first_byte': 'crux_ttfb_ms',
}
for api_name, our_name in metric_mapping.items():
metric = metrics.get(api_name, {})
percentiles = metric.get('percentiles', {})
p75 = percentiles.get('p75')
if p75 is not None:
# CLS is reported as decimal (e.g., 0.15), others in ms
if 'layout_shift' in api_name:
result[our_name] = round(float(p75), 3)
else:
result[our_name] = int(p75)
# Also extract histogram category distribution
histogram = metric.get('histogram', [])
if histogram and len(histogram) >= 3:
total = sum(h.get('density', 0) for h in histogram)
if total > 0:
good_pct = round(histogram[0].get('density', 0) * 100, 1)
result[f'{our_name}_good_pct'] = good_pct
# Collection period
collection_period = record.get('collectionPeriod', {})
if collection_period:
last_date = collection_period.get('lastDate', {})
if last_date:
result['crux_period_end'] = f"{last_date.get('year')}-{last_date.get('month'):02d}-{last_date.get('day'):02d}"
return result if result else None
except requests.exceptions.Timeout:
logger.warning(f"CrUX API timeout for {url}")
return None
except Exception as e:
logger.warning(f"CrUX API error for {url}: {e}")
return None