nordabiz/crux_service.py

"""Chrome UX Report (CrUX) API Service.

Pobiera field data (dane od realnych użytkowników Chrome) dla stron internetowych.
Uzupełnia lab data z PageSpeed Insights o metryki z rzeczywistego ruchu.

API: https://chromeuxreport.googleapis.com/v1/records:queryRecord
Free tier: 150 requests/minute
"""

import os
import logging
import requests

logger = logging.getLogger(__name__)


class CrUXService:
    """Service for Chrome UX Report API."""

    BASE_URL = 'https://chromeuxreport.googleapis.com/v1/records:queryRecord'

    def __init__(self, api_key: str = None):
        self.api_key = api_key or os.environ.get('GOOGLE_PLACES_API_KEY')
        if not self.api_key:
            logger.warning("CrUX API key not configured (GOOGLE_PLACES_API_KEY)")

    def get_field_data(self, url: str) -> dict | None:
        """Fetch CrUX field data for a URL.

        Args:
            url: Website URL (e.g., 'https://example.com')

        Returns:
            Dict with field metrics or None if no data available.
            Many small/local business sites won't have CrUX data.
        """
        if not self.api_key:
            return None

        try:
            # Try origin-level first (more likely to have data)
            response = requests.post(
                f"{self.BASE_URL}?key={self.api_key}",
                json={'origin': url.rstrip('/')},
                timeout=10
            )

            if response.status_code == 404:
                # No CrUX data for this origin (common for small sites)
                return None

            if response.status_code != 200:
                logger.warning(f"CrUX API error {response.status_code} for {url}")
                return None

            data = response.json()
            record = data.get('record', {})
            metrics = record.get('metrics', {})

            result = {}

            # Extract each metric's p75 value
            metric_mapping = {
                'largest_contentful_paint': 'crux_lcp_ms',
                'interaction_to_next_paint': 'crux_inp_ms',
                'cumulative_layout_shift': 'crux_cls',
                'first_contentful_paint': 'crux_fcp_ms',
                'time_to_first_byte': 'crux_ttfb_ms',
            }

            for api_name, our_name in metric_mapping.items():
                metric = metrics.get(api_name, {})
                percentiles = metric.get('percentiles', {})
                p75 = percentiles.get('p75')
                if p75 is not None:
                    # CLS is reported as decimal (e.g., 0.15), others in ms
                    if 'layout_shift' in api_name:
                        result[our_name] = round(float(p75), 3)
                    else:
                        result[our_name] = int(p75)

                # Also extract histogram category distribution
                histogram = metric.get('histogram', [])
                if histogram and len(histogram) >= 3:
                    total = sum(h.get('density', 0) for h in histogram)
                    if total > 0:
                        good_pct = round(histogram[0].get('density', 0) * 100, 1)
                        result[f'{our_name}_good_pct'] = good_pct

            # Collection period
            collection_period = record.get('collectionPeriod', {})
            if collection_period:
                last_date = collection_period.get('lastDate', {})
                if last_date:
                    result['crux_period_end'] = f"{last_date.get('year')}-{last_date.get('month'):02d}-{last_date.get('day'):02d}"

            return result if result else None

        except requests.exceptions.Timeout:
            logger.warning(f"CrUX API timeout for {url}")
            return None
        except Exception as e:
            logger.warning(f"CrUX API error for {url}: {e}")
            return None