Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
The site was added as a domain property in GSC, not URL prefix. _normalize_site_url() now tries sc-domain:example.com variant. Also added ctr/position to top_queries for template display. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
166 lines
5.8 KiB
Python
166 lines
5.8 KiB
Python
"""
|
|
Google Search Console API Client
|
|
=================================
|
|
|
|
Uses OAuth 2.0 to fetch search analytics data (clicks, impressions, CTR, positions).
|
|
|
|
API docs: https://developers.google.com/webmaster-tools/v3/searchanalytics
|
|
"""
|
|
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Optional
|
|
|
|
import requests
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SearchConsoleService:
|
|
"""Google Search Console API client."""
|
|
|
|
BASE_URL = "https://www.googleapis.com/webmasters/v3"
|
|
|
|
def __init__(self, access_token: str):
|
|
self.session = requests.Session()
|
|
self.session.headers.update({
|
|
'Authorization': f'Bearer {access_token}',
|
|
'Content-Type': 'application/json',
|
|
})
|
|
self.session.timeout = 15
|
|
|
|
def list_sites(self) -> List[Dict]:
|
|
"""List verified sites in Search Console."""
|
|
try:
|
|
resp = self.session.get(f"{self.BASE_URL}/sites")
|
|
resp.raise_for_status()
|
|
return resp.json().get('siteEntry', [])
|
|
except Exception as e:
|
|
logger.error(f"Search Console list_sites failed: {e}")
|
|
return []
|
|
|
|
def _normalize_site_url(self, url: str) -> Optional[str]:
|
|
"""Try to find matching site URL in Search Console.
|
|
|
|
Search Console uses exact URL format — with/without trailing slash,
|
|
http/https, www/no-www. Try common variants.
|
|
"""
|
|
sites = self.list_sites()
|
|
site_urls = [s.get('siteUrl', '') for s in sites]
|
|
|
|
# Direct match
|
|
if url in site_urls:
|
|
return url
|
|
|
|
# Try variants
|
|
variants = [url]
|
|
if not url.endswith('/'):
|
|
variants.append(url + '/')
|
|
if url.startswith('https://'):
|
|
variants.append(url.replace('https://', 'http://'))
|
|
if url.startswith('http://'):
|
|
variants.append(url.replace('http://', 'https://'))
|
|
# www variants
|
|
for v in list(variants):
|
|
if '://www.' in v:
|
|
variants.append(v.replace('://www.', '://'))
|
|
else:
|
|
variants.append(v.replace('://', '://www.'))
|
|
|
|
# Domain property variant (sc-domain:example.com)
|
|
from urllib.parse import urlparse
|
|
parsed = urlparse(url)
|
|
domain = parsed.hostname or ''
|
|
if domain.startswith('www.'):
|
|
domain = domain[4:]
|
|
if domain:
|
|
variants.append(f'sc-domain:{domain}')
|
|
|
|
for v in variants:
|
|
if v in site_urls:
|
|
return v
|
|
|
|
logger.debug(f"No match for {url}. Available sites: {site_urls}")
|
|
return None
|
|
|
|
def get_search_analytics(self, site_url: str, days: int = 28) -> Dict:
|
|
"""Get search analytics for a site.
|
|
|
|
Returns:
|
|
Dict with keys: clicks, impressions, ctr, position,
|
|
top_queries (list), top_pages (list), period_days
|
|
"""
|
|
normalized = self._normalize_site_url(site_url)
|
|
if not normalized:
|
|
logger.warning(f"Site {site_url} not found in Search Console")
|
|
return {}
|
|
|
|
end_date = datetime.now() - timedelta(days=3) # SC data has ~3 day delay
|
|
start_date = end_date - timedelta(days=days)
|
|
|
|
try:
|
|
# Totals
|
|
resp = self.session.post(
|
|
f"{self.BASE_URL}/sites/{requests.utils.quote(normalized, safe='')}/searchAnalytics/query",
|
|
json={
|
|
'startDate': start_date.strftime('%Y-%m-%d'),
|
|
'endDate': end_date.strftime('%Y-%m-%d'),
|
|
'dimensions': [],
|
|
}
|
|
)
|
|
resp.raise_for_status()
|
|
rows = resp.json().get('rows', [])
|
|
|
|
totals = rows[0] if rows else {}
|
|
result = {
|
|
'clicks': totals.get('clicks', 0),
|
|
'impressions': totals.get('impressions', 0),
|
|
'ctr': round(totals.get('ctr', 0) * 100, 2),
|
|
'position': round(totals.get('position', 0), 1),
|
|
'period_days': days,
|
|
}
|
|
|
|
# Top queries
|
|
resp_q = self.session.post(
|
|
f"{self.BASE_URL}/sites/{requests.utils.quote(normalized, safe='')}/searchAnalytics/query",
|
|
json={
|
|
'startDate': start_date.strftime('%Y-%m-%d'),
|
|
'endDate': end_date.strftime('%Y-%m-%d'),
|
|
'dimensions': ['query'],
|
|
'rowLimit': 10,
|
|
}
|
|
)
|
|
if resp_q.status_code == 200:
|
|
result['top_queries'] = [
|
|
{
|
|
'query': r['keys'][0],
|
|
'clicks': r.get('clicks', 0),
|
|
'impressions': r.get('impressions', 0),
|
|
'ctr': round(r.get('ctr', 0) * 100, 2),
|
|
'position': round(r.get('position', 0), 1),
|
|
}
|
|
for r in resp_q.json().get('rows', [])
|
|
]
|
|
|
|
# Top pages
|
|
resp_p = self.session.post(
|
|
f"{self.BASE_URL}/sites/{requests.utils.quote(normalized, safe='')}/searchAnalytics/query",
|
|
json={
|
|
'startDate': start_date.strftime('%Y-%m-%d'),
|
|
'endDate': end_date.strftime('%Y-%m-%d'),
|
|
'dimensions': ['page'],
|
|
'rowLimit': 10,
|
|
}
|
|
)
|
|
if resp_p.status_code == 200:
|
|
result['top_pages'] = [
|
|
{'page': r['keys'][0], 'clicks': r.get('clicks', 0), 'impressions': r.get('impressions', 0)}
|
|
for r in resp_p.json().get('rows', [])
|
|
]
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Search Console analytics failed for {site_url}: {e}")
|
|
return {}
|