#!/usr/bin/env python3 """ UptimeRobot Sync ================ Cron job (0 * * * *) - synchronizuje dane z UptimeRobot API co godzinę. Pobiera response times, logi up/down, koreluje z internal_health_logs. Użycie: 0 * * * * cd /var/www/nordabiznes && DATABASE_URL=$(grep DATABASE_URL .env | cut -d'=' -f2) UPTIMEROBOT_API_KEY=$(grep UPTIMEROBOT_API_KEY .env | cut -d'=' -f2) /var/www/nordabiznes/venv/bin/python3 scripts/uptimerobot_sync.py Wymagane env: UPTIMEROBOT_API_KEY - API key z UptimeRobot (Main API Key) """ import os import sys import json import urllib.request import urllib.error from datetime import datetime, timedelta sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from database import ( SessionLocal, UptimeMonitor, UptimeCheck, UptimeIncident, InternalHealthLog ) API_KEY = os.environ.get('UPTIMEROBOT_API_KEY', '') API_BASE = 'https://api.uptimerobot.com/v2' RETENTION_DAYS = 90 # UptimeRobot status codes UR_STATUS = { 0: 'paused', 1: 'not_checked', 2: 'up', 8: 'seems_down', 9: 'down', } def api_request(endpoint, extra_params=None): """Wyślij zapytanie do UptimeRobot API v2""" params = { 'api_key': API_KEY, 'format': 'json', } if extra_params: params.update(extra_params) data = json.dumps(params).encode('utf-8') req = urllib.request.Request( f'{API_BASE}/{endpoint}', data=data, headers={'Content-Type': 'application/json'}, method='POST' ) try: with urllib.request.urlopen(req, timeout=30) as resp: return json.loads(resp.read().decode('utf-8')) except Exception as e: print(f"API error ({endpoint}): {e}", file=sys.stderr) return None def sync_monitors(db): """Synchronizuj listę monitorów z UptimeRobot""" result = api_request('getMonitors', { 'response_times': 1, 'response_times_limit': 1, 'logs': 1, 'logs_limit': 50, 'all_time_uptime_ratio': 1, 'custom_uptime_ratios': '1-7-30-90', }) if not result or result.get('stat') != 'ok': print(f"Błąd API getMonitors: {result}", file=sys.stderr) return [] monitors = result.get('monitors', []) synced = [] for m in monitors: ur_id = m['id'] # Upsert monitor monitor = db.query(UptimeMonitor).filter_by(uptimerobot_id=ur_id).first() if not monitor: monitor = UptimeMonitor( uptimerobot_id=ur_id, name=m.get('friendly_name', ''), url=m.get('url', ''), check_interval_sec=m.get('interval', 300), created_at=datetime.now() ) db.add(monitor) db.flush() print(f"Nowy monitor: {monitor.name} ({monitor.url})") else: monitor.name = m.get('friendly_name', monitor.name) monitor.url = m.get('url', monitor.url) # Sync response times sync_response_times(db, monitor, m.get('response_times', [])) # Sync logs (up/down events) → incydenty sync_logs(db, monitor, m.get('logs', [])) synced.append(monitor) db.commit() return synced def sync_response_times(db, monitor, response_times): """Zapisz response times jako uptime_checks""" if not response_times: return for rt in response_times: ts = datetime.fromtimestamp(rt['datetime']) # Sprawdź czy już istnieje (unikaj duplikatów) exists = db.query(UptimeCheck).filter_by( monitor_id=monitor.id, checked_at=ts ).first() if not exists: check = UptimeCheck( monitor_id=monitor.id, checked_at=ts, status='up', # response time = was up response_time_ms=rt.get('value', 0), ) db.add(check) def sync_logs(db, monitor, logs): """Przetwórz logi up/down z UptimeRobot na incydenty""" if not logs: return for log in logs: log_type = log.get('type', 0) ts = datetime.fromtimestamp(log['datetime']) duration = log.get('duration', 0) if log_type == 1: # DOWN # Sprawdź czy incydent już istnieje existing = db.query(UptimeIncident).filter( UptimeIncident.monitor_id == monitor.id, UptimeIncident.started_at == ts ).first() if existing: # Aktualizuj jeśli się zakończył if duration > 0 and not existing.ended_at: existing.ended_at = ts + timedelta(seconds=duration) existing.duration_seconds = duration existing.auto_resolved = True # Koreluj przyczynę existing.cause = correlate_cause(db, ts, duration) continue ended_at = ts + timedelta(seconds=duration) if duration > 0 else None incident = UptimeIncident( monitor_id=monitor.id, started_at=ts, ended_at=ended_at, duration_seconds=duration if duration > 0 else None, cause=correlate_cause(db, ts, duration) if duration > 0 else 'unknown', auto_resolved=duration > 0 ) db.add(incident) # Dodaj check DOWN down_check = UptimeCheck( monitor_id=monitor.id, checked_at=ts, status='down', response_time_ms=None, ) db.add(down_check) elif log_type == 2: # UP (recovery) # Dodaj check UP up_check = UptimeCheck( monitor_id=monitor.id, checked_at=ts, status='up', response_time_ms=None, ) db.add(up_check) def correlate_cause(db, incident_start, duration_seconds): """ Koreluj incydent z wewnętrznymi logami health. Sprawdź czy serwer działał w czasie incydentu. Logika: - Jeśli internal_health_logs w oknie incydentu mają app_ok=True → ISP - Jeśli mają app_ok=False → server - Jeśli brak logów → infra (cały serwer padł) """ if not duration_seconds or duration_seconds <= 0: return 'unknown' incident_end = incident_start + timedelta(seconds=duration_seconds) # Szukaj logów health z okna incydentu (z 5-min marginesem) margin = timedelta(minutes=5) health_logs = db.query(InternalHealthLog).filter( InternalHealthLog.checked_at >= incident_start - margin, InternalHealthLog.checked_at <= incident_end + margin ).all() if not health_logs: # Brak logów = cała infrastruktura padła (np. prąd, FortiGate) return 'infra' # Sprawdź czy app działała app_ok_count = sum(1 for h in health_logs if h.app_ok) total = len(health_logs) if app_ok_count == total: # Serwer działał normalnie → problem z internetem (ISP) return 'isp' elif app_ok_count == 0: # App nie działała → problem z serwerem return 'server' else: # Mieszane — częściowa awaria return 'server' def cleanup_old_checks(db): """Usuń stare uptime_checks (>90 dni)""" cutoff = datetime.now() - timedelta(days=RETENTION_DAYS) deleted = db.query(UptimeCheck).filter( UptimeCheck.checked_at < cutoff ).delete() if deleted: print(f"Usunięto {deleted} starych uptime checks (>{RETENTION_DAYS} dni)") def main(): if not API_KEY: print("BŁĄD: Brak UPTIMEROBOT_API_KEY w zmiennych środowiskowych", file=sys.stderr) print("Ustaw klucz API w .env: UPTIMEROBOT_API_KEY=ur...", file=sys.stderr) sys.exit(1) db = SessionLocal() try: print(f"[{datetime.now()}] Synchronizacja UptimeRobot...") monitors = sync_monitors(db) print(f"Zsynchronizowano {len(monitors)} monitorów") # Cleanup raz dziennie (o 4:00) now = datetime.now() if now.hour == 4 and now.minute < 5: cleanup_old_checks(db) db.commit() print(f"[{datetime.now()}] Synchronizacja zakończona") except Exception as e: print(f"ERROR: {e}", file=sys.stderr) db.rollback() sys.exit(1) finally: db.close() if __name__ == '__main__': main()