Some checks are pending
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
External monitoring via UptimeRobot (free tier) with internal health logger to differentiate ISP outages from server issues. Includes: - 4 new DB models (UptimeMonitor, UptimeCheck, UptimeIncident, InternalHealthLog) - Migration 082 with tables, indexes, and permissions - Internal health logger script (cron */5 min) - UptimeRobot sync script (cron hourly) with automatic cause correlation - Admin dashboard /admin/uptime with uptime %, response time charts, incident log with editable notes/causes, pattern analysis, monthly report - SLA comparison table (99.9%/99.5%/99%) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
164 lines
4.7 KiB
Python
164 lines
4.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Internal Health Logger
|
|
======================
|
|
Cron job (*/5 * * * *) - zapisuje stan serwera co 5 minut.
|
|
Pozwala odróżnić awarię ISP od awarii serwera.
|
|
|
|
Użycie:
|
|
*/5 * * * * cd /var/www/nordabiznes && DATABASE_URL=$(grep DATABASE_URL .env | cut -d'=' -f2) /var/www/nordabiznes/venv/bin/python3 scripts/internal_health_logger.py
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import urllib.request
|
|
import urllib.error
|
|
from datetime import datetime, timedelta
|
|
|
|
# Setup path
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from database import SessionLocal, InternalHealthLog
|
|
|
|
HEALTH_URL = 'http://localhost:5000/health'
|
|
RETENTION_DAYS = 90
|
|
|
|
|
|
def check_app_health():
|
|
"""Sprawdź czy aplikacja Flask odpowiada na /health"""
|
|
try:
|
|
req = urllib.request.Request(HEALTH_URL, method='GET')
|
|
with urllib.request.urlopen(req, timeout=5) as resp:
|
|
return resp.status == 200
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def check_db_health():
|
|
"""Sprawdź czy PostgreSQL jest dostępny"""
|
|
try:
|
|
db = SessionLocal()
|
|
from sqlalchemy import text
|
|
db.execute(text('SELECT 1'))
|
|
db.close()
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def get_cpu_percent():
|
|
"""Pobierz użycie CPU z /proc/stat lub top"""
|
|
try:
|
|
result = subprocess.run(
|
|
['top', '-bn1'],
|
|
capture_output=True, text=True, timeout=10
|
|
)
|
|
for line in result.stdout.split('\n'):
|
|
if 'Cpu' in line or '%Cpu' in line:
|
|
# Format: %Cpu(s): 2.3 us, 0.5 sy, ... 96.2 id
|
|
parts = line.split()
|
|
for i, part in enumerate(parts):
|
|
if part == 'id,' or part == 'id':
|
|
idle = float(parts[i - 1])
|
|
return round(100.0 - idle, 2)
|
|
return None
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def get_ram_percent():
|
|
"""Pobierz użycie RAM"""
|
|
try:
|
|
result = subprocess.run(
|
|
['free', '-m'],
|
|
capture_output=True, text=True, timeout=5
|
|
)
|
|
for line in result.stdout.split('\n'):
|
|
if line.startswith('Mem:'):
|
|
parts = line.split()
|
|
total = float(parts[1])
|
|
available = float(parts[6]) # available column
|
|
used_pct = round((1 - available / total) * 100, 2)
|
|
return used_pct
|
|
return None
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def get_disk_percent():
|
|
"""Pobierz użycie dysku /"""
|
|
try:
|
|
result = subprocess.run(
|
|
['df', '-h', '/'],
|
|
capture_output=True, text=True, timeout=5
|
|
)
|
|
lines = result.stdout.strip().split('\n')
|
|
if len(lines) >= 2:
|
|
parts = lines[1].split()
|
|
# Format: Filesystem Size Used Avail Use% Mounted
|
|
for part in parts:
|
|
if part.endswith('%'):
|
|
return float(part.rstrip('%'))
|
|
return None
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def get_gunicorn_workers():
|
|
"""Policz aktywne procesy gunicorn"""
|
|
try:
|
|
result = subprocess.run(
|
|
['pgrep', '-c', 'gunicorn'],
|
|
capture_output=True, text=True, timeout=5
|
|
)
|
|
return int(result.stdout.strip()) if result.returncode == 0 else 0
|
|
except Exception:
|
|
return 0
|
|
|
|
|
|
def cleanup_old_logs(db):
|
|
"""Usuń logi starsze niż RETENTION_DAYS"""
|
|
cutoff = datetime.now() - timedelta(days=RETENTION_DAYS)
|
|
deleted = db.query(InternalHealthLog).filter(
|
|
InternalHealthLog.checked_at < cutoff
|
|
).delete()
|
|
if deleted:
|
|
db.commit()
|
|
print(f"Usunięto {deleted} starych logów health (>{RETENTION_DAYS} dni)")
|
|
|
|
|
|
def main():
|
|
db = SessionLocal()
|
|
try:
|
|
log = InternalHealthLog(
|
|
checked_at=datetime.now(),
|
|
app_ok=check_app_health(),
|
|
db_ok=check_db_health(),
|
|
cpu_percent=get_cpu_percent(),
|
|
ram_percent=get_ram_percent(),
|
|
disk_percent=get_disk_percent(),
|
|
gunicorn_workers=get_gunicorn_workers()
|
|
)
|
|
db.add(log)
|
|
db.commit()
|
|
|
|
# Cleanup co jakiś czas (sprawdź raz dziennie, przy pełnej godzinie 3:00)
|
|
now = datetime.now()
|
|
if now.hour == 3 and now.minute < 5:
|
|
cleanup_old_logs(db)
|
|
|
|
print(f"[{log.checked_at}] app={log.app_ok} db={log.db_ok} "
|
|
f"cpu={log.cpu_percent}% ram={log.ram_percent}% disk={log.disk_percent}% "
|
|
f"workers={log.gunicorn_workers}")
|
|
|
|
except Exception as e:
|
|
print(f"ERROR: {e}", file=sys.stderr)
|
|
db.rollback()
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|