feat: add uptime monitoring dashboard with UptimeRobot integration
Some checks are pending
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

External monitoring via UptimeRobot (free tier) with internal health
logger to differentiate ISP outages from server issues. Includes:
- 4 new DB models (UptimeMonitor, UptimeCheck, UptimeIncident, InternalHealthLog)
- Migration 082 with tables, indexes, and permissions
- Internal health logger script (cron */5 min)
- UptimeRobot sync script (cron hourly) with automatic cause correlation
- Admin dashboard /admin/uptime with uptime %, response time charts,
  incident log with editable notes/causes, pattern analysis, monthly report
- SLA comparison table (99.9%/99.5%/99%)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-03-15 07:53:05 +01:00
parent 63968c3c35
commit 9540f7f2e0
8 changed files with 1755 additions and 1 deletions

View File

@ -18,7 +18,8 @@ from sqlalchemy import func, text
from . import bp from . import bp
from database import ( from database import (
SessionLocal, Company, User, AuditLog, SecurityAlert, SessionLocal, Company, User, AuditLog, SecurityAlert,
CompanySocialMedia, CompanyWebsiteAnalysis, SystemRole CompanySocialMedia, CompanyWebsiteAnalysis, SystemRole,
UptimeMonitor, UptimeCheck, UptimeIncident, InternalHealthLog
) )
from utils.decorators import role_required from utils.decorators import role_required
@ -786,3 +787,227 @@ def api_admin_health():
'health_percent': round(100 * ok_count / len(results), 1) 'health_percent': round(100 * ok_count / len(results), 1)
} }
}) })
# ============================================================
# UPTIME MONITORING
# ============================================================
def _get_uptime_data(db, days=30):
"""Pobierz dane uptime dla dashboardu"""
now = datetime.now()
data = {}
# Aktywne monitory
monitors = db.query(UptimeMonitor).filter_by(is_active=True).all()
if not monitors:
return {'monitors': [], 'has_data': False}
monitor = monitors[0] # Główny monitor
data['monitor'] = {
'name': monitor.name,
'url': monitor.url,
'id': monitor.id
}
data['has_data'] = True
# Ostatni check
last_check = db.query(UptimeCheck).filter_by(
monitor_id=monitor.id
).order_by(UptimeCheck.checked_at.desc()).first()
if last_check:
data['current_status'] = last_check.status
data['last_checked'] = last_check.checked_at.strftime('%Y-%m-%d %H:%M')
data['last_response_time'] = last_check.response_time_ms
else:
data['current_status'] = 'unknown'
data['last_checked'] = None
data['last_response_time'] = None
# Uptime % dla różnych okresów
data['uptime'] = {}
for period_name, period_days in [('24h', 1), ('7d', 7), ('30d', 30), ('90d', 90)]:
cutoff = now - timedelta(days=period_days)
total = db.query(UptimeCheck).filter(
UptimeCheck.monitor_id == monitor.id,
UptimeCheck.checked_at >= cutoff
).count()
up = db.query(UptimeCheck).filter(
UptimeCheck.monitor_id == monitor.id,
UptimeCheck.checked_at >= cutoff,
UptimeCheck.status == 'up'
).count()
pct = round(100 * up / total, 3) if total > 0 else None
data['uptime'][period_name] = {
'percent': pct,
'total_checks': total,
'up_checks': up,
'down_checks': total - up if total else 0
}
# Response time (ostatnie N dni)
cutoff = now - timedelta(days=days)
response_times = db.query(
UptimeCheck.checked_at,
UptimeCheck.response_time_ms
).filter(
UptimeCheck.monitor_id == monitor.id,
UptimeCheck.checked_at >= cutoff,
UptimeCheck.response_time_ms.isnot(None)
).order_by(UptimeCheck.checked_at).all()
data['response_times'] = [
{'time': rt.checked_at.strftime('%Y-%m-%d %H:%M'), 'ms': rt.response_time_ms}
for rt in response_times
]
# Średni response time
if response_times:
avg_rt = sum(rt.response_time_ms for rt in response_times) / len(response_times)
data['avg_response_time'] = round(avg_rt)
else:
data['avg_response_time'] = None
# Incydenty
incidents = db.query(UptimeIncident).filter(
UptimeIncident.monitor_id == monitor.id
).order_by(UptimeIncident.started_at.desc()).limit(50).all()
data['incidents'] = [{
'id': inc.id,
'started_at': inc.started_at.strftime('%Y-%m-%d %H:%M'),
'ended_at': inc.ended_at.strftime('%Y-%m-%d %H:%M') if inc.ended_at else None,
'duration_seconds': inc.duration_seconds,
'duration_human': _format_duration(inc.duration_seconds) if inc.duration_seconds else 'trwa...',
'cause': inc.cause,
'cause_label': {'isp': 'ISP (Chopin)', 'server': 'Serwer', 'infra': 'Infrastruktura', 'unknown': 'Nieznana'}.get(inc.cause, inc.cause),
'notes': inc.notes or ''
} for inc in incidents]
# Analiza wzorców — awarie wg godziny i dnia tygodnia
all_incidents = db.query(UptimeIncident).filter(
UptimeIncident.monitor_id == monitor.id
).all()
hour_counts = [0] * 24
dow_counts = [0] * 7 # 0=pon, 6=nie
cause_counts = {'isp': 0, 'server': 0, 'infra': 0, 'unknown': 0}
for inc in all_incidents:
hour_counts[inc.started_at.hour] += 1
dow_counts[inc.started_at.weekday()] += 1
cause_counts[inc.cause] = cause_counts.get(inc.cause, 0) + 1
data['patterns'] = {
'by_hour': hour_counts,
'by_dow': dow_counts,
'by_cause': cause_counts
}
# Raport miesięczny (bieżący miesiąc)
month_start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
month_incidents = [i for i in all_incidents if i.started_at >= month_start]
month_downtime = sum(i.duration_seconds or 0 for i in month_incidents)
days_in_month = (now - month_start).days or 1
month_total_seconds = days_in_month * 86400
month_uptime_pct = round(100 * (1 - month_downtime / month_total_seconds), 3) if month_total_seconds > 0 else 100
# Poprzedni miesiąc
prev_month_end = month_start - timedelta(seconds=1)
prev_month_start = prev_month_end.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
prev_month_incidents = [i for i in all_incidents if prev_month_start <= i.started_at < month_start]
prev_month_downtime = sum(i.duration_seconds or 0 for i in prev_month_incidents)
data['monthly_report'] = {
'month': now.strftime('%B %Y'),
'uptime_pct': month_uptime_pct,
'total_downtime_seconds': month_downtime,
'total_downtime_human': _format_duration(month_downtime),
'incidents_count': len(month_incidents),
'longest_incident': _format_duration(max((i.duration_seconds or 0 for i in month_incidents), default=0)),
'prev_month': prev_month_end.strftime('%B %Y'),
'prev_downtime_seconds': prev_month_downtime,
'prev_downtime_human': _format_duration(prev_month_downtime),
'prev_incidents_count': len(prev_month_incidents),
'trend': 'better' if month_downtime < prev_month_downtime else ('worse' if month_downtime > prev_month_downtime else 'same')
}
# SLA kontekst
data['sla_context'] = {
'99.9': {'max_downtime_month': '43 min', 'max_downtime_year': '8h 46min'},
'99.5': {'max_downtime_month': '3h 36min', 'max_downtime_year': '1d 19h'},
'99.0': {'max_downtime_month': '7h 18min', 'max_downtime_year': '3d 15h'},
}
return data
def _format_duration(seconds):
"""Formatuj sekundy na czytelny tekst"""
if not seconds or seconds <= 0:
return '0s'
if seconds < 60:
return f'{seconds}s'
if seconds < 3600:
m = seconds // 60
s = seconds % 60
return f'{m}min {s}s' if s else f'{m}min'
h = seconds // 3600
m = (seconds % 3600) // 60
return f'{h}h {m}min' if m else f'{h}h'
@bp.route('/uptime')
@login_required
@role_required(SystemRole.OFFICE_MANAGER)
def admin_uptime():
"""Dashboard monitoringu uptime"""
db = SessionLocal()
try:
data = _get_uptime_data(db, days=30)
return render_template('admin/uptime_dashboard.html', data=data)
finally:
db.close()
@bp.route('/api/uptime')
@login_required
@role_required(SystemRole.OFFICE_MANAGER)
def api_admin_uptime():
"""API endpoint dla auto-refresh dashboardu uptime"""
db = SessionLocal()
try:
days = request.args.get('days', 30, type=int)
data = _get_uptime_data(db, days=min(days, 90))
data['timestamp'] = datetime.now().isoformat()
data['success'] = True
return jsonify(data)
finally:
db.close()
@bp.route('/api/uptime/incident/<int:incident_id>/notes', methods=['POST'])
@login_required
@role_required(SystemRole.OFFICE_MANAGER)
def api_update_incident_notes(incident_id):
"""Aktualizuj notatki incydentu"""
db = SessionLocal()
try:
incident = db.query(UptimeIncident).get(incident_id)
if not incident:
return jsonify({'success': False, 'error': 'Incident not found'}), 404
data = request.get_json()
if data and 'notes' in data:
incident.notes = data['notes']
if data and 'cause' in data and data['cause'] in ('isp', 'server', 'infra', 'unknown'):
incident.cause = data['cause']
db.commit()
return jsonify({'success': True})
except Exception as e:
db.rollback()
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()

View File

@ -5589,6 +5589,78 @@ class PortalSEOAudit(Base):
return f'<PortalSEOAudit {self.id} {self.audited_at} perf={self.pagespeed_performance}>' return f'<PortalSEOAudit {self.id} {self.audited_at} perf={self.pagespeed_performance}>'
# ============================================================
# UPTIME MONITORING
# ============================================================
class UptimeMonitor(Base):
"""Konfiguracja monitorów UptimeRobot"""
__tablename__ = 'uptime_monitors'
id = Column(Integer, primary_key=True)
uptimerobot_id = Column(Integer, unique=True, nullable=False)
name = Column(String(200), nullable=False)
url = Column(String(500), nullable=False)
check_interval_sec = Column(Integer, default=300)
is_active = Column(Boolean, default=True)
created_at = Column(DateTime, default=datetime.now)
checks = relationship('UptimeCheck', backref='monitor', lazy='dynamic')
incidents = relationship('UptimeIncident', backref='monitor', lazy='dynamic')
def __repr__(self):
return f'<UptimeMonitor {self.name} ({self.url})>'
class UptimeCheck(Base):
"""Wyniki sprawdzeń z UptimeRobot (synchronizowane co godzinę)"""
__tablename__ = 'uptime_checks'
id = Column(Integer, primary_key=True)
monitor_id = Column(Integer, ForeignKey('uptime_monitors.id'), nullable=False, index=True)
checked_at = Column(DateTime, nullable=False, index=True)
status = Column(String(20), nullable=False) # 'up', 'down', 'paused'
response_time_ms = Column(Integer)
status_code = Column(Integer)
def __repr__(self):
return f'<UptimeCheck {self.checked_at} {self.status}>'
class UptimeIncident(Base):
"""Okresy niedostępności z automatyczną diagnozą przyczyny"""
__tablename__ = 'uptime_incidents'
id = Column(Integer, primary_key=True)
monitor_id = Column(Integer, ForeignKey('uptime_monitors.id'), nullable=False, index=True)
started_at = Column(DateTime, nullable=False, index=True)
ended_at = Column(DateTime)
duration_seconds = Column(Integer)
cause = Column(String(20), default='unknown') # 'isp', 'server', 'infra', 'unknown'
notes = Column(Text)
auto_resolved = Column(Boolean, default=False)
def __repr__(self):
return f'<UptimeIncident {self.started_at} cause={self.cause} duration={self.duration_seconds}s>'
class InternalHealthLog(Base):
"""Wewnętrzny stan serwera (cron co 5 min)"""
__tablename__ = 'internal_health_logs'
id = Column(Integer, primary_key=True)
checked_at = Column(DateTime, nullable=False, default=datetime.now, index=True)
app_ok = Column(Boolean, nullable=False)
db_ok = Column(Boolean, nullable=False)
cpu_percent = Column(Numeric(5, 2))
ram_percent = Column(Numeric(5, 2))
disk_percent = Column(Numeric(5, 2))
gunicorn_workers = Column(Integer)
def __repr__(self):
return f'<InternalHealthLog {self.checked_at} app={self.app_ok} db={self.db_ok}>'
# ============================================================ # ============================================================
# DATABASE INITIALIZATION # DATABASE INITIALIZATION
# ============================================================ # ============================================================

View File

@ -0,0 +1,67 @@
-- Migration 082: Uptime Monitoring
-- Tabele do monitorowania dostępności portalu z zewnątrz (UptimeRobot)
-- oraz wewnętrznego stanu serwera (health logger)
-- Konfiguracja monitorów UptimeRobot
CREATE TABLE IF NOT EXISTS uptime_monitors (
id SERIAL PRIMARY KEY,
uptimerobot_id INTEGER UNIQUE NOT NULL,
name VARCHAR(200) NOT NULL,
url VARCHAR(500) NOT NULL,
check_interval_sec INTEGER DEFAULT 300,
is_active BOOLEAN DEFAULT TRUE,
created_at TIMESTAMP DEFAULT NOW()
);
-- Wyniki sprawdzeń z UptimeRobot
CREATE TABLE IF NOT EXISTS uptime_checks (
id SERIAL PRIMARY KEY,
monitor_id INTEGER NOT NULL REFERENCES uptime_monitors(id) ON DELETE CASCADE,
checked_at TIMESTAMP NOT NULL,
status VARCHAR(20) NOT NULL,
response_time_ms INTEGER,
status_code INTEGER
);
CREATE INDEX IF NOT EXISTS idx_uptime_checks_monitor_id ON uptime_checks(monitor_id);
CREATE INDEX IF NOT EXISTS idx_uptime_checks_checked_at ON uptime_checks(checked_at);
-- Incydenty (okresy niedostępności)
CREATE TABLE IF NOT EXISTS uptime_incidents (
id SERIAL PRIMARY KEY,
monitor_id INTEGER NOT NULL REFERENCES uptime_monitors(id) ON DELETE CASCADE,
started_at TIMESTAMP NOT NULL,
ended_at TIMESTAMP,
duration_seconds INTEGER,
cause VARCHAR(20) DEFAULT 'unknown',
notes TEXT,
auto_resolved BOOLEAN DEFAULT FALSE
);
CREATE INDEX IF NOT EXISTS idx_uptime_incidents_monitor_id ON uptime_incidents(monitor_id);
CREATE INDEX IF NOT EXISTS idx_uptime_incidents_started_at ON uptime_incidents(started_at);
-- Wewnętrzny health log (stan serwera co 5 min)
CREATE TABLE IF NOT EXISTS internal_health_logs (
id SERIAL PRIMARY KEY,
checked_at TIMESTAMP NOT NULL DEFAULT NOW(),
app_ok BOOLEAN NOT NULL,
db_ok BOOLEAN NOT NULL,
cpu_percent NUMERIC(5,2),
ram_percent NUMERIC(5,2),
disk_percent NUMERIC(5,2),
gunicorn_workers INTEGER
);
CREATE INDEX IF NOT EXISTS idx_internal_health_logs_checked_at ON internal_health_logs(checked_at);
-- Uprawnienia dla app usera
GRANT ALL ON TABLE uptime_monitors TO nordabiz_app;
GRANT ALL ON TABLE uptime_checks TO nordabiz_app;
GRANT ALL ON TABLE uptime_incidents TO nordabiz_app;
GRANT ALL ON TABLE internal_health_logs TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE uptime_monitors_id_seq TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE uptime_checks_id_seq TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE uptime_incidents_id_seq TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE internal_health_logs_id_seq TO nordabiz_app;

View File

@ -0,0 +1,149 @@
# Uptime Monitoring - Design Spec
**Data:** 2026-03-15
**Status:** Zatwierdzony
## Problem
Portal nordabiznes.pl jest hostowany on-premise w INPI, za ISP Telewizja Kablowa Chopin. W ciągu ostatnich 2 miesięcy wystąpiły minimum 3 awarie internetu (10 marca, 14 marca + wcześniejszy incydent), powodujące niedostępność portalu z zewnątrz. Brak monitoringu uniemożliwia:
- Udokumentowanie skali problemu
- Odróżnienie awarii ISP od awarii serwera
- Podjęcie decyzji o ewentualnej migracji hostingu
## Rozwiązanie
Podejście B: UptimeRobot (zewnętrzny monitoring) + wewnętrzny health logger z korelacją awarii.
## Architektura
```
UptimeRobot.com (free) NORDABIZ-01 (10.22.68.249)
│ sprawdza co 5 min │ wewnętrzny logger co 5 min
│ HTTPS → nordabiznes.pl │ app/db/cpu/ram/disk → PostgreSQL
│ │
└── REST API ──────────────────→ │ sync co godzinę
│ korelacja: ISP vs serwer vs infra
/admin/uptime (dashboard)
```
### Korelacja awarii
| UptimeRobot | Wewnętrzny log | Diagnoza |
|---|---|---|
| DOWN | serwer OK | Awaria ISP (Chopin) |
| DOWN | serwer DOWN | Awaria serwera/VM |
| DOWN | brak logów | Awaria infrastruktury INPI |
| UP | serwer OK | Wszystko działa |
## Schemat bazy danych
### uptime_monitors
Konfiguracja monitorów UptimeRobot.
| Kolumna | Typ | Opis |
|---------|-----|------|
| id | SERIAL PK | |
| uptimerobot_id | INTEGER UNIQUE | ID monitora w UptimeRobot |
| name | VARCHAR(200) | Nazwa monitora |
| url | VARCHAR(500) | Monitorowany URL |
| check_interval_sec | INTEGER | Interwał sprawdzania (300 = 5 min) |
| is_active | BOOLEAN DEFAULT TRUE | |
| created_at | TIMESTAMP | |
### uptime_checks
Wyniki sprawdzeń z UptimeRobot (synchronizowane co godzinę).
| Kolumna | Typ | Opis |
|---------|-----|------|
| id | SERIAL PK | |
| monitor_id | INTEGER FK | → uptime_monitors.id |
| checked_at | TIMESTAMP | Czas sprawdzenia |
| status | VARCHAR(20) | 'up' / 'down' / 'paused' |
| response_time_ms | INTEGER | Czas odpowiedzi w ms |
| status_code | INTEGER | HTTP status code |
### uptime_incidents
Okresy niedostępności z automatyczną diagnozą przyczyny.
| Kolumna | Typ | Opis |
|---------|-----|------|
| id | SERIAL PK | |
| monitor_id | INTEGER FK | → uptime_monitors.id |
| started_at | TIMESTAMP | Początek awarii |
| ended_at | TIMESTAMP NULL | Koniec (NULL = trwa) |
| duration_seconds | INTEGER | Czas trwania |
| cause | VARCHAR(20) | 'isp' / 'server' / 'infra' / 'unknown' |
| notes | TEXT | Notatki admina |
| auto_resolved | BOOLEAN DEFAULT FALSE | Czy zakończony automatycznie |
### internal_health_logs
Wewnętrzny stan serwera (cron co 5 min, lokalnie).
| Kolumna | Typ | Opis |
|---------|-----|------|
| id | SERIAL PK | |
| checked_at | TIMESTAMP | |
| app_ok | BOOLEAN | /health odpowiada OK |
| db_ok | BOOLEAN | PostgreSQL dostępny |
| cpu_percent | REAL | Użycie CPU % |
| ram_percent | REAL | Użycie RAM % |
| disk_percent | REAL | Użycie dysku % |
| gunicorn_workers | INTEGER | Liczba aktywnych workerów |
## Skrypty
### scripts/internal_health_logger.py
- Cron: `*/5 * * * *`
- Sprawdza: localhost:5000/health, połączenie DB, psutil (CPU/RAM/disk), pgrep gunicorn
- Zapisuje do `internal_health_logs`
- Retencja: automatyczne czyszczenie logów starszych niż 90 dni
### scripts/uptimerobot_sync.py
- Cron: `0 * * * *` (co godzinę)
- Pobiera z UptimeRobot API: response times, logi (up/down events)
- Zapisuje do `uptime_checks`
- Tworzy/aktualizuje `uptime_incidents` na podstawie logów down/up
- Koreluje z `internal_health_logs` — ustawia `cause` automatycznie
- Env: `UPTIMEROBOT_API_KEY` w .env
## Dashboard /admin/uptime
### Sekcje
1. **Aktualny status** — badge UP/DOWN, czas ostatniego sprawdzenia, response time
2. **Uptime podsumowanie** — karty 24h/7d/30d/90d z procentem i oceną SLA
- ≥99.9% zielony, 99.5-99.9% żółty, <99.5% czerwony
- Kontekst: "99.5% = max 3.6h przestoju/miesiąc"
3. **Wykres response time** — Chart.js, przełącznik 24h/7d/30d
4. **Lista incydentów** — tabela z: data, czas trwania, przyczyna (ISP/Serwer/Infra), notatki (edytowalne)
5. **Analiza wzorców** — wykres słupkowy: awarie wg godziny/dnia tygodnia
6. **Raport miesięczny** — SLA %, łączny downtime, liczba incydentów, najdłuższa awaria, trend
### Dostęp
- Route: `/admin/uptime`
- Wymagana rola: `SystemRole.OFFICE_MANAGER`
- Auto-refresh: co 5 min (JSON API endpoint `/admin/api/uptime`)
- Link w nawigacji: sekcja System → "Monitoring uptime"
## UptimeRobot Setup (manual)
1. Konto na uptimerobot.com (free tier)
2. Monitor: HTTP(s), URL `https://nordabiznes.pl/health`, interwał 5 min
3. Alert contact: email
4. API key (Main API Key, read-only) → `.env` jako `UPTIMEROBOT_API_KEY`
## Retencja danych
| Tabela | Retencja |
|--------|----------|
| uptime_checks | 90 dni (sync script czyści starsze) |
| uptime_incidents | Bez limitu (kluczowe dla raportów) |
| internal_health_logs | 90 dni (health logger czyści starsze) |
## Technologie
- Backend: Flask route w `routes_status.py`
- Frontend: Jinja2 template, Chart.js (już używany w projekcie)
- Scheduled: systemowy cron (jak istniejące skrypty)
- External: UptimeRobot free API

View File

@ -0,0 +1,163 @@
#!/usr/bin/env python3
"""
Internal Health Logger
======================
Cron job (*/5 * * * *) - zapisuje stan serwera co 5 minut.
Pozwala odróżnić awarię ISP od awarii serwera.
Użycie:
*/5 * * * * cd /var/www/nordabiznes && DATABASE_URL=$(grep DATABASE_URL .env | cut -d'=' -f2) /var/www/nordabiznes/venv/bin/python3 scripts/internal_health_logger.py
"""
import os
import sys
import subprocess
import urllib.request
import urllib.error
from datetime import datetime, timedelta
# Setup path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from database import SessionLocal, InternalHealthLog
HEALTH_URL = 'http://localhost:5000/health'
RETENTION_DAYS = 90
def check_app_health():
"""Sprawdź czy aplikacja Flask odpowiada na /health"""
try:
req = urllib.request.Request(HEALTH_URL, method='GET')
with urllib.request.urlopen(req, timeout=5) as resp:
return resp.status == 200
except Exception:
return False
def check_db_health():
"""Sprawdź czy PostgreSQL jest dostępny"""
try:
db = SessionLocal()
from sqlalchemy import text
db.execute(text('SELECT 1'))
db.close()
return True
except Exception:
return False
def get_cpu_percent():
"""Pobierz użycie CPU z /proc/stat lub top"""
try:
result = subprocess.run(
['top', '-bn1'],
capture_output=True, text=True, timeout=10
)
for line in result.stdout.split('\n'):
if 'Cpu' in line or '%Cpu' in line:
# Format: %Cpu(s): 2.3 us, 0.5 sy, ... 96.2 id
parts = line.split()
for i, part in enumerate(parts):
if part == 'id,' or part == 'id':
idle = float(parts[i - 1])
return round(100.0 - idle, 2)
return None
except Exception:
return None
def get_ram_percent():
"""Pobierz użycie RAM"""
try:
result = subprocess.run(
['free', '-m'],
capture_output=True, text=True, timeout=5
)
for line in result.stdout.split('\n'):
if line.startswith('Mem:'):
parts = line.split()
total = float(parts[1])
available = float(parts[6]) # available column
used_pct = round((1 - available / total) * 100, 2)
return used_pct
return None
except Exception:
return None
def get_disk_percent():
"""Pobierz użycie dysku /"""
try:
result = subprocess.run(
['df', '-h', '/'],
capture_output=True, text=True, timeout=5
)
lines = result.stdout.strip().split('\n')
if len(lines) >= 2:
parts = lines[1].split()
# Format: Filesystem Size Used Avail Use% Mounted
for part in parts:
if part.endswith('%'):
return float(part.rstrip('%'))
return None
except Exception:
return None
def get_gunicorn_workers():
"""Policz aktywne procesy gunicorn"""
try:
result = subprocess.run(
['pgrep', '-c', 'gunicorn'],
capture_output=True, text=True, timeout=5
)
return int(result.stdout.strip()) if result.returncode == 0 else 0
except Exception:
return 0
def cleanup_old_logs(db):
"""Usuń logi starsze niż RETENTION_DAYS"""
cutoff = datetime.now() - timedelta(days=RETENTION_DAYS)
deleted = db.query(InternalHealthLog).filter(
InternalHealthLog.checked_at < cutoff
).delete()
if deleted:
db.commit()
print(f"Usunięto {deleted} starych logów health (>{RETENTION_DAYS} dni)")
def main():
db = SessionLocal()
try:
log = InternalHealthLog(
checked_at=datetime.now(),
app_ok=check_app_health(),
db_ok=check_db_health(),
cpu_percent=get_cpu_percent(),
ram_percent=get_ram_percent(),
disk_percent=get_disk_percent(),
gunicorn_workers=get_gunicorn_workers()
)
db.add(log)
db.commit()
# Cleanup co jakiś czas (sprawdź raz dziennie, przy pełnej godzinie 3:00)
now = datetime.now()
if now.hour == 3 and now.minute < 5:
cleanup_old_logs(db)
print(f"[{log.checked_at}] app={log.app_ok} db={log.db_ok} "
f"cpu={log.cpu_percent}% ram={log.ram_percent}% disk={log.disk_percent}% "
f"workers={log.gunicorn_workers}")
except Exception as e:
print(f"ERROR: {e}", file=sys.stderr)
db.rollback()
finally:
db.close()
if __name__ == '__main__':
main()

281
scripts/uptimerobot_sync.py Normal file
View File

@ -0,0 +1,281 @@
#!/usr/bin/env python3
"""
UptimeRobot Sync
================
Cron job (0 * * * *) - synchronizuje dane z UptimeRobot API co godzinę.
Pobiera response times, logi up/down, koreluje z internal_health_logs.
Użycie:
0 * * * * cd /var/www/nordabiznes && DATABASE_URL=$(grep DATABASE_URL .env | cut -d'=' -f2) UPTIMEROBOT_API_KEY=$(grep UPTIMEROBOT_API_KEY .env | cut -d'=' -f2) /var/www/nordabiznes/venv/bin/python3 scripts/uptimerobot_sync.py
Wymagane env:
UPTIMEROBOT_API_KEY - API key z UptimeRobot (Main API Key)
"""
import os
import sys
import json
import urllib.request
import urllib.error
from datetime import datetime, timedelta
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from database import (
SessionLocal, UptimeMonitor, UptimeCheck, UptimeIncident, InternalHealthLog
)
API_KEY = os.environ.get('UPTIMEROBOT_API_KEY', '')
API_BASE = 'https://api.uptimerobot.com/v2'
RETENTION_DAYS = 90
# UptimeRobot status codes
UR_STATUS = {
0: 'paused',
1: 'not_checked',
2: 'up',
8: 'seems_down',
9: 'down',
}
def api_request(endpoint, extra_params=None):
"""Wyślij zapytanie do UptimeRobot API v2"""
params = {
'api_key': API_KEY,
'format': 'json',
}
if extra_params:
params.update(extra_params)
data = json.dumps(params).encode('utf-8')
req = urllib.request.Request(
f'{API_BASE}/{endpoint}',
data=data,
headers={'Content-Type': 'application/json'},
method='POST'
)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode('utf-8'))
except Exception as e:
print(f"API error ({endpoint}): {e}", file=sys.stderr)
return None
def sync_monitors(db):
"""Synchronizuj listę monitorów z UptimeRobot"""
result = api_request('getMonitors', {
'response_times': 1,
'response_times_limit': 1,
'logs': 1,
'logs_limit': 50,
'all_time_uptime_ratio': 1,
'custom_uptime_ratios': '1-7-30-90',
})
if not result or result.get('stat') != 'ok':
print(f"Błąd API getMonitors: {result}", file=sys.stderr)
return []
monitors = result.get('monitors', [])
synced = []
for m in monitors:
ur_id = m['id']
# Upsert monitor
monitor = db.query(UptimeMonitor).filter_by(uptimerobot_id=ur_id).first()
if not monitor:
monitor = UptimeMonitor(
uptimerobot_id=ur_id,
name=m.get('friendly_name', ''),
url=m.get('url', ''),
check_interval_sec=m.get('interval', 300),
created_at=datetime.now()
)
db.add(monitor)
db.flush()
print(f"Nowy monitor: {monitor.name} ({monitor.url})")
else:
monitor.name = m.get('friendly_name', monitor.name)
monitor.url = m.get('url', monitor.url)
# Sync response times
sync_response_times(db, monitor, m.get('response_times', []))
# Sync logs (up/down events) → incydenty
sync_logs(db, monitor, m.get('logs', []))
synced.append(monitor)
db.commit()
return synced
def sync_response_times(db, monitor, response_times):
"""Zapisz response times jako uptime_checks"""
if not response_times:
return
for rt in response_times:
ts = datetime.fromtimestamp(rt['datetime'])
# Sprawdź czy już istnieje (unikaj duplikatów)
exists = db.query(UptimeCheck).filter_by(
monitor_id=monitor.id,
checked_at=ts
).first()
if not exists:
check = UptimeCheck(
monitor_id=monitor.id,
checked_at=ts,
status='up', # response time = was up
response_time_ms=rt.get('value', 0),
)
db.add(check)
def sync_logs(db, monitor, logs):
"""Przetwórz logi up/down z UptimeRobot na incydenty"""
if not logs:
return
for log in logs:
log_type = log.get('type', 0)
ts = datetime.fromtimestamp(log['datetime'])
duration = log.get('duration', 0)
if log_type == 1: # DOWN
# Sprawdź czy incydent już istnieje
existing = db.query(UptimeIncident).filter(
UptimeIncident.monitor_id == monitor.id,
UptimeIncident.started_at == ts
).first()
if existing:
# Aktualizuj jeśli się zakończył
if duration > 0 and not existing.ended_at:
existing.ended_at = ts + timedelta(seconds=duration)
existing.duration_seconds = duration
existing.auto_resolved = True
# Koreluj przyczynę
existing.cause = correlate_cause(db, ts, duration)
continue
ended_at = ts + timedelta(seconds=duration) if duration > 0 else None
incident = UptimeIncident(
monitor_id=monitor.id,
started_at=ts,
ended_at=ended_at,
duration_seconds=duration if duration > 0 else None,
cause=correlate_cause(db, ts, duration) if duration > 0 else 'unknown',
auto_resolved=duration > 0
)
db.add(incident)
# Dodaj check DOWN
down_check = UptimeCheck(
monitor_id=monitor.id,
checked_at=ts,
status='down',
response_time_ms=None,
)
db.add(down_check)
elif log_type == 2: # UP (recovery)
# Dodaj check UP
up_check = UptimeCheck(
monitor_id=monitor.id,
checked_at=ts,
status='up',
response_time_ms=None,
)
db.add(up_check)
def correlate_cause(db, incident_start, duration_seconds):
"""
Koreluj incydent z wewnętrznymi logami health.
Sprawdź czy serwer działał w czasie incydentu.
Logika:
- Jeśli internal_health_logs w oknie incydentu mają app_ok=True ISP
- Jeśli mają app_ok=False server
- Jeśli brak logów infra (cały serwer padł)
"""
if not duration_seconds or duration_seconds <= 0:
return 'unknown'
incident_end = incident_start + timedelta(seconds=duration_seconds)
# Szukaj logów health z okna incydentu (z 5-min marginesem)
margin = timedelta(minutes=5)
health_logs = db.query(InternalHealthLog).filter(
InternalHealthLog.checked_at >= incident_start - margin,
InternalHealthLog.checked_at <= incident_end + margin
).all()
if not health_logs:
# Brak logów = cała infrastruktura padła (np. prąd, FortiGate)
return 'infra'
# Sprawdź czy app działała
app_ok_count = sum(1 for h in health_logs if h.app_ok)
total = len(health_logs)
if app_ok_count == total:
# Serwer działał normalnie → problem z internetem (ISP)
return 'isp'
elif app_ok_count == 0:
# App nie działała → problem z serwerem
return 'server'
else:
# Mieszane — częściowa awaria
return 'server'
def cleanup_old_checks(db):
"""Usuń stare uptime_checks (>90 dni)"""
cutoff = datetime.now() - timedelta(days=RETENTION_DAYS)
deleted = db.query(UptimeCheck).filter(
UptimeCheck.checked_at < cutoff
).delete()
if deleted:
print(f"Usunięto {deleted} starych uptime checks (>{RETENTION_DAYS} dni)")
def main():
if not API_KEY:
print("BŁĄD: Brak UPTIMEROBOT_API_KEY w zmiennych środowiskowych", file=sys.stderr)
print("Ustaw klucz API w .env: UPTIMEROBOT_API_KEY=ur...", file=sys.stderr)
sys.exit(1)
db = SessionLocal()
try:
print(f"[{datetime.now()}] Synchronizacja UptimeRobot...")
monitors = sync_monitors(db)
print(f"Zsynchronizowano {len(monitors)} monitorów")
# Cleanup raz dziennie (o 4:00)
now = datetime.now()
if now.hour == 4 and now.minute < 5:
cleanup_old_checks(db)
db.commit()
print(f"[{datetime.now()}] Synchronizacja zakończona")
except Exception as e:
print(f"ERROR: {e}", file=sys.stderr)
db.rollback()
sys.exit(1)
finally:
db.close()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,791 @@
{% extends "base.html" %}
{% block title %}Monitoring uptime - Admin{% endblock %}
{% block extra_css %}
<style>
.uptime-header {
display: flex;
justify-content: space-between;
align-items: flex-start;
margin-bottom: var(--spacing-xl);
}
.uptime-header h1 {
font-size: var(--font-size-2xl);
color: var(--text-primary);
margin-bottom: var(--spacing-xs);
}
.uptime-header p {
color: var(--text-secondary);
}
.refresh-info {
text-align: right;
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius-lg);
padding: var(--spacing-md) var(--spacing-lg);
}
.refresh-info .timestamp {
font-size: var(--font-size-lg);
font-weight: 600;
color: var(--text-primary);
font-family: monospace;
}
.refresh-info .label {
font-size: var(--font-size-xs);
color: var(--text-secondary);
text-transform: uppercase;
letter-spacing: 0.5px;
}
/* Status badge */
.status-badge-large {
display: inline-flex;
align-items: center;
gap: var(--spacing-md);
padding: var(--spacing-lg) var(--spacing-xl);
border-radius: var(--radius-xl);
font-size: var(--font-size-xl);
font-weight: 700;
}
.status-badge-large.up {
background: linear-gradient(135deg, #dcfce7, #bbf7d0);
color: #166534;
border: 2px solid #86efac;
}
.status-badge-large.down {
background: linear-gradient(135deg, #fee2e2, #fecaca);
color: #991b1b;
border: 2px solid #fca5a5;
animation: pulse-red 2s infinite;
}
.status-badge-large.unknown {
background: linear-gradient(135deg, #f3f4f6, #e5e7eb);
color: #6b7280;
border: 2px solid #d1d5db;
}
@keyframes pulse-red {
0%, 100% { box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4); }
50% { box-shadow: 0 0 0 8px rgba(239, 68, 68, 0); }
}
.status-dot {
width: 16px;
height: 16px;
border-radius: 50%;
}
.status-dot.up { background: #22c55e; }
.status-dot.down { background: #ef4444; animation: pulse 2s infinite; }
.status-dot.unknown { background: #9ca3af; }
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
.status-meta {
font-size: var(--font-size-sm);
color: var(--text-secondary);
font-weight: 400;
margin-top: var(--spacing-xs);
}
/* Uptime cards */
.uptime-cards {
display: grid;
grid-template-columns: repeat(4, 1fr);
gap: var(--spacing-lg);
margin-bottom: var(--spacing-xl);
}
@media (max-width: 768px) {
.uptime-cards { grid-template-columns: repeat(2, 1fr); }
}
.uptime-card {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius-xl);
padding: var(--spacing-xl);
text-align: center;
}
.uptime-card .period {
font-size: var(--font-size-sm);
color: var(--text-secondary);
text-transform: uppercase;
letter-spacing: 0.5px;
margin-bottom: var(--spacing-sm);
}
.uptime-card .value {
font-size: var(--font-size-3xl);
font-weight: 800;
font-family: monospace;
}
.uptime-card .value.green { color: #16a34a; }
.uptime-card .value.yellow { color: #ca8a04; }
.uptime-card .value.red { color: #dc2626; }
.uptime-card .detail {
font-size: var(--font-size-xs);
color: var(--text-secondary);
margin-top: var(--spacing-xs);
}
/* Sections */
.section {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius-xl);
padding: var(--spacing-xl);
margin-bottom: var(--spacing-xl);
}
.section-title {
font-size: var(--font-size-lg);
font-weight: 600;
color: var(--text-primary);
margin-bottom: var(--spacing-lg);
display: flex;
align-items: center;
gap: var(--spacing-sm);
}
.section-title svg {
width: 20px;
height: 20px;
color: var(--primary);
}
/* Chart */
.chart-controls {
display: flex;
gap: var(--spacing-sm);
margin-bottom: var(--spacing-md);
}
.chart-btn {
padding: var(--spacing-xs) var(--spacing-md);
border: 1px solid var(--border);
border-radius: var(--radius);
background: var(--surface);
color: var(--text-secondary);
cursor: pointer;
font-size: var(--font-size-sm);
}
.chart-btn.active {
background: var(--primary);
color: white;
border-color: var(--primary);
}
.chart-container {
position: relative;
height: 300px;
}
/* Incidents table */
.incidents-table {
width: 100%;
border-collapse: collapse;
}
.incidents-table th {
text-align: left;
padding: var(--spacing-sm) var(--spacing-md);
border-bottom: 2px solid var(--border);
font-size: var(--font-size-sm);
color: var(--text-secondary);
font-weight: 600;
}
.incidents-table td {
padding: var(--spacing-sm) var(--spacing-md);
border-bottom: 1px solid var(--border);
font-size: var(--font-size-sm);
}
.cause-badge {
display: inline-block;
padding: 2px 8px;
border-radius: var(--radius);
font-size: var(--font-size-xs);
font-weight: 600;
}
.cause-badge.isp { background: #fef3c7; color: #92400e; }
.cause-badge.server { background: #fee2e2; color: #991b1b; }
.cause-badge.infra { background: #ede9fe; color: #5b21b6; }
.cause-badge.unknown { background: #f3f4f6; color: #6b7280; }
.notes-input {
width: 100%;
padding: 4px 8px;
border: 1px solid var(--border);
border-radius: var(--radius);
font-size: var(--font-size-xs);
background: var(--surface);
color: var(--text-primary);
}
.notes-input:focus {
outline: none;
border-color: var(--primary);
}
.cause-select {
padding: 2px 6px;
border: 1px solid var(--border);
border-radius: var(--radius);
font-size: var(--font-size-xs);
background: var(--surface);
color: var(--text-primary);
}
/* Patterns grid */
.patterns-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: var(--spacing-xl);
}
@media (max-width: 768px) {
.patterns-grid { grid-template-columns: 1fr; }
}
.pattern-chart {
height: 200px;
}
/* Monthly report */
.report-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: var(--spacing-lg);
}
.report-stat {
text-align: center;
padding: var(--spacing-lg);
background: var(--background);
border-radius: var(--radius-lg);
}
.report-stat .stat-value {
font-size: var(--font-size-2xl);
font-weight: 700;
color: var(--text-primary);
}
.report-stat .stat-label {
font-size: var(--font-size-xs);
color: var(--text-secondary);
margin-top: var(--spacing-xs);
}
.trend-badge {
display: inline-block;
padding: 2px 8px;
border-radius: var(--radius);
font-size: var(--font-size-xs);
font-weight: 600;
}
.trend-badge.better { background: #dcfce7; color: #166534; }
.trend-badge.worse { background: #fee2e2; color: #991b1b; }
.trend-badge.same { background: #f3f4f6; color: #6b7280; }
/* SLA table */
.sla-table {
width: 100%;
border-collapse: collapse;
margin-top: var(--spacing-md);
}
.sla-table th, .sla-table td {
padding: var(--spacing-sm) var(--spacing-md);
border-bottom: 1px solid var(--border);
font-size: var(--font-size-sm);
text-align: center;
}
.sla-table th {
color: var(--text-secondary);
font-weight: 600;
}
.sla-current {
background: var(--primary-light, #eff6ff);
font-weight: 600;
}
/* No data state */
.no-data {
text-align: center;
padding: var(--spacing-3xl);
color: var(--text-secondary);
}
.no-data svg {
width: 64px;
height: 64px;
margin-bottom: var(--spacing-lg);
opacity: 0.3;
}
.no-data h3 {
font-size: var(--font-size-xl);
color: var(--text-primary);
margin-bottom: var(--spacing-sm);
}
.setup-steps {
text-align: left;
max-width: 500px;
margin: var(--spacing-lg) auto;
}
.setup-steps li {
margin-bottom: var(--spacing-sm);
font-size: var(--font-size-sm);
}
.setup-steps code {
background: var(--background);
padding: 2px 6px;
border-radius: var(--radius);
font-size: var(--font-size-xs);
}
</style>
{% endblock %}
{% block content %}
<div class="uptime-header">
<div>
<h1>Monitoring uptime</h1>
<p>Dostepnosc portalu nordabiznes.pl z perspektywy uzytkownikow zewnetrznych</p>
</div>
<div class="refresh-info">
<div class="label">Ostatnia aktualizacja</div>
<div class="timestamp" id="refresh-time">{{ now.strftime('%H:%M:%S') if now is defined else '--:--:--' }}</div>
</div>
</div>
{% if not data.has_data %}
<!-- Brak danych — instrukcja konfiguracji -->
<div class="section">
<div class="no-data">
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 8v4l3 3m6-3a9 9 0 11-18 0 9 9 0 0118 0z"/>
</svg>
<h3>Monitoring nie jest jeszcze skonfigurowany</h3>
<p>Aby uruchomic monitoring, wykonaj ponizsza konfiguracje:</p>
<ol class="setup-steps">
<li>Zaloz konto na <strong>uptimerobot.com</strong> (darmowy plan)</li>
<li>Dodaj monitor: HTTPS, URL <code>https://nordabiznes.pl/health</code>, interwal 5 min</li>
<li>Skopiuj <strong>Main API Key</strong> z ustawien konta</li>
<li>Dodaj do <code>.env</code>: <code>UPTIMEROBOT_API_KEY=twoj_klucz</code></li>
<li>Dodaj cron jobs na serwerze:
<br><code>*/5 * * * * cd /var/www/nordabiznes && ...</code> (health logger)
<br><code>0 * * * * cd /var/www/nordabiznes && ...</code> (UptimeRobot sync)
</li>
</ol>
</div>
</div>
{% else %}
<!-- 1. Aktualny status -->
<div style="margin-bottom: var(--spacing-xl);">
<div class="status-badge-large {{ data.current_status }}">
<div class="status-dot {{ data.current_status }}"></div>
{% if data.current_status == 'up' %}
Portal dziala poprawnie
{% elif data.current_status == 'down' %}
Portal niedostepny!
{% else %}
Status nieznany
{% endif %}
</div>
<div class="status-meta">
{% if data.last_checked %}
Ostatnie sprawdzenie: {{ data.last_checked }}
{% if data.last_response_time %} | Czas odpowiedzi: {{ data.last_response_time }}ms{% endif %}
{% endif %}
| Monitor: {{ data.monitor.name }} ({{ data.monitor.url }})
</div>
</div>
<!-- 2. Uptime karty -->
<div class="uptime-cards">
{% for period, label in [('24h', 'Ostatnie 24h'), ('7d', 'Ostatnie 7 dni'), ('30d', 'Ostatnie 30 dni'), ('90d', 'Ostatnie 90 dni')] %}
<div class="uptime-card">
<div class="period">{{ label }}</div>
{% if data.uptime[period].percent is not none %}
{% set pct = data.uptime[period].percent %}
<div class="value {% if pct >= 99.9 %}green{% elif pct >= 99.5 %}yellow{% else %}red{% endif %}">
{{ '%.2f' % pct }}%
</div>
<div class="detail">
{{ data.uptime[period].down_checks }} awarii / {{ data.uptime[period].total_checks }} sprawdzen
</div>
{% else %}
<div class="value" style="color: var(--text-secondary);">--</div>
<div class="detail">Brak danych</div>
{% endif %}
</div>
{% endfor %}
</div>
<!-- 3. Wykres response time -->
<div class="section">
<div class="section-title">
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 7h8m0 0v8m0-8l-8 8-4-4-6 6"/>
</svg>
Czas odpowiedzi
{% if data.avg_response_time %}
<span style="font-weight: 400; font-size: var(--font-size-sm); color: var(--text-secondary);">
(sredni: {{ data.avg_response_time }}ms)
</span>
{% endif %}
</div>
<div class="chart-controls">
<button class="chart-btn active" data-days="1" onclick="changeChartPeriod(1)">24h</button>
<button class="chart-btn" data-days="7" onclick="changeChartPeriod(7)">7 dni</button>
<button class="chart-btn" data-days="30" onclick="changeChartPeriod(30)">30 dni</button>
</div>
<div class="chart-container">
<canvas id="responseTimeChart"></canvas>
</div>
</div>
<!-- 4. Lista incydentow -->
<div class="section">
<div class="section-title">
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-2.5L13.732 4c-.77-.833-1.964-.833-2.732 0L4.082 16.5c-.77.833.192 2.5 1.732 2.5z"/>
</svg>
Incydenty ({{ data.incidents|length }})
</div>
{% if data.incidents %}
<div style="overflow-x: auto;">
<table class="incidents-table">
<thead>
<tr>
<th>Data</th>
<th>Czas trwania</th>
<th>Przyczyna</th>
<th>Notatki</th>
</tr>
</thead>
<tbody>
{% for inc in data.incidents %}
<tr>
<td>
{{ inc.started_at }}
{% if inc.ended_at %}<br><span style="color: var(--text-secondary); font-size: var(--font-size-xs);">do {{ inc.ended_at }}</span>{% endif %}
</td>
<td><strong>{{ inc.duration_human }}</strong></td>
<td>
<select class="cause-select" data-incident-id="{{ inc.id }}" onchange="updateIncident(this)">
<option value="isp" {% if inc.cause == 'isp' %}selected{% endif %}>ISP (Chopin)</option>
<option value="server" {% if inc.cause == 'server' %}selected{% endif %}>Serwer</option>
<option value="infra" {% if inc.cause == 'infra' %}selected{% endif %}>Infrastruktura</option>
<option value="unknown" {% if inc.cause == 'unknown' %}selected{% endif %}>Nieznana</option>
</select>
</td>
<td>
<input type="text" class="notes-input" data-incident-id="{{ inc.id }}"
value="{{ inc.notes }}" placeholder="Dodaj notatke..."
onblur="updateIncidentNotes(this)">
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% else %}
<p style="color: var(--text-secondary); text-align: center; padding: var(--spacing-xl);">
Brak zarejestrowanych incydentow
</p>
{% endif %}
</div>
<!-- 5. Analiza wzorcow -->
<div class="section">
<div class="section-title">
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z"/>
</svg>
Wzorce awarii
</div>
{% if data.incidents %}
<div class="patterns-grid">
<div>
<h4 style="font-size: var(--font-size-sm); color: var(--text-secondary); margin-bottom: var(--spacing-sm);">Awarie wg godziny</h4>
<div class="pattern-chart">
<canvas id="hourChart"></canvas>
</div>
</div>
<div>
<h4 style="font-size: var(--font-size-sm); color: var(--text-secondary); margin-bottom: var(--spacing-sm);">Awarie wg dnia tygodnia</h4>
<div class="pattern-chart">
<canvas id="dowChart"></canvas>
</div>
</div>
</div>
{% else %}
<p style="color: var(--text-secondary); text-align: center;">Brak danych do analizy wzorcow</p>
{% endif %}
</div>
<!-- 6. Raport miesieczny -->
<div class="section">
<div class="section-title">
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 17v-2m3 2v-4m3 4v-6m2 10H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"/>
</svg>
Raport miesieczny: {{ data.monthly_report.month }}
</div>
<div class="report-grid">
<div class="report-stat">
<div class="stat-value {% if data.monthly_report.uptime_pct >= 99.9 %}green{% elif data.monthly_report.uptime_pct >= 99.5 %}yellow{% else %}red{% endif %}" style="color: {% if data.monthly_report.uptime_pct >= 99.9 %}#16a34a{% elif data.monthly_report.uptime_pct >= 99.5 %}#ca8a04{% else %}#dc2626{% endif %}">
{{ '%.3f' % data.monthly_report.uptime_pct }}%
</div>
<div class="stat-label">Uptime SLA</div>
</div>
<div class="report-stat">
<div class="stat-value">{{ data.monthly_report.total_downtime_human }}</div>
<div class="stat-label">Laczny przestoj</div>
</div>
<div class="report-stat">
<div class="stat-value">{{ data.monthly_report.incidents_count }}</div>
<div class="stat-label">Liczba incydentow</div>
</div>
<div class="report-stat">
<div class="stat-value">{{ data.monthly_report.longest_incident }}</div>
<div class="stat-label">Najdluzszy incydent</div>
</div>
</div>
<!-- Porownanie z poprzednim miesiacem -->
<div style="margin-top: var(--spacing-lg); padding: var(--spacing-md); background: var(--background); border-radius: var(--radius-lg);">
<strong>Trend vs {{ data.monthly_report.prev_month }}:</strong>
<span class="trend-badge {{ data.monthly_report.trend }}">
{% if data.monthly_report.trend == 'better' %}Lepiej
{% elif data.monthly_report.trend == 'worse' %}Gorzej
{% else %}Bez zmian{% endif %}
</span>
(poprzednio: {{ data.monthly_report.prev_downtime_human }} przestoju, {{ data.monthly_report.prev_incidents_count }} incydentow)
</div>
<!-- Tabela SLA -->
<table class="sla-table">
<thead>
<tr>
<th>Poziom SLA</th>
<th>Max przestoj / miesiac</th>
<th>Max przestoj / rok</th>
<th>Twoj status</th>
</tr>
</thead>
<tbody>
{% for level, limits in data.sla_context.items() %}
<tr {% if data.monthly_report.uptime_pct >= level|float and (loop.last or data.monthly_report.uptime_pct < data.sla_context.keys()|list|sort|reverse|first|float if not loop.first else true) %}class="sla-current"{% endif %}>
<td><strong>{{ level }}%</strong></td>
<td>{{ limits.max_downtime_month }}</td>
<td>{{ limits.max_downtime_year }}</td>
<td>
{% if data.monthly_report.uptime_pct >= level|float %}
<span style="color: #16a34a;">Spelnia</span>
{% else %}
<span style="color: #dc2626;">Nie spelnia</span>
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% endif %}
{% endblock %}
{% block head_extra %}
<script src="{{ url_for('static', filename='js/vendor/chart.min.js') }}"></script>
{% endblock %}
{% block extra_js %}
{% if data.has_data %}
// Dane z backendu
var responseTimesData = {{ data.response_times | tojson }};
var hourData = {{ data.patterns.by_hour | tojson }};
var dowData = {{ data.patterns.by_dow | tojson }};
// Response time chart
var rtCtx = document.getElementById('responseTimeChart');
var rtChart = null;
function renderResponseTimeChart(days) {
if (rtChart) rtChart.destroy();
var cutoff = new Date();
cutoff.setDate(cutoff.getDate() - days);
var filtered = responseTimesData.filter(function(d) {
return new Date(d.time) >= cutoff;
});
var labels = filtered.map(function(d) { return d.time; });
var values = filtered.map(function(d) { return d.ms; });
rtChart = new Chart(rtCtx, {
type: 'line',
data: {
labels: labels,
datasets: [{
label: 'Response time (ms)',
data: values,
borderColor: '#3b82f6',
backgroundColor: 'rgba(59, 130, 246, 0.1)',
fill: true,
tension: 0.3,
pointRadius: days <= 1 ? 3 : (days <= 7 ? 2 : 0),
borderWidth: 2
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
plugins: {
legend: { display: false }
},
scales: {
x: {
display: true,
ticks: {
maxTicksLimit: 12,
font: { size: 10 }
}
},
y: {
beginAtZero: true,
title: {
display: true,
text: 'ms'
}
}
}
}
});
}
function changeChartPeriod(days) {
document.querySelectorAll('.chart-btn').forEach(function(b) {
b.classList.toggle('active', parseInt(b.dataset.days) === days);
});
renderResponseTimeChart(days);
}
// Patterns charts
if (document.getElementById('hourChart')) {
new Chart(document.getElementById('hourChart'), {
type: 'bar',
data: {
labels: Array.from({length: 24}, function(_, i) { return i + ':00'; }),
datasets: [{
data: hourData,
backgroundColor: 'rgba(239, 68, 68, 0.6)',
borderColor: '#ef4444',
borderWidth: 1
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
plugins: { legend: { display: false } },
scales: {
y: { beginAtZero: true, ticks: { stepSize: 1 } },
x: { ticks: { font: { size: 9 } } }
}
}
});
}
if (document.getElementById('dowChart')) {
new Chart(document.getElementById('dowChart'), {
type: 'bar',
data: {
labels: ['Pon', 'Wt', 'Sr', 'Czw', 'Pt', 'Sob', 'Nie'],
datasets: [{
data: dowData,
backgroundColor: 'rgba(245, 158, 11, 0.6)',
borderColor: '#f59e0b',
borderWidth: 1
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
plugins: { legend: { display: false } },
scales: {
y: { beginAtZero: true, ticks: { stepSize: 1 } }
}
}
});
}
// Init
renderResponseTimeChart(1);
// CSRF token
var csrfToken = '{{ csrf_token() }}';
// Aktualizacja incydentow
function updateIncident(selectEl) {
var id = selectEl.dataset.incidentId;
var cause = selectEl.value;
fetch('/admin/api/uptime/incident/' + id + '/notes', {
method: 'POST',
headers: {'Content-Type': 'application/json', 'X-CSRFToken': csrfToken},
body: JSON.stringify({cause: cause})
});
}
function updateIncidentNotes(inputEl) {
var id = inputEl.dataset.incidentId;
var notes = inputEl.value;
fetch('/admin/api/uptime/incident/' + id + '/notes', {
method: 'POST',
headers: {'Content-Type': 'application/json', 'X-CSRFToken': csrfToken},
body: JSON.stringify({notes: notes})
});
}
// Auto-refresh co 5 min
setInterval(function() {
fetch('/admin/api/uptime')
.then(function(r) { return r.json(); })
.then(function(data) {
if (data.success) {
document.getElementById('refresh-time').textContent =
new Date(data.timestamp).toLocaleTimeString('pl-PL');
}
})
.catch(function() {});
}, 300000);
{% endif %}
{% endblock %}

View File

@ -1789,6 +1789,12 @@
</svg> </svg>
Monitoring AI Monitoring AI
</a> </a>
<a href="{{ url_for('admin.admin_uptime') }}">
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 8v4l3 3m6-3a9 9 0 11-18 0 9 9 0 0118 0z"/>
</svg>
Monitoring uptime
</a>
<a href="{{ url_for('admin.admin_status') }}"> <a href="{{ url_for('admin.admin_status') }}">
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24"> <svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z"/> <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z"/>