fix: improve analytics data quality and human-friendliness
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

- Add KPI stat cards to Overview tab (active users, sessions, pageviews, bounce rate)
- Filter technical paths from Pages and Paths tabs (/sw.js, /robots.txt, /.git/, /.env, etc.)
- Cap time_on_page at 30min to exclude outlier tabs left open
- Format time as human-readable (Xm Ys) instead of raw seconds
- Mask security tokens in unused pages list (/reset-password/*** etc.)
- Fix Polish labels (period display: "7 dni" instead of "week")
- Add percentages to logged/anonymous donut chart legend

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-03-10 19:07:50 +01:00
parent 41ad6a1b18
commit 4fb45df2aa
2 changed files with 143 additions and 22 deletions

View File

@ -682,6 +682,47 @@ def _tab_engagement(db, start_date, days):
# TAB 3: PAGE MAP
# ============================================================
EXCLUDED_PATHS = (
'/sw.js', '/robots.txt', '/sitemap.xml', '/favicon.ico',
'/apple-touch-icon', '/.well-known/', '/.git/', '/.env',
'/check-verification-status', '/manifest.json',
'/sito/', '/wordpress/', '/wp-', '/xmlrpc',
)
def _is_technical_path(path):
"""Check if path is a technical/system endpoint (not a user page)."""
if not path:
return True
for excl in EXCLUDED_PATHS:
if path.startswith(excl) or path == excl:
return True
return False
def _mask_token_path(path):
"""Mask security tokens in paths like /reset-password/<token>."""
import re
path = re.sub(r'(/reset-password/)[A-Za-z0-9_-]{10,}', r'\1***', path)
path = re.sub(r'(/verify-email/)[A-Za-z0-9_-]{10,}', r'\1***', path)
return path
def _format_time(seconds):
"""Format seconds to human-readable string."""
if seconds <= 0:
return '0s'
if seconds < 60:
return f'{seconds}s'
minutes = seconds // 60
secs = seconds % 60
if minutes < 60:
return f'{minutes}m {secs}s' if secs else f'{minutes}m'
hours = minutes // 60
mins = minutes % 60
return f'{hours}h {mins}m'
def _tab_pages(db, start_date, days):
"""Page popularity map."""
start_dt = datetime.combine(start_date, datetime.min.time())
@ -713,21 +754,25 @@ def _tab_pages(db, start_date, days):
if prefixes == ['/']:
conditions = [PageView.path == '/']
# Cap time_on_page at 1800s (30 min) to exclude outliers (tabs left open)
capped_time = func.least(PageView.time_on_page_seconds, 1800)
q = db.query(
func.count(PageView.id).label('views'),
func.count(func.distinct(PageView.user_id)).label('unique_users'),
func.avg(PageView.time_on_page_seconds).label('avg_time')
func.avg(capped_time).label('avg_time')
).join(UserSession, PageView.session_id == UserSession.id).filter(
or_(*conditions),
PageView.viewed_at >= start_dt,
UserSession.is_bot == False
).first()
avg_time_val = int(q.avg_time or 0)
sections.append({
'name': name,
'views': q.views or 0,
'unique_users': q.unique_users or 0,
'avg_time': int(q.avg_time or 0),
'avg_time': avg_time_val,
'avg_time_fmt': _format_time(avg_time_val),
})
max_views = max((s['views'] for s in sections), default=1) or 1
@ -735,36 +780,41 @@ def _tab_pages(db, start_date, days):
for s in sections:
s['intensity'] = min(100, int(s['views'] / max_views * 100))
# Top 50 pages (exclude bots)
top_pages = db.query(
# Top 50 pages (exclude bots + technical paths)
capped_time = func.least(PageView.time_on_page_seconds, 1800)
top_pages_raw = db.query(
PageView.path,
func.count(PageView.id).label('views'),
func.count(func.distinct(PageView.user_id)).label('unique_users'),
func.avg(PageView.time_on_page_seconds).label('avg_time'),
func.avg(capped_time).label('avg_time'),
func.avg(PageView.scroll_depth_percent).label('avg_scroll'),
func.avg(PageView.load_time_ms).label('avg_load'),
).join(UserSession, PageView.session_id == UserSession.id).filter(
PageView.viewed_at >= start_dt,
UserSession.is_bot == False
).group_by(PageView.path).order_by(desc('views')).limit(50).all()
).group_by(PageView.path).order_by(desc('views')).limit(80).all()
# Filter out technical paths after query (more flexible than SQL LIKE)
top_pages = [p for p in top_pages_raw if not _is_technical_path(p.path)][:50]
max_page_views = top_pages[0].views if top_pages else 1
pages_list = []
for p in top_pages:
avg_time_val = int(p.avg_time or 0)
pages_list.append({
'path': p.path,
'views': p.views,
'unique_users': p.unique_users,
'avg_time': int(p.avg_time or 0),
'avg_time': avg_time_val,
'avg_time_fmt': _format_time(avg_time_val),
'avg_scroll': int(p.avg_scroll or 0),
'avg_load': int(p.avg_load or 0),
'bar_pct': int(p.views / max_page_views * 100),
})
# Ignored pages (< 5 views in 30d, exclude bots)
# Ignored pages (< 5 views in 30d, exclude bots + technical paths)
start_30d = datetime.combine(date.today() - timedelta(days=30), datetime.min.time())
ignored = db.query(
ignored_raw = db.query(
PageView.path,
func.count(PageView.id).label('views'),
).join(UserSession, PageView.session_id == UserSession.id).filter(
@ -772,7 +822,8 @@ def _tab_pages(db, start_date, days):
UserSession.is_bot == False
).group_by(PageView.path).having(
func.count(PageView.id) < 5
).order_by('views').limit(30).all()
).order_by('views').limit(60).all()
ignored = [p for p in ignored_raw if not _is_technical_path(p.path)][:30]
# Top searches
search_query = db.query(
@ -795,7 +846,7 @@ def _tab_pages(db, start_date, days):
return {
'sections': sections,
'top_pages': pages_list,
'ignored_pages': [{'path': p.path, 'views': p.views} for p in ignored],
'ignored_pages': [{'path': _mask_token_path(p.path), 'views': p.views} for p in ignored],
'top_searches': search_query,
'searches_no_results': no_results_query,
}
@ -820,7 +871,7 @@ def _tab_paths(db, start_date, days):
)
SELECT path, COUNT(*) as cnt
FROM first_pages
GROUP BY path ORDER BY cnt DESC LIMIT 10
GROUP BY path ORDER BY cnt DESC LIMIT 25
""")
entry_pages = db.execute(entry_sql, {'start_dt': start_dt}).fetchall()
@ -835,7 +886,7 @@ def _tab_paths(db, start_date, days):
)
SELECT path, COUNT(*) as cnt
FROM last_pages
GROUP BY path ORDER BY cnt DESC LIMIT 10
GROUP BY path ORDER BY cnt DESC LIMIT 25
""")
exit_pages = db.execute(exit_sql, {'start_dt': start_dt}).fetchall()
@ -854,7 +905,7 @@ def _tab_paths(db, start_date, days):
SELECT path, next_path, COUNT(*) as cnt
FROM ordered
WHERE next_path IS NOT NULL AND path != next_path
GROUP BY path, next_path ORDER BY cnt DESC LIMIT 30
GROUP BY path, next_path ORDER BY cnt DESC LIMIT 60
""")
transitions = db.execute(transitions_sql, {'start_dt': start_dt}).fetchall()
@ -911,11 +962,20 @@ def _tab_paths(db, start_date, days):
session_lengths = db.execute(session_length_sql, {'start_dt': start_dt}).fetchall()
max_sl = max((r.cnt for r in session_lengths), default=1) or 1
# Filter out technical paths from results
entry_filtered = [r for r in entry_pages if not _is_technical_path(r.path)][:10]
exit_filtered = [r for r in exit_pages if not _is_technical_path(r.path)][:10]
transitions_filtered = [r for r in transitions if not _is_technical_path(r.path) and not _is_technical_path(r.next_path)][:30]
dropoff_filtered = [r for r in dropoff if not _is_technical_path(r.path)][:20]
max_entry_f = entry_filtered[0].cnt if entry_filtered else 1
max_exit_f = exit_filtered[0].cnt if exit_filtered else 1
return {
'entry_pages': [{'path': r.path, 'count': r.cnt, 'bar_pct': int(r.cnt / max_entry * 100)} for r in entry_pages],
'exit_pages': [{'path': r.path, 'count': r.cnt, 'bar_pct': int(r.cnt / max_exit * 100)} for r in exit_pages],
'transitions': [{'from': r.path, 'to': r.next_path, 'count': r.cnt} for r in transitions],
'dropoff': [{'path': r.path, 'views': r.views, 'exits': r.exits, 'exit_rate': float(r.exit_rate)} for r in dropoff],
'entry_pages': [{'path': r.path, 'count': r.cnt, 'bar_pct': int(r.cnt / max_entry_f * 100)} for r in entry_filtered],
'exit_pages': [{'path': r.path, 'count': r.cnt, 'bar_pct': int(r.cnt / max_exit_f * 100)} for r in exit_filtered],
'transitions': [{'from': r.path, 'to': r.next_path, 'count': r.cnt} for r in transitions_filtered],
'dropoff': [{'path': r.path, 'views': r.views, 'exits': r.exits, 'exit_rate': float(r.exit_rate)} for r in dropoff_filtered],
'session_lengths': [{'bucket': r.bucket, 'count': r.cnt, 'bar_pct': int(r.cnt / max_sl * 100)} for r in session_lengths],
}
@ -930,6 +990,38 @@ def _tab_overview(db, start_date, days):
start_dt = datetime.combine(start_date, datetime.min.time())
start_30d = datetime.combine(date.today() - timedelta(days=30), datetime.min.time())
# KPI stat cards (period-based, bot-filtered)
active_users = db.query(func.count(func.distinct(UserSession.user_id))).filter(
UserSession.started_at >= start_dt,
UserSession.user_id.isnot(None),
UserSession.is_bot == False
).scalar() or 0
total_sessions = db.query(func.count(UserSession.id)).filter(
UserSession.started_at >= start_dt,
UserSession.is_bot == False
).scalar() or 0
total_pageviews = db.query(func.count(PageView.id)).join(
UserSession, PageView.session_id == UserSession.id
).filter(
PageView.viewed_at >= start_dt,
UserSession.is_bot == False
).scalar() or 0
# Bounce rate: sessions with only 1 page view / total sessions
single_pv_sessions = db.query(func.count()).select_from(
db.query(PageView.session_id).join(
UserSession, PageView.session_id == UserSession.id
).filter(
PageView.viewed_at >= start_dt,
UserSession.is_bot == False
).group_by(PageView.session_id).having(
func.count(PageView.id) == 1
).subquery()
).scalar() or 0
bounce_rate = round(single_pv_sessions / total_sessions * 100) if total_sessions > 0 else 0
# Daily sessions from analytics_daily (already bot-filtered after migration)
daily_data = db.query(AnalyticsDaily).filter(
AnalyticsDaily.date >= date.today() - timedelta(days=30)
@ -1033,6 +1125,12 @@ def _tab_overview(db, start_date, days):
return {
'filter_type': filter_type,
'kpi': {
'active_users': active_users,
'sessions': total_sessions,
'pageviews': total_pageviews,
'bounce_rate': bounce_rate,
},
'chart_data': {
'labels': chart_labels,
'sessions': chart_sessions,

View File

@ -436,7 +436,7 @@
<div class="stats-grid">
<div class="stat-card success">
<div class="stat-value">{{ data.active_7d }}</div>
<div class="stat-label">Aktywni ({{ period }})</div>
<div class="stat-label">Aktywni ({% if period == 'day' %}dziś{% elif period == 'week' %}7 dni{% else %}30 dni{% endif %})</div>
</div>
<div class="stat-card warning">
<div class="stat-value">{{ data.at_risk }}</div>
@ -534,7 +534,7 @@
<div class="section-tile" style="background: rgba(34, 197, 94, {{ s.intensity / 100 * 0.3 + 0.05 }});">
<h3>{{ s.name }}</h3>
<div class="stat-value" style="font-size: var(--font-size-lg);">{{ s.views }}</div>
<div class="metric">{{ s.unique_users }} unikalnych &middot; {{ s.avg_time }}s śr.</div>
<div class="metric">{{ s.unique_users }} unikalnych &middot; {{ s.avg_time_fmt }} śr.</div>
</div>
{% endfor %}
</div>
@ -568,7 +568,7 @@
</div>
</td>
<td>{{ p.unique_users }}</td>
<td>{{ p.avg_time }}s</td>
<td>{{ p.avg_time_fmt }}</td>
<td>{{ p.avg_scroll }}%</td>
<td>
<span {% if p.avg_load > 3000 %}style="color: var(--error); font-weight: 600;"{% elif p.avg_load > 1500 %}style="color: #d97706;"{% endif %}>
@ -770,6 +770,26 @@
<!-- ============================================================ -->
{% elif tab == 'overview' %}
<!-- KPI Stat Cards -->
<div class="stats-grid">
<div class="stat-card success">
<div class="stat-value">{{ data.kpi.active_users }}</div>
<div class="stat-label">Aktywni użytkownicy</div>
</div>
<div class="stat-card info">
<div class="stat-value">{{ data.kpi.sessions }}</div>
<div class="stat-label">Sesje</div>
</div>
<div class="stat-card info">
<div class="stat-value">{{ data.kpi.pageviews }}</div>
<div class="stat-label">Odsłony</div>
</div>
<div class="stat-card {% if data.kpi.bounce_rate > 70 %}error{% elif data.kpi.bounce_rate > 50 %}warning{% else %}success{% endif %}">
<div class="stat-value">{{ data.kpi.bounce_rate }}%</div>
<div class="stat-label">Współczynnik odrzuceń</div>
</div>
</div>
<!-- Filter -->
<div style="margin-bottom: var(--spacing-lg);">
<div class="filter-group">
@ -1026,10 +1046,13 @@
// Auth doughnut
const authData = {{ data.logged_vs_anon|tojson|safe }};
const authCtx = document.getElementById('authChart').getContext('2d');
const authTotal = authData.logged + authData.anonymous;
const authPctLogged = authTotal > 0 ? Math.round(authData.logged / authTotal * 100) : 0;
const authPctAnon = authTotal > 0 ? Math.round(authData.anonymous / authTotal * 100) : 0;
new Chart(authCtx, {
type: 'doughnut',
data: {
labels: ['Zalogowani', 'Anonimowi'],
labels: ['Zalogowani (' + authPctLogged + '%)', 'Anonimowi (' + authPctAnon + '%)'],
datasets: [{
data: [authData.logged, authData.anonymous],
backgroundColor: ['#6366f1', '#d1d5db'],