fix: improve analytics data quality and human-friendliness
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
- Add KPI stat cards to Overview tab (active users, sessions, pageviews, bounce rate) - Filter technical paths from Pages and Paths tabs (/sw.js, /robots.txt, /.git/, /.env, etc.) - Cap time_on_page at 30min to exclude outlier tabs left open - Format time as human-readable (Xm Ys) instead of raw seconds - Mask security tokens in unused pages list (/reset-password/*** etc.) - Fix Polish labels (period display: "7 dni" instead of "week") - Add percentages to logged/anonymous donut chart legend Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
41ad6a1b18
commit
4fb45df2aa
@ -682,6 +682,47 @@ def _tab_engagement(db, start_date, days):
|
||||
# TAB 3: PAGE MAP
|
||||
# ============================================================
|
||||
|
||||
EXCLUDED_PATHS = (
|
||||
'/sw.js', '/robots.txt', '/sitemap.xml', '/favicon.ico',
|
||||
'/apple-touch-icon', '/.well-known/', '/.git/', '/.env',
|
||||
'/check-verification-status', '/manifest.json',
|
||||
'/sito/', '/wordpress/', '/wp-', '/xmlrpc',
|
||||
)
|
||||
|
||||
|
||||
def _is_technical_path(path):
|
||||
"""Check if path is a technical/system endpoint (not a user page)."""
|
||||
if not path:
|
||||
return True
|
||||
for excl in EXCLUDED_PATHS:
|
||||
if path.startswith(excl) or path == excl:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _mask_token_path(path):
|
||||
"""Mask security tokens in paths like /reset-password/<token>."""
|
||||
import re
|
||||
path = re.sub(r'(/reset-password/)[A-Za-z0-9_-]{10,}', r'\1***', path)
|
||||
path = re.sub(r'(/verify-email/)[A-Za-z0-9_-]{10,}', r'\1***', path)
|
||||
return path
|
||||
|
||||
|
||||
def _format_time(seconds):
|
||||
"""Format seconds to human-readable string."""
|
||||
if seconds <= 0:
|
||||
return '0s'
|
||||
if seconds < 60:
|
||||
return f'{seconds}s'
|
||||
minutes = seconds // 60
|
||||
secs = seconds % 60
|
||||
if minutes < 60:
|
||||
return f'{minutes}m {secs}s' if secs else f'{minutes}m'
|
||||
hours = minutes // 60
|
||||
mins = minutes % 60
|
||||
return f'{hours}h {mins}m'
|
||||
|
||||
|
||||
def _tab_pages(db, start_date, days):
|
||||
"""Page popularity map."""
|
||||
start_dt = datetime.combine(start_date, datetime.min.time())
|
||||
@ -713,21 +754,25 @@ def _tab_pages(db, start_date, days):
|
||||
if prefixes == ['/']:
|
||||
conditions = [PageView.path == '/']
|
||||
|
||||
# Cap time_on_page at 1800s (30 min) to exclude outliers (tabs left open)
|
||||
capped_time = func.least(PageView.time_on_page_seconds, 1800)
|
||||
q = db.query(
|
||||
func.count(PageView.id).label('views'),
|
||||
func.count(func.distinct(PageView.user_id)).label('unique_users'),
|
||||
func.avg(PageView.time_on_page_seconds).label('avg_time')
|
||||
func.avg(capped_time).label('avg_time')
|
||||
).join(UserSession, PageView.session_id == UserSession.id).filter(
|
||||
or_(*conditions),
|
||||
PageView.viewed_at >= start_dt,
|
||||
UserSession.is_bot == False
|
||||
).first()
|
||||
|
||||
avg_time_val = int(q.avg_time or 0)
|
||||
sections.append({
|
||||
'name': name,
|
||||
'views': q.views or 0,
|
||||
'unique_users': q.unique_users or 0,
|
||||
'avg_time': int(q.avg_time or 0),
|
||||
'avg_time': avg_time_val,
|
||||
'avg_time_fmt': _format_time(avg_time_val),
|
||||
})
|
||||
|
||||
max_views = max((s['views'] for s in sections), default=1) or 1
|
||||
@ -735,36 +780,41 @@ def _tab_pages(db, start_date, days):
|
||||
for s in sections:
|
||||
s['intensity'] = min(100, int(s['views'] / max_views * 100))
|
||||
|
||||
# Top 50 pages (exclude bots)
|
||||
top_pages = db.query(
|
||||
# Top 50 pages (exclude bots + technical paths)
|
||||
capped_time = func.least(PageView.time_on_page_seconds, 1800)
|
||||
top_pages_raw = db.query(
|
||||
PageView.path,
|
||||
func.count(PageView.id).label('views'),
|
||||
func.count(func.distinct(PageView.user_id)).label('unique_users'),
|
||||
func.avg(PageView.time_on_page_seconds).label('avg_time'),
|
||||
func.avg(capped_time).label('avg_time'),
|
||||
func.avg(PageView.scroll_depth_percent).label('avg_scroll'),
|
||||
func.avg(PageView.load_time_ms).label('avg_load'),
|
||||
).join(UserSession, PageView.session_id == UserSession.id).filter(
|
||||
PageView.viewed_at >= start_dt,
|
||||
UserSession.is_bot == False
|
||||
).group_by(PageView.path).order_by(desc('views')).limit(50).all()
|
||||
).group_by(PageView.path).order_by(desc('views')).limit(80).all()
|
||||
|
||||
# Filter out technical paths after query (more flexible than SQL LIKE)
|
||||
top_pages = [p for p in top_pages_raw if not _is_technical_path(p.path)][:50]
|
||||
max_page_views = top_pages[0].views if top_pages else 1
|
||||
|
||||
pages_list = []
|
||||
for p in top_pages:
|
||||
avg_time_val = int(p.avg_time or 0)
|
||||
pages_list.append({
|
||||
'path': p.path,
|
||||
'views': p.views,
|
||||
'unique_users': p.unique_users,
|
||||
'avg_time': int(p.avg_time or 0),
|
||||
'avg_time': avg_time_val,
|
||||
'avg_time_fmt': _format_time(avg_time_val),
|
||||
'avg_scroll': int(p.avg_scroll or 0),
|
||||
'avg_load': int(p.avg_load or 0),
|
||||
'bar_pct': int(p.views / max_page_views * 100),
|
||||
})
|
||||
|
||||
# Ignored pages (< 5 views in 30d, exclude bots)
|
||||
# Ignored pages (< 5 views in 30d, exclude bots + technical paths)
|
||||
start_30d = datetime.combine(date.today() - timedelta(days=30), datetime.min.time())
|
||||
ignored = db.query(
|
||||
ignored_raw = db.query(
|
||||
PageView.path,
|
||||
func.count(PageView.id).label('views'),
|
||||
).join(UserSession, PageView.session_id == UserSession.id).filter(
|
||||
@ -772,7 +822,8 @@ def _tab_pages(db, start_date, days):
|
||||
UserSession.is_bot == False
|
||||
).group_by(PageView.path).having(
|
||||
func.count(PageView.id) < 5
|
||||
).order_by('views').limit(30).all()
|
||||
).order_by('views').limit(60).all()
|
||||
ignored = [p for p in ignored_raw if not _is_technical_path(p.path)][:30]
|
||||
|
||||
# Top searches
|
||||
search_query = db.query(
|
||||
@ -795,7 +846,7 @@ def _tab_pages(db, start_date, days):
|
||||
return {
|
||||
'sections': sections,
|
||||
'top_pages': pages_list,
|
||||
'ignored_pages': [{'path': p.path, 'views': p.views} for p in ignored],
|
||||
'ignored_pages': [{'path': _mask_token_path(p.path), 'views': p.views} for p in ignored],
|
||||
'top_searches': search_query,
|
||||
'searches_no_results': no_results_query,
|
||||
}
|
||||
@ -820,7 +871,7 @@ def _tab_paths(db, start_date, days):
|
||||
)
|
||||
SELECT path, COUNT(*) as cnt
|
||||
FROM first_pages
|
||||
GROUP BY path ORDER BY cnt DESC LIMIT 10
|
||||
GROUP BY path ORDER BY cnt DESC LIMIT 25
|
||||
""")
|
||||
entry_pages = db.execute(entry_sql, {'start_dt': start_dt}).fetchall()
|
||||
|
||||
@ -835,7 +886,7 @@ def _tab_paths(db, start_date, days):
|
||||
)
|
||||
SELECT path, COUNT(*) as cnt
|
||||
FROM last_pages
|
||||
GROUP BY path ORDER BY cnt DESC LIMIT 10
|
||||
GROUP BY path ORDER BY cnt DESC LIMIT 25
|
||||
""")
|
||||
exit_pages = db.execute(exit_sql, {'start_dt': start_dt}).fetchall()
|
||||
|
||||
@ -854,7 +905,7 @@ def _tab_paths(db, start_date, days):
|
||||
SELECT path, next_path, COUNT(*) as cnt
|
||||
FROM ordered
|
||||
WHERE next_path IS NOT NULL AND path != next_path
|
||||
GROUP BY path, next_path ORDER BY cnt DESC LIMIT 30
|
||||
GROUP BY path, next_path ORDER BY cnt DESC LIMIT 60
|
||||
""")
|
||||
transitions = db.execute(transitions_sql, {'start_dt': start_dt}).fetchall()
|
||||
|
||||
@ -911,11 +962,20 @@ def _tab_paths(db, start_date, days):
|
||||
session_lengths = db.execute(session_length_sql, {'start_dt': start_dt}).fetchall()
|
||||
max_sl = max((r.cnt for r in session_lengths), default=1) or 1
|
||||
|
||||
# Filter out technical paths from results
|
||||
entry_filtered = [r for r in entry_pages if not _is_technical_path(r.path)][:10]
|
||||
exit_filtered = [r for r in exit_pages if not _is_technical_path(r.path)][:10]
|
||||
transitions_filtered = [r for r in transitions if not _is_technical_path(r.path) and not _is_technical_path(r.next_path)][:30]
|
||||
dropoff_filtered = [r for r in dropoff if not _is_technical_path(r.path)][:20]
|
||||
|
||||
max_entry_f = entry_filtered[0].cnt if entry_filtered else 1
|
||||
max_exit_f = exit_filtered[0].cnt if exit_filtered else 1
|
||||
|
||||
return {
|
||||
'entry_pages': [{'path': r.path, 'count': r.cnt, 'bar_pct': int(r.cnt / max_entry * 100)} for r in entry_pages],
|
||||
'exit_pages': [{'path': r.path, 'count': r.cnt, 'bar_pct': int(r.cnt / max_exit * 100)} for r in exit_pages],
|
||||
'transitions': [{'from': r.path, 'to': r.next_path, 'count': r.cnt} for r in transitions],
|
||||
'dropoff': [{'path': r.path, 'views': r.views, 'exits': r.exits, 'exit_rate': float(r.exit_rate)} for r in dropoff],
|
||||
'entry_pages': [{'path': r.path, 'count': r.cnt, 'bar_pct': int(r.cnt / max_entry_f * 100)} for r in entry_filtered],
|
||||
'exit_pages': [{'path': r.path, 'count': r.cnt, 'bar_pct': int(r.cnt / max_exit_f * 100)} for r in exit_filtered],
|
||||
'transitions': [{'from': r.path, 'to': r.next_path, 'count': r.cnt} for r in transitions_filtered],
|
||||
'dropoff': [{'path': r.path, 'views': r.views, 'exits': r.exits, 'exit_rate': float(r.exit_rate)} for r in dropoff_filtered],
|
||||
'session_lengths': [{'bucket': r.bucket, 'count': r.cnt, 'bar_pct': int(r.cnt / max_sl * 100)} for r in session_lengths],
|
||||
}
|
||||
|
||||
@ -930,6 +990,38 @@ def _tab_overview(db, start_date, days):
|
||||
start_dt = datetime.combine(start_date, datetime.min.time())
|
||||
start_30d = datetime.combine(date.today() - timedelta(days=30), datetime.min.time())
|
||||
|
||||
# KPI stat cards (period-based, bot-filtered)
|
||||
active_users = db.query(func.count(func.distinct(UserSession.user_id))).filter(
|
||||
UserSession.started_at >= start_dt,
|
||||
UserSession.user_id.isnot(None),
|
||||
UserSession.is_bot == False
|
||||
).scalar() or 0
|
||||
|
||||
total_sessions = db.query(func.count(UserSession.id)).filter(
|
||||
UserSession.started_at >= start_dt,
|
||||
UserSession.is_bot == False
|
||||
).scalar() or 0
|
||||
|
||||
total_pageviews = db.query(func.count(PageView.id)).join(
|
||||
UserSession, PageView.session_id == UserSession.id
|
||||
).filter(
|
||||
PageView.viewed_at >= start_dt,
|
||||
UserSession.is_bot == False
|
||||
).scalar() or 0
|
||||
|
||||
# Bounce rate: sessions with only 1 page view / total sessions
|
||||
single_pv_sessions = db.query(func.count()).select_from(
|
||||
db.query(PageView.session_id).join(
|
||||
UserSession, PageView.session_id == UserSession.id
|
||||
).filter(
|
||||
PageView.viewed_at >= start_dt,
|
||||
UserSession.is_bot == False
|
||||
).group_by(PageView.session_id).having(
|
||||
func.count(PageView.id) == 1
|
||||
).subquery()
|
||||
).scalar() or 0
|
||||
bounce_rate = round(single_pv_sessions / total_sessions * 100) if total_sessions > 0 else 0
|
||||
|
||||
# Daily sessions from analytics_daily (already bot-filtered after migration)
|
||||
daily_data = db.query(AnalyticsDaily).filter(
|
||||
AnalyticsDaily.date >= date.today() - timedelta(days=30)
|
||||
@ -1033,6 +1125,12 @@ def _tab_overview(db, start_date, days):
|
||||
|
||||
return {
|
||||
'filter_type': filter_type,
|
||||
'kpi': {
|
||||
'active_users': active_users,
|
||||
'sessions': total_sessions,
|
||||
'pageviews': total_pageviews,
|
||||
'bounce_rate': bounce_rate,
|
||||
},
|
||||
'chart_data': {
|
||||
'labels': chart_labels,
|
||||
'sessions': chart_sessions,
|
||||
|
||||
@ -436,7 +436,7 @@
|
||||
<div class="stats-grid">
|
||||
<div class="stat-card success">
|
||||
<div class="stat-value">{{ data.active_7d }}</div>
|
||||
<div class="stat-label">Aktywni ({{ period }})</div>
|
||||
<div class="stat-label">Aktywni ({% if period == 'day' %}dziś{% elif period == 'week' %}7 dni{% else %}30 dni{% endif %})</div>
|
||||
</div>
|
||||
<div class="stat-card warning">
|
||||
<div class="stat-value">{{ data.at_risk }}</div>
|
||||
@ -534,7 +534,7 @@
|
||||
<div class="section-tile" style="background: rgba(34, 197, 94, {{ s.intensity / 100 * 0.3 + 0.05 }});">
|
||||
<h3>{{ s.name }}</h3>
|
||||
<div class="stat-value" style="font-size: var(--font-size-lg);">{{ s.views }}</div>
|
||||
<div class="metric">{{ s.unique_users }} unikalnych · {{ s.avg_time }}s śr.</div>
|
||||
<div class="metric">{{ s.unique_users }} unikalnych · {{ s.avg_time_fmt }} śr.</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
@ -568,7 +568,7 @@
|
||||
</div>
|
||||
</td>
|
||||
<td>{{ p.unique_users }}</td>
|
||||
<td>{{ p.avg_time }}s</td>
|
||||
<td>{{ p.avg_time_fmt }}</td>
|
||||
<td>{{ p.avg_scroll }}%</td>
|
||||
<td>
|
||||
<span {% if p.avg_load > 3000 %}style="color: var(--error); font-weight: 600;"{% elif p.avg_load > 1500 %}style="color: #d97706;"{% endif %}>
|
||||
@ -770,6 +770,26 @@
|
||||
<!-- ============================================================ -->
|
||||
{% elif tab == 'overview' %}
|
||||
|
||||
<!-- KPI Stat Cards -->
|
||||
<div class="stats-grid">
|
||||
<div class="stat-card success">
|
||||
<div class="stat-value">{{ data.kpi.active_users }}</div>
|
||||
<div class="stat-label">Aktywni użytkownicy</div>
|
||||
</div>
|
||||
<div class="stat-card info">
|
||||
<div class="stat-value">{{ data.kpi.sessions }}</div>
|
||||
<div class="stat-label">Sesje</div>
|
||||
</div>
|
||||
<div class="stat-card info">
|
||||
<div class="stat-value">{{ data.kpi.pageviews }}</div>
|
||||
<div class="stat-label">Odsłony</div>
|
||||
</div>
|
||||
<div class="stat-card {% if data.kpi.bounce_rate > 70 %}error{% elif data.kpi.bounce_rate > 50 %}warning{% else %}success{% endif %}">
|
||||
<div class="stat-value">{{ data.kpi.bounce_rate }}%</div>
|
||||
<div class="stat-label">Współczynnik odrzuceń</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Filter -->
|
||||
<div style="margin-bottom: var(--spacing-lg);">
|
||||
<div class="filter-group">
|
||||
@ -1026,10 +1046,13 @@
|
||||
// Auth doughnut
|
||||
const authData = {{ data.logged_vs_anon|tojson|safe }};
|
||||
const authCtx = document.getElementById('authChart').getContext('2d');
|
||||
const authTotal = authData.logged + authData.anonymous;
|
||||
const authPctLogged = authTotal > 0 ? Math.round(authData.logged / authTotal * 100) : 0;
|
||||
const authPctAnon = authTotal > 0 ? Math.round(authData.anonymous / authTotal * 100) : 0;
|
||||
new Chart(authCtx, {
|
||||
type: 'doughnut',
|
||||
data: {
|
||||
labels: ['Zalogowani', 'Anonimowi'],
|
||||
labels: ['Zalogowani (' + authPctLogged + '%)', 'Anonimowi (' + authPctAnon + '%)'],
|
||||
datasets: [{
|
||||
data: [authData.logged, authData.anonymous],
|
||||
backgroundColor: ['#6366f1', '#d1d5db'],
|
||||
|
||||
Loading…
Reference in New Issue
Block a user