fix: correct nested data extraction from SEOAuditor results
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

OnPageSEOResult uses nested objects (meta_tags.title, images.total_images,
structured_data.has_structured_data). TechnicalSEOResult uses robots_txt.exists,
sitemap.exists, canonical.has_canonical. Fixed all field access paths.
Extracted DB save logic to _save_audit_to_db() for clarity.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-21 15:39:24 +01:00
parent 4d6150fbde
commit 5484138bfb

View File

@ -14,7 +14,6 @@ import time as time_module
from datetime import datetime, date
from decimal import Decimal
import requests
from flask import (
abort, render_template, request, redirect, url_for,
flash, Response, stream_with_context
@ -61,6 +60,93 @@ def _get_auditor():
return SEOAuditor()
def _save_audit_to_db(result, notes, user_email):
"""Extract fields from SEOAuditor result dict and save to database.
Result structure (after to_dict()):
- result['onpage']['meta_tags']['title'] / ['description']
- result['onpage']['images']['total_images'] / ['images_without_alt']
- result['onpage']['structured_data']['has_structured_data']
- result['onpage']['open_graph']['og_title'] (non-None = has OG)
- result['technical']['robots_txt']['exists']
- result['technical']['sitemap']['exists']
- result['technical']['canonical']['has_canonical']
- result['pagespeed']['scores']['performance'] etc.
- result['pagespeed']['core_web_vitals']['lcp_ms'] etc.
"""
onpage = result.get('onpage', {})
tech = result.get('technical', {})
ps = result.get('pagespeed', {})
scores = result.get('scores', {})
# On-page nested
meta_tags = onpage.get('meta_tags', {})
images = onpage.get('images', {})
structured = onpage.get('structured_data', {})
og = onpage.get('open_graph', {})
# Technical nested
robots = tech.get('robots_txt', {})
sitemap = tech.get('sitemap', {})
canonical = tech.get('canonical', {})
indexability = tech.get('indexability', {})
# PageSpeed nested
cwv = ps.get('core_web_vitals', {})
# Security headers (from technical redirect chain response headers)
# These are stored separately in audit_company result
sec = result.get('security_headers', {})
db = SessionLocal()
try:
audit = PortalSEOAudit(
audited_at=datetime.now(),
url=PORTAL_URL,
# PageSpeed scores
pagespeed_performance=scores.get('pagespeed_performance'),
pagespeed_seo=scores.get('pagespeed_seo'),
pagespeed_accessibility=scores.get('pagespeed_accessibility'),
pagespeed_best_practices=scores.get('pagespeed_best_practices'),
# Core Web Vitals
lcp_ms=cwv.get('lcp_ms'),
fcp_ms=cwv.get('fcp_ms'),
cls=cwv.get('cls'),
tbt_ms=cwv.get('tbt_ms'),
speed_index_ms=cwv.get('speed_index_ms'),
# On-page checks
has_meta_title=bool(meta_tags.get('title')),
has_meta_description=bool(meta_tags.get('description')),
has_canonical=canonical.get('has_canonical', False),
has_robots_txt=robots.get('exists', False),
has_sitemap=sitemap.get('exists', False),
has_structured_data=structured.get('has_structured_data', False),
has_og_tags=bool(og.get('og_title')),
has_ssl=result.get('final_url', '').startswith('https'),
is_mobile_friendly=indexability.get('is_indexable'),
# Security headers
has_hsts=sec.get('has_hsts'),
has_csp=sec.get('has_csp'),
has_x_frame=sec.get('has_x_frame_options'),
has_x_content_type=sec.get('has_x_content_type'),
# Content metrics
image_count=images.get('total_images'),
images_without_alt=images.get('images_without_alt'),
# Full data
full_results=_make_json_safe(result),
notes=notes,
created_by=user_email
)
db.add(audit)
db.commit()
return audit.id
except Exception:
db.rollback()
raise
finally:
db.close()
@bp.route('/portal-seo')
@login_required
def admin_portal_seo():
@ -85,32 +171,25 @@ def admin_portal_seo():
@bp.route('/portal-seo/run/stream')
@login_required
def admin_portal_seo_run_stream():
"""SSE endpoint for streaming portal SEO audit progress step by step."""
"""SSE endpoint for streaming portal SEO audit progress step by step.
Runs the same audit pipeline as SEOAuditor.audit_company() but yields
progress events between each step.
"""
if not is_audit_owner():
abort(404)
notes = request.args.get('notes', '')
user_email = current_user.email
STEPS = [
(1, 'Inicjalizacja audytora SEO...'),
(2, 'Pobieranie strony nordabiznes.pl...'),
(3, 'Analiza on-page SEO...'),
(4, 'Sprawdzanie techniczne (robots.txt, sitemap, SSL)...'),
(5, 'PageSpeed Insights API...'),
(6, 'Analiza Local SEO...'),
(7, 'Sprawdzanie cytowań (citations)...'),
(8, 'Sprawdzanie aktualności treści...'),
(9, 'Zapisywanie wyników...'),
]
TOTAL = len(STEPS)
TOTAL = 9
def generate():
try:
# Step 1: Init auditor
yield _sse_event({
'step': 1, 'total': TOTAL,
'message': STEPS[0][1], 'status': 'running'
'message': 'Inicjalizacja audytora SEO...', 'status': 'running'
})
auditor = _get_auditor()
@ -133,6 +212,7 @@ def admin_portal_seo_run_stream():
'audit_date': datetime.now(),
'website_url': PORTAL_URL,
'errors': [],
'scores': {},
}
html_content = None
final_url = PORTAL_URL
@ -140,7 +220,7 @@ def admin_portal_seo_run_stream():
# Step 2: Fetch page
yield _sse_event({
'step': 2, 'total': TOTAL,
'message': STEPS[1][1], 'status': 'running'
'message': 'Pobieranie strony nordabiznes.pl...', 'status': 'running'
})
try:
@ -150,49 +230,65 @@ def admin_portal_seo_run_stream():
)
load_ms = int((time_module.time() - start) * 1000)
final_url = resp.url
result['http_status'] = resp.status_code
result['load_time_ms'] = load_ms
result['final_url'] = final_url
if resp.status_code == 200:
if resp.encoding and resp.encoding.lower() == 'iso-8859-1':
resp.encoding = resp.apparent_encoding
html_content = resp.text
result['http_status'] = 200
result['load_time_ms'] = load_ms
result['final_url'] = final_url
yield _sse_event({
'step': 2, 'total': TOTAL,
'message': f'Strona pobrana ({load_ms}ms)',
'message': f'Strona pobrana ({load_ms}ms, {len(html_content)//1024}KB)',
'status': 'done'
})
else:
result['errors'].append(f'HTTP {resp.status_code}')
yield _sse_event({
'step': 2, 'total': TOTAL,
'message': f'HTTP {resp.status_code}',
'status': 'warning'
'message': f'HTTP {resp.status_code}', 'status': 'warning'
})
except Exception as e:
result['errors'].append(str(e)[:100])
yield _sse_event({
'step': 2, 'total': TOTAL,
'message': f'Błąd pobierania: {str(e)[:80]}',
'status': 'error'
'message': f'Błąd: {str(e)[:80]}', 'status': 'error'
})
# Step 3: On-page analysis
yield _sse_event({
'step': 3, 'total': TOTAL,
'message': STEPS[2][1], 'status': 'running'
'message': 'Analiza on-page SEO (meta tagi, nagłówki, obrazy)...',
'status': 'running'
})
if html_content:
try:
onpage = auditor.onpage_analyzer.analyze_html(
onpage_obj = auditor.onpage_analyzer.analyze_html(
html_content, base_url=final_url
)
result['onpage'] = onpage.to_dict()
onpage_dict = onpage_obj.to_dict()
result['onpage'] = onpage_dict
# Build summary
mt = onpage_dict.get('meta_tags', {})
imgs = onpage_dict.get('images', {})
sd = onpage_dict.get('structured_data', {})
title = mt.get('title', '')
title_short = (title[:35] + '...') if title and len(title) > 35 else title
parts = []
if title:
parts.append(f'title="{title_short}"')
parts.append(f'{imgs.get("total_images", 0)} obrazów')
if imgs.get('images_without_alt'):
parts.append(f'{imgs["images_without_alt"]} bez alt')
if sd.get('has_structured_data'):
parts.append('Schema.org')
yield _sse_event({
'step': 3, 'total': TOTAL,
'message': f'On-page: title="{onpage.meta_title[:40]}..."' if onpage.meta_title else 'On-page: brak meta title',
'message': f'On-page: {", ".join(parts)}',
'status': 'done'
})
except Exception as e:
@ -205,30 +301,45 @@ def admin_portal_seo_run_stream():
else:
yield _sse_event({
'step': 3, 'total': TOTAL,
'message': 'Pominięto (brak HTML)',
'status': 'skipped'
'message': 'Pominięto (brak HTML)', 'status': 'skipped'
})
# Step 4: Technical SEO
yield _sse_event({
'step': 4, 'total': TOTAL,
'message': STEPS[3][1], 'status': 'running'
'message': 'Sprawdzanie techniczne (robots.txt, sitemap, SSL, canonical)...',
'status': 'running'
})
try:
tech_result = auditor.technical_checker.check_url(final_url)
result['technical'] = tech_result.to_dict()
checks = []
td = tech_result.to_dict()
if td.get('has_robots_txt'):
checks.append('robots.txt')
if td.get('has_sitemap'):
checks.append('sitemap')
if td.get('has_ssl'):
checks.append('SSL')
tech_obj = auditor.technical_checker.check_url(final_url)
tech_dict = tech_obj.to_dict()
result['technical'] = tech_dict
checks_ok = []
checks_fail = []
if tech_dict.get('robots_txt', {}).get('exists'):
checks_ok.append('robots.txt')
else:
checks_fail.append('robots.txt')
if tech_dict.get('sitemap', {}).get('exists'):
checks_ok.append('sitemap')
else:
checks_fail.append('sitemap')
if tech_dict.get('canonical', {}).get('has_canonical'):
checks_ok.append('canonical')
else:
checks_fail.append('canonical')
msg_parts = []
if checks_ok:
msg_parts.append(f'OK: {", ".join(checks_ok)}')
if checks_fail:
msg_parts.append(f'Brak: {", ".join(checks_fail)}')
yield _sse_event({
'step': 4, 'total': TOTAL,
'message': f'Technical: {", ".join(checks) if checks else "brak kluczowych elementów"}',
'message': f'Technical: {" | ".join(msg_parts)}',
'status': 'done'
})
except Exception as e:
@ -242,7 +353,8 @@ def admin_portal_seo_run_stream():
# Step 5: PageSpeed Insights
yield _sse_event({
'step': 5, 'total': TOTAL,
'message': STEPS[4][1], 'status': 'running'
'message': 'PageSpeed Insights API (może potrwać do 30s)...',
'status': 'running'
})
try:
@ -261,14 +373,19 @@ def admin_portal_seo_run_stream():
}
yield _sse_event({
'step': 5, 'total': TOTAL,
'message': f'PageSpeed: Perf={ps_result.scores.performance}, SEO={ps_result.scores.seo}',
'message': (
f'Perf={ps_result.scores.performance}, '
f'SEO={ps_result.scores.seo}, '
f'A11y={ps_result.scores.accessibility}, '
f'BP={ps_result.scores.best_practices}'
),
'status': 'done'
})
else:
result['errors'].append('PageSpeed API quota exceeded')
yield _sse_event({
'step': 5, 'total': TOTAL,
'message': 'Limit API wyczerpany — pominięto',
'message': 'Limit API wyczerpany',
'status': 'warning'
})
except Exception as e:
@ -282,7 +399,8 @@ def admin_portal_seo_run_stream():
# Step 6: Local SEO
yield _sse_event({
'step': 6, 'total': TOTAL,
'message': STEPS[5][1], 'status': 'running'
'message': 'Analiza Local SEO (NAP, Google Maps, lokalne słowa kluczowe)...',
'status': 'running'
})
if html_content:
@ -294,7 +412,7 @@ def admin_portal_seo_run_stream():
score = local_seo.get('local_seo_score', 0)
yield _sse_event({
'step': 6, 'total': TOTAL,
'message': f'Local SEO score: {score}',
'message': f'Local SEO score: {score}/100',
'status': 'done'
})
except Exception as e:
@ -307,14 +425,14 @@ def admin_portal_seo_run_stream():
else:
yield _sse_event({
'step': 6, 'total': TOTAL,
'message': 'Pominięto (brak HTML)',
'status': 'skipped'
'message': 'Pominięto (brak HTML)', 'status': 'skipped'
})
# Step 7: Citations
yield _sse_event({
'step': 7, 'total': TOTAL,
'message': STEPS[6][1], 'status': 'running'
'message': 'Sprawdzanie cytowań w katalogach (Google, Yelp, Facebook)...',
'status': 'running'
})
try:
@ -339,7 +457,8 @@ def admin_portal_seo_run_stream():
# Step 8: Content freshness
yield _sse_event({
'step': 8, 'total': TOTAL,
'message': STEPS[7][1], 'status': 'running'
'message': 'Sprawdzanie aktualności treści...',
'status': 'running'
})
try:
@ -350,7 +469,7 @@ def admin_portal_seo_run_stream():
fscore = freshness.get('content_freshness_score', 0)
yield _sse_event({
'step': 8, 'total': TOTAL,
'message': f'Aktualność treści: {fscore}',
'message': f'Aktualność treści: {fscore}/100',
'status': 'done'
})
except Exception as e:
@ -364,54 +483,12 @@ def admin_portal_seo_run_stream():
# Step 9: Save to DB
yield _sse_event({
'step': 9, 'total': TOTAL,
'message': STEPS[8][1], 'status': 'running'
'message': 'Zapisywanie wyników do bazy danych...',
'status': 'running'
})
# Extract data for DB columns
ps = result.get('pagespeed', {})
ps_scores = result.get('scores', {})
cwv = ps.get('core_web_vitals', {})
tech = result.get('technical', {})
onpage = result.get('onpage', {})
sec = tech.get('security_headers', {})
db = SessionLocal()
try:
audit = PortalSEOAudit(
audited_at=datetime.now(),
url=PORTAL_URL,
pagespeed_performance=ps_scores.get('pagespeed_performance'),
pagespeed_seo=ps_scores.get('pagespeed_seo'),
pagespeed_accessibility=ps_scores.get('pagespeed_accessibility'),
pagespeed_best_practices=ps_scores.get('pagespeed_best_practices'),
lcp_ms=cwv.get('lcp_ms'),
fcp_ms=cwv.get('fcp_ms'),
cls=cwv.get('cls'),
tbt_ms=cwv.get('tbt_ms'),
speed_index_ms=cwv.get('speed_index_ms'),
has_meta_title=bool(onpage.get('meta_title')),
has_meta_description=bool(onpage.get('meta_description')),
has_canonical=tech.get('has_canonical'),
has_robots_txt=tech.get('has_robots_txt'),
has_sitemap=tech.get('has_sitemap'),
has_structured_data=onpage.get('has_structured_data'),
has_og_tags=onpage.get('has_og_tags'),
has_ssl=tech.get('has_ssl'),
is_mobile_friendly=tech.get('is_mobile_friendly'),
has_hsts=sec.get('has_hsts'),
has_csp=sec.get('has_csp'),
has_x_frame=sec.get('has_x_frame_options'),
has_x_content_type=sec.get('has_x_content_type'),
page_size_bytes=onpage.get('page_size_bytes'),
image_count=onpage.get('total_images'),
images_without_alt=onpage.get('images_without_alt'),
full_results=_make_json_safe(result),
notes=notes,
created_by=user_email
)
db.add(audit)
db.commit()
audit_id = audit.id
audit_id = _save_audit_to_db(result, notes, user_email)
yield _sse_event({
'step': 9, 'total': TOTAL,
@ -419,17 +496,15 @@ def admin_portal_seo_run_stream():
'status': 'done'
})
# Send complete event
yield _sse_event({
'status': 'complete',
'audit_id': audit_id,
'performance': ps_scores.get('pagespeed_performance'),
'seo': ps_scores.get('pagespeed_seo'),
'performance': result.get('scores', {}).get('pagespeed_performance'),
'seo': result.get('scores', {}).get('pagespeed_seo'),
'errors': result.get('errors', []),
})
except Exception as e:
db.rollback()
logger.error(f'Portal SEO save failed: {e}', exc_info=True)
yield _sse_event({
'step': 9, 'total': TOTAL,
@ -437,8 +512,6 @@ def admin_portal_seo_run_stream():
'status': 'error'
})
yield _sse_event({'status': 'error', 'message': str(e)[:200]})
finally:
db.close()
except Exception as e:
logger.error(f'Portal SEO audit stream failed: {e}', exc_info=True)