fix: correct nested data extraction from SEOAuditor results
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
OnPageSEOResult uses nested objects (meta_tags.title, images.total_images, structured_data.has_structured_data). TechnicalSEOResult uses robots_txt.exists, sitemap.exists, canonical.has_canonical. Fixed all field access paths. Extracted DB save logic to _save_audit_to_db() for clarity. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
4d6150fbde
commit
5484138bfb
@ -14,7 +14,6 @@ import time as time_module
|
||||
from datetime import datetime, date
|
||||
from decimal import Decimal
|
||||
|
||||
import requests
|
||||
from flask import (
|
||||
abort, render_template, request, redirect, url_for,
|
||||
flash, Response, stream_with_context
|
||||
@ -61,6 +60,93 @@ def _get_auditor():
|
||||
return SEOAuditor()
|
||||
|
||||
|
||||
def _save_audit_to_db(result, notes, user_email):
|
||||
"""Extract fields from SEOAuditor result dict and save to database.
|
||||
|
||||
Result structure (after to_dict()):
|
||||
- result['onpage']['meta_tags']['title'] / ['description']
|
||||
- result['onpage']['images']['total_images'] / ['images_without_alt']
|
||||
- result['onpage']['structured_data']['has_structured_data']
|
||||
- result['onpage']['open_graph']['og_title'] (non-None = has OG)
|
||||
- result['technical']['robots_txt']['exists']
|
||||
- result['technical']['sitemap']['exists']
|
||||
- result['technical']['canonical']['has_canonical']
|
||||
- result['pagespeed']['scores']['performance'] etc.
|
||||
- result['pagespeed']['core_web_vitals']['lcp_ms'] etc.
|
||||
"""
|
||||
onpage = result.get('onpage', {})
|
||||
tech = result.get('technical', {})
|
||||
ps = result.get('pagespeed', {})
|
||||
scores = result.get('scores', {})
|
||||
|
||||
# On-page nested
|
||||
meta_tags = onpage.get('meta_tags', {})
|
||||
images = onpage.get('images', {})
|
||||
structured = onpage.get('structured_data', {})
|
||||
og = onpage.get('open_graph', {})
|
||||
|
||||
# Technical nested
|
||||
robots = tech.get('robots_txt', {})
|
||||
sitemap = tech.get('sitemap', {})
|
||||
canonical = tech.get('canonical', {})
|
||||
indexability = tech.get('indexability', {})
|
||||
|
||||
# PageSpeed nested
|
||||
cwv = ps.get('core_web_vitals', {})
|
||||
|
||||
# Security headers (from technical redirect chain response headers)
|
||||
# These are stored separately in audit_company result
|
||||
sec = result.get('security_headers', {})
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
audit = PortalSEOAudit(
|
||||
audited_at=datetime.now(),
|
||||
url=PORTAL_URL,
|
||||
# PageSpeed scores
|
||||
pagespeed_performance=scores.get('pagespeed_performance'),
|
||||
pagespeed_seo=scores.get('pagespeed_seo'),
|
||||
pagespeed_accessibility=scores.get('pagespeed_accessibility'),
|
||||
pagespeed_best_practices=scores.get('pagespeed_best_practices'),
|
||||
# Core Web Vitals
|
||||
lcp_ms=cwv.get('lcp_ms'),
|
||||
fcp_ms=cwv.get('fcp_ms'),
|
||||
cls=cwv.get('cls'),
|
||||
tbt_ms=cwv.get('tbt_ms'),
|
||||
speed_index_ms=cwv.get('speed_index_ms'),
|
||||
# On-page checks
|
||||
has_meta_title=bool(meta_tags.get('title')),
|
||||
has_meta_description=bool(meta_tags.get('description')),
|
||||
has_canonical=canonical.get('has_canonical', False),
|
||||
has_robots_txt=robots.get('exists', False),
|
||||
has_sitemap=sitemap.get('exists', False),
|
||||
has_structured_data=structured.get('has_structured_data', False),
|
||||
has_og_tags=bool(og.get('og_title')),
|
||||
has_ssl=result.get('final_url', '').startswith('https'),
|
||||
is_mobile_friendly=indexability.get('is_indexable'),
|
||||
# Security headers
|
||||
has_hsts=sec.get('has_hsts'),
|
||||
has_csp=sec.get('has_csp'),
|
||||
has_x_frame=sec.get('has_x_frame_options'),
|
||||
has_x_content_type=sec.get('has_x_content_type'),
|
||||
# Content metrics
|
||||
image_count=images.get('total_images'),
|
||||
images_without_alt=images.get('images_without_alt'),
|
||||
# Full data
|
||||
full_results=_make_json_safe(result),
|
||||
notes=notes,
|
||||
created_by=user_email
|
||||
)
|
||||
db.add(audit)
|
||||
db.commit()
|
||||
return audit.id
|
||||
except Exception:
|
||||
db.rollback()
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@bp.route('/portal-seo')
|
||||
@login_required
|
||||
def admin_portal_seo():
|
||||
@ -85,32 +171,25 @@ def admin_portal_seo():
|
||||
@bp.route('/portal-seo/run/stream')
|
||||
@login_required
|
||||
def admin_portal_seo_run_stream():
|
||||
"""SSE endpoint for streaming portal SEO audit progress step by step."""
|
||||
"""SSE endpoint for streaming portal SEO audit progress step by step.
|
||||
|
||||
Runs the same audit pipeline as SEOAuditor.audit_company() but yields
|
||||
progress events between each step.
|
||||
"""
|
||||
if not is_audit_owner():
|
||||
abort(404)
|
||||
|
||||
notes = request.args.get('notes', '')
|
||||
user_email = current_user.email
|
||||
|
||||
STEPS = [
|
||||
(1, 'Inicjalizacja audytora SEO...'),
|
||||
(2, 'Pobieranie strony nordabiznes.pl...'),
|
||||
(3, 'Analiza on-page SEO...'),
|
||||
(4, 'Sprawdzanie techniczne (robots.txt, sitemap, SSL)...'),
|
||||
(5, 'PageSpeed Insights API...'),
|
||||
(6, 'Analiza Local SEO...'),
|
||||
(7, 'Sprawdzanie cytowań (citations)...'),
|
||||
(8, 'Sprawdzanie aktualności treści...'),
|
||||
(9, 'Zapisywanie wyników...'),
|
||||
]
|
||||
TOTAL = len(STEPS)
|
||||
TOTAL = 9
|
||||
|
||||
def generate():
|
||||
try:
|
||||
# Step 1: Init auditor
|
||||
yield _sse_event({
|
||||
'step': 1, 'total': TOTAL,
|
||||
'message': STEPS[0][1], 'status': 'running'
|
||||
'message': 'Inicjalizacja audytora SEO...', 'status': 'running'
|
||||
})
|
||||
|
||||
auditor = _get_auditor()
|
||||
@ -133,6 +212,7 @@ def admin_portal_seo_run_stream():
|
||||
'audit_date': datetime.now(),
|
||||
'website_url': PORTAL_URL,
|
||||
'errors': [],
|
||||
'scores': {},
|
||||
}
|
||||
html_content = None
|
||||
final_url = PORTAL_URL
|
||||
@ -140,7 +220,7 @@ def admin_portal_seo_run_stream():
|
||||
# Step 2: Fetch page
|
||||
yield _sse_event({
|
||||
'step': 2, 'total': TOTAL,
|
||||
'message': STEPS[1][1], 'status': 'running'
|
||||
'message': 'Pobieranie strony nordabiznes.pl...', 'status': 'running'
|
||||
})
|
||||
|
||||
try:
|
||||
@ -150,49 +230,65 @@ def admin_portal_seo_run_stream():
|
||||
)
|
||||
load_ms = int((time_module.time() - start) * 1000)
|
||||
final_url = resp.url
|
||||
result['http_status'] = resp.status_code
|
||||
result['load_time_ms'] = load_ms
|
||||
result['final_url'] = final_url
|
||||
|
||||
if resp.status_code == 200:
|
||||
if resp.encoding and resp.encoding.lower() == 'iso-8859-1':
|
||||
resp.encoding = resp.apparent_encoding
|
||||
html_content = resp.text
|
||||
result['http_status'] = 200
|
||||
result['load_time_ms'] = load_ms
|
||||
result['final_url'] = final_url
|
||||
yield _sse_event({
|
||||
'step': 2, 'total': TOTAL,
|
||||
'message': f'Strona pobrana ({load_ms}ms)',
|
||||
'message': f'Strona pobrana ({load_ms}ms, {len(html_content)//1024}KB)',
|
||||
'status': 'done'
|
||||
})
|
||||
else:
|
||||
result['errors'].append(f'HTTP {resp.status_code}')
|
||||
yield _sse_event({
|
||||
'step': 2, 'total': TOTAL,
|
||||
'message': f'HTTP {resp.status_code}',
|
||||
'status': 'warning'
|
||||
'message': f'HTTP {resp.status_code}', 'status': 'warning'
|
||||
})
|
||||
except Exception as e:
|
||||
result['errors'].append(str(e)[:100])
|
||||
yield _sse_event({
|
||||
'step': 2, 'total': TOTAL,
|
||||
'message': f'Błąd pobierania: {str(e)[:80]}',
|
||||
'status': 'error'
|
||||
'message': f'Błąd: {str(e)[:80]}', 'status': 'error'
|
||||
})
|
||||
|
||||
# Step 3: On-page analysis
|
||||
yield _sse_event({
|
||||
'step': 3, 'total': TOTAL,
|
||||
'message': STEPS[2][1], 'status': 'running'
|
||||
'message': 'Analiza on-page SEO (meta tagi, nagłówki, obrazy)...',
|
||||
'status': 'running'
|
||||
})
|
||||
|
||||
if html_content:
|
||||
try:
|
||||
onpage = auditor.onpage_analyzer.analyze_html(
|
||||
onpage_obj = auditor.onpage_analyzer.analyze_html(
|
||||
html_content, base_url=final_url
|
||||
)
|
||||
result['onpage'] = onpage.to_dict()
|
||||
onpage_dict = onpage_obj.to_dict()
|
||||
result['onpage'] = onpage_dict
|
||||
|
||||
# Build summary
|
||||
mt = onpage_dict.get('meta_tags', {})
|
||||
imgs = onpage_dict.get('images', {})
|
||||
sd = onpage_dict.get('structured_data', {})
|
||||
title = mt.get('title', '')
|
||||
title_short = (title[:35] + '...') if title and len(title) > 35 else title
|
||||
parts = []
|
||||
if title:
|
||||
parts.append(f'title="{title_short}"')
|
||||
parts.append(f'{imgs.get("total_images", 0)} obrazów')
|
||||
if imgs.get('images_without_alt'):
|
||||
parts.append(f'{imgs["images_without_alt"]} bez alt')
|
||||
if sd.get('has_structured_data'):
|
||||
parts.append('Schema.org')
|
||||
|
||||
yield _sse_event({
|
||||
'step': 3, 'total': TOTAL,
|
||||
'message': f'On-page: title="{onpage.meta_title[:40]}..."' if onpage.meta_title else 'On-page: brak meta title',
|
||||
'message': f'On-page: {", ".join(parts)}',
|
||||
'status': 'done'
|
||||
})
|
||||
except Exception as e:
|
||||
@ -205,30 +301,45 @@ def admin_portal_seo_run_stream():
|
||||
else:
|
||||
yield _sse_event({
|
||||
'step': 3, 'total': TOTAL,
|
||||
'message': 'Pominięto (brak HTML)',
|
||||
'status': 'skipped'
|
||||
'message': 'Pominięto (brak HTML)', 'status': 'skipped'
|
||||
})
|
||||
|
||||
# Step 4: Technical SEO
|
||||
yield _sse_event({
|
||||
'step': 4, 'total': TOTAL,
|
||||
'message': STEPS[3][1], 'status': 'running'
|
||||
'message': 'Sprawdzanie techniczne (robots.txt, sitemap, SSL, canonical)...',
|
||||
'status': 'running'
|
||||
})
|
||||
|
||||
try:
|
||||
tech_result = auditor.technical_checker.check_url(final_url)
|
||||
result['technical'] = tech_result.to_dict()
|
||||
checks = []
|
||||
td = tech_result.to_dict()
|
||||
if td.get('has_robots_txt'):
|
||||
checks.append('robots.txt')
|
||||
if td.get('has_sitemap'):
|
||||
checks.append('sitemap')
|
||||
if td.get('has_ssl'):
|
||||
checks.append('SSL')
|
||||
tech_obj = auditor.technical_checker.check_url(final_url)
|
||||
tech_dict = tech_obj.to_dict()
|
||||
result['technical'] = tech_dict
|
||||
|
||||
checks_ok = []
|
||||
checks_fail = []
|
||||
if tech_dict.get('robots_txt', {}).get('exists'):
|
||||
checks_ok.append('robots.txt')
|
||||
else:
|
||||
checks_fail.append('robots.txt')
|
||||
if tech_dict.get('sitemap', {}).get('exists'):
|
||||
checks_ok.append('sitemap')
|
||||
else:
|
||||
checks_fail.append('sitemap')
|
||||
if tech_dict.get('canonical', {}).get('has_canonical'):
|
||||
checks_ok.append('canonical')
|
||||
else:
|
||||
checks_fail.append('canonical')
|
||||
|
||||
msg_parts = []
|
||||
if checks_ok:
|
||||
msg_parts.append(f'OK: {", ".join(checks_ok)}')
|
||||
if checks_fail:
|
||||
msg_parts.append(f'Brak: {", ".join(checks_fail)}')
|
||||
|
||||
yield _sse_event({
|
||||
'step': 4, 'total': TOTAL,
|
||||
'message': f'Technical: {", ".join(checks) if checks else "brak kluczowych elementów"}',
|
||||
'message': f'Technical: {" | ".join(msg_parts)}',
|
||||
'status': 'done'
|
||||
})
|
||||
except Exception as e:
|
||||
@ -242,7 +353,8 @@ def admin_portal_seo_run_stream():
|
||||
# Step 5: PageSpeed Insights
|
||||
yield _sse_event({
|
||||
'step': 5, 'total': TOTAL,
|
||||
'message': STEPS[4][1], 'status': 'running'
|
||||
'message': 'PageSpeed Insights API (może potrwać do 30s)...',
|
||||
'status': 'running'
|
||||
})
|
||||
|
||||
try:
|
||||
@ -261,14 +373,19 @@ def admin_portal_seo_run_stream():
|
||||
}
|
||||
yield _sse_event({
|
||||
'step': 5, 'total': TOTAL,
|
||||
'message': f'PageSpeed: Perf={ps_result.scores.performance}, SEO={ps_result.scores.seo}',
|
||||
'message': (
|
||||
f'Perf={ps_result.scores.performance}, '
|
||||
f'SEO={ps_result.scores.seo}, '
|
||||
f'A11y={ps_result.scores.accessibility}, '
|
||||
f'BP={ps_result.scores.best_practices}'
|
||||
),
|
||||
'status': 'done'
|
||||
})
|
||||
else:
|
||||
result['errors'].append('PageSpeed API quota exceeded')
|
||||
yield _sse_event({
|
||||
'step': 5, 'total': TOTAL,
|
||||
'message': 'Limit API wyczerpany — pominięto',
|
||||
'message': 'Limit API wyczerpany',
|
||||
'status': 'warning'
|
||||
})
|
||||
except Exception as e:
|
||||
@ -282,7 +399,8 @@ def admin_portal_seo_run_stream():
|
||||
# Step 6: Local SEO
|
||||
yield _sse_event({
|
||||
'step': 6, 'total': TOTAL,
|
||||
'message': STEPS[5][1], 'status': 'running'
|
||||
'message': 'Analiza Local SEO (NAP, Google Maps, lokalne słowa kluczowe)...',
|
||||
'status': 'running'
|
||||
})
|
||||
|
||||
if html_content:
|
||||
@ -294,7 +412,7 @@ def admin_portal_seo_run_stream():
|
||||
score = local_seo.get('local_seo_score', 0)
|
||||
yield _sse_event({
|
||||
'step': 6, 'total': TOTAL,
|
||||
'message': f'Local SEO score: {score}',
|
||||
'message': f'Local SEO score: {score}/100',
|
||||
'status': 'done'
|
||||
})
|
||||
except Exception as e:
|
||||
@ -307,14 +425,14 @@ def admin_portal_seo_run_stream():
|
||||
else:
|
||||
yield _sse_event({
|
||||
'step': 6, 'total': TOTAL,
|
||||
'message': 'Pominięto (brak HTML)',
|
||||
'status': 'skipped'
|
||||
'message': 'Pominięto (brak HTML)', 'status': 'skipped'
|
||||
})
|
||||
|
||||
# Step 7: Citations
|
||||
yield _sse_event({
|
||||
'step': 7, 'total': TOTAL,
|
||||
'message': STEPS[6][1], 'status': 'running'
|
||||
'message': 'Sprawdzanie cytowań w katalogach (Google, Yelp, Facebook)...',
|
||||
'status': 'running'
|
||||
})
|
||||
|
||||
try:
|
||||
@ -339,7 +457,8 @@ def admin_portal_seo_run_stream():
|
||||
# Step 8: Content freshness
|
||||
yield _sse_event({
|
||||
'step': 8, 'total': TOTAL,
|
||||
'message': STEPS[7][1], 'status': 'running'
|
||||
'message': 'Sprawdzanie aktualności treści...',
|
||||
'status': 'running'
|
||||
})
|
||||
|
||||
try:
|
||||
@ -350,7 +469,7 @@ def admin_portal_seo_run_stream():
|
||||
fscore = freshness.get('content_freshness_score', 0)
|
||||
yield _sse_event({
|
||||
'step': 8, 'total': TOTAL,
|
||||
'message': f'Aktualność treści: {fscore}',
|
||||
'message': f'Aktualność treści: {fscore}/100',
|
||||
'status': 'done'
|
||||
})
|
||||
except Exception as e:
|
||||
@ -364,54 +483,12 @@ def admin_portal_seo_run_stream():
|
||||
# Step 9: Save to DB
|
||||
yield _sse_event({
|
||||
'step': 9, 'total': TOTAL,
|
||||
'message': STEPS[8][1], 'status': 'running'
|
||||
'message': 'Zapisywanie wyników do bazy danych...',
|
||||
'status': 'running'
|
||||
})
|
||||
|
||||
# Extract data for DB columns
|
||||
ps = result.get('pagespeed', {})
|
||||
ps_scores = result.get('scores', {})
|
||||
cwv = ps.get('core_web_vitals', {})
|
||||
tech = result.get('technical', {})
|
||||
onpage = result.get('onpage', {})
|
||||
sec = tech.get('security_headers', {})
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
audit = PortalSEOAudit(
|
||||
audited_at=datetime.now(),
|
||||
url=PORTAL_URL,
|
||||
pagespeed_performance=ps_scores.get('pagespeed_performance'),
|
||||
pagespeed_seo=ps_scores.get('pagespeed_seo'),
|
||||
pagespeed_accessibility=ps_scores.get('pagespeed_accessibility'),
|
||||
pagespeed_best_practices=ps_scores.get('pagespeed_best_practices'),
|
||||
lcp_ms=cwv.get('lcp_ms'),
|
||||
fcp_ms=cwv.get('fcp_ms'),
|
||||
cls=cwv.get('cls'),
|
||||
tbt_ms=cwv.get('tbt_ms'),
|
||||
speed_index_ms=cwv.get('speed_index_ms'),
|
||||
has_meta_title=bool(onpage.get('meta_title')),
|
||||
has_meta_description=bool(onpage.get('meta_description')),
|
||||
has_canonical=tech.get('has_canonical'),
|
||||
has_robots_txt=tech.get('has_robots_txt'),
|
||||
has_sitemap=tech.get('has_sitemap'),
|
||||
has_structured_data=onpage.get('has_structured_data'),
|
||||
has_og_tags=onpage.get('has_og_tags'),
|
||||
has_ssl=tech.get('has_ssl'),
|
||||
is_mobile_friendly=tech.get('is_mobile_friendly'),
|
||||
has_hsts=sec.get('has_hsts'),
|
||||
has_csp=sec.get('has_csp'),
|
||||
has_x_frame=sec.get('has_x_frame_options'),
|
||||
has_x_content_type=sec.get('has_x_content_type'),
|
||||
page_size_bytes=onpage.get('page_size_bytes'),
|
||||
image_count=onpage.get('total_images'),
|
||||
images_without_alt=onpage.get('images_without_alt'),
|
||||
full_results=_make_json_safe(result),
|
||||
notes=notes,
|
||||
created_by=user_email
|
||||
)
|
||||
db.add(audit)
|
||||
db.commit()
|
||||
audit_id = audit.id
|
||||
audit_id = _save_audit_to_db(result, notes, user_email)
|
||||
|
||||
yield _sse_event({
|
||||
'step': 9, 'total': TOTAL,
|
||||
@ -419,17 +496,15 @@ def admin_portal_seo_run_stream():
|
||||
'status': 'done'
|
||||
})
|
||||
|
||||
# Send complete event
|
||||
yield _sse_event({
|
||||
'status': 'complete',
|
||||
'audit_id': audit_id,
|
||||
'performance': ps_scores.get('pagespeed_performance'),
|
||||
'seo': ps_scores.get('pagespeed_seo'),
|
||||
'performance': result.get('scores', {}).get('pagespeed_performance'),
|
||||
'seo': result.get('scores', {}).get('pagespeed_seo'),
|
||||
'errors': result.get('errors', []),
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
logger.error(f'Portal SEO save failed: {e}', exc_info=True)
|
||||
yield _sse_event({
|
||||
'step': 9, 'total': TOTAL,
|
||||
@ -437,8 +512,6 @@ def admin_portal_seo_run_stream():
|
||||
'status': 'error'
|
||||
})
|
||||
yield _sse_event({'status': 'error', 'message': str(e)[:200]})
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f'Portal SEO audit stream failed: {e}', exc_info=True)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user