1401 lines
56 KiB
Python
1401 lines
56 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
SEO Report Generator for Norda Biznes
|
|
=====================================
|
|
|
|
Generates HTML reports (single company or batch) and JSON exports from SEO audit data.
|
|
Designed for offline viewing, sharing with clients, and archiving audit results.
|
|
|
|
Usage:
|
|
python seo_report_generator.py --company-id 26 --html
|
|
python seo_report_generator.py --all --html --output ./reports
|
|
python seo_report_generator.py --batch 1-10 --json
|
|
python seo_report_generator.py --all --json --output ./exports
|
|
|
|
Output:
|
|
- HTML: Styled, standalone reports suitable for viewing in browsers
|
|
- JSON: Machine-readable exports for integration with other tools
|
|
|
|
Author: Claude Code
|
|
Date: 2026-01-08
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import argparse
|
|
import logging
|
|
from datetime import datetime
|
|
from typing import Optional, Dict, List, Any, Tuple
|
|
from pathlib import Path
|
|
from html import escape
|
|
|
|
from sqlalchemy import create_engine, text
|
|
from sqlalchemy.orm import sessionmaker
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
datefmt='%Y-%m-%d %H:%M:%S'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Database configuration
|
|
DATABASE_URL = os.getenv(
|
|
'DATABASE_URL',
|
|
'postgresql://nordabiz_app:NordaBiz2025Secure@127.0.0.1:5432/nordabiz'
|
|
)
|
|
|
|
# Report version for tracking
|
|
REPORT_VERSION = '1.0.0'
|
|
|
|
|
|
class SEOReportGenerator:
|
|
"""
|
|
Generates HTML and JSON reports from SEO audit data stored in database.
|
|
"""
|
|
|
|
def __init__(self, database_url: str = DATABASE_URL):
|
|
"""
|
|
Initialize report generator.
|
|
|
|
Args:
|
|
database_url: Database connection string.
|
|
"""
|
|
self.engine = create_engine(database_url)
|
|
self.Session = sessionmaker(bind=self.engine)
|
|
|
|
def get_companies_with_seo_data(
|
|
self,
|
|
company_ids: Optional[List[int]] = None,
|
|
batch_start: Optional[int] = None,
|
|
batch_end: Optional[int] = None
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Fetch companies with their SEO analysis data from database.
|
|
|
|
Args:
|
|
company_ids: List of specific company IDs to fetch.
|
|
batch_start: Start index for batch processing (1-indexed).
|
|
batch_end: End index for batch processing (1-indexed).
|
|
|
|
Returns:
|
|
List of company dicts with SEO analysis data.
|
|
"""
|
|
with self.Session() as session:
|
|
base_query = """
|
|
SELECT
|
|
c.id, c.name, c.slug, c.website, c.address_city,
|
|
c.nip, c.email, c.phone,
|
|
cat.name as category_name,
|
|
wa.analyzed_at, wa.website_url, wa.final_url,
|
|
wa.http_status_code, wa.load_time_ms,
|
|
wa.pagespeed_seo_score, wa.pagespeed_performance_score,
|
|
wa.pagespeed_accessibility_score, wa.pagespeed_best_practices_score,
|
|
wa.meta_title, wa.meta_description, wa.meta_keywords,
|
|
wa.h1_count, wa.h2_count, wa.h3_count, wa.h1_text,
|
|
wa.total_images, wa.images_without_alt, wa.images_with_alt,
|
|
wa.internal_links_count, wa.external_links_count,
|
|
wa.has_structured_data, wa.structured_data_types,
|
|
wa.has_canonical, wa.canonical_url, wa.is_indexable, wa.noindex_reason,
|
|
wa.has_sitemap, wa.has_robots_txt,
|
|
wa.viewport_configured, wa.is_mobile_friendly,
|
|
wa.largest_contentful_paint_ms, wa.first_input_delay_ms, wa.cumulative_layout_shift,
|
|
wa.has_og_tags, wa.og_title, wa.og_description, wa.og_image,
|
|
wa.has_twitter_cards, wa.html_lang, wa.has_hreflang,
|
|
wa.word_count_homepage,
|
|
wa.seo_audit_version, wa.seo_audited_at, wa.seo_audit_errors,
|
|
wa.seo_overall_score, wa.seo_health_score, wa.seo_issues,
|
|
wa.has_ssl, wa.ssl_expires_at
|
|
FROM companies c
|
|
LEFT JOIN company_website_analysis wa ON c.id = wa.company_id
|
|
LEFT JOIN categories cat ON c.category_id = cat.id
|
|
WHERE c.is_active = TRUE
|
|
"""
|
|
|
|
if company_ids:
|
|
query = text(base_query + " AND c.id = ANY(:ids) ORDER BY c.id")
|
|
result = session.execute(query, {'ids': company_ids})
|
|
elif batch_start is not None and batch_end is not None:
|
|
query = text(base_query + " ORDER BY c.id OFFSET :offset LIMIT :limit")
|
|
result = session.execute(query, {
|
|
'offset': batch_start - 1,
|
|
'limit': batch_end - batch_start + 1
|
|
})
|
|
else:
|
|
query = text(base_query + " ORDER BY c.id")
|
|
result = session.execute(query)
|
|
|
|
companies = []
|
|
for row in result:
|
|
company = dict(row._mapping)
|
|
# Parse JSON fields if they are strings
|
|
if company.get('seo_issues') and isinstance(company['seo_issues'], str):
|
|
try:
|
|
company['seo_issues'] = json.loads(company['seo_issues'])
|
|
except json.JSONDecodeError:
|
|
company['seo_issues'] = []
|
|
if company.get('seo_audit_errors') and isinstance(company['seo_audit_errors'], str):
|
|
try:
|
|
company['seo_audit_errors'] = json.loads(company['seo_audit_errors'])
|
|
except json.JSONDecodeError:
|
|
company['seo_audit_errors'] = []
|
|
companies.append(company)
|
|
|
|
return companies
|
|
|
|
def generate_html_report(
|
|
self,
|
|
company: Dict[str, Any],
|
|
include_recommendations: bool = True
|
|
) -> str:
|
|
"""
|
|
Generate HTML report for a single company.
|
|
|
|
Args:
|
|
company: Company data dict with SEO analysis.
|
|
include_recommendations: Whether to include improvement recommendations.
|
|
|
|
Returns:
|
|
HTML string of the complete report.
|
|
"""
|
|
# Escape HTML in all string values
|
|
def safe(value):
|
|
if value is None:
|
|
return ''
|
|
return escape(str(value))
|
|
|
|
# Score color helper
|
|
def score_color(score):
|
|
if score is None:
|
|
return '#6c757d' # gray
|
|
if score >= 90:
|
|
return '#28a745' # green
|
|
if score >= 50:
|
|
return '#ffc107' # yellow
|
|
return '#dc3545' # red
|
|
|
|
def score_label(score):
|
|
if score is None:
|
|
return 'Brak danych'
|
|
if score >= 90:
|
|
return 'Doskonały'
|
|
if score >= 70:
|
|
return 'Dobry'
|
|
if score >= 50:
|
|
return 'Średni'
|
|
return 'Wymaga poprawy'
|
|
|
|
# Generate recommendations based on issues
|
|
recommendations = []
|
|
if include_recommendations:
|
|
recommendations = self._generate_recommendations(company)
|
|
|
|
# Build HTML
|
|
html = f'''<!DOCTYPE html>
|
|
<html lang="pl">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>Raport SEO - {safe(company.get('name'))}</title>
|
|
<style>
|
|
* {{
|
|
box-sizing: border-box;
|
|
margin: 0;
|
|
padding: 0;
|
|
}}
|
|
body {{
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
|
line-height: 1.6;
|
|
color: #333;
|
|
background: #f8f9fa;
|
|
padding: 20px;
|
|
}}
|
|
.report-container {{
|
|
max-width: 1000px;
|
|
margin: 0 auto;
|
|
background: white;
|
|
border-radius: 8px;
|
|
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
|
overflow: hidden;
|
|
}}
|
|
.header {{
|
|
background: linear-gradient(135deg, #1a5276 0%, #2e86ab 100%);
|
|
color: white;
|
|
padding: 30px;
|
|
}}
|
|
.header h1 {{
|
|
font-size: 1.8em;
|
|
margin-bottom: 10px;
|
|
}}
|
|
.header .meta {{
|
|
opacity: 0.9;
|
|
font-size: 0.9em;
|
|
}}
|
|
.content {{
|
|
padding: 30px;
|
|
}}
|
|
.section {{
|
|
margin-bottom: 30px;
|
|
}}
|
|
.section h2 {{
|
|
color: #1a5276;
|
|
font-size: 1.3em;
|
|
margin-bottom: 15px;
|
|
padding-bottom: 10px;
|
|
border-bottom: 2px solid #e9ecef;
|
|
}}
|
|
.scores-grid {{
|
|
display: grid;
|
|
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
|
gap: 15px;
|
|
margin-bottom: 20px;
|
|
}}
|
|
.score-card {{
|
|
background: #f8f9fa;
|
|
border-radius: 8px;
|
|
padding: 20px;
|
|
text-align: center;
|
|
}}
|
|
.score-card .score {{
|
|
font-size: 2.5em;
|
|
font-weight: bold;
|
|
margin: 10px 0;
|
|
}}
|
|
.score-card .label {{
|
|
color: #6c757d;
|
|
font-size: 0.85em;
|
|
text-transform: uppercase;
|
|
letter-spacing: 0.5px;
|
|
}}
|
|
.score-card .status {{
|
|
font-size: 0.8em;
|
|
margin-top: 5px;
|
|
}}
|
|
.info-grid {{
|
|
display: grid;
|
|
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
|
|
gap: 15px;
|
|
}}
|
|
.info-card {{
|
|
background: #f8f9fa;
|
|
border-radius: 8px;
|
|
padding: 15px;
|
|
}}
|
|
.info-card h3 {{
|
|
color: #1a5276;
|
|
font-size: 1em;
|
|
margin-bottom: 10px;
|
|
}}
|
|
.info-card .item {{
|
|
display: flex;
|
|
justify-content: space-between;
|
|
padding: 8px 0;
|
|
border-bottom: 1px solid #e9ecef;
|
|
}}
|
|
.info-card .item:last-child {{
|
|
border-bottom: none;
|
|
}}
|
|
.info-card .item .label {{
|
|
color: #6c757d;
|
|
}}
|
|
.info-card .item .value {{
|
|
font-weight: 500;
|
|
}}
|
|
.badge {{
|
|
display: inline-block;
|
|
padding: 3px 8px;
|
|
border-radius: 4px;
|
|
font-size: 0.75em;
|
|
font-weight: 600;
|
|
}}
|
|
.badge-success {{ background: #d4edda; color: #155724; }}
|
|
.badge-warning {{ background: #fff3cd; color: #856404; }}
|
|
.badge-danger {{ background: #f8d7da; color: #721c24; }}
|
|
.badge-secondary {{ background: #e9ecef; color: #6c757d; }}
|
|
.issues-list {{
|
|
list-style: none;
|
|
}}
|
|
.issues-list li {{
|
|
padding: 10px 15px;
|
|
margin-bottom: 8px;
|
|
border-radius: 4px;
|
|
border-left: 4px solid;
|
|
}}
|
|
.issues-list li.error {{
|
|
background: #f8d7da;
|
|
border-color: #dc3545;
|
|
}}
|
|
.issues-list li.warning {{
|
|
background: #fff3cd;
|
|
border-color: #ffc107;
|
|
}}
|
|
.issues-list li.info {{
|
|
background: #d1ecf1;
|
|
border-color: #17a2b8;
|
|
}}
|
|
.recommendations {{
|
|
background: #e8f4f8;
|
|
border-radius: 8px;
|
|
padding: 20px;
|
|
}}
|
|
.recommendations h3 {{
|
|
color: #1a5276;
|
|
margin-bottom: 15px;
|
|
}}
|
|
.recommendations ol {{
|
|
margin-left: 20px;
|
|
}}
|
|
.recommendations li {{
|
|
margin-bottom: 10px;
|
|
}}
|
|
.footer {{
|
|
background: #f8f9fa;
|
|
padding: 20px 30px;
|
|
text-align: center;
|
|
color: #6c757d;
|
|
font-size: 0.85em;
|
|
border-top: 1px solid #e9ecef;
|
|
}}
|
|
.truncate {{
|
|
white-space: nowrap;
|
|
overflow: hidden;
|
|
text-overflow: ellipsis;
|
|
max-width: 300px;
|
|
}}
|
|
@media print {{
|
|
body {{ background: white; padding: 0; }}
|
|
.report-container {{ box-shadow: none; }}
|
|
}}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="report-container">
|
|
<div class="header">
|
|
<h1>{safe(company.get('name'))}</h1>
|
|
<div class="meta">
|
|
<div>Raport SEO wygenerowany: {datetime.now().strftime('%d.%m.%Y %H:%M')}</div>
|
|
<div>Strona: {safe(company.get('website') or company.get('website_url') or 'Brak')}</div>
|
|
{f'<div>Kategoria: {safe(company.get("category_name"))}</div>' if company.get('category_name') else ''}
|
|
</div>
|
|
</div>
|
|
|
|
<div class="content">
|
|
<!-- Overall Scores -->
|
|
<div class="section">
|
|
<h2>Wyniki SEO</h2>
|
|
<div class="scores-grid">
|
|
<div class="score-card">
|
|
<div class="label">Ogolny wynik SEO</div>
|
|
<div class="score" style="color: {score_color(company.get('seo_overall_score'))}">
|
|
{company.get('seo_overall_score') if company.get('seo_overall_score') is not None else '—'}
|
|
</div>
|
|
<div class="status">{score_label(company.get('seo_overall_score'))}</div>
|
|
</div>
|
|
<div class="score-card">
|
|
<div class="label">PageSpeed SEO</div>
|
|
<div class="score" style="color: {score_color(company.get('pagespeed_seo_score'))}">
|
|
{company.get('pagespeed_seo_score') if company.get('pagespeed_seo_score') is not None else '—'}
|
|
</div>
|
|
<div class="status">{score_label(company.get('pagespeed_seo_score'))}</div>
|
|
</div>
|
|
<div class="score-card">
|
|
<div class="label">Wydajnosc</div>
|
|
<div class="score" style="color: {score_color(company.get('pagespeed_performance_score'))}">
|
|
{company.get('pagespeed_performance_score') if company.get('pagespeed_performance_score') is not None else '—'}
|
|
</div>
|
|
<div class="status">{score_label(company.get('pagespeed_performance_score'))}</div>
|
|
</div>
|
|
<div class="score-card">
|
|
<div class="label">Dostepnosc</div>
|
|
<div class="score" style="color: {score_color(company.get('pagespeed_accessibility_score'))}">
|
|
{company.get('pagespeed_accessibility_score') if company.get('pagespeed_accessibility_score') is not None else '—'}
|
|
</div>
|
|
<div class="status">{score_label(company.get('pagespeed_accessibility_score'))}</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Technical Details -->
|
|
<div class="section">
|
|
<h2>Szczegoly techniczne</h2>
|
|
<div class="info-grid">
|
|
<div class="info-card">
|
|
<h3>Meta tagi</h3>
|
|
<div class="item">
|
|
<span class="label">Tytul strony</span>
|
|
<span class="value truncate" title="{safe(company.get('meta_title'))}">
|
|
{self._truncate(safe(company.get('meta_title')), 40) or '—'}
|
|
</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Opis meta</span>
|
|
<span class="value">
|
|
{f'<span class="badge badge-success">Tak ({len(company.get("meta_description") or "")} zn.)</span>' if company.get('meta_description') else '<span class="badge badge-danger">Brak</span>'}
|
|
</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Canonical URL</span>
|
|
<span class="value">
|
|
{'<span class="badge badge-success">Tak</span>' if company.get('has_canonical') else '<span class="badge badge-warning">Nie</span>'}
|
|
</span>
|
|
</div>
|
|
</div>
|
|
<div class="info-card">
|
|
<h3>Struktura naglowkow</h3>
|
|
<div class="item">
|
|
<span class="label">H1</span>
|
|
<span class="value">
|
|
{self._h1_badge(company.get('h1_count'))}
|
|
</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">H2</span>
|
|
<span class="value">{company.get('h2_count') if company.get('h2_count') is not None else '—'}</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">H3</span>
|
|
<span class="value">{company.get('h3_count') if company.get('h3_count') is not None else '—'}</span>
|
|
</div>
|
|
</div>
|
|
<div class="info-card">
|
|
<h3>Obrazy</h3>
|
|
<div class="item">
|
|
<span class="label">Liczba obrazow</span>
|
|
<span class="value">{company.get('total_images') if company.get('total_images') is not None else '—'}</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Bez alt</span>
|
|
<span class="value">
|
|
{self._images_alt_badge(company.get('images_without_alt'), company.get('total_images'))}
|
|
</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Z alt</span>
|
|
<span class="value">{company.get('images_with_alt') if company.get('images_with_alt') is not None else '—'}</span>
|
|
</div>
|
|
</div>
|
|
<div class="info-card">
|
|
<h3>Linki</h3>
|
|
<div class="item">
|
|
<span class="label">Wewnetrzne</span>
|
|
<span class="value">{company.get('internal_links_count') if company.get('internal_links_count') is not None else '—'}</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Zewnetrzne</span>
|
|
<span class="value">{company.get('external_links_count') if company.get('external_links_count') is not None else '—'}</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Technical SEO -->
|
|
<div class="section">
|
|
<h2>Techniczne SEO</h2>
|
|
<div class="info-grid">
|
|
<div class="info-card">
|
|
<h3>Pliki i indeksowanie</h3>
|
|
<div class="item">
|
|
<span class="label">robots.txt</span>
|
|
<span class="value">
|
|
{'<span class="badge badge-success">Tak</span>' if company.get('has_robots_txt') else '<span class="badge badge-warning">Nie</span>'}
|
|
</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">sitemap.xml</span>
|
|
<span class="value">
|
|
{'<span class="badge badge-success">Tak</span>' if company.get('has_sitemap') else '<span class="badge badge-warning">Nie</span>'}
|
|
</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Indeksowalnosc</span>
|
|
<span class="value">
|
|
{'<span class="badge badge-success">Tak</span>' if company.get('is_indexable') else f'<span class="badge badge-danger">Nie ({safe(company.get("noindex_reason") or "")})</span>'}
|
|
</span>
|
|
</div>
|
|
</div>
|
|
<div class="info-card">
|
|
<h3>Bezpieczenstwo i mobilnosc</h3>
|
|
<div class="item">
|
|
<span class="label">SSL/HTTPS</span>
|
|
<span class="value">
|
|
{'<span class="badge badge-success">Tak</span>' if company.get('has_ssl') else '<span class="badge badge-danger">Nie</span>'}
|
|
</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Viewport</span>
|
|
<span class="value">
|
|
{'<span class="badge badge-success">Tak</span>' if company.get('viewport_configured') else '<span class="badge badge-warning">Nie</span>'}
|
|
</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Mobile-friendly</span>
|
|
<span class="value">
|
|
{'<span class="badge badge-success">Tak</span>' if company.get('is_mobile_friendly') else '<span class="badge badge-warning">Nie</span>'}
|
|
</span>
|
|
</div>
|
|
</div>
|
|
<div class="info-card">
|
|
<h3>Dane strukturalne</h3>
|
|
<div class="item">
|
|
<span class="label">Schema.org</span>
|
|
<span class="value">
|
|
{'<span class="badge badge-success">Tak</span>' if company.get('has_structured_data') else '<span class="badge badge-secondary">Nie</span>'}
|
|
</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Typy</span>
|
|
<span class="value truncate">
|
|
{', '.join(company.get('structured_data_types') or []) or '—'}
|
|
</span>
|
|
</div>
|
|
</div>
|
|
<div class="info-card">
|
|
<h3>Social Media</h3>
|
|
<div class="item">
|
|
<span class="label">Open Graph</span>
|
|
<span class="value">
|
|
{'<span class="badge badge-success">Tak</span>' if company.get('has_og_tags') else '<span class="badge badge-secondary">Nie</span>'}
|
|
</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Twitter Cards</span>
|
|
<span class="value">
|
|
{'<span class="badge badge-success">Tak</span>' if company.get('has_twitter_cards') else '<span class="badge badge-secondary">Nie</span>'}
|
|
</span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Jezyk (lang)</span>
|
|
<span class="value">{safe(company.get('html_lang')) or '—'}</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Core Web Vitals -->
|
|
{self._core_web_vitals_section(company)}
|
|
|
|
<!-- Issues -->
|
|
{self._issues_section(company)}
|
|
|
|
<!-- Recommendations -->
|
|
{self._recommendations_section(recommendations) if recommendations else ''}
|
|
|
|
</div>
|
|
|
|
<div class="footer">
|
|
<div>Raport wygenerowany przez Norda Biznes SEO Auditor v{REPORT_VERSION}</div>
|
|
<div>Data audytu: {company.get('seo_audited_at').strftime('%d.%m.%Y %H:%M') if company.get('seo_audited_at') else 'Brak danych'}</div>
|
|
<div>https://nordabiznes.pl</div>
|
|
</div>
|
|
</div>
|
|
</body>
|
|
</html>'''
|
|
|
|
return html
|
|
|
|
def _truncate(self, text: str, length: int) -> str:
|
|
"""Truncate text with ellipsis."""
|
|
if not text:
|
|
return ''
|
|
if len(text) <= length:
|
|
return text
|
|
return text[:length] + '...'
|
|
|
|
def _h1_badge(self, count: Optional[int]) -> str:
|
|
"""Generate badge for H1 count."""
|
|
if count is None:
|
|
return '<span class="badge badge-secondary">—</span>'
|
|
if count == 1:
|
|
return f'<span class="badge badge-success">{count}</span>'
|
|
if count == 0:
|
|
return '<span class="badge badge-danger">0 (brak!)</span>'
|
|
return f'<span class="badge badge-warning">{count} (za duzo)</span>'
|
|
|
|
def _images_alt_badge(self, without_alt: Optional[int], total: Optional[int]) -> str:
|
|
"""Generate badge for images without alt."""
|
|
if without_alt is None:
|
|
return '<span class="badge badge-secondary">—</span>'
|
|
if without_alt == 0:
|
|
return '<span class="badge badge-success">0</span>'
|
|
if total and without_alt / total > 0.5:
|
|
return f'<span class="badge badge-danger">{without_alt}</span>'
|
|
return f'<span class="badge badge-warning">{without_alt}</span>'
|
|
|
|
def _core_web_vitals_section(self, company: Dict[str, Any]) -> str:
|
|
"""Generate Core Web Vitals section HTML."""
|
|
lcp = company.get('largest_contentful_paint_ms')
|
|
fid = company.get('first_input_delay_ms')
|
|
cls = company.get('cumulative_layout_shift')
|
|
|
|
if lcp is None and fid is None and cls is None:
|
|
return ''
|
|
|
|
def lcp_status(val):
|
|
if val is None:
|
|
return ('—', 'badge-secondary')
|
|
if val <= 2500:
|
|
return (f'{val}ms', 'badge-success')
|
|
if val <= 4000:
|
|
return (f'{val}ms', 'badge-warning')
|
|
return (f'{val}ms', 'badge-danger')
|
|
|
|
def fid_status(val):
|
|
if val is None:
|
|
return ('—', 'badge-secondary')
|
|
if val <= 100:
|
|
return (f'{val}ms', 'badge-success')
|
|
if val <= 300:
|
|
return (f'{val}ms', 'badge-warning')
|
|
return (f'{val}ms', 'badge-danger')
|
|
|
|
def cls_status(val):
|
|
if val is None:
|
|
return ('—', 'badge-secondary')
|
|
if val <= 0.1:
|
|
return (f'{val:.3f}', 'badge-success')
|
|
if val <= 0.25:
|
|
return (f'{val:.3f}', 'badge-warning')
|
|
return (f'{val:.3f}', 'badge-danger')
|
|
|
|
lcp_val, lcp_class = lcp_status(lcp)
|
|
fid_val, fid_class = fid_status(fid)
|
|
cls_val, cls_class = cls_status(cls)
|
|
|
|
return f'''
|
|
<div class="section">
|
|
<h2>Core Web Vitals</h2>
|
|
<div class="info-grid">
|
|
<div class="info-card">
|
|
<h3>LCP (Largest Contentful Paint)</h3>
|
|
<div class="item">
|
|
<span class="label">Wynik</span>
|
|
<span class="value"><span class="badge {lcp_class}">{lcp_val}</span></span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Cel</span>
|
|
<span class="value">< 2500ms</span>
|
|
</div>
|
|
</div>
|
|
<div class="info-card">
|
|
<h3>FID (First Input Delay)</h3>
|
|
<div class="item">
|
|
<span class="label">Wynik</span>
|
|
<span class="value"><span class="badge {fid_class}">{fid_val}</span></span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Cel</span>
|
|
<span class="value">< 100ms</span>
|
|
</div>
|
|
</div>
|
|
<div class="info-card">
|
|
<h3>CLS (Cumulative Layout Shift)</h3>
|
|
<div class="item">
|
|
<span class="label">Wynik</span>
|
|
<span class="value"><span class="badge {cls_class}">{cls_val}</span></span>
|
|
</div>
|
|
<div class="item">
|
|
<span class="label">Cel</span>
|
|
<span class="value">< 0.1</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
'''
|
|
|
|
def _issues_section(self, company: Dict[str, Any]) -> str:
|
|
"""Generate issues section HTML."""
|
|
issues = company.get('seo_issues') or []
|
|
errors = company.get('seo_audit_errors') or []
|
|
|
|
if not issues and not errors:
|
|
return ''
|
|
|
|
items_html = ''
|
|
for issue in issues:
|
|
if isinstance(issue, dict):
|
|
severity = issue.get('severity', 'info')
|
|
message = escape(issue.get('message', ''))
|
|
else:
|
|
severity = 'info'
|
|
message = escape(str(issue))
|
|
items_html += f'<li class="{severity}">{message}</li>\n'
|
|
|
|
for error in errors:
|
|
items_html += f'<li class="error">{escape(str(error))}</li>\n'
|
|
|
|
return f'''
|
|
<div class="section">
|
|
<h2>Wykryte problemy</h2>
|
|
<ul class="issues-list">
|
|
{items_html}
|
|
</ul>
|
|
</div>
|
|
'''
|
|
|
|
def _recommendations_section(self, recommendations: List[str]) -> str:
|
|
"""Generate recommendations section HTML."""
|
|
if not recommendations:
|
|
return ''
|
|
|
|
items_html = ''.join(f'<li>{escape(rec)}</li>\n' for rec in recommendations)
|
|
|
|
return f'''
|
|
<div class="section">
|
|
<h2>Rekomendacje</h2>
|
|
<div class="recommendations">
|
|
<ol>
|
|
{items_html}
|
|
</ol>
|
|
</div>
|
|
</div>
|
|
'''
|
|
|
|
def _generate_recommendations(self, company: Dict[str, Any]) -> List[str]:
|
|
"""Generate SEO improvement recommendations based on audit data."""
|
|
recommendations = []
|
|
|
|
# Meta tags
|
|
if not company.get('meta_title'):
|
|
recommendations.append(
|
|
'Dodaj znacznik <title> do strony. Powinien miec 50-60 znakow i zawierac slowa kluczowe.'
|
|
)
|
|
if not company.get('meta_description'):
|
|
recommendations.append(
|
|
'Dodaj meta description (150-160 znakow). Dobry opis zwieksza CTR w wynikach wyszukiwania.'
|
|
)
|
|
|
|
# Headings
|
|
h1_count = company.get('h1_count')
|
|
if h1_count == 0:
|
|
recommendations.append(
|
|
'Dodaj naglowek H1 do strony. Kazda strona powinna miec dokladnie jeden H1.'
|
|
)
|
|
elif h1_count and h1_count > 1:
|
|
recommendations.append(
|
|
f'Strona ma {h1_count} naglowkow H1. Pozostaw tylko jeden glowny naglowek H1.'
|
|
)
|
|
|
|
# Images
|
|
images_without_alt = company.get('images_without_alt')
|
|
if images_without_alt and images_without_alt > 0:
|
|
recommendations.append(
|
|
f'Dodaj atrybuty alt do {images_without_alt} obrazow. Alt poprawia SEO i dostepnosc.'
|
|
)
|
|
|
|
# Technical SEO
|
|
if not company.get('has_robots_txt'):
|
|
recommendations.append(
|
|
'Utworz plik robots.txt w glownym katalogu strony.'
|
|
)
|
|
if not company.get('has_sitemap'):
|
|
recommendations.append(
|
|
'Utworz i zglos mape strony (sitemap.xml) w Google Search Console.'
|
|
)
|
|
if not company.get('has_canonical'):
|
|
recommendations.append(
|
|
'Dodaj znacznik canonical URL aby uniknac problemow z duplikacja tresci.'
|
|
)
|
|
if not company.get('has_ssl'):
|
|
recommendations.append(
|
|
'Wlacz certyfikat SSL (HTTPS). Google premiuje strony z bezpiecznym polaczeniem.'
|
|
)
|
|
|
|
# Mobile
|
|
if not company.get('viewport_configured'):
|
|
recommendations.append(
|
|
'Dodaj znacznik viewport meta dla prawidlowego wyswietlania na urzadzeniach mobilnych.'
|
|
)
|
|
|
|
# Structured data
|
|
if not company.get('has_structured_data'):
|
|
recommendations.append(
|
|
'Dodaj dane strukturalne (Schema.org) - np. LocalBusiness dla lepszej widocznosci w Google.'
|
|
)
|
|
|
|
# Open Graph
|
|
if not company.get('has_og_tags'):
|
|
recommendations.append(
|
|
'Dodaj znaczniki Open Graph dla lepszego wygladu przy udostepnianiu w mediach spolecznosciowych.'
|
|
)
|
|
|
|
# Performance
|
|
lcp = company.get('largest_contentful_paint_ms')
|
|
if lcp and lcp > 2500:
|
|
recommendations.append(
|
|
f'Popraw LCP (obecnie {lcp}ms). Zoptymalizuj obrazy i skrypty dla szybszego ladowania.'
|
|
)
|
|
|
|
cls = company.get('cumulative_layout_shift')
|
|
if cls and cls > 0.1:
|
|
recommendations.append(
|
|
f'Popraw CLS (obecnie {cls:.3f}). Zdefiniuj wymiary obrazow i unikaj dynamicznego dodawania tresci.'
|
|
)
|
|
|
|
return recommendations
|
|
|
|
def generate_batch_html_report(
|
|
self,
|
|
companies: List[Dict[str, Any]],
|
|
title: str = "Raport SEO - Norda Biznes"
|
|
) -> str:
|
|
"""
|
|
Generate batch HTML report summarizing multiple companies.
|
|
|
|
Args:
|
|
companies: List of company data dicts with SEO analysis.
|
|
title: Report title.
|
|
|
|
Returns:
|
|
HTML string of the batch summary report.
|
|
"""
|
|
def safe(value):
|
|
if value is None:
|
|
return ''
|
|
return escape(str(value))
|
|
|
|
def score_color(score):
|
|
if score is None:
|
|
return '#6c757d'
|
|
if score >= 90:
|
|
return '#28a745'
|
|
if score >= 50:
|
|
return '#ffc107'
|
|
return '#dc3545'
|
|
|
|
# Calculate statistics
|
|
total = len(companies)
|
|
audited = sum(1 for c in companies if c.get('seo_audited_at'))
|
|
scores = [c.get('seo_overall_score') for c in companies if c.get('seo_overall_score') is not None]
|
|
avg_score = sum(scores) / len(scores) if scores else 0
|
|
excellent = sum(1 for s in scores if s >= 90)
|
|
good = sum(1 for s in scores if 70 <= s < 90)
|
|
fair = sum(1 for s in scores if 50 <= s < 70)
|
|
poor = sum(1 for s in scores if s < 50)
|
|
|
|
# Generate table rows
|
|
rows_html = ''
|
|
for company in sorted(companies, key=lambda c: c.get('seo_overall_score') or 0, reverse=True):
|
|
overall = company.get('seo_overall_score')
|
|
perf = company.get('pagespeed_performance_score')
|
|
seo = company.get('pagespeed_seo_score')
|
|
acc = company.get('pagespeed_accessibility_score')
|
|
|
|
rows_html += f'''
|
|
<tr>
|
|
<td><strong>{safe(company.get('name'))}</strong></td>
|
|
<td>{safe(company.get('category_name') or '—')}</td>
|
|
<td style="color: {score_color(overall)}; font-weight: bold;">{overall if overall is not None else '—'}</td>
|
|
<td style="color: {score_color(seo)};">{seo if seo is not None else '—'}</td>
|
|
<td style="color: {score_color(perf)};">{perf if perf is not None else '—'}</td>
|
|
<td style="color: {score_color(acc)};">{acc if acc is not None else '—'}</td>
|
|
<td>{'<span class="badge badge-success">Tak</span>' if company.get('has_ssl') else '<span class="badge badge-danger">Nie</span>'}</td>
|
|
<td>{'<span class="badge badge-success">Tak</span>' if company.get('is_mobile_friendly') else '<span class="badge badge-warning">Nie</span>'}</td>
|
|
</tr>
|
|
'''
|
|
|
|
html = f'''<!DOCTYPE html>
|
|
<html lang="pl">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>{safe(title)}</title>
|
|
<style>
|
|
* {{ box-sizing: border-box; margin: 0; padding: 0; }}
|
|
body {{
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
line-height: 1.6; color: #333; background: #f8f9fa; padding: 20px;
|
|
}}
|
|
.container {{ max-width: 1200px; margin: 0 auto; }}
|
|
.header {{
|
|
background: linear-gradient(135deg, #1a5276 0%, #2e86ab 100%);
|
|
color: white; padding: 30px; border-radius: 8px 8px 0 0;
|
|
}}
|
|
.header h1 {{ font-size: 1.8em; margin-bottom: 5px; }}
|
|
.header .meta {{ opacity: 0.9; font-size: 0.9em; }}
|
|
.content {{ background: white; padding: 30px; border-radius: 0 0 8px 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }}
|
|
.stats-grid {{
|
|
display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
|
|
gap: 15px; margin-bottom: 30px;
|
|
}}
|
|
.stat-card {{ background: #f8f9fa; border-radius: 8px; padding: 20px; text-align: center; }}
|
|
.stat-card .value {{ font-size: 2em; font-weight: bold; color: #1a5276; }}
|
|
.stat-card .label {{ color: #6c757d; font-size: 0.85em; }}
|
|
table {{ width: 100%; border-collapse: collapse; margin-top: 20px; }}
|
|
th, td {{ padding: 12px; text-align: left; border-bottom: 1px solid #e9ecef; }}
|
|
th {{ background: #f8f9fa; color: #1a5276; font-weight: 600; }}
|
|
tr:hover {{ background: #f8f9fa; }}
|
|
.badge {{ display: inline-block; padding: 3px 8px; border-radius: 4px; font-size: 0.75em; font-weight: 600; }}
|
|
.badge-success {{ background: #d4edda; color: #155724; }}
|
|
.badge-warning {{ background: #fff3cd; color: #856404; }}
|
|
.badge-danger {{ background: #f8d7da; color: #721c24; }}
|
|
.footer {{ text-align: center; color: #6c757d; font-size: 0.85em; margin-top: 20px; }}
|
|
@media print {{ body {{ background: white; }} .container {{ box-shadow: none; }} }}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="container">
|
|
<div class="header">
|
|
<h1>{safe(title)}</h1>
|
|
<div class="meta">Wygenerowano: {datetime.now().strftime('%d.%m.%Y %H:%M')}</div>
|
|
</div>
|
|
<div class="content">
|
|
<div class="stats-grid">
|
|
<div class="stat-card">
|
|
<div class="value">{total}</div>
|
|
<div class="label">Firm w raporcie</div>
|
|
</div>
|
|
<div class="stat-card">
|
|
<div class="value">{audited}</div>
|
|
<div class="label">Przebadanych</div>
|
|
</div>
|
|
<div class="stat-card">
|
|
<div class="value">{avg_score:.1f}</div>
|
|
<div class="label">Sredni wynik</div>
|
|
</div>
|
|
<div class="stat-card">
|
|
<div class="value" style="color: #28a745;">{excellent}</div>
|
|
<div class="label">Doskonaly (90+)</div>
|
|
</div>
|
|
<div class="stat-card">
|
|
<div class="value" style="color: #28a745;">{good}</div>
|
|
<div class="label">Dobry (70-89)</div>
|
|
</div>
|
|
<div class="stat-card">
|
|
<div class="value" style="color: #ffc107;">{fair}</div>
|
|
<div class="label">Sredni (50-69)</div>
|
|
</div>
|
|
<div class="stat-card">
|
|
<div class="value" style="color: #dc3545;">{poor}</div>
|
|
<div class="label">Slaby (<50)</div>
|
|
</div>
|
|
</div>
|
|
|
|
<h2>Wyniki poszczegolnych firm</h2>
|
|
<table>
|
|
<thead>
|
|
<tr>
|
|
<th>Firma</th>
|
|
<th>Kategoria</th>
|
|
<th>Wynik SEO</th>
|
|
<th>PS SEO</th>
|
|
<th>Wydajnosc</th>
|
|
<th>Dostepnosc</th>
|
|
<th>SSL</th>
|
|
<th>Mobile</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
{rows_html}
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
<div class="footer">
|
|
<p>Raport wygenerowany przez Norda Biznes SEO Auditor v{REPORT_VERSION}</p>
|
|
<p>https://nordabiznes.pl</p>
|
|
</div>
|
|
</div>
|
|
</body>
|
|
</html>'''
|
|
|
|
return html
|
|
|
|
def generate_json_export(
|
|
self,
|
|
companies: List[Dict[str, Any]],
|
|
include_raw_data: bool = False
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Generate JSON export of SEO audit data.
|
|
|
|
Args:
|
|
companies: List of company data dicts.
|
|
include_raw_data: Whether to include all raw data fields.
|
|
|
|
Returns:
|
|
Dict ready for JSON serialization.
|
|
"""
|
|
export = {
|
|
'report_version': REPORT_VERSION,
|
|
'generated_at': datetime.now().isoformat(),
|
|
'total_companies': len(companies),
|
|
'audited_companies': sum(1 for c in companies if c.get('seo_audited_at')),
|
|
'statistics': self._calculate_statistics(companies),
|
|
'companies': []
|
|
}
|
|
|
|
for company in companies:
|
|
company_data = {
|
|
'id': company.get('id'),
|
|
'name': company.get('name'),
|
|
'slug': company.get('slug'),
|
|
'website': company.get('website') or company.get('website_url'),
|
|
'category': company.get('category_name'),
|
|
'nip': company.get('nip'),
|
|
'city': company.get('address_city'),
|
|
'seo_audit': {
|
|
'audited_at': company.get('seo_audited_at').isoformat() if company.get('seo_audited_at') else None,
|
|
'audit_version': company.get('seo_audit_version'),
|
|
'overall_score': company.get('seo_overall_score'),
|
|
'health_score': company.get('seo_health_score'),
|
|
}
|
|
}
|
|
|
|
if company.get('seo_audited_at'):
|
|
company_data['seo_audit']['pagespeed'] = {
|
|
'seo_score': company.get('pagespeed_seo_score'),
|
|
'performance_score': company.get('pagespeed_performance_score'),
|
|
'accessibility_score': company.get('pagespeed_accessibility_score'),
|
|
'best_practices_score': company.get('pagespeed_best_practices_score'),
|
|
}
|
|
company_data['seo_audit']['on_page'] = {
|
|
'meta_title': company.get('meta_title'),
|
|
'meta_description': company.get('meta_description'),
|
|
'h1_count': company.get('h1_count'),
|
|
'h2_count': company.get('h2_count'),
|
|
'h3_count': company.get('h3_count'),
|
|
'total_images': company.get('total_images'),
|
|
'images_without_alt': company.get('images_without_alt'),
|
|
'internal_links': company.get('internal_links_count'),
|
|
'external_links': company.get('external_links_count'),
|
|
'has_structured_data': company.get('has_structured_data'),
|
|
'structured_data_types': company.get('structured_data_types'),
|
|
}
|
|
company_data['seo_audit']['technical'] = {
|
|
'has_ssl': company.get('has_ssl'),
|
|
'has_sitemap': company.get('has_sitemap'),
|
|
'has_robots_txt': company.get('has_robots_txt'),
|
|
'has_canonical': company.get('has_canonical'),
|
|
'is_indexable': company.get('is_indexable'),
|
|
'is_mobile_friendly': company.get('is_mobile_friendly'),
|
|
'viewport_configured': company.get('viewport_configured'),
|
|
'http_status': company.get('http_status_code'),
|
|
'load_time_ms': company.get('load_time_ms'),
|
|
}
|
|
company_data['seo_audit']['core_web_vitals'] = {
|
|
'lcp_ms': company.get('largest_contentful_paint_ms'),
|
|
'fid_ms': company.get('first_input_delay_ms'),
|
|
'cls': float(company.get('cumulative_layout_shift')) if company.get('cumulative_layout_shift') else None,
|
|
}
|
|
company_data['seo_audit']['social'] = {
|
|
'has_og_tags': company.get('has_og_tags'),
|
|
'og_title': company.get('og_title'),
|
|
'has_twitter_cards': company.get('has_twitter_cards'),
|
|
}
|
|
company_data['seo_audit']['issues'] = company.get('seo_issues') or []
|
|
company_data['seo_audit']['errors'] = company.get('seo_audit_errors') or []
|
|
|
|
export['companies'].append(company_data)
|
|
|
|
return export
|
|
|
|
def _calculate_statistics(self, companies: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""Calculate summary statistics from company data."""
|
|
scores = [c.get('seo_overall_score') for c in companies if c.get('seo_overall_score') is not None]
|
|
perf_scores = [c.get('pagespeed_performance_score') for c in companies if c.get('pagespeed_performance_score') is not None]
|
|
|
|
stats = {
|
|
'seo_scores': {
|
|
'average': round(sum(scores) / len(scores), 1) if scores else None,
|
|
'min': min(scores) if scores else None,
|
|
'max': max(scores) if scores else None,
|
|
'count': len(scores),
|
|
'distribution': {
|
|
'excellent_90_100': sum(1 for s in scores if s >= 90),
|
|
'good_70_89': sum(1 for s in scores if 70 <= s < 90),
|
|
'fair_50_69': sum(1 for s in scores if 50 <= s < 70),
|
|
'poor_0_49': sum(1 for s in scores if s < 50),
|
|
}
|
|
},
|
|
'performance_scores': {
|
|
'average': round(sum(perf_scores) / len(perf_scores), 1) if perf_scores else None,
|
|
'count': len(perf_scores),
|
|
},
|
|
'technical': {
|
|
'with_ssl': sum(1 for c in companies if c.get('has_ssl')),
|
|
'with_sitemap': sum(1 for c in companies if c.get('has_sitemap')),
|
|
'with_robots_txt': sum(1 for c in companies if c.get('has_robots_txt')),
|
|
'mobile_friendly': sum(1 for c in companies if c.get('is_mobile_friendly')),
|
|
'with_structured_data': sum(1 for c in companies if c.get('has_structured_data')),
|
|
'with_og_tags': sum(1 for c in companies if c.get('has_og_tags')),
|
|
},
|
|
}
|
|
|
|
return stats
|
|
|
|
def save_html_report(
|
|
self,
|
|
html: str,
|
|
output_path: str
|
|
) -> str:
|
|
"""
|
|
Save HTML report to file.
|
|
|
|
Args:
|
|
html: HTML content string.
|
|
output_path: Path to save file.
|
|
|
|
Returns:
|
|
Full path to saved file.
|
|
"""
|
|
path = Path(output_path)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
path.write_text(html, encoding='utf-8')
|
|
return str(path.absolute())
|
|
|
|
def save_json_export(
|
|
self,
|
|
data: Dict[str, Any],
|
|
output_path: str
|
|
) -> str:
|
|
"""
|
|
Save JSON export to file.
|
|
|
|
Args:
|
|
data: Data dict to serialize.
|
|
output_path: Path to save file.
|
|
|
|
Returns:
|
|
Full path to saved file.
|
|
"""
|
|
path = Path(output_path)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
def json_serializer(obj):
|
|
if hasattr(obj, 'isoformat'):
|
|
return obj.isoformat()
|
|
if hasattr(obj, '__float__'):
|
|
return float(obj)
|
|
raise TypeError(f'Object of type {type(obj)} is not JSON serializable')
|
|
|
|
with open(path, 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, indent=2, ensure_ascii=False, default=json_serializer)
|
|
|
|
return str(path.absolute())
|
|
|
|
|
|
def parse_batch_argument(batch_str: str) -> Tuple[int, int]:
|
|
"""Parse batch argument in format 'START-END'."""
|
|
if '-' not in batch_str:
|
|
raise ValueError(f"Invalid batch format '{batch_str}'. Use START-END (e.g., 1-10)")
|
|
|
|
parts = batch_str.split('-')
|
|
if len(parts) != 2:
|
|
raise ValueError(f"Invalid batch format '{batch_str}'. Use START-END (e.g., 1-10)")
|
|
|
|
try:
|
|
start = int(parts[0].strip())
|
|
end = int(parts[1].strip())
|
|
except ValueError:
|
|
raise ValueError(f"Invalid batch values '{batch_str}'. START and END must be numbers")
|
|
|
|
if start < 1:
|
|
raise ValueError(f"Invalid batch start '{start}'. Must be >= 1")
|
|
if end < start:
|
|
raise ValueError(f"Invalid batch range '{start}-{end}'. END must be >= START")
|
|
|
|
return start, end
|
|
|
|
|
|
def main():
|
|
"""Main entry point for CLI usage."""
|
|
parser = argparse.ArgumentParser(
|
|
description='Generate SEO reports from Norda Biznes audit data',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python seo_report_generator.py --company-id 26 --html
|
|
python seo_report_generator.py --all --html --output ./reports
|
|
python seo_report_generator.py --batch 1-10 --json
|
|
python seo_report_generator.py --all --json --output ./exports
|
|
python seo_report_generator.py --all --html --json --output ./output
|
|
|
|
Output formats:
|
|
--html Generate styled HTML reports
|
|
--json Generate JSON exports for integration
|
|
|
|
File naming:
|
|
Single company HTML: seo_report_{slug}.html
|
|
Batch HTML summary: seo_report_batch_{timestamp}.html
|
|
JSON export: seo_export_{timestamp}.json
|
|
"""
|
|
)
|
|
|
|
# Selection arguments
|
|
selection = parser.add_argument_group('Company Selection (choose one)')
|
|
selection.add_argument('--company-id', type=int, metavar='ID',
|
|
help='Generate report for single company by ID')
|
|
selection.add_argument('--company-ids', type=str, metavar='IDS',
|
|
help='Generate reports for multiple companies (comma-separated IDs)')
|
|
selection.add_argument('--batch', type=str, metavar='RANGE',
|
|
help='Generate reports for batch of companies (e.g., 1-10)')
|
|
selection.add_argument('--all', action='store_true',
|
|
help='Generate reports for all companies')
|
|
|
|
# Output format arguments
|
|
output_group = parser.add_argument_group('Output Format')
|
|
output_group.add_argument('--html', action='store_true',
|
|
help='Generate HTML reports')
|
|
output_group.add_argument('--json', action='store_true',
|
|
help='Generate JSON export')
|
|
|
|
# Options
|
|
options = parser.add_argument_group('Options')
|
|
options.add_argument('--output', '-o', type=str, metavar='DIR', default='.',
|
|
help='Output directory (default: current directory)')
|
|
options.add_argument('--no-recommendations', action='store_true',
|
|
help='Exclude recommendations from HTML reports')
|
|
options.add_argument('--batch-summary', action='store_true',
|
|
help='Generate batch summary HTML instead of individual reports')
|
|
options.add_argument('--verbose', '-v', action='store_true',
|
|
help='Verbose output')
|
|
options.add_argument('--database-url', type=str, metavar='URL',
|
|
help='Database connection URL')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Configure logging
|
|
if args.verbose:
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
|
|
# Validate selection
|
|
selection_count = sum([
|
|
args.company_id is not None,
|
|
args.company_ids is not None,
|
|
args.batch is not None,
|
|
args.all
|
|
])
|
|
|
|
if selection_count == 0:
|
|
parser.print_help()
|
|
print("\nError: Please specify one of --company-id, --company-ids, --batch, or --all")
|
|
sys.exit(1)
|
|
|
|
if selection_count > 1:
|
|
print("Error: Please specify only one selection method")
|
|
sys.exit(1)
|
|
|
|
# Validate output format
|
|
if not args.html and not args.json:
|
|
parser.print_help()
|
|
print("\nError: Please specify at least one output format: --html or --json")
|
|
sys.exit(1)
|
|
|
|
# Parse selection arguments
|
|
company_ids = None
|
|
batch_start, batch_end = None, None
|
|
|
|
if args.company_id:
|
|
company_ids = [args.company_id]
|
|
elif args.company_ids:
|
|
try:
|
|
company_ids = [int(x.strip()) for x in args.company_ids.split(',')]
|
|
except ValueError:
|
|
print("Error: Invalid --company-ids format. Use comma-separated integers")
|
|
sys.exit(1)
|
|
elif args.batch:
|
|
try:
|
|
batch_start, batch_end = parse_batch_argument(args.batch)
|
|
except ValueError as e:
|
|
print(f"Error: {e}")
|
|
sys.exit(1)
|
|
|
|
# Initialize generator
|
|
database_url = args.database_url or DATABASE_URL
|
|
try:
|
|
generator = SEOReportGenerator(database_url=database_url)
|
|
except Exception as e:
|
|
logger.error(f"Failed to initialize: {e}")
|
|
print(f"Error: {e}")
|
|
sys.exit(1)
|
|
|
|
# Fetch data
|
|
logger.info("Fetching company data from database...")
|
|
try:
|
|
companies = generator.get_companies_with_seo_data(
|
|
company_ids=company_ids,
|
|
batch_start=batch_start,
|
|
batch_end=batch_end
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to fetch data: {e}")
|
|
print(f"Error fetching data: {e}")
|
|
sys.exit(1)
|
|
|
|
if not companies:
|
|
print("No companies found matching the criteria")
|
|
sys.exit(1)
|
|
|
|
logger.info(f"Found {len(companies)} companies")
|
|
|
|
# Create output directory
|
|
output_dir = Path(args.output)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
generated_files = []
|
|
|
|
# Generate HTML reports
|
|
if args.html:
|
|
if args.batch_summary or len(companies) > 1:
|
|
# Generate batch summary
|
|
logger.info("Generating batch HTML summary...")
|
|
html = generator.generate_batch_html_report(companies)
|
|
filename = f"seo_report_batch_{timestamp}.html"
|
|
filepath = generator.save_html_report(html, output_dir / filename)
|
|
generated_files.append(filepath)
|
|
logger.info(f"Saved: {filepath}")
|
|
|
|
# Also generate individual reports if not only summary
|
|
if not args.batch_summary:
|
|
for company in companies:
|
|
slug = company.get('slug', f"company_{company.get('id')}")
|
|
html = generator.generate_html_report(
|
|
company,
|
|
include_recommendations=not args.no_recommendations
|
|
)
|
|
filename = f"seo_report_{slug}.html"
|
|
filepath = generator.save_html_report(html, output_dir / filename)
|
|
generated_files.append(filepath)
|
|
logger.debug(f"Saved: {filepath}")
|
|
|
|
logger.info(f"Generated {len(companies)} individual HTML reports")
|
|
else:
|
|
# Single company report
|
|
company = companies[0]
|
|
slug = company.get('slug', f"company_{company.get('id')}")
|
|
html = generator.generate_html_report(
|
|
company,
|
|
include_recommendations=not args.no_recommendations
|
|
)
|
|
filename = f"seo_report_{slug}.html"
|
|
filepath = generator.save_html_report(html, output_dir / filename)
|
|
generated_files.append(filepath)
|
|
logger.info(f"Saved: {filepath}")
|
|
|
|
# Generate JSON export
|
|
if args.json:
|
|
logger.info("Generating JSON export...")
|
|
data = generator.generate_json_export(companies)
|
|
filename = f"seo_export_{timestamp}.json"
|
|
filepath = generator.save_json_export(data, output_dir / filename)
|
|
generated_files.append(filepath)
|
|
logger.info(f"Saved: {filepath}")
|
|
|
|
# Summary
|
|
print("\n" + "=" * 60)
|
|
print("REPORT GENERATION COMPLETE")
|
|
print("=" * 60)
|
|
print(f"Companies processed: {len(companies)}")
|
|
print(f"Files generated: {len(generated_files)}")
|
|
print(f"Output directory: {output_dir.absolute()}")
|
|
print("\nGenerated files:")
|
|
for f in generated_files:
|
|
print(f" - {f}")
|
|
print("=" * 60)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|