nordabiz/scripts/generate_audit_report.py
Maciej Pienczyn ef39ebf8a3
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
refactor(seo): Rename FID to INP across entire codebase
Google replaced First Input Delay (FID) with Interaction to Next Paint
(INP) as a Core Web Vital in March 2024. This renames the DB column
from first_input_delay_ms to interaction_to_next_paint_ms, updates the
PageSpeed client to prefer the INP audit key, and fixes all references
across routes, services, scripts, and report generators. Updated INP
thresholds: good ≤200ms, needs improvement ≤500ms.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 12:58:41 +01:00

327 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Unified Audit Report Generator for NordaBiz
=============================================
Generates comprehensive audit reports combining:
- Social Media audit data
- Google Business Profile audit data
- SEO audit data
- Competitor monitoring data (if available)
Usage:
python generate_audit_report.py --company-id 26
python generate_audit_report.py --all
python generate_audit_report.py --company-id 26 --type social
Author: NordaBiz Development Team
Created: 2026-02-06
"""
import os
import sys
import json
import argparse
import logging
from datetime import datetime, date, timedelta
from typing import Optional, Dict, List, Any
from pathlib import Path
# Load .env file
try:
from dotenv import load_dotenv
script_dir = Path(__file__).resolve().parent
project_root = script_dir.parent
env_path = project_root / '.env'
if env_path.exists():
load_dotenv(env_path)
except ImportError:
pass
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
from database import (
Company, CompanySocialMedia, GBPAudit, CompanyWebsiteAnalysis,
CompanyCompetitor, CompetitorSnapshot, AuditReport, SessionLocal
)
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
DATABASE_URL = os.getenv(
'DATABASE_URL',
'postgresql://nordabiz_app:CHANGE_ME@127.0.0.1:5432/nordabiz'
)
class AuditReportGenerator:
"""Generates unified audit reports for companies."""
def __init__(self, database_url: str = DATABASE_URL):
self.engine = create_engine(database_url)
self.Session = sessionmaker(bind=self.engine)
def generate_report(self, company_id: int, report_type: str = 'full') -> Dict[str, Any]:
"""Generate a comprehensive audit report for a company."""
with self.Session() as session:
company = session.query(Company).filter(Company.id == company_id).first()
if not company:
raise ValueError(f"Company {company_id} not found")
logger.info(f"Generating {report_type} report for: {company.name} (ID: {company_id})")
report_data = {
'company': {
'id': company.id,
'name': company.name,
'slug': company.slug,
'website': company.website,
'city': company.address_city,
'category': company.category.name if company.category else None,
},
'generated_at': datetime.now().isoformat(),
'report_type': report_type,
'sections': {},
'scores': {},
}
sections_included = {'social': False, 'gbp': False, 'seo': False, 'competitors': False}
# Social Media section
if report_type in ('full', 'social'):
social_data = self._get_social_data(session, company_id)
if social_data:
report_data['sections']['social'] = social_data
report_data['scores']['social'] = social_data.get('average_completeness', 0)
sections_included['social'] = True
# GBP section
if report_type in ('full', 'gbp'):
gbp_data = self._get_gbp_data(session, company_id)
if gbp_data:
report_data['sections']['gbp'] = gbp_data
report_data['scores']['gbp'] = gbp_data.get('completeness_score', 0)
sections_included['gbp'] = True
# SEO section
if report_type in ('full', 'seo'):
seo_data = self._get_seo_data(session, company_id)
if seo_data:
report_data['sections']['seo'] = seo_data
report_data['scores']['seo'] = seo_data.get('overall_score', 0)
sections_included['seo'] = True
# Competitors section
if report_type == 'full':
competitor_data = self._get_competitor_data(session, company_id)
if competitor_data:
report_data['sections']['competitors'] = competitor_data
sections_included['competitors'] = True
# Calculate overall score
scores = [v for v in report_data['scores'].values() if v and v > 0]
overall = int(sum(scores) / len(scores)) if scores else 0
report_data['scores']['overall'] = overall
# Save report
report = AuditReport(
company_id=company_id,
report_type=report_type,
period_start=date.today() - timedelta(days=30),
period_end=date.today(),
overall_score=overall,
social_score=report_data['scores'].get('social'),
gbp_score=report_data['scores'].get('gbp'),
seo_score=report_data['scores'].get('seo'),
sections=sections_included,
data=report_data,
generated_by='system',
status='draft',
)
session.add(report)
session.commit()
session.refresh(report)
report_data['report_id'] = report.id
logger.info(f"Report #{report.id} generated. Overall score: {overall}/100")
return report_data
def _get_social_data(self, session, company_id: int) -> Optional[Dict]:
"""Get social media audit data."""
profiles = session.query(CompanySocialMedia).filter(
CompanySocialMedia.company_id == company_id,
CompanySocialMedia.is_valid == True
).all()
if not profiles:
return None
platforms = []
total_completeness = 0
for p in profiles:
platform_data = {
'platform': p.platform,
'url': p.url,
'page_name': p.page_name,
'followers_count': p.followers_count,
'has_bio': p.has_bio,
'has_profile_photo': p.has_profile_photo,
'completeness_score': p.profile_completeness_score or 0,
'last_checked': p.last_checked_at.isoformat() if p.last_checked_at else None,
}
platforms.append(platform_data)
total_completeness += (p.profile_completeness_score or 0)
average = int(total_completeness / len(platforms)) if platforms else 0
return {
'platforms_found': len(platforms),
'platforms': platforms,
'average_completeness': average,
'missing_platforms': self._find_missing_platforms(profiles),
}
@staticmethod
def _find_missing_platforms(profiles) -> List[str]:
"""Find platforms without profiles."""
all_platforms = {'facebook', 'instagram', 'linkedin', 'youtube', 'twitter', 'tiktok'}
found = {p.platform for p in profiles}
return sorted(all_platforms - found)
def _get_gbp_data(self, session, company_id: int) -> Optional[Dict]:
"""Get GBP audit data."""
audit = session.query(GBPAudit).filter(
GBPAudit.company_id == company_id
).order_by(GBPAudit.audit_date.desc()).first()
if not audit:
return None
return {
'completeness_score': audit.completeness_score,
'score_category': audit.score_category,
'audit_date': audit.audit_date.isoformat() if audit.audit_date else None,
'fields_status': audit.fields_status,
'review_count': audit.review_count,
'average_rating': float(audit.average_rating) if audit.average_rating else None,
'photo_count': audit.photo_count,
'nap_consistent': audit.nap_consistent,
'review_response_rate': float(audit.review_response_rate) if audit.review_response_rate else None,
'review_sentiment': audit.review_sentiment,
'recommendations': audit.recommendations or [],
}
def _get_seo_data(self, session, company_id: int) -> Optional[Dict]:
"""Get SEO audit data."""
analysis = session.query(CompanyWebsiteAnalysis).filter(
CompanyWebsiteAnalysis.company_id == company_id
).first()
if not analysis:
return None
return {
'overall_score': analysis.seo_overall_score,
'pagespeed_seo': analysis.pagespeed_seo_score,
'pagespeed_performance': analysis.pagespeed_performance_score,
'pagespeed_accessibility': analysis.pagespeed_accessibility_score,
'meta_title': analysis.meta_title,
'has_structured_data': analysis.has_structured_data,
'has_sitemap': analysis.has_sitemap,
'has_robots_txt': analysis.has_robots_txt,
'is_mobile_friendly': analysis.is_mobile_friendly,
'local_seo_score': analysis.local_seo_score,
'has_local_business_schema': analysis.has_local_business_schema,
'citations_count': analysis.citations_count,
'content_freshness_score': analysis.content_freshness_score,
'core_web_vitals': {
'lcp_ms': analysis.largest_contentful_paint_ms,
'inp_ms': analysis.interaction_to_next_paint_ms,
'cls': float(analysis.cumulative_layout_shift) if analysis.cumulative_layout_shift else None,
},
'seo_issues': analysis.seo_issues,
}
def _get_competitor_data(self, session, company_id: int) -> Optional[Dict]:
"""Get competitor monitoring data."""
competitors = session.query(CompanyCompetitor).filter(
CompanyCompetitor.company_id == company_id,
CompanyCompetitor.is_active == True
).all()
if not competitors:
return None
competitor_list = []
for comp in competitors:
# Get latest snapshot
latest = session.query(CompetitorSnapshot).filter(
CompetitorSnapshot.competitor_id == comp.id
).order_by(CompetitorSnapshot.snapshot_date.desc()).first()
competitor_list.append({
'name': comp.competitor_name,
'rating': float(comp.competitor_rating) if comp.competitor_rating else None,
'review_count': comp.competitor_review_count,
'category': comp.competitor_category,
'latest_changes': latest.changes if latest else None,
})
return {
'total_tracked': len(competitors),
'competitors': competitor_list,
}
def main():
parser = argparse.ArgumentParser(description='Generate Unified Audit Report')
parser.add_argument('--company-id', type=int, help='Generate report for specific company')
parser.add_argument('--all', action='store_true', help='Generate for all active companies')
parser.add_argument('--type', choices=['full', 'social', 'gbp', 'seo'], default='full')
parser.add_argument('--json', action='store_true', help='Output JSON to stdout')
parser.add_argument('--verbose', '-v', action='store_true')
args = parser.parse_args()
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
generator = AuditReportGenerator()
if args.company_id:
report = generator.generate_report(args.company_id, args.type)
if args.json:
print(json.dumps(report, default=str, indent=2, ensure_ascii=False))
else:
print(f"\nReport #{report.get('report_id')} generated")
print(f"Overall score: {report['scores'].get('overall', 0)}/100")
for section, data in report.get('sections', {}).items():
print(f" {section}: included")
elif args.all:
engine = create_engine(DATABASE_URL)
Session = sessionmaker(bind=engine)
with Session() as session:
companies = session.query(Company).filter(Company.status == 'active').all()
company_ids = [c.id for c in companies]
for cid in company_ids:
try:
report = generator.generate_report(cid, args.type)
print(f"Company {cid}: score={report['scores'].get('overall', 0)}")
except Exception as e:
logger.error(f"Company {cid} failed: {e}")
else:
parser.print_help()
sys.exit(1)
if __name__ == '__main__':
main()