auto-claude: 4.2 - Add CLI argument parsing, progress logging, and error handling
Enhanced scripts/seo_audit.py with comprehensive CLI improvements: CLI Arguments: - --company-id: Audit single company by ID - --company-ids: Audit multiple companies (comma-separated) - --batch: Audit range of companies (e.g., 1-10) - --all: Audit all companies - --dry-run: Print results without database writes - --verbose/-v: Debug output - --quiet/-q: Suppress progress output - --json: JSON output for scripting - --database-url: Override DATABASE_URL env var Progress Logging: - ETA calculation based on average time per company - Progress counter [X/Y] for each company - Status indicators (SUCCESS/SKIPPED/FAILED/TIMEOUT) Summary Reporting: - Detailed breakdown by result category - Edge case counts (no_website, unavailable, timeout, ssl_errors) - PageSpeed API quota tracking (start/used/remaining) - Visual score distribution with bar charts - Failed audits listing with error messages Error Handling: - Proper exit codes (0-5) for different scenarios - Categorization of errors (timeout, connection, SSL, unavailable) - Database connection error handling - Quota exceeded handling - Batch argument validation with helpful error messages 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
2bebb46f02
commit
c8eb0829d9
@ -16,6 +16,14 @@ Usage:
|
||||
python seo_audit.py --all
|
||||
python seo_audit.py --company-id 26 --dry-run
|
||||
|
||||
Exit codes:
|
||||
0 - All audits completed successfully
|
||||
1 - Argument error or invalid input
|
||||
2 - Partial failures (some audits failed)
|
||||
3 - All audits failed
|
||||
4 - Database connection error
|
||||
5 - API quota exceeded
|
||||
|
||||
Author: Claude Code
|
||||
Date: 2026-01-08
|
||||
"""
|
||||
@ -25,11 +33,13 @@ import sys
|
||||
import json
|
||||
import argparse
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, List, Any
|
||||
import time as time_module
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, Dict, List, Any, Tuple
|
||||
|
||||
import requests
|
||||
from sqlalchemy import create_engine, text
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
# Import SEO analysis components
|
||||
@ -50,10 +60,19 @@ from seo_analyzer import (
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Exit codes
|
||||
EXIT_SUCCESS = 0
|
||||
EXIT_ARGUMENT_ERROR = 1
|
||||
EXIT_PARTIAL_FAILURES = 2
|
||||
EXIT_ALL_FAILED = 3
|
||||
EXIT_DATABASE_ERROR = 4
|
||||
EXIT_QUOTA_EXCEEDED = 5
|
||||
|
||||
# Database configuration
|
||||
DATABASE_URL = os.getenv(
|
||||
'DATABASE_URL',
|
||||
@ -192,14 +211,13 @@ class SEOAuditor:
|
||||
|
||||
try:
|
||||
logger.info(f" Fetching page: {website_url}")
|
||||
import time
|
||||
start_time = time.time()
|
||||
start_time = time_module.time()
|
||||
response = self.session.get(
|
||||
website_url,
|
||||
timeout=REQUEST_TIMEOUT,
|
||||
allow_redirects=True
|
||||
)
|
||||
load_time_ms = int((time.time() - start_time) * 1000)
|
||||
load_time_ms = int((time_module.time() - start_time) * 1000)
|
||||
http_status = response.status_code
|
||||
final_url = response.url
|
||||
|
||||
@ -710,74 +728,394 @@ class SEOAuditor:
|
||||
Returns:
|
||||
Summary dict with success/failed counts and results.
|
||||
"""
|
||||
start_time = time_module.time()
|
||||
companies = self.get_companies(company_ids, batch_start, batch_end)
|
||||
|
||||
if not companies:
|
||||
logger.warning("No companies found matching the specified criteria")
|
||||
return {
|
||||
'total': 0,
|
||||
'success': 0,
|
||||
'failed': 0,
|
||||
'skipped': 0,
|
||||
'no_website': 0,
|
||||
'unavailable': 0,
|
||||
'timeout': 0,
|
||||
'quota_remaining': self.pagespeed_client.get_remaining_quota(),
|
||||
'duration_seconds': 0,
|
||||
'results': [],
|
||||
}
|
||||
|
||||
summary = {
|
||||
'total': len(companies),
|
||||
'success': 0,
|
||||
'failed': 0,
|
||||
'skipped': 0,
|
||||
'no_website': 0, # Companies without website URL
|
||||
'unavailable': 0, # Websites that returned 4xx/5xx
|
||||
'timeout': 0, # Websites that timed out
|
||||
'ssl_errors': 0, # SSL certificate issues
|
||||
'connection_errors': 0, # Connection refused/DNS errors
|
||||
'quota_exceeded': False,
|
||||
'quota_remaining': self.pagespeed_client.get_remaining_quota(),
|
||||
'quota_start': self.pagespeed_client.get_remaining_quota(),
|
||||
'results': [],
|
||||
}
|
||||
|
||||
logger.info(f"Starting SEO audit for {len(companies)} companies")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"SEO AUDIT STARTING")
|
||||
logger.info("=" * 60)
|
||||
logger.info(f"Companies to audit: {len(companies)}")
|
||||
logger.info(f"Mode: {'DRY RUN (no database writes)' if dry_run else 'LIVE'}")
|
||||
logger.info(f"PageSpeed API quota remaining: {summary['quota_remaining']}")
|
||||
logger.info("=" * 60)
|
||||
|
||||
for i, company in enumerate(companies, 1):
|
||||
logger.info(f"\n[{i}/{len(companies)}] Processing company ID: {company['id']}")
|
||||
# Progress estimation
|
||||
elapsed = time_module.time() - start_time
|
||||
if i > 1:
|
||||
avg_time_per_company = elapsed / (i - 1)
|
||||
remaining_companies = len(companies) - i + 1
|
||||
eta_seconds = avg_time_per_company * remaining_companies
|
||||
eta_str = str(timedelta(seconds=int(eta_seconds)))
|
||||
else:
|
||||
eta_str = "calculating..."
|
||||
|
||||
logger.info("")
|
||||
logger.info(f"[{i}/{len(companies)}] {company['name']} (ID: {company['id']}) - ETA: {eta_str}")
|
||||
|
||||
# Check for quota before proceeding
|
||||
current_quota = self.pagespeed_client.get_remaining_quota()
|
||||
if current_quota <= 0:
|
||||
logger.warning(f" PageSpeed quota exhausted, skipping PageSpeed analysis")
|
||||
summary['quota_exceeded'] = True
|
||||
|
||||
try:
|
||||
result = self.audit_company(company)
|
||||
|
||||
if not dry_run:
|
||||
if self.save_audit_result(result):
|
||||
# Categorize the result based on errors
|
||||
result_status = self._categorize_result(result)
|
||||
|
||||
if result_status == 'no_website':
|
||||
summary['no_website'] += 1
|
||||
summary['skipped'] += 1
|
||||
logger.info(f" → SKIPPED: No website URL configured")
|
||||
elif result_status == 'unavailable':
|
||||
summary['unavailable'] += 1
|
||||
summary['failed'] += 1
|
||||
logger.warning(f" → UNAVAILABLE: HTTP {result.get('http_status')}")
|
||||
elif result_status == 'timeout':
|
||||
summary['timeout'] += 1
|
||||
summary['failed'] += 1
|
||||
logger.warning(f" → TIMEOUT: Website did not respond")
|
||||
elif result_status == 'ssl_error':
|
||||
summary['ssl_errors'] += 1
|
||||
# Still count as success if we got data via HTTP fallback
|
||||
if result.get('onpage'):
|
||||
summary['success'] += 1
|
||||
logger.info(f" → SUCCESS (with SSL warning)")
|
||||
else:
|
||||
summary['failed'] += 1
|
||||
logger.warning(f" → FAILED: SSL error, no fallback data")
|
||||
elif result_status == 'connection_error':
|
||||
summary['connection_errors'] += 1
|
||||
summary['failed'] += 1
|
||||
logger.warning(f" → FAILED: Connection error")
|
||||
else:
|
||||
summary['success'] += 1
|
||||
# Print result in dry run mode
|
||||
print("\n" + "=" * 60)
|
||||
print(f"Company: {company['name']} (ID: {company['id']})")
|
||||
print(f"Website: {result.get('website_url')}")
|
||||
print(f"HTTP Status: {result.get('http_status')}")
|
||||
print(f"Load Time: {result.get('load_time_ms')}ms")
|
||||
print(f"\nScores:")
|
||||
scores = result.get('scores', {})
|
||||
print(f" PageSpeed SEO: {scores.get('pagespeed_seo')}")
|
||||
print(f" PageSpeed Performance: {scores.get('pagespeed_performance')}")
|
||||
print(f" Overall SEO: {scores.get('overall_seo')}")
|
||||
if result.get('errors'):
|
||||
print(f"\nErrors:")
|
||||
for err in result['errors']:
|
||||
print(f" - {err}")
|
||||
print("=" * 60)
|
||||
score = result.get('scores', {}).get('overall_seo')
|
||||
logger.info(f" → SUCCESS: Overall SEO score: {score}")
|
||||
|
||||
# Save to database or print in dry-run mode
|
||||
if not dry_run:
|
||||
if result_status not in ('no_website',):
|
||||
if self.save_audit_result(result):
|
||||
logger.debug(f" Saved to database")
|
||||
else:
|
||||
logger.error(f" Failed to save to database")
|
||||
else:
|
||||
self._print_dry_run_result(company, result)
|
||||
|
||||
# Build result entry
|
||||
summary['results'].append({
|
||||
'company_id': company['id'],
|
||||
'company_name': company['name'],
|
||||
'status': 'success',
|
||||
'status': result_status,
|
||||
'overall_score': result.get('scores', {}).get('overall_seo'),
|
||||
'pagespeed_seo': result.get('scores', {}).get('pagespeed_seo'),
|
||||
'http_status': result.get('http_status'),
|
||||
'load_time_ms': result.get('load_time_ms'),
|
||||
'errors_count': len(result.get('errors', [])),
|
||||
'errors': result.get('errors', []),
|
||||
})
|
||||
|
||||
except QuotaExceededError:
|
||||
logger.error(f" PageSpeed API quota exceeded!")
|
||||
summary['quota_exceeded'] = True
|
||||
summary['skipped'] += 1
|
||||
summary['results'].append({
|
||||
'company_id': company['id'],
|
||||
'company_name': company['name'],
|
||||
'status': 'quota_exceeded',
|
||||
'error': 'PageSpeed API quota exceeded',
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Audit failed for company {company['id']}: {e}")
|
||||
logger.error(f" Unexpected error: {e}")
|
||||
summary['failed'] += 1
|
||||
summary['results'].append({
|
||||
'company_id': company['id'],
|
||||
'company_name': company['name'],
|
||||
'status': 'failed',
|
||||
'status': 'error',
|
||||
'error': str(e),
|
||||
})
|
||||
|
||||
# Update final quota
|
||||
# Final summary
|
||||
summary['quota_remaining'] = self.pagespeed_client.get_remaining_quota()
|
||||
summary['quota_used'] = summary['quota_start'] - summary['quota_remaining']
|
||||
summary['duration_seconds'] = int(time_module.time() - start_time)
|
||||
|
||||
return summary
|
||||
|
||||
def _categorize_result(self, result: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Categorize audit result based on errors encountered.
|
||||
|
||||
Returns one of: 'success', 'no_website', 'unavailable', 'timeout',
|
||||
'ssl_error', 'connection_error', 'error'
|
||||
"""
|
||||
errors = result.get('errors', [])
|
||||
error_text = ' '.join(errors).lower()
|
||||
|
||||
# No website URL
|
||||
if 'no website url' in error_text:
|
||||
return 'no_website'
|
||||
|
||||
# Timeout
|
||||
if 'timeout' in error_text:
|
||||
return 'timeout'
|
||||
|
||||
# Connection errors
|
||||
if 'connection error' in error_text or 'connection refused' in error_text:
|
||||
return 'connection_error'
|
||||
|
||||
# SSL errors (without successful fallback)
|
||||
if 'ssl error' in error_text:
|
||||
return 'ssl_error'
|
||||
|
||||
# HTTP errors (4xx, 5xx)
|
||||
http_status = result.get('http_status')
|
||||
if http_status and http_status >= 400:
|
||||
return 'unavailable'
|
||||
|
||||
# If we have errors but also have data, it's partial success
|
||||
if errors and not result.get('onpage') and not result.get('technical'):
|
||||
return 'error'
|
||||
|
||||
return 'success'
|
||||
|
||||
def _print_dry_run_result(self, company: Dict, result: Dict[str, Any]) -> None:
|
||||
"""Print formatted result in dry-run mode."""
|
||||
print("\n" + "-" * 60)
|
||||
print(f"Company: {company['name']} (ID: {company['id']})")
|
||||
print(f"Website: {result.get('website_url') or 'Not configured'}")
|
||||
|
||||
if result.get('http_status'):
|
||||
print(f"HTTP Status: {result.get('http_status')}")
|
||||
if result.get('load_time_ms'):
|
||||
print(f"Load Time: {result.get('load_time_ms')}ms")
|
||||
if result.get('final_url') and result.get('final_url') != result.get('website_url'):
|
||||
print(f"Final URL (after redirects): {result.get('final_url')}")
|
||||
|
||||
scores = result.get('scores', {})
|
||||
if any(scores.values()):
|
||||
print(f"\nScores:")
|
||||
if scores.get('overall_seo') is not None:
|
||||
print(f" Overall SEO: {scores.get('overall_seo')}")
|
||||
if scores.get('pagespeed_seo') is not None:
|
||||
print(f" PageSpeed SEO: {scores.get('pagespeed_seo')}")
|
||||
if scores.get('pagespeed_performance') is not None:
|
||||
print(f" PageSpeed Performance: {scores.get('pagespeed_performance')}")
|
||||
if scores.get('pagespeed_accessibility') is not None:
|
||||
print(f" PageSpeed Accessibility: {scores.get('pagespeed_accessibility')}")
|
||||
if scores.get('pagespeed_best_practices') is not None:
|
||||
print(f" PageSpeed Best Practices: {scores.get('pagespeed_best_practices')}")
|
||||
|
||||
# On-page summary
|
||||
onpage = result.get('onpage', {})
|
||||
if onpage:
|
||||
print(f"\nOn-Page SEO:")
|
||||
meta = onpage.get('meta_tags', {})
|
||||
if meta.get('title'):
|
||||
print(f" Title: {meta.get('title')[:60]}...")
|
||||
headings = onpage.get('headings', {})
|
||||
print(f" H1 count: {headings.get('h1_count', 0)}")
|
||||
images = onpage.get('images', {})
|
||||
if images.get('total_images'):
|
||||
print(f" Images: {images.get('total_images')} total, {images.get('images_without_alt', 0)} missing alt")
|
||||
structured = onpage.get('structured_data', {})
|
||||
print(f" Structured Data: {'Yes' if structured.get('has_structured_data') else 'No'}")
|
||||
|
||||
# Technical SEO summary
|
||||
technical = result.get('technical', {})
|
||||
if technical:
|
||||
print(f"\nTechnical SEO:")
|
||||
robots = technical.get('robots_txt', {})
|
||||
print(f" robots.txt: {'Yes' if robots.get('exists') else 'No'}")
|
||||
sitemap = technical.get('sitemap', {})
|
||||
print(f" sitemap.xml: {'Yes' if sitemap.get('exists') else 'No'}")
|
||||
indexability = technical.get('indexability', {})
|
||||
print(f" Indexable: {'Yes' if indexability.get('is_indexable', True) else 'No'}")
|
||||
|
||||
if result.get('errors'):
|
||||
print(f"\nIssues ({len(result['errors'])}):")
|
||||
for err in result['errors'][:5]: # Show first 5 errors
|
||||
print(f" ⚠ {err}")
|
||||
if len(result['errors']) > 5:
|
||||
print(f" ... and {len(result['errors']) - 5} more")
|
||||
|
||||
print("-" * 60)
|
||||
|
||||
|
||||
def parse_batch_argument(batch_str: str) -> Tuple[int, int]:
|
||||
"""
|
||||
Parse batch argument in format 'START-END'.
|
||||
|
||||
Args:
|
||||
batch_str: String like '1-10' or '5-20'
|
||||
|
||||
Returns:
|
||||
Tuple of (start, end) integers
|
||||
|
||||
Raises:
|
||||
ValueError: If format is invalid
|
||||
"""
|
||||
if '-' not in batch_str:
|
||||
raise ValueError(f"Invalid batch format '{batch_str}'. Use START-END (e.g., 1-10)")
|
||||
|
||||
parts = batch_str.split('-')
|
||||
if len(parts) != 2:
|
||||
raise ValueError(f"Invalid batch format '{batch_str}'. Use START-END (e.g., 1-10)")
|
||||
|
||||
try:
|
||||
start = int(parts[0].strip())
|
||||
end = int(parts[1].strip())
|
||||
except ValueError:
|
||||
raise ValueError(f"Invalid batch values '{batch_str}'. START and END must be numbers")
|
||||
|
||||
if start < 1:
|
||||
raise ValueError(f"Invalid batch start '{start}'. Must be >= 1")
|
||||
|
||||
if end < start:
|
||||
raise ValueError(f"Invalid batch range '{start}-{end}'. END must be >= START")
|
||||
|
||||
return start, end
|
||||
|
||||
|
||||
def print_summary(summary: Dict[str, Any], dry_run: bool = False) -> None:
|
||||
"""Print formatted audit summary."""
|
||||
duration = summary.get('duration_seconds', 0)
|
||||
duration_str = str(timedelta(seconds=duration))
|
||||
|
||||
print("\n")
|
||||
print("=" * 70)
|
||||
print(" SEO AUDIT COMPLETE")
|
||||
print("=" * 70)
|
||||
print("")
|
||||
print(f" Mode: {'DRY RUN' if dry_run else 'LIVE'}")
|
||||
print(f" Duration: {duration_str}")
|
||||
print("")
|
||||
print("-" * 70)
|
||||
print(" RESULTS BREAKDOWN")
|
||||
print("-" * 70)
|
||||
print(f" Total companies: {summary['total']}")
|
||||
print(f" ✓ Successful: {summary['success']}")
|
||||
print(f" ✗ Failed: {summary['failed']}")
|
||||
print(f" ○ Skipped: {summary['skipped']}")
|
||||
print("")
|
||||
|
||||
# Edge case breakdown
|
||||
if summary.get('no_website', 0) > 0:
|
||||
print(f" - No website: {summary['no_website']}")
|
||||
if summary.get('unavailable', 0) > 0:
|
||||
print(f" - Unavailable: {summary['unavailable']}")
|
||||
if summary.get('timeout', 0) > 0:
|
||||
print(f" - Timeout: {summary['timeout']}")
|
||||
if summary.get('ssl_errors', 0) > 0:
|
||||
print(f" - SSL errors: {summary['ssl_errors']}")
|
||||
if summary.get('connection_errors', 0) > 0:
|
||||
print(f" - Connection errors: {summary['connection_errors']}")
|
||||
|
||||
print("")
|
||||
print("-" * 70)
|
||||
print(" PAGESPEED API QUOTA")
|
||||
print("-" * 70)
|
||||
print(f" Quota at start: {summary.get('quota_start', 'N/A')}")
|
||||
print(f" Quota used: {summary.get('quota_used', 'N/A')}")
|
||||
print(f" Quota remaining: {summary.get('quota_remaining', 'N/A')}")
|
||||
|
||||
if summary.get('quota_exceeded'):
|
||||
print(" ⚠ WARNING: Quota was exceeded during this run!")
|
||||
|
||||
# Score distribution
|
||||
results = summary.get('results', [])
|
||||
scores = [r.get('overall_score') for r in results if r.get('overall_score') is not None]
|
||||
|
||||
if scores:
|
||||
avg_score = sum(scores) / len(scores)
|
||||
print("")
|
||||
print("-" * 70)
|
||||
print(" SEO SCORE DISTRIBUTION")
|
||||
print("-" * 70)
|
||||
print(f" Companies with scores: {len(scores)}")
|
||||
print(f" Average SEO score: {avg_score:.1f}")
|
||||
print(f" Highest score: {max(scores)}")
|
||||
print(f" Lowest score: {min(scores)}")
|
||||
print("")
|
||||
|
||||
# Score ranges with visual bars
|
||||
excellent = sum(1 for s in scores if s >= 90)
|
||||
good = sum(1 for s in scores if 70 <= s < 90)
|
||||
fair = sum(1 for s in scores if 50 <= s < 70)
|
||||
poor = sum(1 for s in scores if s < 50)
|
||||
|
||||
max_bar = 30
|
||||
total = len(scores)
|
||||
|
||||
def bar(count, total, max_bar=30):
|
||||
if total == 0:
|
||||
return ""
|
||||
width = int((count / total) * max_bar)
|
||||
return "█" * width + "░" * (max_bar - width)
|
||||
|
||||
print(f" Excellent (90-100): {excellent:3d} {bar(excellent, total)}")
|
||||
print(f" Good (70-89): {good:3d} {bar(good, total)}")
|
||||
print(f" Fair (50-69): {fair:3d} {bar(fair, total)}")
|
||||
print(f" Poor (<50): {poor:3d} {bar(poor, total)}")
|
||||
|
||||
# List failed companies
|
||||
failed_results = [r for r in results if r.get('status') in ('unavailable', 'timeout', 'connection_error', 'error')]
|
||||
if failed_results:
|
||||
print("")
|
||||
print("-" * 70)
|
||||
print(" FAILED AUDITS")
|
||||
print("-" * 70)
|
||||
for r in failed_results[:10]: # Show first 10
|
||||
status_icon = {
|
||||
'unavailable': '🔴',
|
||||
'timeout': '⏱',
|
||||
'connection_error': '🔌',
|
||||
'error': '❌',
|
||||
}.get(r['status'], '?')
|
||||
errors = r.get('errors', [])
|
||||
error_msg = errors[0][:50] if errors else r.get('status', 'Unknown')
|
||||
print(f" {status_icon} {r['company_name'][:30]:<30} - {error_msg}")
|
||||
if len(failed_results) > 10:
|
||||
print(f" ... and {len(failed_results) - 10} more")
|
||||
|
||||
print("")
|
||||
print("=" * 70)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for CLI usage."""
|
||||
@ -790,83 +1128,143 @@ Examples:
|
||||
python seo_audit.py --batch 1-10 # Audit companies 1-10
|
||||
python seo_audit.py --all # Audit all companies
|
||||
python seo_audit.py --company-id 26 --dry-run # Test without saving
|
||||
python seo_audit.py --all --json > report.json # Export to JSON
|
||||
|
||||
Exit codes:
|
||||
0 - All audits completed successfully
|
||||
1 - Argument error or invalid input
|
||||
2 - Partial failures (some audits failed)
|
||||
3 - All audits failed
|
||||
4 - Database connection error
|
||||
5 - API quota exceeded
|
||||
"""
|
||||
)
|
||||
parser.add_argument('--company-id', type=int,
|
||||
help='Audit single company by ID')
|
||||
parser.add_argument('--batch', type=str,
|
||||
help='Audit batch of companies (e.g., 1-10)')
|
||||
parser.add_argument('--all', action='store_true',
|
||||
help='Audit all companies')
|
||||
parser.add_argument('--dry-run', action='store_true',
|
||||
help='Print results without saving to database')
|
||||
parser.add_argument('--verbose', '-v', action='store_true',
|
||||
help='Enable verbose output')
|
||||
parser.add_argument('--json', action='store_true',
|
||||
help='Output results as JSON')
|
||||
|
||||
# Selection arguments (mutually exclusive in practice)
|
||||
selection = parser.add_argument_group('Company Selection (choose one)')
|
||||
selection.add_argument('--company-id', type=int, metavar='ID',
|
||||
help='Audit single company by ID')
|
||||
selection.add_argument('--company-ids', type=str, metavar='IDS',
|
||||
help='Audit multiple companies by IDs (comma-separated, e.g., 1,5,10)')
|
||||
selection.add_argument('--batch', type=str, metavar='RANGE',
|
||||
help='Audit batch of companies by row offset (e.g., 1-10)')
|
||||
selection.add_argument('--all', action='store_true',
|
||||
help='Audit all companies')
|
||||
|
||||
# Options
|
||||
options = parser.add_argument_group('Options')
|
||||
options.add_argument('--dry-run', action='store_true',
|
||||
help='Print results without saving to database')
|
||||
options.add_argument('--verbose', '-v', action='store_true',
|
||||
help='Enable verbose/debug output')
|
||||
options.add_argument('--quiet', '-q', action='store_true',
|
||||
help='Suppress progress output (only show summary)')
|
||||
options.add_argument('--json', action='store_true',
|
||||
help='Output results as JSON (for scripting)')
|
||||
options.add_argument('--database-url', type=str, metavar='URL',
|
||||
help='Database connection URL (overrides DATABASE_URL env var)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
# Configure logging level
|
||||
if args.quiet:
|
||||
logging.getLogger().setLevel(logging.WARNING)
|
||||
elif args.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
|
||||
# Check that at least one selection method is provided
|
||||
if not (args.company_id or args.batch or args.all):
|
||||
# Validate that at least one selection method is provided
|
||||
selection_count = sum([
|
||||
args.company_id is not None,
|
||||
args.company_ids is not None,
|
||||
args.batch is not None,
|
||||
args.all
|
||||
])
|
||||
|
||||
if selection_count == 0:
|
||||
parser.print_help()
|
||||
print("\nError: Please specify --company-id, --batch, or --all")
|
||||
sys.exit(1)
|
||||
print("\n❌ Error: Please specify one of --company-id, --company-ids, --batch, or --all")
|
||||
sys.exit(EXIT_ARGUMENT_ERROR)
|
||||
|
||||
if selection_count > 1:
|
||||
print("❌ Error: Please specify only one selection method (--company-id, --company-ids, --batch, or --all)")
|
||||
sys.exit(EXIT_ARGUMENT_ERROR)
|
||||
|
||||
# Parse batch argument if provided
|
||||
batch_start, batch_end = None, None
|
||||
if args.batch:
|
||||
try:
|
||||
batch_start, batch_end = parse_batch_argument(args.batch)
|
||||
except ValueError as e:
|
||||
print(f"❌ Error: {e}")
|
||||
sys.exit(EXIT_ARGUMENT_ERROR)
|
||||
|
||||
# Parse company IDs if provided
|
||||
company_ids = None
|
||||
if args.company_id:
|
||||
company_ids = [args.company_id]
|
||||
elif args.company_ids:
|
||||
try:
|
||||
company_ids = [int(x.strip()) for x in args.company_ids.split(',')]
|
||||
if not company_ids:
|
||||
raise ValueError("Empty list")
|
||||
except ValueError:
|
||||
print(f"❌ Error: Invalid --company-ids format. Use comma-separated integers (e.g., 1,5,10)")
|
||||
sys.exit(EXIT_ARGUMENT_ERROR)
|
||||
|
||||
# Determine database URL
|
||||
database_url = args.database_url or DATABASE_URL
|
||||
|
||||
# Initialize auditor
|
||||
auditor = SEOAuditor()
|
||||
try:
|
||||
auditor = SEOAuditor(database_url=database_url)
|
||||
except SQLAlchemyError as e:
|
||||
logger.error(f"Failed to connect to database: {e}")
|
||||
print(f"❌ Error: Database connection failed: {e}")
|
||||
sys.exit(EXIT_DATABASE_ERROR)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize auditor: {e}")
|
||||
print(f"❌ Error: Failed to initialize SEO auditor: {e}")
|
||||
sys.exit(EXIT_DATABASE_ERROR)
|
||||
|
||||
# Run audit based on arguments
|
||||
if args.company_id:
|
||||
summary = auditor.run_audit(company_ids=[args.company_id], dry_run=args.dry_run)
|
||||
elif args.batch:
|
||||
try:
|
||||
start, end = map(int, args.batch.split('-'))
|
||||
except ValueError:
|
||||
print("Error: --batch must be in format START-END (e.g., 1-10)")
|
||||
sys.exit(1)
|
||||
summary = auditor.run_audit(batch_start=start, batch_end=end, dry_run=args.dry_run)
|
||||
elif args.all:
|
||||
summary = auditor.run_audit(dry_run=args.dry_run)
|
||||
else:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
# Run audit
|
||||
try:
|
||||
summary = auditor.run_audit(
|
||||
company_ids=company_ids,
|
||||
batch_start=batch_start,
|
||||
batch_end=batch_end,
|
||||
dry_run=args.dry_run
|
||||
)
|
||||
except QuotaExceededError:
|
||||
logger.error("PageSpeed API quota exceeded")
|
||||
print("❌ Error: PageSpeed API quota exceeded. Try again tomorrow.")
|
||||
sys.exit(EXIT_QUOTA_EXCEEDED)
|
||||
except SQLAlchemyError as e:
|
||||
logger.error(f"Database error during audit: {e}")
|
||||
print(f"❌ Error: Database error: {e}")
|
||||
sys.exit(EXIT_DATABASE_ERROR)
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error during audit: {e}")
|
||||
print(f"❌ Error: Unexpected error: {e}")
|
||||
sys.exit(EXIT_ALL_FAILED)
|
||||
|
||||
# Output results
|
||||
if args.json:
|
||||
print(json.dumps(summary, default=str, indent=2))
|
||||
else:
|
||||
print("\n" + "=" * 60)
|
||||
print("SEO AUDIT SUMMARY")
|
||||
print("=" * 60)
|
||||
print(f"Total companies: {summary['total']}")
|
||||
print(f"Successful: {summary['success']}")
|
||||
print(f"Failed: {summary['failed']}")
|
||||
print(f"PageSpeed quota remaining: {summary['quota_remaining']}")
|
||||
print("=" * 60)
|
||||
print_summary(summary, dry_run=args.dry_run)
|
||||
|
||||
# Print score distribution
|
||||
if summary['results']:
|
||||
scores = [r.get('overall_score') for r in summary['results'] if r.get('overall_score') is not None]
|
||||
if scores:
|
||||
avg_score = sum(scores) / len(scores)
|
||||
print(f"\nScore distribution:")
|
||||
print(f" Average SEO score: {avg_score:.1f}")
|
||||
print(f" Highest: {max(scores)}")
|
||||
print(f" Lowest: {min(scores)}")
|
||||
|
||||
# Score ranges
|
||||
excellent = sum(1 for s in scores if s >= 90)
|
||||
good = sum(1 for s in scores if 70 <= s < 90)
|
||||
fair = sum(1 for s in scores if 50 <= s < 70)
|
||||
poor = sum(1 for s in scores if s < 50)
|
||||
print(f"\n Excellent (90+): {excellent}")
|
||||
print(f" Good (70-89): {good}")
|
||||
print(f" Fair (50-69): {fair}")
|
||||
print(f" Poor (<50): {poor}")
|
||||
# Determine exit code
|
||||
if summary['total'] == 0:
|
||||
logger.warning("No companies found to audit")
|
||||
sys.exit(EXIT_ARGUMENT_ERROR)
|
||||
elif summary.get('quota_exceeded'):
|
||||
sys.exit(EXIT_QUOTA_EXCEEDED)
|
||||
elif summary['failed'] == summary['total'] - summary['skipped']:
|
||||
sys.exit(EXIT_ALL_FAILED)
|
||||
elif summary['failed'] > 0:
|
||||
sys.exit(EXIT_PARTIAL_FAILURES)
|
||||
else:
|
||||
sys.exit(EXIT_SUCCESS)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Loading…
Reference in New Issue
Block a user