- Add normalize_social_url() function to database.py to prevent www vs non-www duplicates in social media records - Update update_social_media.py to normalize URLs before insert - Update social_media_audit.py to normalize URLs before insert - Add inline GBP Audit section to company profile - Add inline Social Media Audit section to company profile - Add inline IT Audit section to company profile Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
194 lines
6.7 KiB
Python
194 lines
6.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Update Social Media profiles in NordaBiz database.
|
|
Reads from social_media_found.json and updates company_social_media table.
|
|
|
|
Usage:
|
|
python update_social_media.py [--dry-run]
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import sys
|
|
from datetime import datetime
|
|
|
|
# Database connection: Use environment variable DATABASE_URL
|
|
# WARNING: DATABASE_URL must be set before running this script.
|
|
# Example: export DATABASE_URL='postgresql://nordabiz_app:PASSWORD@localhost:5432/nordabiz'
|
|
# The database module will fall back to a safe placeholder if not set.
|
|
# NEVER commit real credentials to version control (CWE-798).
|
|
|
|
from database import SessionLocal, Company, CompanySocialMedia, normalize_social_url
|
|
from sqlalchemy import func
|
|
|
|
PLATFORMS = ['facebook', 'instagram', 'youtube', 'linkedin', 'tiktok', 'twitter']
|
|
|
|
def load_social_media_data():
|
|
"""Load social media data from JSON file."""
|
|
with open('social_media_found.json', 'r') as f:
|
|
data = json.load(f)
|
|
return data['results']
|
|
|
|
def update_social_media(dry_run=False):
|
|
"""Update social media profiles in database."""
|
|
db = SessionLocal()
|
|
|
|
try:
|
|
results = load_social_media_data()
|
|
|
|
stats = {
|
|
'companies_processed': 0,
|
|
'profiles_added': 0,
|
|
'profiles_updated': 0,
|
|
'profiles_unchanged': 0,
|
|
'errors': 0
|
|
}
|
|
|
|
changes = []
|
|
|
|
for item in results:
|
|
company_id = item['company_id']
|
|
company_name = item['name']
|
|
stats['companies_processed'] += 1
|
|
|
|
# Check if company exists
|
|
company = db.query(Company).filter(Company.id == company_id).first()
|
|
if not company:
|
|
print(f" WARNING: Company ID {company_id} ({company_name}) not found in database")
|
|
stats['errors'] += 1
|
|
continue
|
|
|
|
for platform in PLATFORMS:
|
|
url = item.get(platform)
|
|
if not url:
|
|
continue
|
|
|
|
# Normalize URL to prevent www vs non-www duplicates
|
|
url = normalize_social_url(url, platform)
|
|
|
|
# Check if profile already exists for this platform
|
|
existing = db.query(CompanySocialMedia).filter(
|
|
CompanySocialMedia.company_id == company_id,
|
|
CompanySocialMedia.platform == platform
|
|
).first()
|
|
|
|
if existing:
|
|
# Normalize existing URL for comparison
|
|
existing_normalized = normalize_social_url(existing.url, platform)
|
|
if existing_normalized != url:
|
|
old_url = existing.url
|
|
if not dry_run:
|
|
existing.url = url
|
|
existing.verified_at = datetime.now()
|
|
existing.source = 'web_search_2025-12-29'
|
|
changes.append({
|
|
'company_id': company_id,
|
|
'company_name': company_name,
|
|
'platform': platform,
|
|
'action': 'updated',
|
|
'old_url': old_url,
|
|
'new_url': url
|
|
})
|
|
stats['profiles_updated'] += 1
|
|
else:
|
|
stats['profiles_unchanged'] += 1
|
|
else:
|
|
if not dry_run:
|
|
new_profile = CompanySocialMedia(
|
|
company_id=company_id,
|
|
platform=platform,
|
|
url=url,
|
|
verified_at=datetime.now(),
|
|
source='web_search_2025-12-29',
|
|
is_valid=True
|
|
)
|
|
db.add(new_profile)
|
|
changes.append({
|
|
'company_id': company_id,
|
|
'company_name': company_name,
|
|
'platform': platform,
|
|
'action': 'added',
|
|
'url': url
|
|
})
|
|
stats['profiles_added'] += 1
|
|
|
|
if not dry_run:
|
|
db.commit()
|
|
print("\n=== Changes committed to database ===")
|
|
else:
|
|
print("\n=== DRY RUN - No changes made ===")
|
|
|
|
# Print statistics
|
|
print(f"\n=== Statistics ===")
|
|
print(f"Companies processed: {stats['companies_processed']}")
|
|
print(f"Profiles added: {stats['profiles_added']}")
|
|
print(f"Profiles updated: {stats['profiles_updated']}")
|
|
print(f"Profiles unchanged: {stats['profiles_unchanged']}")
|
|
print(f"Errors: {stats['errors']}")
|
|
|
|
# Print changes summary
|
|
if changes:
|
|
print(f"\n=== Changes Summary ===")
|
|
for change in changes:
|
|
if change['action'] == 'added':
|
|
print(f" + {change['company_name']} | {change['platform']}: {change['url']}")
|
|
else:
|
|
print(f" ~ {change['company_name']} | {change['platform']}: {change['old_url']} -> {change['new_url']}")
|
|
|
|
return stats, changes
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
print(f"ERROR: {e}")
|
|
raise
|
|
finally:
|
|
db.close()
|
|
|
|
def print_summary_table(changes):
|
|
"""Print a summary table of all social media by platform."""
|
|
print("\n" + "="*80)
|
|
print("SOCIAL MEDIA SUMMARY - NordaBiz 2025-12-29")
|
|
print("="*80)
|
|
|
|
with open('social_media_found.json', 'r') as f:
|
|
data = json.load(f)
|
|
|
|
results = data['results']
|
|
|
|
# Count by platform
|
|
platform_counts = {p: 0 for p in PLATFORMS}
|
|
for item in results:
|
|
for platform in PLATFORMS:
|
|
if item.get(platform):
|
|
platform_counts[platform] += 1
|
|
|
|
print("\nPlatform Statistics:")
|
|
print("-" * 40)
|
|
for platform, count in sorted(platform_counts.items(), key=lambda x: -x[1]):
|
|
bar = "#" * (count // 2)
|
|
print(f" {platform:12} | {count:2} | {bar}")
|
|
|
|
total_profiles = sum(platform_counts.values())
|
|
companies_with_any = sum(1 for r in results if any(r.get(p) for p in PLATFORMS))
|
|
companies_without = len(results) - companies_with_any
|
|
|
|
print(f"\nTotal profiles found: {total_profiles}")
|
|
print(f"Companies with social media: {companies_with_any}")
|
|
print(f"Companies without social media: {companies_without}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
dry_run = '--dry-run' in sys.argv
|
|
|
|
print("="*60)
|
|
print("NordaBiz Social Media Update")
|
|
print("="*60)
|
|
|
|
if dry_run:
|
|
print("MODE: Dry run (no changes will be made)")
|
|
else:
|
|
print("MODE: Live update")
|
|
|
|
stats, changes = update_social_media(dry_run=dry_run)
|
|
print_summary_table(changes)
|