#!/usr/bin/env python3 """ Backfill data quality scores for all companies. Usage: python3 scripts/backfill_data_quality_scores.py """ import sys import os BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, BASE_DIR) from database import SessionLocal, Company from utils.data_quality import update_company_data_quality BATCH_SIZE = 50 def main(): db = SessionLocal() try: companies = db.query(Company).all() total = len(companies) print(f"Backfilling data quality for {total} companies...") for i, company in enumerate(companies, 1): result = update_company_data_quality(company, db) if i % BATCH_SIZE == 0: db.commit() print(f" [{i}/{total}] committed batch") db.commit() print(f"Done. {total} companies updated.") # Summary basic = sum(1 for c in companies if c.data_quality == 'basic') enhanced = sum(1 for c in companies if c.data_quality == 'enhanced') complete = sum(1 for c in companies if c.data_quality == 'complete') print(f"\nSummary: basic={basic}, enhanced={enhanced}, complete={complete}") finally: db.close() if __name__ == '__main__': main()