Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
- Extract 12-field completeness scoring to utils/data_quality.py service - Auto-update data_quality_score and data_quality label on company data changes - Add /admin/data-quality dashboard with field coverage stats, quality distribution, and sortable company table - Add bulk enrichment with background processing, step selection, and progress tracking - Flow GBP phone/website to Company record when company fields are empty - Display Google opening hours on public company profile - Add BulkEnrichmentJob model and migration 075 - Refactor arm_company.py to support selective steps and progress callbacks Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
49 lines
1.3 KiB
Python
49 lines
1.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Backfill data quality scores for all companies.
|
|
|
|
Usage:
|
|
python3 scripts/backfill_data_quality_scores.py
|
|
"""
|
|
import sys
|
|
import os
|
|
|
|
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
sys.path.insert(0, BASE_DIR)
|
|
|
|
from database import SessionLocal, Company
|
|
from utils.data_quality import update_company_data_quality
|
|
|
|
BATCH_SIZE = 50
|
|
|
|
|
|
def main():
|
|
db = SessionLocal()
|
|
try:
|
|
companies = db.query(Company).all()
|
|
total = len(companies)
|
|
print(f"Backfilling data quality for {total} companies...")
|
|
|
|
for i, company in enumerate(companies, 1):
|
|
result = update_company_data_quality(company, db)
|
|
|
|
if i % BATCH_SIZE == 0:
|
|
db.commit()
|
|
print(f" [{i}/{total}] committed batch")
|
|
|
|
db.commit()
|
|
print(f"Done. {total} companies updated.")
|
|
|
|
# Summary
|
|
basic = sum(1 for c in companies if c.data_quality == 'basic')
|
|
enhanced = sum(1 for c in companies if c.data_quality == 'enhanced')
|
|
complete = sum(1 for c in companies if c.data_quality == 'complete')
|
|
print(f"\nSummary: basic={basic}, enhanced={enhanced}, complete={complete}")
|
|
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|