Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
- Add clickable field coverage bars to filter companies missing specific data - Add quick-action buttons (Registry/SEO/GBP) per company in dashboard table - Add stale data detection (>6 months) with yellow badges - Implement weighted priority score (contacts 34%, audits 17%) - Add data hints in admin company detail showing where to find missing data - Add "Available data" section showing Google Business data ready to apply - Add POST /api/company/<id>/apply-hint endpoint for one-click data fill - Extend website content updater with phone/email extraction (AI + regex) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
105 lines
2.9 KiB
Python
105 lines
2.9 KiB
Python
"""
|
|
Data Quality Service
|
|
====================
|
|
|
|
Computes and updates company data quality scores.
|
|
Extracted from inline completeness logic in admin routes.
|
|
"""
|
|
|
|
import os
|
|
|
|
from database import CompanyWebsiteAnalysis, CompanySocialMedia, GBPAudit
|
|
|
|
|
|
FIELD_WEIGHTS = {
|
|
'NIP': 10, 'Adres': 8, 'Telefon': 12, 'Email': 12,
|
|
'Strona WWW': 10, 'Opis': 10, 'Kategoria': 5,
|
|
'Logo': 8, 'Dane urzędowe': 8,
|
|
'Audyt SEO': 5, 'Audyt Social': 5, 'Audyt GBP': 7,
|
|
}
|
|
|
|
MAX_WEIGHT = sum(FIELD_WEIGHTS.values())
|
|
|
|
|
|
def compute_weighted_score(fields):
|
|
"""Compute weighted score from fields dict. Returns int 0-100."""
|
|
weighted = sum(FIELD_WEIGHTS.get(f, 0) for f, v in fields.items() if v)
|
|
return int(weighted / MAX_WEIGHT * 100)
|
|
|
|
|
|
def compute_data_quality_score(company, db):
|
|
"""Compute data quality score for a company.
|
|
|
|
Returns dict with 'score' (0-100), 'fields' (name->bool), 'total', 'filled'.
|
|
"""
|
|
# Logo check (webp or svg)
|
|
logo_exists = False
|
|
for ext in ('webp', 'svg'):
|
|
if os.path.isfile(os.path.join('static', 'img', 'companies', f'{company.slug}.{ext}')):
|
|
logo_exists = True
|
|
break
|
|
|
|
# Registry data
|
|
registry_done = bool(company.ceidg_fetched_at or company.krs_fetched_at)
|
|
|
|
# SEO audit
|
|
seo_done = db.query(CompanyWebsiteAnalysis).filter(
|
|
CompanyWebsiteAnalysis.company_id == company.id
|
|
).first() is not None
|
|
|
|
# Social media audit
|
|
social_done = db.query(CompanySocialMedia).filter(
|
|
CompanySocialMedia.company_id == company.id
|
|
).count() > 0
|
|
|
|
# GBP audit
|
|
gbp_done = db.query(GBPAudit).filter(
|
|
GBPAudit.company_id == company.id
|
|
).first() is not None
|
|
|
|
fields = {
|
|
'NIP': bool(company.nip),
|
|
'Adres': bool(company.address_city),
|
|
'Telefon': bool(company.phone),
|
|
'Email': bool(company.email),
|
|
'Strona WWW': bool(company.website),
|
|
'Opis': bool(company.description_short),
|
|
'Kategoria': bool(company.category_id),
|
|
'Logo': logo_exists,
|
|
'Dane urzędowe': registry_done,
|
|
'Audyt SEO': seo_done,
|
|
'Audyt Social': social_done,
|
|
'Audyt GBP': gbp_done,
|
|
}
|
|
|
|
filled = sum(fields.values())
|
|
total = len(fields)
|
|
score = compute_weighted_score(fields)
|
|
|
|
return {
|
|
'score': score,
|
|
'fields': fields,
|
|
'total': total,
|
|
'filled': filled,
|
|
}
|
|
|
|
|
|
def compute_data_quality_label(score):
|
|
"""Map numeric score to quality label."""
|
|
if score < 34:
|
|
return 'basic'
|
|
elif score < 67:
|
|
return 'enhanced'
|
|
return 'complete'
|
|
|
|
|
|
def update_company_data_quality(company, db):
|
|
"""Compute and persist data quality score on a company.
|
|
|
|
Returns the result dict from compute_data_quality_score.
|
|
"""
|
|
result = compute_data_quality_score(company, db)
|
|
company.data_quality_score = result['score']
|
|
company.data_quality = compute_data_quality_label(result['score'])
|
|
return result
|