feat: add data quality dashboard, auto-scoring, bulk enrichment and GBP data flow
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

- Extract 12-field completeness scoring to utils/data_quality.py service
- Auto-update data_quality_score and data_quality label on company data changes
- Add /admin/data-quality dashboard with field coverage stats, quality distribution, and sortable company table
- Add bulk enrichment with background processing, step selection, and progress tracking
- Flow GBP phone/website to Company record when company fields are empty
- Display Google opening hours on public company profile
- Add BulkEnrichmentJob model and migration 075
- Refactor arm_company.py to support selective steps and progress callbacks

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-21 07:02:45 +01:00
parent a311dcf0f1
commit 93e90b2c72
15 changed files with 1403 additions and 28 deletions

View File

@ -29,3 +29,5 @@ from . import routes_membership # noqa: E402, F401
from . import routes_benefits # noqa: E402, F401
from . import routes_competitors # noqa: E402, F401
from . import routes_social_publisher # noqa: E402, F401
from . import routes_data_quality # noqa: E402, F401
from . import routes_bulk_enrichment # noqa: E402, F401

View File

@ -0,0 +1,193 @@
"""
Admin Bulk Enrichment Routes
=============================
Batch enrichment operations for multiple companies at once.
"""
import logging
import threading
import time
from datetime import datetime
from flask import request, jsonify
from flask_login import login_required, current_user
from . import bp
from database import SessionLocal, Company, BulkEnrichmentJob, SystemRole
from utils.decorators import role_required
logger = logging.getLogger(__name__)
def _run_bulk_enrichment(job_id, company_ids, steps):
"""Background worker for bulk enrichment. Runs in a separate thread."""
import sys
import os
base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
if base_dir not in sys.path:
sys.path.insert(0, base_dir)
scripts_dir = os.path.join(base_dir, 'scripts')
if scripts_dir not in sys.path:
sys.path.insert(0, scripts_dir)
from scripts.arm_company import arm_company
db = SessionLocal()
try:
job = db.query(BulkEnrichmentJob).filter_by(id=job_id).first()
if not job:
logger.error(f"Bulk enrichment job {job_id} not found")
return
all_results = {}
for i, cid in enumerate(company_ids):
company = db.query(Company).filter_by(id=cid).first()
company_name = company.name if company else f"ID {cid}"
try:
result = arm_company(cid, force=False, steps=steps)
if isinstance(result, dict):
all_results[str(cid)] = {
'name': company_name,
'results': result,
}
else:
all_results[str(cid)] = {
'name': company_name,
'results': {'error': 'Firma nie znaleziona' if not result else 'Nieznany błąd'},
}
except Exception as e:
logger.error(f"Bulk enrichment error for company {cid}: {e}")
all_results[str(cid)] = {
'name': company_name,
'results': {'error': str(e)[:200]},
}
# Update progress
job.processed_companies = i + 1
job.results = all_results
db.commit()
# Delay between companies to respect API limits
if i < len(company_ids) - 1:
time.sleep(2)
job.status = 'completed'
job.completed_at = datetime.now()
db.commit()
logger.info(f"Bulk enrichment job {job_id} completed: {len(company_ids)} companies")
except Exception as e:
logger.error(f"Bulk enrichment job {job_id} failed: {e}")
try:
job = db.query(BulkEnrichmentJob).filter_by(id=job_id).first()
if job:
job.status = 'failed'
job.completed_at = datetime.now()
db.commit()
except Exception:
pass
finally:
db.close()
@bp.route('/data-quality/bulk-enrich', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def bulk_enrich():
"""Start a bulk enrichment job."""
data = request.get_json()
if not data:
return jsonify({'error': 'Brak danych'}), 400
company_ids = data.get('company_ids', [])
steps = data.get('steps', ['registry', 'seo', 'social', 'gbp', 'logo'])
if not company_ids:
return jsonify({'error': 'Nie wybrano firm'}), 400
if len(company_ids) > 50:
return jsonify({'error': 'Maksymalnie 50 firm na raz'}), 400
valid_steps = {'registry', 'seo', 'social', 'gbp', 'logo'}
steps = [s for s in steps if s in valid_steps]
if not steps:
return jsonify({'error': 'Nie wybrano kroków'}), 400
db = SessionLocal()
try:
# Validate company IDs
existing = db.query(Company.id).filter(Company.id.in_(company_ids)).all()
existing_ids = [r[0] for r in existing]
if len(existing_ids) != len(company_ids):
missing = set(company_ids) - set(existing_ids)
return jsonify({'error': f'Nie znaleziono firm: {missing}'}), 400
# Create job
job = BulkEnrichmentJob(
started_by=current_user.id,
total_companies=len(company_ids),
steps=steps,
results={},
)
db.add(job)
db.commit()
job_id = job.id
# Start background thread
thread = threading.Thread(
target=_run_bulk_enrichment,
args=(job_id, company_ids, steps),
daemon=True,
)
thread.start()
logger.info(f"Bulk enrichment job {job_id} started by {current_user.email}: {len(company_ids)} companies, steps={steps}")
return jsonify({'job_id': job_id, 'total': len(company_ids)})
finally:
db.close()
@bp.route('/data-quality/bulk-enrich/status')
@login_required
@role_required(SystemRole.ADMIN)
def bulk_enrich_status():
"""Check status of a bulk enrichment job."""
job_id = request.args.get('job_id', type=int)
if not job_id:
return jsonify({'error': 'Brak job_id'}), 400
db = SessionLocal()
try:
job = db.query(BulkEnrichmentJob).filter_by(id=job_id).first()
if not job:
return jsonify({'error': 'Job nie znaleziony'}), 404
# Get latest result for progress log
latest_result = None
if job.results and job.processed_companies > 0:
results_dict = job.results
# Find the last processed company
for cid, data in results_dict.items():
name = data.get('name', cid)
res = data.get('results', {})
ok = sum(1 for v in res.values() if isinstance(v, str) and (v.startswith('OK') or v.startswith('SKIP (done)')))
total_steps = len(res)
latest_result = f"{name}: {ok}/{total_steps} kroków OK"
return jsonify({
'job_id': job.id,
'status': job.status,
'processed': job.processed_companies,
'total': job.total_companies,
'latest_result': latest_result,
'results': job.results if job.status != 'running' else None,
})
finally:
db.close()

View File

@ -21,6 +21,7 @@ from database import (
CompanyWebsiteAnalysis, CompanySocialMedia, GBPAudit
)
from utils.decorators import role_required
from utils.data_quality import compute_data_quality_score, update_company_data_quality
# Logger
logger = logging.getLogger(__name__)
@ -157,6 +158,9 @@ def admin_company_add():
db.commit()
db.refresh(new_company)
update_company_data_quality(new_company, db)
db.commit()
logger.info(f"Admin {current_user.email} created new company: {name} (ID: {new_company.id})")
return jsonify({
@ -263,6 +267,9 @@ def admin_company_update(company_id):
company.last_updated = datetime.utcnow()
db.commit()
update_company_data_quality(company, db)
db.commit()
logger.info(f"Admin {current_user.email} updated company {company.name} (ID: {company_id})")
return jsonify({
@ -726,27 +733,7 @@ def admin_company_detail(company_id):
}
# --- Completeness score (12 fields) ---
fields = {
'NIP': bool(company.nip),
'Adres': bool(company.address_city),
'Telefon': bool(company.phone),
'Email': bool(company.email),
'Strona WWW': bool(company.website),
'Opis': bool(company.description_short),
'Kategoria': bool(company.category_id),
'Logo': enrichment['logo']['done'],
'Dane urzędowe': enrichment['registry']['done'],
'Audyt SEO': enrichment['seo']['done'],
'Audyt Social': enrichment['social']['done'],
'Audyt GBP': enrichment['gbp']['done'],
}
completeness = {
'score': int(sum(fields.values()) / len(fields) * 100),
'fields': fields,
'total': len(fields),
'filled': sum(fields.values()),
}
completeness = compute_data_quality_score(company, db)
logger.info(f"Admin {current_user.email} viewed company detail: {company.name} (ID: {company_id})")

View File

@ -0,0 +1,184 @@
"""
Admin Data Quality Dashboard
=============================
Aggregate view of company data quality and completeness across all companies.
"""
import os
import logging
from datetime import datetime
from flask import render_template
from flask_login import login_required
from sqlalchemy import func
from . import bp
from database import (
SessionLocal, Company, CompanyWebsiteAnalysis,
CompanySocialMedia, GBPAudit, SystemRole
)
from utils.decorators import role_required
logger = logging.getLogger(__name__)
LOGO_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'static', 'img', 'companies')
def _check_logo_exists(slug):
"""Check if company logo file exists on disk."""
if not slug:
return False
for ext in ('webp', 'svg'):
if os.path.isfile(os.path.join(LOGO_DIR, f'{slug}.{ext}')):
return True
return False
@bp.route('/data-quality')
@login_required
@role_required(SystemRole.ADMIN)
def admin_data_quality():
"""Data quality dashboard with aggregate stats."""
db = SessionLocal()
try:
now = datetime.now()
# Load all active/pending companies with minimal fields
companies = db.query(Company).filter(
Company.status.in_(['active', 'pending'])
).order_by(Company.name).all()
total = len(companies)
if total == 0:
return render_template(
'admin/data_quality_dashboard.html',
total=0, field_stats={}, quality_dist={},
score_dist={}, avg_score=0, companies_table=[],
now=now,
)
# Batch query: companies with SEO analysis
seo_company_ids = set(
row[0] for row in db.query(CompanyWebsiteAnalysis.company_id).all()
)
# Batch query: companies with social media profiles
social_counts = dict(
db.query(
CompanySocialMedia.company_id,
func.count(CompanySocialMedia.id)
).group_by(CompanySocialMedia.company_id).all()
)
# Batch query: companies with GBP audit
gbp_company_ids = set(
row[0] for row in db.query(GBPAudit.company_id).distinct().all()
)
# Per-field coverage counters
field_counters = {
'NIP': 0,
'Adres': 0,
'Telefon': 0,
'Email': 0,
'Strona WWW': 0,
'Opis': 0,
'Kategoria': 0,
'Logo': 0,
'Dane urzędowe': 0,
'Audyt SEO': 0,
'Audyt Social': 0,
'Audyt GBP': 0,
}
# Quality distribution
quality_dist = {'basic': 0, 'enhanced': 0, 'complete': 0}
score_dist = {'0-25': 0, '26-50': 0, '51-75': 0, '76-100': 0}
score_sum = 0
# Per-company table data
companies_table = []
for c in companies:
# Compute 12-field check
fields = {
'NIP': bool(c.nip),
'Adres': bool(c.address_city),
'Telefon': bool(c.phone),
'Email': bool(c.email),
'Strona WWW': bool(c.website),
'Opis': bool(c.description_short),
'Kategoria': bool(c.category_id),
'Logo': _check_logo_exists(c.slug),
'Dane urzędowe': bool(c.ceidg_fetched_at or c.krs_fetched_at),
'Audyt SEO': c.id in seo_company_ids,
'Audyt Social': social_counts.get(c.id, 0) > 0,
'Audyt GBP': c.id in gbp_company_ids,
}
filled = sum(fields.values())
score = int(filled / len(fields) * 100)
# Update counters
for field_name, has_value in fields.items():
if has_value:
field_counters[field_name] += 1
# Quality label
if score < 34:
label = 'basic'
elif score < 67:
label = 'enhanced'
else:
label = 'complete'
quality_dist[label] = quality_dist.get(label, 0) + 1
# Score distribution
if score <= 25:
score_dist['0-25'] += 1
elif score <= 50:
score_dist['26-50'] += 1
elif score <= 75:
score_dist['51-75'] += 1
else:
score_dist['76-100'] += 1
score_sum += score
companies_table.append({
'id': c.id,
'name': c.name,
'slug': c.slug,
'score': score,
'filled': filled,
'total': len(fields),
'label': label,
'data_quality': c.data_quality or 'basic',
'fields': fields,
'status': c.status,
})
# Sort by score ascending (most incomplete first)
companies_table.sort(key=lambda x: x['score'])
# Field stats as percentages
field_stats = {
name: {'count': count, 'pct': round(count / total * 100)}
for name, count in field_counters.items()
}
avg_score = round(score_sum / total) if total > 0 else 0
return render_template(
'admin/data_quality_dashboard.html',
total=total,
field_stats=field_stats,
quality_dist=quality_dist,
score_dist=score_dist,
avg_score=avg_score,
companies_table=companies_table,
now=now,
)
finally:
db.close()

View File

@ -23,6 +23,7 @@ from database import (
from datetime import timedelta
import gemini_service
import krs_api_service
from utils.data_quality import update_company_data_quality
from ceidg_api_service import fetch_ceidg_by_nip
from . import bp
@ -589,6 +590,9 @@ def api_enrich_company_registry(company_id):
db.commit()
update_company_data_quality(company, db)
db.commit()
logger.info(f"Registry enrichment for company {company.id} ({company.name}) from {source} by {current_user.email}")
return jsonify({

View File

@ -11,6 +11,7 @@ from blueprints.public import bp
from sqlalchemy import or_
from database import SessionLocal, Company, CompanyContact, CompanySocialMedia, CompanyWebsite, Category
from utils.helpers import sanitize_input, sanitize_html, validate_email, ensure_url
from utils.data_quality import update_company_data_quality
from datetime import datetime
import logging
@ -180,6 +181,10 @@ def company_edit_save(company_id=None):
_save_social_media(db, company)
db.commit()
update_company_data_quality(company, db)
db.commit()
flash('Dane firmy zostały zaktualizowane.', 'success')
return redirect(url_for('public.company_detail', company_id=company.id))

View File

@ -1320,6 +1320,24 @@ class CompanyQualityTracking(Base):
company = relationship('Company', back_populates='quality_tracking')
class BulkEnrichmentJob(Base):
"""Tracks bulk enrichment jobs started from admin dashboard."""
__tablename__ = 'bulk_enrichment_jobs'
id = Column(Integer, primary_key=True)
started_at = Column(DateTime, default=datetime.now)
started_by = Column(Integer, ForeignKey('users.id'))
status = Column(String(20), default='running') # running, completed, failed
total_companies = Column(Integer, default=0)
processed_companies = Column(Integer, default=0)
steps = Column(PG_JSONB) # ['registry', 'seo', 'social', 'gbp', 'logo']
results = Column(PG_JSONB, default={}) # {company_id: {step: result, ...}, ...}
completed_at = Column(DateTime)
# Relationship
user = relationship('User')
class CompanyWebsiteContent(Base):
"""Scraped website content for companies"""
__tablename__ = 'company_website_content'

View File

@ -0,0 +1,17 @@
-- 075: Create bulk_enrichment_jobs table for tracking batch enrichment operations
-- Run: python3 scripts/run_migration.py database/migrations/075_bulk_enrichment_jobs.sql
CREATE TABLE IF NOT EXISTS bulk_enrichment_jobs (
id SERIAL PRIMARY KEY,
started_at TIMESTAMP DEFAULT NOW(),
started_by INTEGER REFERENCES users(id),
status VARCHAR(20) DEFAULT 'running',
total_companies INTEGER DEFAULT 0,
processed_companies INTEGER DEFAULT 0,
steps JSONB,
results JSONB DEFAULT '{}'::jsonb,
completed_at TIMESTAMP
);
GRANT ALL ON TABLE bulk_enrichment_jobs TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE bulk_enrichment_jobs_id_seq TO nordabiz_app;

View File

@ -24,6 +24,7 @@ from typing import Dict, List, Optional, Any
from sqlalchemy.orm import Session
from database import Company, GBPAudit, GBPReview, CompanyWebsiteAnalysis, SessionLocal, OAuthToken
from utils.data_quality import update_company_data_quality
import gemini_service
try:
@ -2029,6 +2030,18 @@ def fetch_google_business_data(
db.commit()
# Flow GBP phone/website to Company if Company fields are empty
try:
if analysis.google_phone and not company.phone:
company.phone = analysis.google_phone
if analysis.google_website and not company.website:
company.website = analysis.google_website
update_company_data_quality(company, db)
db.commit()
except Exception as flow_err:
logger.warning(f"Failed to flow GBP data to Company {company_id}: {flow_err}")
db.rollback()
result['steps'][-1]['status'] = 'complete'
result['steps'][-1]['message'] = 'Dane zapisane pomyślnie'
result['success'] = True

View File

@ -24,12 +24,27 @@ if scripts_dir not in sys.path:
from database import SessionLocal, Company, CompanyWebsiteAnalysis, CompanySocialMedia, CompanyPKD, CompanyPerson
from database import GBPAudit
from utils.data_quality import update_company_data_quality
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('arm_company')
def arm_company(company_id, force=False):
ALL_STEPS = ['registry', 'seo', 'social', 'gbp', 'logo']
def arm_company(company_id, force=False, steps=None, progress_callback=None):
"""Arm a company with enrichment data.
Args:
company_id: Company ID to enrich
force: Force re-execution of all steps
steps: List of steps to run (default: all). Options: registry, seo, social, gbp, logo
progress_callback: Optional callback(company_id, step, result_text) for bulk tracking
"""
if steps is None:
steps = ALL_STEPS
db = SessionLocal()
try:
company = db.query(Company).filter_by(id=company_id).first()
@ -41,13 +56,17 @@ def arm_company(company_id, force=False):
print("Uzbrajam: %s (ID: %d)" % (company.name, company.id))
print("NIP: %s | WWW: %s" % (company.nip or '-', company.website or '-'))
print("Tryb: %s" % ("FORCE (wszystkie kroki)" if force else "SMART (tylko brakujące)"))
if steps != ALL_STEPS:
print("Kroki: %s" % ', '.join(steps))
print("=" * 60)
results = {}
# --- Krok 1: Dane urzędowe ---
registry_done = bool(company.krs_fetched_at or company.ceidg_fetched_at)
if force or not registry_done:
if 'registry' not in steps:
results['registry'] = 'SKIP (nie wybrano)'
elif force or not registry_done:
if company.nip:
print("\n[1/5] Pobieranie danych urzędowych...")
try:
@ -194,7 +213,9 @@ def arm_company(company_id, force=False):
# --- Krok 2: Audyt SEO ---
seo_done = db.query(CompanyWebsiteAnalysis).filter_by(company_id=company.id).first() is not None
if force or not seo_done:
if 'seo' not in steps:
results['seo'] = 'SKIP (nie wybrano)'
elif force or not seo_done:
if company.website:
print("\n[2/5] Audyt SEO...")
try:
@ -224,7 +245,9 @@ def arm_company(company_id, force=False):
# --- Krok 3: Social Media ---
social_done = db.query(CompanySocialMedia).filter_by(company_id=company.id).count() > 0
if force or not social_done:
if 'social' not in steps:
results['social'] = 'SKIP (nie wybrano)'
elif force or not social_done:
print("\n[3/5] Audyt Social Media...")
try:
from social_media_audit import SocialMediaAuditor
@ -254,7 +277,9 @@ def arm_company(company_id, force=False):
# --- Krok 4: GBP ---
gbp_done = db.query(GBPAudit).filter_by(company_id=company.id).first() is not None
if force or not gbp_done:
if 'gbp' not in steps:
results['gbp'] = 'SKIP (nie wybrano)'
elif force or not gbp_done:
print("\n[4/5] Audyt GBP...")
try:
from gbp_audit_service import GBPAuditService
@ -284,7 +309,9 @@ def arm_company(company_id, force=False):
logo_done = True
break
if force or not logo_done:
if 'logo' not in steps:
results['logo'] = 'SKIP (nie wybrano)'
elif force or not logo_done:
if company.website:
print("\n[5/5] Pobieranie logo...")
try:
@ -322,7 +349,15 @@ def arm_company(company_id, force=False):
print(" Wynik: %d/5 kroków zaliczonych" % ok_count)
print("=" * 60)
return True
# Update data quality score
dq = update_company_data_quality(company, db)
db.commit()
print("\n Data quality: %s (%d%%)" % (company.data_quality, dq['score']))
if progress_callback:
progress_callback(company_id, results)
return results
except Exception as e:
logger.error("Błąd uzbrajania firmy %d: %s" % (company_id, str(e)))

View File

@ -0,0 +1,48 @@
#!/usr/bin/env python3
"""
Backfill data quality scores for all companies.
Usage:
python3 scripts/backfill_data_quality_scores.py
"""
import sys
import os
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, BASE_DIR)
from database import SessionLocal, Company
from utils.data_quality import update_company_data_quality
BATCH_SIZE = 50
def main():
db = SessionLocal()
try:
companies = db.query(Company).all()
total = len(companies)
print(f"Backfilling data quality for {total} companies...")
for i, company in enumerate(companies, 1):
result = update_company_data_quality(company, db)
if i % BATCH_SIZE == 0:
db.commit()
print(f" [{i}/{total}] committed batch")
db.commit()
print(f"Done. {total} companies updated.")
# Summary
basic = sum(1 for c in companies if c.data_quality == 'basic')
enhanced = sum(1 for c in companies if c.data_quality == 'enhanced')
complete = sum(1 for c in companies if c.data_quality == 'complete')
print(f"\nSummary: basic={basic}, enhanced={enhanced}, complete={complete}")
finally:
db.close()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,717 @@
{% extends "base.html" %}
{% block title %}Jakość danych - Admin{% endblock %}
{% block extra_css %}
<style>
.dq-header {
display: flex;
justify-content: space-between;
align-items: flex-start;
margin-bottom: var(--spacing-xl);
flex-wrap: wrap;
gap: var(--spacing-md);
}
.dq-header h1 {
font-size: var(--font-size-2xl);
color: var(--text-primary);
margin-bottom: var(--spacing-xs);
}
.dq-header p {
color: var(--text-secondary);
}
.dq-timestamp {
text-align: right;
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius-lg);
padding: var(--spacing-md) var(--spacing-lg);
font-size: var(--font-size-sm);
color: var(--text-secondary);
}
/* --- Stat Cards --- */
.dq-stats-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: var(--spacing-lg);
margin-bottom: var(--spacing-xl);
}
.dq-stat-card {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius-xl);
padding: var(--spacing-lg);
text-align: center;
position: relative;
overflow: hidden;
}
.dq-stat-card::before {
content: '';
position: absolute;
top: 0; left: 0; right: 0;
height: 4px;
}
.dq-stat-card.total::before { background: linear-gradient(90deg, #3b82f6, #8b5cf6); }
.dq-stat-card.avg::before { background: linear-gradient(90deg, #10b981, #14b8a6); }
.dq-stat-card.complete::before { background: linear-gradient(90deg, #22c55e, #16a34a); }
.dq-stat-card.incomplete::before { background: linear-gradient(90deg, #f59e0b, #f97316); }
.dq-stat-value {
font-size: var(--font-size-3xl);
font-weight: 700;
color: var(--text-primary);
line-height: 1;
margin-bottom: var(--spacing-xs);
}
.dq-stat-label {
font-size: var(--font-size-sm);
color: var(--text-secondary);
}
/* --- Coverage Bars --- */
.dq-section {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius-xl);
padding: var(--spacing-xl);
margin-bottom: var(--spacing-xl);
}
.dq-section-title {
font-size: var(--font-size-lg);
font-weight: 600;
color: var(--text-primary);
margin-bottom: var(--spacing-lg);
}
.dq-bar-row {
display: flex;
align-items: center;
gap: var(--spacing-md);
margin-bottom: var(--spacing-md);
}
.dq-bar-label {
width: 140px;
font-size: var(--font-size-sm);
color: var(--text-secondary);
text-align: right;
flex-shrink: 0;
}
.dq-bar-track {
flex: 1;
height: 24px;
background: var(--background);
border-radius: var(--radius);
overflow: hidden;
position: relative;
}
.dq-bar-fill {
height: 100%;
border-radius: var(--radius);
transition: width 0.5s ease;
display: flex;
align-items: center;
justify-content: flex-end;
padding-right: var(--spacing-sm);
font-size: var(--font-size-xs);
font-weight: 600;
color: white;
min-width: 40px;
}
.dq-bar-fill.high { background: linear-gradient(90deg, #22c55e, #16a34a); }
.dq-bar-fill.medium { background: linear-gradient(90deg, #f59e0b, #d97706); }
.dq-bar-fill.low { background: linear-gradient(90deg, #ef4444, #dc2626); }
.dq-bar-count {
width: 80px;
font-size: var(--font-size-sm);
color: var(--text-secondary);
flex-shrink: 0;
}
/* --- Distribution --- */
.dq-dist-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
gap: var(--spacing-md);
}
.dq-dist-card {
padding: var(--spacing-lg);
border-radius: var(--radius-lg);
text-align: center;
}
.dq-dist-card.basic { background: #fef2f2; border: 1px solid #fecaca; }
.dq-dist-card.enhanced { background: #fffbeb; border: 1px solid #fde68a; }
.dq-dist-card.complete { background: #f0fdf4; border: 1px solid #bbf7d0; }
.dq-dist-value {
font-size: var(--font-size-2xl);
font-weight: 700;
}
.dq-dist-card.basic .dq-dist-value { color: #dc2626; }
.dq-dist-card.enhanced .dq-dist-value { color: #d97706; }
.dq-dist-card.complete .dq-dist-value { color: #16a34a; }
.dq-dist-label {
font-size: var(--font-size-sm);
color: var(--text-secondary);
margin-top: var(--spacing-xs);
}
/* --- Score Distribution --- */
.dq-score-dist {
display: grid;
grid-template-columns: repeat(4, 1fr);
gap: var(--spacing-md);
margin-top: var(--spacing-lg);
}
.dq-score-bucket {
text-align: center;
padding: var(--spacing-md);
background: var(--background);
border-radius: var(--radius-lg);
}
.dq-score-bucket-value {
font-size: var(--font-size-xl);
font-weight: 700;
color: var(--text-primary);
}
.dq-score-bucket-label {
font-size: var(--font-size-xs);
color: var(--text-secondary);
}
/* --- Companies Table --- */
.dq-table-controls {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: var(--spacing-md);
flex-wrap: wrap;
gap: var(--spacing-sm);
}
.dq-filter-select {
padding: var(--spacing-xs) var(--spacing-md);
border: 1px solid var(--border);
border-radius: var(--radius);
background: var(--surface);
font-size: var(--font-size-sm);
color: var(--text-primary);
}
.dq-table {
width: 100%;
border-collapse: collapse;
}
.dq-table th {
text-align: left;
padding: var(--spacing-sm) var(--spacing-md);
font-size: var(--font-size-xs);
color: var(--text-secondary);
text-transform: uppercase;
letter-spacing: 0.5px;
border-bottom: 2px solid var(--border);
cursor: pointer;
user-select: none;
}
.dq-table th:hover {
color: var(--text-primary);
}
.dq-table td {
padding: var(--spacing-sm) var(--spacing-md);
font-size: var(--font-size-sm);
border-bottom: 1px solid var(--border);
vertical-align: middle;
}
.dq-table tr:hover {
background: var(--background);
}
.dq-score-badge {
display: inline-flex;
align-items: center;
gap: var(--spacing-xs);
padding: 2px 10px;
border-radius: 999px;
font-size: var(--font-size-xs);
font-weight: 600;
}
.dq-score-badge.high { background: #dcfce7; color: #166534; }
.dq-score-badge.medium { background: #fef9c3; color: #854d0e; }
.dq-score-badge.low { background: #fee2e2; color: #991b1b; }
.dq-quality-badge {
display: inline-block;
padding: 2px 8px;
border-radius: var(--radius);
font-size: var(--font-size-xs);
font-weight: 500;
}
.dq-quality-badge.basic { background: #fee2e2; color: #991b1b; }
.dq-quality-badge.enhanced { background: #fef9c3; color: #854d0e; }
.dq-quality-badge.complete { background: #dcfce7; color: #166534; }
.dq-field-dots {
display: flex;
gap: 3px;
flex-wrap: wrap;
}
.dq-field-dot {
width: 8px;
height: 8px;
border-radius: 50%;
flex-shrink: 0;
}
.dq-field-dot.filled { background: #22c55e; }
.dq-field-dot.empty { background: #e5e7eb; }
.dq-company-link {
color: var(--primary);
text-decoration: none;
font-weight: 500;
}
.dq-company-link:hover {
text-decoration: underline;
}
.dq-bulk-bar {
display: none;
align-items: center;
gap: var(--spacing-md);
padding: var(--spacing-md) var(--spacing-lg);
background: var(--primary);
color: white;
border-radius: var(--radius-lg);
margin-bottom: var(--spacing-md);
}
.dq-bulk-bar.active {
display: flex;
}
.dq-bulk-btn {
padding: var(--spacing-xs) var(--spacing-md);
background: white;
color: var(--primary);
border: none;
border-radius: var(--radius);
font-size: var(--font-size-sm);
font-weight: 600;
cursor: pointer;
}
.dq-bulk-btn:hover {
background: #f0f0f0;
}
/* Pagination */
.dq-pagination {
display: flex;
justify-content: center;
gap: var(--spacing-xs);
margin-top: var(--spacing-lg);
}
.dq-page-btn {
padding: var(--spacing-xs) var(--spacing-md);
border: 1px solid var(--border);
border-radius: var(--radius);
background: var(--surface);
color: var(--text-primary);
font-size: var(--font-size-sm);
cursor: pointer;
}
.dq-page-btn.active {
background: var(--primary);
color: white;
border-color: var(--primary);
}
.dq-page-btn:hover:not(.active) {
background: var(--background);
}
/* Responsive */
@media (max-width: 768px) {
.dq-bar-label { width: 100px; font-size: var(--font-size-xs); }
.dq-stats-grid { grid-template-columns: repeat(2, 1fr); }
.dq-score-dist { grid-template-columns: repeat(2, 1fr); }
.dq-table { font-size: var(--font-size-xs); }
.dq-table td, .dq-table th { padding: var(--spacing-xs); }
}
</style>
{% endblock %}
{% block content %}
<div class="dq-header">
<div>
<h1>Jakość danych firm</h1>
<p>Przegląd kompletności i jakości danych {{ total }} firm w katalogu</p>
</div>
<div class="dq-timestamp">
Stan na {{ now.strftime('%d.%m.%Y, %H:%M') }}
</div>
</div>
<!-- Stat Cards -->
<div class="dq-stats-grid">
<div class="dq-stat-card total">
<div class="dq-stat-value">{{ total }}</div>
<div class="dq-stat-label">Firm w katalogu</div>
</div>
<div class="dq-stat-card avg">
<div class="dq-stat-value">{{ avg_score }}%</div>
<div class="dq-stat-label">Średnia kompletność</div>
</div>
<div class="dq-stat-card complete">
<div class="dq-stat-value">{{ quality_dist.get('complete', 0) }}</div>
<div class="dq-stat-label">Kompletnych (67%+)</div>
</div>
<div class="dq-stat-card incomplete">
<div class="dq-stat-value">{{ quality_dist.get('basic', 0) }}</div>
<div class="dq-stat-label">Podstawowych (&lt;34%)</div>
</div>
</div>
<!-- Field Coverage -->
<div class="dq-section">
<div class="dq-section-title">Pokrycie danych per pole</div>
{% for field_name, stats in field_stats.items() %}
<div class="dq-bar-row">
<div class="dq-bar-label">{{ field_name }}</div>
<div class="dq-bar-track">
<div class="dq-bar-fill {% if stats.pct >= 70 %}high{% elif stats.pct >= 40 %}medium{% else %}low{% endif %}"
style="width: {{ stats.pct }}%">
{{ stats.pct }}%
</div>
</div>
<div class="dq-bar-count">{{ stats.count }}/{{ total }}</div>
</div>
{% endfor %}
</div>
<!-- Quality Distribution -->
<div class="dq-section">
<div class="dq-section-title">Rozkład jakości danych</div>
<div class="dq-dist-grid">
<div class="dq-dist-card basic">
<div class="dq-dist-value">{{ quality_dist.get('basic', 0) }}</div>
<div class="dq-dist-label">Podstawowe (&lt;34%)</div>
</div>
<div class="dq-dist-card enhanced">
<div class="dq-dist-value">{{ quality_dist.get('enhanced', 0) }}</div>
<div class="dq-dist-label">Rozszerzone (34-66%)</div>
</div>
<div class="dq-dist-card complete">
<div class="dq-dist-value">{{ quality_dist.get('complete', 0) }}</div>
<div class="dq-dist-label">Kompletne (67%+)</div>
</div>
</div>
<div class="dq-score-dist">
<div class="dq-score-bucket">
<div class="dq-score-bucket-value">{{ score_dist.get('0-25', 0) }}</div>
<div class="dq-score-bucket-label">0-25%</div>
</div>
<div class="dq-score-bucket">
<div class="dq-score-bucket-value">{{ score_dist.get('26-50', 0) }}</div>
<div class="dq-score-bucket-label">26-50%</div>
</div>
<div class="dq-score-bucket">
<div class="dq-score-bucket-value">{{ score_dist.get('51-75', 0) }}</div>
<div class="dq-score-bucket-label">51-75%</div>
</div>
<div class="dq-score-bucket">
<div class="dq-score-bucket-value">{{ score_dist.get('76-100', 0) }}</div>
<div class="dq-score-bucket-label">76-100%</div>
</div>
</div>
</div>
<!-- Companies Table -->
<div class="dq-section">
<div class="dq-section-title">Firmy wg kompletności danych</div>
<!-- Bulk action bar -->
<div class="dq-bulk-bar" id="bulkBar">
<span id="selectedCount">0</span> zaznaczonych
<button class="dq-bulk-btn" onclick="openBulkEnrich()">Uzbrój zaznaczone</button>
<button class="dq-bulk-btn" onclick="clearSelection()" style="background: transparent; color: white; border: 1px solid rgba(255,255,255,0.5);">Odznacz</button>
</div>
<div class="dq-table-controls">
<div>
<select class="dq-filter-select" id="qualityFilter" onchange="filterTable()">
<option value="all">Wszystkie poziomy</option>
<option value="basic">Podstawowe</option>
<option value="enhanced">Rozszerzone</option>
<option value="complete">Kompletne</option>
</select>
</div>
<div style="font-size: var(--font-size-sm); color: var(--text-secondary);">
Pokazano <span id="shownCount">{{ companies_table|length }}</span> z {{ total }} firm
</div>
</div>
<table class="dq-table" id="companiesTable">
<thead>
<tr>
<th style="width: 30px"><input type="checkbox" id="selectAll" onchange="toggleSelectAll()"></th>
<th onclick="sortTable(1)">Firma</th>
<th onclick="sortTable(2)" style="width: 100px">Score</th>
<th onclick="sortTable(3)" style="width: 80px">Pola</th>
<th style="width: 130px">Kompletność</th>
<th onclick="sortTable(5)" style="width: 100px">Jakość</th>
</tr>
</thead>
<tbody>
{% for c in companies_table %}
<tr data-quality="{{ c.label }}">
<td><input type="checkbox" class="company-cb" value="{{ c.id }}"></td>
<td>
<a href="{{ url_for('admin.admin_company_detail', company_id=c.id) }}" class="dq-company-link">
{{ c.name }}
</a>
</td>
<td>
<span class="dq-score-badge {% if c.score >= 67 %}high{% elif c.score >= 34 %}medium{% else %}low{% endif %}">
{{ c.score }}%
</span>
</td>
<td>{{ c.filled }}/{{ c.total }}</td>
<td>
<div class="dq-field-dots" title="{% for fname, fval in c.fields.items() %}{{ fname }}: {{ 'tak' if fval else 'nie' }}&#10;{% endfor %}">
{% for fname, fval in c.fields.items() %}
<span class="dq-field-dot {{ 'filled' if fval else 'empty' }}" title="{{ fname }}"></span>
{% endfor %}
</div>
</td>
<td>
<span class="dq-quality-badge {{ c.label }}">
{% if c.label == 'basic' %}Podstawowe{% elif c.label == 'enhanced' %}Rozszerzone{% else %}Kompletne{% endif %}
</span>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<!-- Bulk Enrich Modal -->
<div id="bulkModal" style="display: none; position: fixed; top: 0; left: 0; right: 0; bottom: 0; background: rgba(0,0,0,0.5); z-index: 9999; align-items: center; justify-content: center;">
<div style="background: var(--surface); border-radius: var(--radius-xl); padding: var(--spacing-xl); max-width: 500px; width: 90%; max-height: 80vh; overflow-y: auto;">
<h3 style="margin-bottom: var(--spacing-lg);">Uzbrój zaznaczone firmy</h3>
<p style="color: var(--text-secondary); margin-bottom: var(--spacing-lg);">
Wybierz kroki enrichmentu do wykonania dla <strong id="modalCount">0</strong> firm:
</p>
<div style="display: flex; flex-direction: column; gap: var(--spacing-sm); margin-bottom: var(--spacing-xl);">
<label style="display: flex; align-items: center; gap: var(--spacing-sm); cursor: pointer;">
<input type="checkbox" id="step-registry" checked> Dane z rejestrów (CEIDG/KRS)
</label>
<label style="display: flex; align-items: center; gap: var(--spacing-sm); cursor: pointer;">
<input type="checkbox" id="step-seo" checked> Audyt SEO
</label>
<label style="display: flex; align-items: center; gap: var(--spacing-sm); cursor: pointer;">
<input type="checkbox" id="step-social" checked> Audyt Social Media
</label>
<label style="display: flex; align-items: center; gap: var(--spacing-sm); cursor: pointer;">
<input type="checkbox" id="step-gbp" checked> Audyt GBP
</label>
<label style="display: flex; align-items: center; gap: var(--spacing-sm); cursor: pointer;">
<input type="checkbox" id="step-logo"> Pobierz logo
</label>
</div>
<div style="display: flex; gap: var(--spacing-md); justify-content: flex-end;">
<button onclick="closeBulkModal()" style="padding: var(--spacing-sm) var(--spacing-lg); border: 1px solid var(--border); border-radius: var(--radius); background: var(--surface); cursor: pointer;">Anuluj</button>
<button onclick="startBulkEnrich()" style="padding: var(--spacing-sm) var(--spacing-lg); border: none; border-radius: var(--radius); background: var(--primary); color: white; font-weight: 600; cursor: pointer;">Rozpocznij</button>
</div>
<!-- Progress section -->
<div id="bulkProgress" style="display: none; margin-top: var(--spacing-xl); padding-top: var(--spacing-lg); border-top: 1px solid var(--border);">
<div style="display: flex; justify-content: space-between; margin-bottom: var(--spacing-sm);">
<span style="font-weight: 600;">Postęp</span>
<span id="progressText">0/0</span>
</div>
<div style="height: 8px; background: var(--background); border-radius: 4px; overflow: hidden;">
<div id="progressBar" style="height: 100%; background: var(--primary); border-radius: 4px; transition: width 0.3s; width: 0%;"></div>
</div>
<div id="progressLog" style="margin-top: var(--spacing-md); max-height: 200px; overflow-y: auto; font-size: var(--font-size-xs); font-family: monospace; color: var(--text-secondary);"></div>
</div>
</div>
</div>
{% endblock %}
{% block extra_js %}
// Data Quality Dashboard JS
function filterTable() {
var filter = document.getElementById('qualityFilter').value;
var rows = document.querySelectorAll('#companiesTable tbody tr');
var shown = 0;
rows.forEach(function(row) {
if (filter === 'all' || row.dataset.quality === filter) {
row.style.display = '';
shown++;
} else {
row.style.display = 'none';
}
});
document.getElementById('shownCount').textContent = shown;
}
function sortTable(colIdx) {
var table = document.getElementById('companiesTable');
var tbody = table.querySelector('tbody');
var rows = Array.from(tbody.querySelectorAll('tr'));
var asc = table.dataset.sortCol == colIdx && table.dataset.sortDir !== 'asc';
table.dataset.sortCol = colIdx;
table.dataset.sortDir = asc ? 'asc' : 'desc';
rows.sort(function(a, b) {
var aVal = a.cells[colIdx].textContent.trim().replace('%', '');
var bVal = b.cells[colIdx].textContent.trim().replace('%', '');
var aNum = parseFloat(aVal);
var bNum = parseFloat(bVal);
if (!isNaN(aNum) && !isNaN(bNum)) {
return asc ? aNum - bNum : bNum - aNum;
}
return asc ? aVal.localeCompare(bVal, 'pl') : bVal.localeCompare(aVal, 'pl');
});
rows.forEach(function(row) { tbody.appendChild(row); });
}
// Checkbox selection
function toggleSelectAll() {
var checked = document.getElementById('selectAll').checked;
document.querySelectorAll('.company-cb').forEach(function(cb) {
var row = cb.closest('tr');
if (row.style.display !== 'none') {
cb.checked = checked;
}
});
updateBulkBar();
}
document.addEventListener('change', function(e) {
if (e.target.classList.contains('company-cb')) {
updateBulkBar();
}
});
function updateBulkBar() {
var selected = document.querySelectorAll('.company-cb:checked').length;
var bar = document.getElementById('bulkBar');
document.getElementById('selectedCount').textContent = selected;
if (selected > 0) {
bar.classList.add('active');
} else {
bar.classList.remove('active');
}
}
function clearSelection() {
document.querySelectorAll('.company-cb').forEach(function(cb) { cb.checked = false; });
document.getElementById('selectAll').checked = false;
updateBulkBar();
}
// Bulk enrich modal
function openBulkEnrich() {
var selected = document.querySelectorAll('.company-cb:checked').length;
document.getElementById('modalCount').textContent = selected;
document.getElementById('bulkModal').style.display = 'flex';
document.getElementById('bulkProgress').style.display = 'none';
}
function closeBulkModal() {
document.getElementById('bulkModal').style.display = 'none';
}
function startBulkEnrich() {
var companyIds = [];
document.querySelectorAll('.company-cb:checked').forEach(function(cb) {
companyIds.push(parseInt(cb.value));
});
var steps = [];
if (document.getElementById('step-registry').checked) steps.push('registry');
if (document.getElementById('step-seo').checked) steps.push('seo');
if (document.getElementById('step-social').checked) steps.push('social');
if (document.getElementById('step-gbp').checked) steps.push('gbp');
if (document.getElementById('step-logo').checked) steps.push('logo');
if (companyIds.length === 0 || steps.length === 0) return;
document.getElementById('bulkProgress').style.display = 'block';
document.getElementById('progressText').textContent = '0/' + companyIds.length;
document.getElementById('progressLog').innerHTML = '';
fetch('/admin/data-quality/bulk-enrich', {
method: 'POST',
headers: {'Content-Type': 'application/json', 'X-CSRFToken': document.querySelector('meta[name=csrf-token]')?.content || ''},
body: JSON.stringify({company_ids: companyIds, steps: steps})
})
.then(function(r) { return r.json(); })
.then(function(data) {
if (data.job_id) {
pollProgress(data.job_id, companyIds.length);
}
})
.catch(function(err) {
document.getElementById('progressLog').innerHTML += '<div style="color: #ef4444;">Błąd: ' + err.message + '</div>';
});
}
function pollProgress(jobId, total) {
fetch('/admin/data-quality/bulk-enrich/status?job_id=' + jobId)
.then(function(r) { return r.json(); })
.then(function(data) {
var processed = data.processed || 0;
var pct = Math.round(processed / total * 100);
document.getElementById('progressBar').style.width = pct + '%';
document.getElementById('progressText').textContent = processed + '/' + total;
if (data.latest_result) {
var log = document.getElementById('progressLog');
log.innerHTML += '<div>' + data.latest_result + '</div>';
log.scrollTop = log.scrollHeight;
}
if (data.status === 'running') {
setTimeout(function() { pollProgress(jobId, total); }, 2000);
} else {
document.getElementById('progressLog').innerHTML += '<div style="color: #22c55e; font-weight: 600;">Zakończono!</div>';
}
});
}
{% endblock %}

View File

@ -1478,6 +1478,12 @@
</svg>
Firmy
</a>
<a href="{{ url_for('admin.admin_data_quality') }}">
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z"/>
</svg>
Jakość danych
</a>
{% if current_user.has_role(SystemRole.ADMIN) %}
<a href="{{ url_for('admin.admin_users') }}">
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24">

View File

@ -1164,6 +1164,64 @@
</div>
{% endif %}
<!-- Godziny otwarcia z Google Business Profile -->
{% if website_analysis and website_analysis.google_opening_hours and website_analysis.google_opening_hours.weekday_text %}
{% set day_translations = {
'Monday': 'Poniedziałek',
'Tuesday': 'Wtorek',
'Wednesday': 'Środa',
'Thursday': 'Czwartek',
'Friday': 'Piątek',
'Saturday': 'Sobota',
'Sunday': 'Niedziela'
} %}
<div class="company-section">
<h2 class="section-title">Godziny otwarcia</h2>
<div style="background: var(--background); border-radius: var(--radius-lg); padding: var(--spacing-lg); border: 1px solid var(--border);">
<div style="display: flex; align-items: flex-start; gap: var(--spacing-md);">
<div style="width: 48px; height: 48px; border-radius: 12px; background: linear-gradient(135deg, #10b981, #059669); display: flex; align-items: center; justify-content: center; flex-shrink: 0;">
<svg width="24" height="24" fill="none" stroke="white" stroke-width="2" viewBox="0 0 24 24">
<circle cx="12" cy="12" r="10"/>
<polyline points="12 6 12 12 16 14"/>
</svg>
</div>
<div style="flex: 1;">
<div style="display: grid; gap: 4px;">
{% for line in website_analysis.google_opening_hours.weekday_text %}
{% set parts = line.split(': ', 1) %}
{% set day_en = parts[0] if parts|length > 1 else '' %}
{% set hours_text = parts[1] if parts|length > 1 else line %}
{% set day_pl = day_translations.get(day_en, day_en) %}
{% set is_closed = 'Closed' in hours_text %}
<div style="display: flex; justify-content: space-between; align-items: center; padding: 6px 0; {% if not loop.last %}border-bottom: 1px solid var(--border);{% endif %}">
<span style="font-weight: 600; color: var(--text-primary); min-width: 120px;">{{ day_pl }}</span>
<span style="color: {% if is_closed %}#ef4444{% else %}var(--text-secondary){% endif %};">
{% if is_closed %}Zamknięte{% else %}{{ hours_text }}{% endif %}
</span>
</div>
{% endfor %}
</div>
{% if website_analysis.google_opening_hours.open_now is not none %}
<div style="margin-top: var(--spacing-sm); padding-top: var(--spacing-sm); border-top: 1px solid var(--border);">
{% if website_analysis.google_opening_hours.open_now %}
<span style="display: inline-flex; align-items: center; gap: 6px; color: #10b981; font-weight: 600; font-size: var(--font-size-sm);">
<span style="width: 8px; height: 8px; border-radius: 50%; background: #10b981; display: inline-block;"></span>
Teraz otwarte
</span>
{% else %}
<span style="display: inline-flex; align-items: center; gap: 6px; color: #ef4444; font-weight: 600; font-size: var(--font-size-sm);">
<span style="width: 8px; height: 8px; border-radius: 50%; background: #ef4444; display: inline-block;"></span>
Teraz zamknięte
</span>
{% endif %}
</div>
{% endif %}
</div>
</div>
</div>
</div>
{% endif %}
<!-- O firmie - Single Description (prioritized sources) -->
{% set about_description = company.description_full or (ai_insights.business_summary if ai_insights else none) or (website_analysis.content_summary if website_analysis else none) %}
{% set _about_hidden = company.is_section_hidden('about') %}

88
utils/data_quality.py Normal file
View File

@ -0,0 +1,88 @@
"""
Data Quality Service
====================
Computes and updates company data quality scores.
Extracted from inline completeness logic in admin routes.
"""
import os
from database import CompanyWebsiteAnalysis, CompanySocialMedia, GBPAudit
def compute_data_quality_score(company, db):
"""Compute data quality score for a company.
Returns dict with 'score' (0-100), 'fields' (name->bool), 'total', 'filled'.
"""
# Logo check (webp or svg)
logo_exists = False
for ext in ('webp', 'svg'):
if os.path.isfile(os.path.join('static', 'img', 'companies', f'{company.slug}.{ext}')):
logo_exists = True
break
# Registry data
registry_done = bool(company.ceidg_fetched_at or company.krs_fetched_at)
# SEO audit
seo_done = db.query(CompanyWebsiteAnalysis).filter(
CompanyWebsiteAnalysis.company_id == company.id
).first() is not None
# Social media audit
social_done = db.query(CompanySocialMedia).filter(
CompanySocialMedia.company_id == company.id
).count() > 0
# GBP audit
gbp_done = db.query(GBPAudit).filter(
GBPAudit.company_id == company.id
).first() is not None
fields = {
'NIP': bool(company.nip),
'Adres': bool(company.address_city),
'Telefon': bool(company.phone),
'Email': bool(company.email),
'Strona WWW': bool(company.website),
'Opis': bool(company.description_short),
'Kategoria': bool(company.category_id),
'Logo': logo_exists,
'Dane urzędowe': registry_done,
'Audyt SEO': seo_done,
'Audyt Social': social_done,
'Audyt GBP': gbp_done,
}
filled = sum(fields.values())
total = len(fields)
score = int(filled / total * 100)
return {
'score': score,
'fields': fields,
'total': total,
'filled': filled,
}
def compute_data_quality_label(score):
"""Map numeric score to quality label."""
if score < 34:
return 'basic'
elif score < 67:
return 'enhanced'
return 'complete'
def update_company_data_quality(company, db):
"""Compute and persist data quality score on a company.
Returns the result dict from compute_data_quality_score.
"""
result = compute_data_quality_score(company, db)
company.data_quality_score = result['score']
company.data_quality = compute_data_quality_label(result['score'])
return result