From e0bb6b718a1e98aca07ba436b15ad2a183fd2a6c Mon Sep 17 00:00:00 2001 From: Maciej Pienczyn Date: Sat, 21 Feb 2026 07:25:39 +0100 Subject: [PATCH] feat: enhance data quality dashboard with filters, hints, weighted scores and contact scraping - Add clickable field coverage bars to filter companies missing specific data - Add quick-action buttons (Registry/SEO/GBP) per company in dashboard table - Add stale data detection (>6 months) with yellow badges - Implement weighted priority score (contacts 34%, audits 17%) - Add data hints in admin company detail showing where to find missing data - Add "Available data" section showing Google Business data ready to apply - Add POST /api/company//apply-hint endpoint for one-click data fill - Extend website content updater with phone/email extraction (AI + regex) Co-Authored-By: Claude Opus 4.6 --- blueprints/admin/routes_companies.py | 36 ++ blueprints/admin/routes_data_quality.py | 40 ++- blueprints/api/routes_company.py | 50 +++ scripts/website_content_updater.py | 107 +++++- templates/admin/company_detail.html | 56 +++- templates/admin/data_quality_dashboard.html | 350 +++++++++++++++++++- utils/data_quality.py | 18 +- 7 files changed, 636 insertions(+), 21 deletions(-) diff --git a/blueprints/admin/routes_companies.py b/blueprints/admin/routes_companies.py index 2666e8e..e1d716f 100644 --- a/blueprints/admin/routes_companies.py +++ b/blueprints/admin/routes_companies.py @@ -703,6 +703,8 @@ def admin_company_detail(company_id): GBPAudit.company_id == company_id ).order_by(GBPAudit.audit_date.desc()).first() + registry_stale = registry_done and registry_date and (datetime.now() - registry_date).days > 180 + enrichment = { 'registry': { 'done': registry_done, @@ -710,6 +712,7 @@ def admin_company_detail(company_id): 'date': registry_date, 'has_krs': bool(company.krs), 'has_nip': bool(company.nip), + 'stale': registry_stale, }, 'logo': { 'done': logo_exists, @@ -735,6 +738,38 @@ def admin_company_detail(company_id): # --- Completeness score (12 fields) --- completeness = compute_data_quality_score(company, db) + # --- Hints: where to find missing data --- + hints = {} + analysis = seo_analysis # CompanyWebsiteAnalysis object or None + + if not company.phone: + if analysis and analysis.google_phone: + hints['Telefon'] = {'source': 'Google Business', 'value': analysis.google_phone, 'action': 'apply'} + elif analysis and analysis.nap_on_website: + nap = analysis.nap_on_website if isinstance(analysis.nap_on_website, dict) else {} + if nap.get('phone'): + hints['Telefon'] = {'source': 'Strona WWW (NAP)', 'value': nap['phone'], 'action': 'apply'} + elif company.nip: + hints['Telefon'] = {'source': 'CEIDG/KRS', 'value': None, 'action': 'fetch_registry'} + + if not company.email: + if analysis and analysis.nap_on_website: + nap = analysis.nap_on_website if isinstance(analysis.nap_on_website, dict) else {} + if nap.get('email'): + hints['Email'] = {'source': 'Strona WWW (NAP)', 'value': nap['email'], 'action': 'apply'} + + if not company.website: + if analysis and analysis.google_website: + hints['Strona WWW'] = {'source': 'Google Business', 'value': analysis.google_website, 'action': 'apply'} + + if not company.address_city: + if analysis and analysis.google_address: + hints['Adres'] = {'source': 'Google Business', 'value': analysis.google_address, 'action': 'apply'} + + if not company.description_short: + if analysis and analysis.content_summary: + hints['Opis'] = {'source': 'Analiza strony WWW', 'value': analysis.content_summary[:200], 'action': 'apply'} + logger.info(f"Admin {current_user.email} viewed company detail: {company.name} (ID: {company_id})") return render_template( @@ -743,6 +778,7 @@ def admin_company_detail(company_id): enrichment=enrichment, completeness=completeness, users=users, + hints=hints, ) finally: db.close() diff --git a/blueprints/admin/routes_data_quality.py b/blueprints/admin/routes_data_quality.py index 8906b64..72bd04b 100644 --- a/blueprints/admin/routes_data_quality.py +++ b/blueprints/admin/routes_data_quality.py @@ -19,6 +19,7 @@ from database import ( CompanySocialMedia, GBPAudit, SystemRole ) from utils.decorators import role_required +from utils.data_quality import compute_weighted_score logger = logging.getLogger(__name__) @@ -118,7 +119,7 @@ def admin_data_quality(): } filled = sum(fields.values()) - score = int(filled / len(fields) * 100) + score = compute_weighted_score(fields) # Update counters for field_name, has_value in fields.items(): @@ -146,6 +147,13 @@ def admin_data_quality(): score_sum += score + # Stale data detection + registry_done = fields['Dane urzędowe'] + registry_date = c.krs_fetched_at or c.ceidg_fetched_at + registry_stale = registry_done and ( + (not registry_date) or ((now - registry_date).days > 180) + ) + companies_table.append({ 'id': c.id, 'name': c.name, @@ -157,6 +165,10 @@ def admin_data_quality(): 'data_quality': c.data_quality or 'basic', 'fields': fields, 'status': c.status, + 'nip': c.nip or '', + 'website': c.website or '', + 'registry_stale': registry_stale, + 'registry_date': registry_date, }) # Sort by score ascending (most incomplete first) @@ -170,6 +182,31 @@ def admin_data_quality(): avg_score = round(score_sum / total) if total > 0 else 0 + # Available data: companies where Google has data but company profile is empty + available_data = [] + analyses = db.query(CompanyWebsiteAnalysis).all() + company_map = {c.id: c for c in companies} + + for a in analyses: + comp = company_map.get(a.company_id) + if not comp: + continue + if a.google_phone and not comp.phone: + available_data.append({ + 'company_id': comp.id, 'company_name': comp.name, 'company_slug': comp.slug, + 'field': 'Telefon', 'source': 'Google Business', 'value': a.google_phone + }) + if a.google_website and not comp.website: + available_data.append({ + 'company_id': comp.id, 'company_name': comp.name, 'company_slug': comp.slug, + 'field': 'Strona WWW', 'source': 'Google Business', 'value': a.google_website + }) + if a.google_address and not comp.address_city: + available_data.append({ + 'company_id': comp.id, 'company_name': comp.name, 'company_slug': comp.slug, + 'field': 'Adres', 'source': 'Google Business', 'value': a.google_address + }) + return render_template( 'admin/data_quality_dashboard.html', total=total, @@ -178,6 +215,7 @@ def admin_data_quality(): score_dist=score_dist, avg_score=avg_score, companies_table=companies_table, + available_data=available_data, now=now, ) finally: diff --git a/blueprints/api/routes_company.py b/blueprints/api/routes_company.py index f6ba210..5a2912d 100644 --- a/blueprints/api/routes_company.py +++ b/blueprints/api/routes_company.py @@ -1346,3 +1346,53 @@ def test_sanitization(): except Exception as e: logger.error(f"Error testing sanitization: {e}") return jsonify({'success': False, 'error': str(e)}), 500 + + +@bp.route('/company//apply-hint', methods=['POST']) +@login_required +def api_apply_hint(company_id): + """Apply a data hint to fill a missing company field.""" + if not current_user.is_admin: + return jsonify({'success': False, 'error': 'Tylko administrator'}), 403 + + db = SessionLocal() + try: + company = db.query(Company).filter_by(id=company_id).first() + if not company: + return jsonify({'success': False, 'error': 'Firma nie znaleziona'}), 404 + + data = request.get_json() or {} + field = data.get('field', '') + value = data.get('value', '').strip() + + if not field or not value: + return jsonify({'success': False, 'error': 'Brak pola lub wartości'}), 400 + + # Map display names to model attributes + FIELD_MAP = { + 'Telefon': 'phone', + 'Email': 'email', + 'Strona WWW': 'website', + 'Adres': 'address_city', + 'Opis': 'description_short', + } + + attr = FIELD_MAP.get(field) + if not attr: + return jsonify({'success': False, 'error': f'Niedozwolone pole: {field}'}), 400 + + setattr(company, attr, value) + db.commit() + + update_company_data_quality(company, db) + db.commit() + + logger.info(f"Hint applied: {field}={value[:50]} for company {company.id} by {current_user.email}") + + return jsonify({'success': True, 'message': f'Pole "{field}" uzupełnione'}) + except Exception as e: + db.rollback() + logger.error(f"Error applying hint for company {company_id}: {e}") + return jsonify({'success': False, 'error': str(e)}), 500 + finally: + db.close() diff --git a/scripts/website_content_updater.py b/scripts/website_content_updater.py index 33d2fba..7f9a2b5 100644 --- a/scripts/website_content_updater.py +++ b/scripts/website_content_updater.py @@ -54,7 +54,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # Load .env from project root load_dotenv(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), '.env')) -from database import Company, CompanyWebsiteAnalysis, SessionLocal +from database import Company, CompanyWebsiteAnalysis, CompanyContact, SessionLocal # Configure logging logging.basicConfig( @@ -205,7 +205,9 @@ Zwróć odpowiedź w formacie JSON (tylko JSON, bez markdown): "specializations": ["specjalizacja 1", "specjalizacja 2", ...], "target_customers": ["klient docelowy 1", "klient docelowy 2", ...], "regions": ["region 1", "region 2", ...], - "summary": "Szczegółowe podsumowanie działalności firmy (2-3 zdania)" + "summary": "Szczegółowe podsumowanie działalności firmy (2-3 zdania)", + "contact_phone": "numer telefonu firmy jeśli widoczny na stronie, w formacie +48XXXXXXXXX lub oryginalnym", + "contact_email": "adres email kontaktowy firmy jeśli widoczny na stronie" }} ZASADY - WYODRĘBNIJ WSZYSTKO, BEZ LIMITÓW: @@ -217,6 +219,8 @@ ZASADY - WYODRĘBNIJ WSZYSTKO, BEZ LIMITÓW: 6. target_customers: Typy klientów (np. "MŚP", "korporacje", "sektor publiczny") 7. regions: Obszar działania geograficzny (miasta, regiony) 8. summary: Pełne podsumowanie czym zajmuje się firma +9. contact_phone: Numer telefonu firmy (najlepiej główny/biurowy) +10. contact_email: Adres email firmy (najlepiej ogólny/biurowy, nie osobisty) WAŻNE: - Wyodrębnij WSZYSTKIE informacje bez ograniczeń ilościowych @@ -261,10 +265,15 @@ ODPOWIEDŹ (tylko JSON):""" # Merge keywords + brands + target_customers + regions into main_keywords merged_keywords = list(dict.fromkeys(all_keywords + all_brands + all_target_customers + all_regions)) + contact_phone = data.get('contact_phone', '') + contact_email = data.get('contact_email', '') + return { 'services': merged_services, # No limit 'keywords': merged_keywords, # No limit 'summary': data.get('summary', '')[:1000] if data.get('summary') else None, + 'contact_phone': contact_phone, + 'contact_email': contact_email, 'raw_data': { 'services': all_services, 'products': all_products, @@ -284,6 +293,41 @@ ODPOWIEDŹ (tylko JSON):""" logger.error(f"Gemini extraction error: {e}") return {'services': [], 'keywords': [], 'summary': None, 'error': str(e)[:100]} + def extract_contacts_regex(self, html_text: str) -> Dict[str, List[str]]: + """Extract phone numbers and emails from raw website text using regex.""" + contacts = {'phones': [], 'emails': []} + + # Email extraction + email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' + emails = re.findall(email_pattern, html_text) + # Filter out common non-contact emails + skip_domains = {'example.com', 'sentry.io', 'wixpress.com', 'wordpress.org', 'w3.org', 'schema.org', 'googleapis.com'} + contacts['emails'] = list(dict.fromkeys( + e.lower() for e in emails + if not any(d in e.lower() for d in skip_domains) + ))[:5] # Max 5 emails + + # Phone extraction - Polish patterns + phone_patterns = [ + r'(?:\+48\s?)?\d{2}[\s-]?\d{3}[\s-]?\d{2}[\s-]?\d{2}', # +48 XX XXX XX XX + r'(?:\+48\s?)?\d{3}[\s-]?\d{3}[\s-]?\d{3}', # +48 XXX XXX XXX + r'\(\d{2}\)\s?\d{3}[\s-]?\d{2}[\s-]?\d{2}', # (XX) XXX XX XX + r'(?:tel|phone|telefon)[.:]\s*[\+]?\d[\d\s\-]{7,14}', # tel: +48... + ] + + for pattern in phone_patterns: + matches = re.findall(pattern, html_text, re.IGNORECASE) + for m in matches: + # Clean up + clean = re.sub(r'(?:tel|phone|telefon)[.:]?\s*', '', m, flags=re.IGNORECASE).strip() + digits = re.sub(r'\D', '', clean) + if 9 <= len(digits) <= 12: + contacts['phones'].append(clean) + + contacts['phones'] = list(dict.fromkeys(contacts['phones']))[:5] + + return contacts + def update_company(self, company: Company) -> bool: """ Aktualizuje dane jednej firmy. @@ -366,6 +410,65 @@ ODPOWIEDŹ (tylko JSON):""" self.db.commit() self.stats['updated'] += 1 + # --- Contact extraction --- + all_phones = [] + all_emails = [] + + # From Gemini + if extracted.get('contact_phone'): + all_phones.append(extracted['contact_phone']) + if extracted.get('contact_email'): + all_emails.append(extracted['contact_email']) + + # From regex fallback + regex_contacts = self.extract_contacts_regex(text) + all_phones.extend(regex_contacts.get('phones', [])) + all_emails.extend(regex_contacts.get('emails', [])) + + # Deduplicate + all_phones = list(dict.fromkeys(all_phones)) + all_emails = list(dict.fromkeys(all_emails)) + + # Save to CompanyContact (source='website') + contacts_added = 0 + for phone in all_phones[:3]: # Max 3 phones + existing = self.db.query(CompanyContact).filter_by( + company_id=company.id, contact_type='phone', value=phone + ).first() + if not existing: + self.db.add(CompanyContact( + company_id=company.id, + contact_type='phone', + value=phone, + source='website', + source_url=company.website, + source_date=datetime.now().date(), + is_verified=False, + )) + contacts_added += 1 + logger.info(f" [{company.id}] Found phone: {phone}") + + for email in all_emails[:3]: # Max 3 emails + existing = self.db.query(CompanyContact).filter_by( + company_id=company.id, contact_type='email', value=email + ).first() + if not existing: + self.db.add(CompanyContact( + company_id=company.id, + contact_type='email', + value=email, + source='website', + source_url=company.website, + source_date=datetime.now().date(), + is_verified=False, + )) + contacts_added += 1 + logger.info(f" [{company.id}] Found email: {email}") + + if contacts_added > 0: + self.db.commit() + logger.info(f" [{company.id}] Saved {contacts_added} new contacts") + logger.info(f"[{company.id}] {company.name}: ✓ Zaktualizowano") return True diff --git a/templates/admin/company_detail.html b/templates/admin/company_detail.html index 70b983e..3396b3f 100644 --- a/templates/admin/company_detail.html +++ b/templates/admin/company_detail.html @@ -473,6 +473,18 @@ .check-ok { color: var(--success); } .check-missing { color: var(--error); } + .hint-apply-btn { + padding: 1px 8px; + font-size: var(--font-size-xs); + background: var(--primary); + color: white; + border: none; + border-radius: var(--radius); + cursor: pointer; + white-space: nowrap; + } + .hint-apply-btn:hover { opacity: 0.9; } + /* Toast */ .toast-container { position: fixed; @@ -701,6 +713,11 @@ Nie wykonano {% endif %} + {% if enrichment.registry.stale %} +
+ Dane z rejestru pobrane ponad 6 mcy temu — odśwież +
+ {% endif %} {% if enrichment.registry.source %}
Źródło: {{ enrichment.registry.source }}
{% endif %} @@ -815,10 +832,24 @@
{% if is_filled %} + {{ field_name }} {% else %} - {% endif %} {{ field_name }} + {% if hints and hints.get(field_name) %} +
+ + {{ hints[field_name].source }}{% if hints[field_name].value %}: {{ hints[field_name].value[:40] }}{% endif %} + + {% if hints[field_name].action == 'apply' and hints[field_name].value %} + + {% elif hints[field_name].action == 'fetch_registry' %} + + {% endif %} +
+ {% endif %} + {% endif %}
{% endfor %} @@ -847,6 +878,29 @@ }, 5000); } + function applyHint(companyId, fieldName, value) { + if (!confirm('Uzupełnić pole "' + fieldName + '" wartością: ' + value + '?')) return; + + fetch('/api/company/' + companyId + '/apply-hint', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-CSRFToken': csrfToken + }, + body: JSON.stringify({field: fieldName, value: value}) + }) + .then(function(r) { return r.json(); }) + .then(function(data) { + if (data.success) { + showToast('Pole "' + fieldName + '" uzupełnione', 'success'); + setTimeout(function() { location.reload(); }, 1000); + } else { + showToast('Błąd: ' + data.error, 'error'); + } + }) + .catch(function(err) { showToast('Błąd: ' + err.message, 'error'); }); + } + var _skipReload = false; function runEnrichAction(btn, url, body) { diff --git a/templates/admin/data_quality_dashboard.html b/templates/admin/data_quality_dashboard.html index 4e20e83..202a44f 100644 --- a/templates/admin/data_quality_dashboard.html +++ b/templates/admin/data_quality_dashboard.html @@ -360,6 +360,101 @@ background: var(--background); } + /* Bar row click & active state */ + .dq-bar-row:hover { + background: var(--background); + border-radius: var(--radius); + } + + .dq-bar-row.dq-bar-active { + background: var(--background); + border-radius: var(--radius); + box-shadow: inset 3px 0 0 var(--primary); + } + + /* Stale data badge */ + .dq-stale-badge { + background: #fef9c3; + color: #854d0e; + font-size: var(--font-size-xs); + padding: 1px 6px; + border-radius: var(--radius); + } + + /* Quick action buttons */ + .dq-action-btn { + display: inline-flex; + align-items: center; + justify-content: center; + width: 24px; + height: 24px; + border: 1px solid var(--border); + border-radius: var(--radius); + background: var(--surface); + color: var(--text-secondary); + cursor: pointer; + padding: 0; + transition: all 0.15s; + } + + .dq-action-btn:hover:not(:disabled) { + border-color: var(--primary); + color: var(--primary); + background: #eff6ff; + } + + .dq-action-btn:disabled { + opacity: 0.3; + cursor: not-allowed; + } + + .dq-action-btn.loading { + animation: dq-spin 1s linear infinite; + } + + @keyframes dq-spin { + from { transform: rotate(0deg); } + to { transform: rotate(360deg); } + } + + .dq-actions-cell { + display: flex; + gap: 4px; + } + + /* Field filter reset */ + .dq-field-filter-info { + display: none; + align-items: center; + gap: var(--spacing-sm); + padding: var(--spacing-sm) var(--spacing-md); + background: #eff6ff; + border: 1px solid #bfdbfe; + border-radius: var(--radius); + margin-bottom: var(--spacing-md); + font-size: var(--font-size-sm); + color: #1e40af; + } + + .dq-field-filter-info.active { + display: flex; + } + + .dq-field-filter-reset { + margin-left: auto; + padding: 2px 8px; + border: 1px solid #93c5fd; + border-radius: var(--radius); + background: white; + color: #2563eb; + cursor: pointer; + font-size: var(--font-size-xs); + } + + .dq-field-filter-reset:hover { + background: #dbeafe; + } + /* Responsive */ @media (max-width: 768px) { .dq-bar-label { width: 100px; font-size: var(--font-size-xs); } @@ -406,7 +501,7 @@
Pokrycie danych per pole
{% for field_name, stats in field_stats.items() %} -
+
{{ field_name }}
+ +{% if available_data %} +
+
Dane gotowe do uzupełnienia ({{ available_data|length }})
+

+ Poniższe dane zostały znalezione w Google Business Profile, ale nie są jeszcze w profilu firmy. +

+ +
+ +
+ + + + + + + + + + + + + {% for item in available_data %} + + + + + + + + {% endfor %} + +
FirmaPoleŹródłoWartośćAkcja
{{ item.company_name }}{{ item.field }}{{ item.source }}{{ item.value[:50] }} + +
+
+{% endif %} +
Firmy wg kompletności danych
@@ -468,6 +608,11 @@
+
+ Filtr pola: — firmy bez tego pola + +
+
@@ -506,12 +652,15 @@ {{ c.score }}% + {% if c.registry_stale %} + Dane stare + {% endif %} {{ c.filled }}/{{ c.total }}
{% for fname, fval in c.fields.items() %} - + {% endfor %}
@@ -520,6 +669,37 @@ {% if c.label == 'basic' %}Podstawowe{% elif c.label == 'enhanced' %}Rozszerzone{% else %}Kompletne{% endif %} + +
+ {% if not c.fields['Dane urzędowe'] and c.nip %} + + {% else %} + + {% endif %} + {% if not c.fields['Audyt SEO'] and c.website %} + + {% else %} + + {% endif %} + {% if not c.fields['Audyt GBP'] %} + + {% else %} + + {% endif %} +
+ {% endfor %} @@ -574,18 +754,7 @@ // Data Quality Dashboard JS function filterTable() { - var filter = document.getElementById('qualityFilter').value; - var rows = document.querySelectorAll('#companiesTable tbody tr'); - var shown = 0; - rows.forEach(function(row) { - if (filter === 'all' || row.dataset.quality === filter) { - row.style.display = ''; - shown++; - } else { - row.style.display = 'none'; - } - }); - document.getElementById('shownCount').textContent = shown; + applyFilters(); } function sortTable(colIdx) { @@ -692,6 +861,115 @@ }); } + // --- A1: Filter by field --- + var activeFieldFilter = null; + + function filterByField(fieldName) { + // Toggle: if same field clicked again, reset + if (activeFieldFilter === fieldName) { + resetFieldFilter(); + return; + } + activeFieldFilter = fieldName; + + // Highlight active bar + document.querySelectorAll('.dq-bar-row').forEach(function(row) { + row.classList.toggle('dq-bar-active', row.dataset.field === fieldName); + }); + + // Show filter info + document.getElementById('fieldFilterName').textContent = fieldName; + document.getElementById('fieldFilterInfo').classList.add('active'); + + applyFilters(); + } + + function resetFieldFilter() { + activeFieldFilter = null; + document.querySelectorAll('.dq-bar-row').forEach(function(row) { + row.classList.remove('dq-bar-active'); + }); + document.getElementById('fieldFilterInfo').classList.remove('active'); + applyFilters(); + } + + function applyFilters() { + var qualityFilter = document.getElementById('qualityFilter').value; + var rows = document.querySelectorAll('#companiesTable tbody tr'); + var shown = 0; + rows.forEach(function(row) { + var qualityMatch = (qualityFilter === 'all' || row.dataset.quality === qualityFilter); + var fieldMatch = true; + if (activeFieldFilter) { + try { + var fields = JSON.parse(row.dataset.fields); + // Show only companies MISSING this field + fieldMatch = !fields[activeFieldFilter]; + } catch(e) { fieldMatch = true; } + } + if (qualityMatch && fieldMatch) { + row.style.display = ''; + shown++; + } else { + row.style.display = 'none'; + } + }); + document.getElementById('shownCount').textContent = shown; + } + + // --- A2: Quick action buttons --- + function quickAction(btn, type, companyId) { + if (btn.disabled || btn.classList.contains('loading')) return; + var originalHTML = btn.innerHTML; + btn.classList.add('loading'); + btn.innerHTML = ''; + var csrf = document.querySelector('meta[name=csrf-token]')?.content || ''; + var url, body; + if (type === 'registry') { + url = '/api/company/' + companyId + '/enrich-registry'; + body = null; + } else if (type === 'seo') { + url = '/api/seo/audit'; + body = JSON.stringify({company_id: companyId}); + } else if (type === 'gbp') { + url = '/api/gbp/audit'; + body = JSON.stringify({company_id: companyId}); + } + + var opts = { + method: 'POST', + headers: {'Content-Type': 'application/json', 'X-CSRFToken': csrf} + }; + if (body) opts.body = body; + + fetch(url, opts) + .then(function(r) { return r.json().then(function(d) { return {ok: r.ok, data: d}; }); }) + .then(function(result) { + btn.classList.remove('loading'); + if (result.ok) { + btn.innerHTML = ''; + btn.disabled = true; + btn.title = 'Wykonano'; + // Update corresponding dot + var row = btn.closest('tr'); + var fieldName = type === 'registry' ? 'Dane urzędowe' : (type === 'seo' ? 'Audyt SEO' : 'Audyt GBP'); + var dot = row.querySelector('.dq-field-dot[data-field="' + fieldName + '"]'); + if (dot) { + dot.classList.remove('empty'); + dot.classList.add('filled'); + } + } else { + btn.innerHTML = originalHTML; + btn.title = 'Błąd: ' + (result.data.error || 'nieznany'); + } + }) + .catch(function(err) { + btn.classList.remove('loading'); + btn.innerHTML = originalHTML; + btn.title = 'Błąd: ' + err.message; + }); + } + function pollProgress(jobId, total) { fetch('/admin/data-quality/bulk-enrich/status?job_id=' + jobId) .then(function(r) { return r.json(); }) @@ -714,4 +992,44 @@ } }); } + + function applyAvailableHint(companyId, field, value, rowId) { + var btn = event.target; + btn.disabled = true; + btn.textContent = '...'; + + fetch('/api/company/' + companyId + '/apply-hint', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-CSRFToken': document.querySelector('meta[name=csrf-token]')?.content || '' + }, + body: JSON.stringify({field: field, value: value}) + }) + .then(function(r) { return r.json(); }) + .then(function(data) { + if (data.success) { + var row = document.getElementById(rowId); + if (row) row.style.opacity = '0.3'; + btn.textContent = 'OK'; + btn.style.background = '#22c55e'; + } else { + btn.textContent = 'Błąd'; + btn.style.background = '#ef4444'; + } + }) + .catch(function() { + btn.textContent = 'Błąd'; + btn.style.background = '#ef4444'; + }); + } + + function applyAllAvailableHints() { + if (!confirm('Uzupełnić wszystkie dane z Google Business?')) return; + var rows = document.querySelectorAll('#availableDataTable tbody tr'); + rows.forEach(function(row) { + var btn = row.querySelector('.hint-apply-btn'); + if (btn && !btn.disabled) btn.click(); + }); + } {% endblock %} diff --git a/utils/data_quality.py b/utils/data_quality.py index e398d1c..570332f 100644 --- a/utils/data_quality.py +++ b/utils/data_quality.py @@ -11,6 +11,22 @@ import os from database import CompanyWebsiteAnalysis, CompanySocialMedia, GBPAudit +FIELD_WEIGHTS = { + 'NIP': 10, 'Adres': 8, 'Telefon': 12, 'Email': 12, + 'Strona WWW': 10, 'Opis': 10, 'Kategoria': 5, + 'Logo': 8, 'Dane urzędowe': 8, + 'Audyt SEO': 5, 'Audyt Social': 5, 'Audyt GBP': 7, +} + +MAX_WEIGHT = sum(FIELD_WEIGHTS.values()) + + +def compute_weighted_score(fields): + """Compute weighted score from fields dict. Returns int 0-100.""" + weighted = sum(FIELD_WEIGHTS.get(f, 0) for f, v in fields.items() if v) + return int(weighted / MAX_WEIGHT * 100) + + def compute_data_quality_score(company, db): """Compute data quality score for a company. @@ -58,7 +74,7 @@ def compute_data_quality_score(company, db): filled = sum(fields.values()) total = len(fields) - score = int(filled / total * 100) + score = compute_weighted_score(fields) return { 'score': score,