nordabiz/blueprints/api/routes_company.py
Maciej Pienczyn 93e90b2c72
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
feat: add data quality dashboard, auto-scoring, bulk enrichment and GBP data flow
- Extract 12-field completeness scoring to utils/data_quality.py service
- Auto-update data_quality_score and data_quality label on company data changes
- Add /admin/data-quality dashboard with field coverage stats, quality distribution, and sortable company table
- Add bulk enrichment with background processing, step selection, and progress tracking
- Flow GBP phone/website to Company record when company fields are empty
- Display Google opening hours on public company profile
- Add BulkEnrichmentJob model and migration 075
- Refactor arm_company.py to support selective steps and progress callbacks

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 07:02:45 +01:00

1349 lines
50 KiB
Python

"""
Company API Routes - API blueprint
Migrated from app.py as part of the blueprint refactoring.
Contains API routes for company data, validation, and AI enrichment.
"""
import json
import logging
import os
import re
import time
from datetime import datetime, date
import requests
from bs4 import BeautifulSoup
from flask import jsonify, request, current_app
from flask_login import current_user, login_required
from database import (
SessionLocal, Company, User, Person, CompanyPerson, CompanyPKD, CompanyAIInsights, AiEnrichmentProposal
)
from datetime import timedelta
import gemini_service
import krs_api_service
from utils.data_quality import update_company_data_quality
from ceidg_api_service import fetch_ceidg_by_nip
from . import bp
logger = logging.getLogger(__name__)
# ============================================================
# COMPANY DATA API ROUTES
# ============================================================
@bp.route('/companies')
def api_companies():
"""API: Get all companies"""
db = SessionLocal()
try:
companies = db.query(Company).filter_by(status='active').all()
return jsonify({
'success': True,
'companies': [
{
'id': c.id,
'name': c.name,
'category': c.category.name if c.category else None,
'description': c.description_short,
'website': c.website,
'phone': c.phone,
'email': c.email
}
for c in companies
]
})
finally:
db.close()
@bp.route('/connections')
def api_connections():
"""
API: Get company-person connections for D3.js visualization.
Returns nodes (companies and people) and links (relationships).
"""
db = SessionLocal()
try:
# Get all companies with people data
companies = db.query(Company).filter_by(status='active').all()
# Get all people with company relationships
people = db.query(Person).join(CompanyPerson).distinct().all()
# Build nodes
nodes = []
# Company nodes
for c in companies:
nodes.append({
'id': f'company_{c.id}',
'name': c.name,
'type': 'company',
'category': c.category.name if c.category else 'Other',
'slug': c.slug,
'has_krs': bool(c.krs),
'city': c.address_city or ''
})
# Person nodes
for p in people:
# Count UNIQUE companies this person is connected to (not roles)
company_count = len(set(r.company_id for r in p.company_roles if r.company and r.company.status == 'active'))
nodes.append({
'id': f'person_{p.id}',
'name': f'{p.imiona} {p.nazwisko}',
'type': 'person',
'company_count': company_count
})
# Build links
links = []
for p in people:
for role in p.company_roles:
if role.company and role.company.status == 'active':
links.append({
'source': f'person_{p.id}',
'target': f'company_{role.company_id}',
'role': role.role,
'category': role.role_category
})
return jsonify({
'success': True,
'nodes': nodes,
'links': links,
'stats': {
'companies': len([n for n in nodes if n['type'] == 'company']),
'people': len([n for n in nodes if n['type'] == 'person']),
'connections': len(links)
}
})
finally:
db.close()
# ============================================================
# VALIDATION API ROUTES
# ============================================================
def validate_email(email):
"""Simple email validation"""
import re
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return re.match(pattern, email) is not None
@bp.route('/check-email', methods=['POST'])
def api_check_email():
"""API: Check if email is available"""
data = request.get_json()
email = data.get('email', '').strip().lower()
# Validate email format
if not email or not validate_email(email):
return jsonify({
'available': False,
'error': 'Nieprawidłowy format email'
}), 400
db = SessionLocal()
try:
# Check if email exists
existing_user = db.query(User).filter_by(email=email).first()
return jsonify({
'available': existing_user is None,
'email': email
})
finally:
db.close()
@bp.route('/verify-nip', methods=['POST'])
def api_verify_nip():
"""API: Verify NIP and check if company is NORDA member"""
data = request.get_json()
nip = data.get('nip', '').strip()
# Validate NIP format
if not nip or not re.match(r'^\d{10}$', nip):
return jsonify({
'success': False,
'error': 'Nieprawidłowy format NIP'
}), 400
db = SessionLocal()
try:
# Check if NIP exists in companies database
company = db.query(Company).filter_by(nip=nip, status='active').first()
if company:
return jsonify({
'success': True,
'is_member': True,
'company_name': company.name,
'company_id': company.id
})
else:
return jsonify({
'success': True,
'is_member': False,
'company_name': None,
'company_id': None
})
finally:
db.close()
@bp.route('/verify-krs', methods=['GET', 'POST'])
def api_verify_krs():
"""
API: Verify company data from KRS Open API (prs.ms.gov.pl).
GET /api/verify-krs?krs=0000817317
POST /api/verify-krs with JSON body: {"krs": "0000817317"}
Returns official KRS data including:
- Company name, NIP, REGON
- Address
- Capital
- Registration date
- Management board (anonymized in Open API)
- Shareholders (anonymized in Open API)
"""
# Get KRS from query params (GET) or JSON body (POST)
if request.method == 'GET':
krs = request.args.get('krs', '').strip()
else:
data = request.get_json(silent=True) or {}
krs = data.get('krs', '').strip()
# Validate KRS format (7-10 digits)
if not krs or not re.match(r'^\d{7,10}$', krs):
return jsonify({
'success': False,
'error': 'Nieprawidłowy format KRS (wymagane 7-10 cyfr)'
}), 400
# Normalize to 10 digits
krs_normalized = krs.zfill(10)
try:
# Fetch data from KRS Open API
krs_data = krs_api_service.get_company_from_krs(krs_normalized)
if krs_data is None:
return jsonify({
'success': False,
'error': f'Nie znaleziono podmiotu o KRS {krs_normalized} w rejestrze',
'krs': krs_normalized
}), 404
# Check if company exists in our database
db = SessionLocal()
try:
our_company = db.query(Company).filter_by(krs=krs_normalized).first()
is_member = our_company is not None
company_id = our_company.id if our_company else None
finally:
db.close()
return jsonify({
'success': True,
'krs': krs_normalized,
'is_norda_member': is_member,
'company_id': company_id,
'data': krs_data.to_dict(),
'formatted_address': krs_api_service.format_address(krs_data),
'source': 'KRS Open API (prs.ms.gov.pl)',
'note': 'Dane osobowe (imiona, nazwiska) są zanonimizowane w Open API'
})
except Exception as e:
return jsonify({
'success': False,
'error': f'Błąd podczas pobierania danych z KRS: {str(e)}'
}), 500
@bp.route('/company/<int:company_id>/refresh-krs', methods=['POST'])
@login_required
def api_refresh_company_krs(company_id):
"""
API: Refresh company data from KRS Open API.
Updates company record with official KRS data.
Requires login.
"""
db = SessionLocal()
try:
company = db.query(Company).filter_by(id=company_id).first()
if not company:
return jsonify({
'success': False,
'error': 'Firma nie znaleziona'
}), 404
if not company.krs:
return jsonify({
'success': False,
'error': 'Firma nie ma numeru KRS'
}), 400
# Fetch data from KRS
krs_data = krs_api_service.get_company_from_krs(company.krs)
if krs_data is None:
return jsonify({
'success': False,
'error': f'Nie znaleziono podmiotu o KRS {company.krs} w rejestrze'
}), 404
# Update company data (only non-personal data)
updates = {}
if krs_data.nip and krs_data.nip != company.nip:
updates['nip'] = krs_data.nip
company.nip = krs_data.nip
if krs_data.regon:
regon_9 = krs_data.regon[:9]
if regon_9 != company.regon:
updates['regon'] = regon_9
company.regon = regon_9
# Update address if significantly different
new_address = krs_api_service.format_address(krs_data)
if new_address and new_address != company.address:
updates['address'] = new_address
company.address = new_address
if krs_data.miejscowosc and krs_data.miejscowosc != company.city:
updates['city'] = krs_data.miejscowosc
company.city = krs_data.miejscowosc
if krs_data.kapital_zakladowy:
updates['kapital_zakladowy'] = krs_data.kapital_zakladowy
# Update verification timestamp
company.krs_verified_at = datetime.utcnow()
db.commit()
return jsonify({
'success': True,
'company_id': company_id,
'updates': updates,
'krs_data': krs_data.to_dict(),
'message': f'Zaktualizowano {len(updates)} pól' if updates else 'Dane są aktualne'
})
except Exception as e:
db.rollback()
return jsonify({
'success': False,
'error': f'Błąd podczas aktualizacji: {str(e)}'
}), 500
finally:
db.close()
# ============================================================
# REGISTRY ENRICHMENT API ROUTE
# ============================================================
@bp.route('/company/<int:company_id>/enrich-registry', methods=['POST'])
@login_required
def api_enrich_company_registry(company_id):
"""
API: Enrich company data from official registries (KRS, CEIDG, Biała Lista VAT).
Only accessible by administrators.
"""
if not current_user.is_admin:
return jsonify({
'success': False,
'error': 'Tylko administrator może pobierać dane z rejestrów'
}), 403
db = SessionLocal()
try:
company = db.query(Company).filter_by(id=company_id).first()
if not company:
return jsonify({'success': False, 'error': 'Firma nie znaleziona'}), 404
source = None
message_parts = []
details = {}
# For child brands (parent_company_id set, no own NIP/KRS) — inherit from parent
effective_nip = company.nip
effective_krs = company.krs
if not effective_nip and not effective_krs and company.parent_company_id:
parent = db.query(Company).filter_by(id=company.parent_company_id).first()
if parent:
effective_nip = parent.nip
effective_krs = parent.krs
# Strategy 1: Company has KRS — fetch directly
if effective_krs and not company.krs:
company.krs = effective_krs
db.flush()
if company.krs:
from blueprints.admin.routes_membership import _enrich_company_from_krs
success = _enrich_company_from_krs(company, db)
if success:
source = 'KRS'
# Count imported data
people_count = db.query(CompanyPerson).filter_by(company_id=company.id).count()
pkd_count = db.query(CompanyPKD).filter_by(company_id=company.id).count()
details = {
'people_imported': people_count,
'pkd_codes': pkd_count,
'legal_form': company.legal_form or ''
}
message_parts.append(f'zarząd ({people_count} osób)')
message_parts.append(f'kody PKD ({pkd_count})')
if company.legal_form:
message_parts.append(f'forma prawna: {company.legal_form}')
else:
return jsonify({
'success': False,
'error': f'Nie udało się pobrać danych z KRS dla numeru {company.krs}'
}), 404
# Strategy 2: No KRS but has NIP — try Biała Lista to find KRS
elif effective_nip:
krs_service = krs_api_service.KRSApiService()
biala_lista_result = krs_service.search_by_nip(effective_nip)
if biala_lista_result and biala_lista_result.get('krs'):
# Found KRS via Biała Lista — save it and enrich
company.krs = biala_lista_result['krs']
db.flush()
from blueprints.admin.routes_membership import _enrich_company_from_krs
success = _enrich_company_from_krs(company, db)
if success:
source = 'KRS (via Biała Lista VAT)'
people_count = db.query(CompanyPerson).filter_by(company_id=company.id).count()
pkd_count = db.query(CompanyPKD).filter_by(company_id=company.id).count()
details = {
'krs_found': company.krs,
'people_imported': people_count,
'pkd_codes': pkd_count,
'legal_form': company.legal_form or ''
}
message_parts.append(f'znaleziono KRS: {company.krs}')
message_parts.append(f'zarząd ({people_count} osób)')
message_parts.append(f'kody PKD ({pkd_count})')
else:
source = 'Biała Lista VAT'
details = {'krs_found': company.krs}
message_parts.append(f'znaleziono KRS: {company.krs} (dane KRS niedostępne)')
else:
# No KRS found — try CEIDG (likely JDG)
ceidg_data = fetch_ceidg_by_nip(effective_nip)
if ceidg_data:
source = 'CEIDG'
updated_fields = []
# --- CEIDG identifiers & metadata ---
if ceidg_data.get('ceidg_id'):
company.ceidg_id = ceidg_data['ceidg_id']
updated_fields.append('CEIDG ID')
if ceidg_data.get('status'):
company.ceidg_status = ceidg_data['status']
updated_fields.append('status CEIDG')
company.ceidg_raw_data = ceidg_data.get('raw')
company.ceidg_fetched_at = datetime.now()
company.data_source = 'CEIDG API'
company.last_verified_at = datetime.now()
# --- Owner ---
wlasciciel = ceidg_data.get('wlasciciel', {})
if wlasciciel.get('imie'):
company.owner_first_name = wlasciciel['imie']
updated_fields.append('właściciel')
if wlasciciel.get('nazwisko'):
company.owner_last_name = wlasciciel['nazwisko']
if ceidg_data.get('obywatelstwa'):
company.owner_citizenships = ceidg_data['obywatelstwa']
# --- Legal name ---
if ceidg_data.get('firma') and (not company.legal_name or company.legal_name == company.name):
company.legal_name = ceidg_data['firma']
updated_fields.append('nazwa pełna')
# --- REGON ---
if not company.regon:
regon = ceidg_data.get('regon') or wlasciciel.get('regon')
if regon:
company.regon = regon
updated_fields.append('REGON')
# --- Business start date ---
if ceidg_data.get('dataRozpoczecia'):
try:
d = ceidg_data['dataRozpoczecia']
if isinstance(d, str):
company.business_start_date = date.fromisoformat(d)
updated_fields.append('data rozpoczęcia')
except (ValueError, TypeError):
pass
# --- Legal form ---
if not company.legal_form:
company.legal_form = 'JEDNOOSOBOWA DZIAŁALNOŚĆ GOSPODARCZA'
updated_fields.append('forma prawna')
# --- PKD (main) ---
pkd_gl = ceidg_data.get('pkdGlowny', {})
if pkd_gl and pkd_gl.get('kod'):
company.pkd_code = pkd_gl['kod']
company.pkd_description = pkd_gl.get('nazwa')
updated_fields.append(f'PKD główny ({pkd_gl["kod"]})')
# --- PKD (full list) ---
pkd_lista = ceidg_data.get('pkd', [])
if pkd_lista:
company.ceidg_pkd_list = pkd_lista
# Create CompanyPKD records (same pattern as KRS enrichment)
pkd_main_code = pkd_gl.get('kod', '') if pkd_gl else ''
for pkd_item in pkd_lista:
kod = pkd_item.get('kod', '')
if not kod:
continue
existing_pkd = db.query(CompanyPKD).filter(
CompanyPKD.company_id == company.id,
CompanyPKD.pkd_code == kod
).first()
if not existing_pkd:
db.add(CompanyPKD(
company_id=company.id,
pkd_code=kod,
pkd_description=pkd_item.get('nazwa', ''),
is_primary=(kod == pkd_main_code)
))
updated_fields.append(f'lista PKD ({len(pkd_lista)} kodów)')
# --- Business address ---
adres = ceidg_data.get('adresDzialalnosci', {})
ulica = adres.get('ulica', '')
budynek = adres.get('budynek', '')
lokal = adres.get('lokal', '')
if ulica or budynek:
street_parts = [ulica, budynek]
if lokal:
street_parts[-1] = (budynek + '/' + lokal) if budynek else lokal
company.address_street = ' '.join(p for p in street_parts if p)
updated_fields.append('adres')
if adres.get('kod') or adres.get('kodPocztowy'):
company.address_postal = adres.get('kod') or adres.get('kodPocztowy')
if adres.get('miasto') or adres.get('miejscowosc'):
company.address_city = adres.get('miasto') or adres.get('miejscowosc')
if company.address_street and company.address_postal and company.address_city:
company.address_full = f'{company.address_street}, {company.address_postal} {company.address_city}'
# --- Correspondence address ---
koresp = ceidg_data.get('adresKorespondencyjny', {})
if koresp and (koresp.get('ulica') or koresp.get('budynek')):
k_ulica = koresp.get('ulica', '')
k_budynek = koresp.get('budynek', '')
k_lokal = koresp.get('lokal', '')
k_parts = [k_ulica, k_budynek]
if k_lokal:
k_parts[-1] = (k_budynek + '/' + k_lokal) if k_budynek else k_lokal
company.correspondence_street = ' '.join(p for p in k_parts if p)
company.correspondence_postal = koresp.get('kod')
company.correspondence_city = koresp.get('miasto')
updated_fields.append('adres korespondencyjny')
# --- Contact (only if empty) ---
if ceidg_data.get('email') and not company.email:
company.email = ceidg_data['email']
updated_fields.append('email')
if ceidg_data.get('stronaWWW') and not company.website:
company.website = ceidg_data['stronaWWW']
updated_fields.append('strona www')
if ceidg_data.get('telefon') and not company.phone:
company.phone = ceidg_data['telefon']
updated_fields.append('telefon')
details = {'updated_fields': updated_fields}
message_parts.append(f'zaktualizowano {len(updated_fields)} pól')
if updated_fields:
message_parts.append(', '.join(updated_fields))
else:
return jsonify({
'success': False,
'error': 'Nie znaleziono danych w rejestrach KRS ani CEIDG dla tego NIP'
}), 404
else:
return jsonify({
'success': False,
'error': 'Firma nie ma numeru NIP ani KRS (również w firmie nadrzędnej) — nie można pobrać danych z rejestrów'
}), 400
db.commit()
update_company_data_quality(company, db)
db.commit()
logger.info(f"Registry enrichment for company {company.id} ({company.name}) from {source} by {current_user.email}")
return jsonify({
'success': True,
'message': f'Pobrano dane z rejestru {source}: {", ".join(message_parts)}',
'source': source,
'details': details
})
except Exception as e:
db.rollback()
logger.error(f"Registry enrichment error for company {company_id}: {str(e)}")
return jsonify({
'success': False,
'error': f'Błąd podczas pobierania danych: {str(e)}'
}), 500
finally:
db.close()
# ============================================================
# LOGO FETCH API ROUTE
# ============================================================
@bp.route('/company/<int:company_id>/fetch-logo', methods=['POST'])
@login_required
def api_fetch_company_logo(company_id):
"""
API: Fetch company logo candidates from their website.
Actions:
- fetch (default): Download candidates, save as temp files
- confirm: Save chosen candidate as final logo (by index)
- cancel: Delete all candidate temp files
"""
db = SessionLocal()
try:
company = db.query(Company).filter_by(id=company_id).first()
if not company:
return jsonify({'success': False, 'error': 'Firma nie znaleziona'}), 404
if not current_user.can_edit_company(company.id) and not current_user.is_admin:
return jsonify({
'success': False,
'error': 'Brak uprawnień do edycji profilu firmy'
}), 403
from logo_fetch_service import LogoFetchService
service = LogoFetchService()
data = request.get_json(silent=True) or {}
action = data.get('action', 'fetch')
if action == 'confirm':
index = data.get('index', 0)
ok = service.confirm_candidate(company.slug, index)
logger.info(f"Logo candidate #{index} confirmed for {company.name} by {current_user.email}")
return jsonify({'success': ok, 'message': 'Logo zapisane' if ok else 'Nie znaleziono kandydata'})
if action == 'cancel':
service.cleanup_candidates(company.slug)
return jsonify({'success': True, 'message': 'Anulowano'})
# action == 'fetch'
if not company.website:
return jsonify({
'success': False,
'error': 'Firma nie ma ustawionej strony WWW'
}), 400
has_logo = service.has_existing_logo(company.slug)
result = service.fetch_candidates(company.website, company.slug)
result['has_existing_logo'] = has_logo is not None
result['existing_logo_ext'] = has_logo
logger.info(
f"Logo fetch for company {company.id} ({company.name}): "
f"success={result['success']}, candidates={len(result.get('candidates', []))}, "
f"has_existing={has_logo is not None}, by={current_user.email}"
)
return jsonify(result)
except Exception as e:
logger.error(f"Logo fetch error for company {company_id}: {str(e)}")
return jsonify({
'success': False,
'error': f'Błąd podczas pobierania logo: {str(e)}'
}), 500
finally:
db.close()
# ============================================================
# AI ENRICHMENT HELPER FUNCTIONS
# ============================================================
def _search_brave_for_company(company_name: str, city: str = None) -> dict:
"""
Search Brave API for company information.
Returns dict with news items and web results.
"""
brave_api_key = os.getenv('BRAVE_API_KEY')
if not brave_api_key:
logger.warning("BRAVE_API_KEY not configured, skipping web search")
return {'news': [], 'web': []}
results = {'news': [], 'web': []}
# Build search query
query = f'"{company_name}"'
if city:
query += f' {city}'
try:
headers = {
'Accept': 'application/json',
'X-Subscription-Token': brave_api_key
}
# Search news
news_params = {
'q': query,
'count': 5,
'freshness': 'py', # past year
'country': 'pl',
'search_lang': 'pl'
}
news_response = requests.get(
'https://api.search.brave.com/res/v1/news/search',
headers=headers,
params=news_params,
timeout=10
)
if news_response.status_code == 200:
news_data = news_response.json()
for item in news_data.get('results', [])[:5]:
results['news'].append({
'title': item.get('title', ''),
'description': item.get('description', ''),
'url': item.get('url', ''),
'source': item.get('meta_url', {}).get('hostname', '')
})
logger.info(f"Brave News: found {len(results['news'])} items for '{company_name}'")
# Search web
web_params = {
'q': query,
'count': 5,
'country': 'pl',
'search_lang': 'pl'
}
web_response = requests.get(
'https://api.search.brave.com/res/v1/web/search',
headers=headers,
params=web_params,
timeout=10
)
if web_response.status_code == 200:
web_data = web_response.json()
for item in web_data.get('web', {}).get('results', [])[:5]:
results['web'].append({
'title': item.get('title', ''),
'description': item.get('description', ''),
'url': item.get('url', '')
})
logger.info(f"Brave Web: found {len(results['web'])} items for '{company_name}'")
except Exception as e:
logger.error(f"Brave search error for '{company_name}': {e}")
return results
def _fetch_website_content(url: str) -> str:
"""
Fetch and extract text content from company website.
Returns first 3000 chars of text content.
"""
if not url:
return ''
try:
# Ensure URL has protocol
if not url.startswith('http'):
url = 'https://' + url
response = requests.get(url, timeout=10, headers={
'User-Agent': 'Mozilla/5.0 (compatible; NordaBizBot/1.0)'
})
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
# Remove scripts and styles
for tag in soup(['script', 'style', 'nav', 'footer', 'header']):
tag.decompose()
# Get text content
text = soup.get_text(separator=' ', strip=True)
# Clean up whitespace
text = ' '.join(text.split())
logger.info(f"Fetched {len(text)} chars from {url}")
return text[:3000] # Limit to 3000 chars
except Exception as e:
logger.warning(f"Failed to fetch website content from {url}: {e}")
return ''
# ============================================================
# AI ENRICHMENT API ROUTE
# ============================================================
@bp.route('/company/<int:company_id>/enrich-ai', methods=['POST'])
@login_required
def api_enrich_company_ai(company_id):
"""
API: Enrich company data using AI (Gemini) with web search.
Process:
1. Search Brave API for company news and web results
2. Fetch content from company website
3. Combine with existing database data
4. Send to Gemini for AI-powered enrichment
Generates AI insights including:
- Business summary
- Services list
- Target market
- Unique selling points
- Company values
- Certifications
- Industry tags
Requires: Admin or company owner permissions.
Rate limited to 5 requests per hour per user.
"""
db = SessionLocal()
try:
# Get company
company = db.query(Company).filter_by(id=company_id).first()
if not company:
return jsonify({
'success': False,
'error': 'Firma nie znaleziona'
}), 404
# Check permissions: user with company edit rights
logger.info(f"Permission check: user={current_user.email}, role={current_user.role}, user_company_id={current_user.company_id}, target_company_id={company.id}")
if not current_user.can_edit_company(company.id):
return jsonify({
'success': False,
'error': 'Brak uprawnien. Tylko administrator lub wlasciciel firmy moze wzbogacac dane.'
}), 403
# Get Gemini service
service = gemini_service.get_gemini_service()
if not service:
return jsonify({
'success': False,
'error': 'Usluga AI jest niedostepna. Skontaktuj sie z administratorem.'
}), 503
logger.info(f"AI enrichment triggered by {current_user.email} for company: {company.name} (ID: {company.id})")
# ============================================
# STEP 1: Search the web for company info
# ============================================
brave_results = _search_brave_for_company(company.name, company.address_city)
# Format news for prompt
news_text = ""
if brave_results['news']:
news_text = "\n".join([
f"- {item['title']}: {item['description'][:200]}"
for item in brave_results['news'][:3]
])
# Format web results for prompt
web_text = ""
if brave_results['web']:
web_text = "\n".join([
f"- {item['title']}: {item['description'][:200]}"
for item in brave_results['web'][:3]
])
# ============================================
# STEP 2: Fetch company website content
# ============================================
website_content = ""
if company.website:
website_content = _fetch_website_content(company.website)
# ============================================
# STEP 3: Collect existing company data
# ============================================
services_list = []
if company.services:
services_list = [cs.service.name for cs in company.services if cs.service]
elif company.services_offered:
services_list = [company.services_offered]
competencies_list = []
if company.competencies:
competencies_list = [cc.competency.name for cc in company.competencies if cc.competency]
existing_data = {
'nazwa': company.name,
'opis_krotki': company.description_short or '',
'opis_pelny': company.description_full or '',
'kategoria': company.category.name if company.category else '',
'uslugi': ', '.join(services_list) if services_list else '',
'kompetencje': ', '.join(competencies_list) if competencies_list else '',
'wartosci': company.core_values or '',
'strona_www': company.website or '',
'miasto': company.address_city or '',
'branza': company.pkd_description or ''
}
# ============================================
# STEP 4: Build comprehensive prompt for AI
# ============================================
prompt = f"""Przeanalizuj wszystkie dostepne dane o polskiej firmie i wygeneruj wzbogacone informacje.
=== DANE Z BAZY DANYCH ===
Nazwa: {existing_data['nazwa']}
Kategoria: {existing_data['kategoria']}
Opis krotki: {existing_data['opis_krotki']}
Opis pelny: {existing_data['opis_pelny']}
Uslugi: {existing_data['uslugi']}
Kompetencje: {existing_data['kompetencje']}
Wartosci firmy: {existing_data['wartosci']}
Strona WWW: {existing_data['strona_www']}
Miasto: {existing_data['miasto']}
Branza (PKD): {existing_data['branza']}
=== INFORMACJE Z INTERNETU (Brave Search) ===
Newsy o firmie:
{news_text if news_text else '(brak znalezionych newsow)'}
Wyniki wyszukiwania:
{web_text if web_text else '(brak wynikow)'}
=== TRESC ZE STRONY WWW FIRMY ===
{website_content[:2000] if website_content else '(nie udalo sie pobrac tresci strony)'}
=== ZADANIE ===
Na podstawie WSZYSTKICH powyzszych danych (baza danych, wyszukiwarka, strona WWW) wygeneruj wzbogacone informacje o firmie.
Wykorzystaj informacje z internetu do uzupelnienia brakujacych danych.
Jesli znalazles nowe uslugi, certyfikaty lub informacje - dodaj je do odpowiedzi.
Odpowiedz WYLACZNIE w formacie JSON (bez dodatkowego tekstu):
{{
"business_summary": "Zwiezly opis dzialalnosci firmy (2-3 zdania) na podstawie wszystkich zrodel",
"services_list": ["usluga1", "usluga2", "usluga3", "usluga4", "usluga5"],
"target_market": "Opis grupy docelowej klientow",
"unique_selling_points": ["wyroznik1", "wyroznik2", "wyroznik3"],
"company_values": ["wartosc1", "wartosc2", "wartosc3"],
"certifications": ["certyfikat1", "certyfikat2"],
"industry_tags": ["tag1", "tag2", "tag3", "tag4", "tag5"],
"recent_news": "Krotkie podsumowanie ostatnich newsow o firmie (jesli sa)",
"suggested_category": "Sugerowana kategoria glowna",
"category_confidence": 0.85,
"data_sources_used": ["database", "brave_search", "website"]
}}
WAZNE:
- Odpowiedz TYLKO JSON, bez markdown, bez ```json
- Wszystkie teksty po polsku
- Listy powinny zawierac 3-5 elementow
- category_confidence to liczba od 0 do 1
- Wykorzystaj maksymalnie informacje z internetu
"""
# Call Gemini API
start_time = time.time()
response_text = service.generate_text(
prompt=prompt,
temperature=0.7,
feature='ai_enrichment',
user_id=current_user.id,
company_id=company.id,
related_entity_type='company',
related_entity_id=company.id
)
processing_time = int((time.time() - start_time) * 1000)
# Parse JSON response
try:
# Clean response - remove markdown code blocks if present
clean_response = response_text.strip()
if clean_response.startswith('```'):
clean_response = clean_response.split('```')[1]
if clean_response.startswith('json'):
clean_response = clean_response[4:]
clean_response = clean_response.strip()
ai_data = json.loads(clean_response)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse AI response: {e}\nResponse: {response_text[:500]}")
return jsonify({
'success': False,
'error': 'Blad parsowania odpowiedzi AI. Sprobuj ponownie.'
}), 500
# Create AI enrichment PROPOSAL (requires approval before applying)
# Instead of directly saving, we create a proposal that needs to be reviewed
# Count sources used (needs to be before creating proposal)
sources_used = ['database']
if brave_results['news'] or brave_results['web']:
sources_used.append('brave_search')
if website_content:
sources_used.append('website')
# Check for existing pending proposals
existing_pending = db.query(AiEnrichmentProposal).filter_by(
company_id=company.id,
status='pending'
).first()
if existing_pending:
# Update existing pending proposal
existing_pending.proposed_data = ai_data
existing_pending.data_source = company.website
existing_pending.confidence_score = 0.85
existing_pending.ai_explanation = f"AI przeanalizowało dane z {len(sources_used)} źródeł: {', '.join(sources_used)}"
existing_pending.created_at = datetime.utcnow()
existing_pending.expires_at = datetime.utcnow() + timedelta(days=30)
proposal = existing_pending
else:
# Create new proposal
proposal = AiEnrichmentProposal(
company_id=company.id,
status='pending',
proposal_type='ai_enrichment',
data_source=company.website,
proposed_data=ai_data,
ai_explanation=f"AI przeanalizowało dane z {len(sources_used)} źródeł: {', '.join(sources_used)}",
confidence_score=0.85,
expires_at=datetime.utcnow() + timedelta(days=30)
)
db.add(proposal)
db.commit()
proposal_id = proposal.id
logger.info(f"AI enrichment proposal created for {company.name}. Proposal ID: {proposal_id}. Sources: {sources_used}")
return jsonify({
'success': True,
'message': f'Propozycja wzbogacenia danych dla "{company.name}" została utworzona i oczekuje na akceptację',
'proposal_id': proposal_id,
'status': 'pending',
'processing_time_ms': processing_time,
'sources_used': sources_used,
'brave_results_count': len(brave_results['news']) + len(brave_results['web']),
'website_content_length': len(website_content),
'proposed_data': ai_data,
'requires_approval': True
})
except Exception as e:
db.rollback()
logger.error(f"AI enrichment error for company {company_id}: {str(e)}")
return jsonify({
'success': False,
'error': f'Blad podczas wzbogacania danych: {str(e)}'
}), 500
finally:
db.close()
# ============================================================
# AI ENRICHMENT PROPOSALS API ROUTES
# ============================================================
@bp.route('/company/<int:company_id>/proposals', methods=['GET'])
@login_required
def api_get_proposals(company_id):
"""
API: Get AI enrichment proposals for a company.
Returns pending, approved, and rejected proposals.
"""
db = SessionLocal()
try:
company = db.query(Company).filter_by(id=company_id).first()
if not company:
return jsonify({'success': False, 'error': 'Firma nie istnieje'}), 404
# Check permissions - user with company edit rights
if not current_user.can_edit_company(company.id):
return jsonify({'success': False, 'error': 'Brak uprawnien'}), 403
proposals = db.query(AiEnrichmentProposal).filter_by(
company_id=company_id
).order_by(AiEnrichmentProposal.created_at.desc()).all()
return jsonify({
'success': True,
'proposals': [{
'id': p.id,
'status': p.status,
'proposal_type': p.proposal_type,
'proposed_data': p.proposed_data,
'ai_explanation': p.ai_explanation,
'confidence_score': float(p.confidence_score) if p.confidence_score else None,
'created_at': p.created_at.isoformat() if p.created_at else None,
'reviewed_at': p.reviewed_at.isoformat() if p.reviewed_at else None,
'reviewed_by': p.reviewed_by.email if p.reviewed_by else None,
'review_comment': p.review_comment,
'approved_fields': p.approved_fields
} for p in proposals]
})
finally:
db.close()
@bp.route('/company/<int:company_id>/proposals/<int:proposal_id>/approve', methods=['POST'])
@login_required
def api_approve_proposal(company_id, proposal_id):
"""
API: Approve an AI enrichment proposal.
Optionally accepts 'fields' parameter to approve only specific fields.
When approved, the data is applied to CompanyAIInsights.
"""
db = SessionLocal()
try:
company = db.query(Company).filter_by(id=company_id).first()
if not company:
return jsonify({'success': False, 'error': 'Firma nie istnieje'}), 404
# Check permissions - user with company edit rights
if not current_user.can_edit_company(company.id):
return jsonify({'success': False, 'error': 'Brak uprawnien'}), 403
proposal = db.query(AiEnrichmentProposal).filter_by(
id=proposal_id,
company_id=company_id
).first()
if not proposal:
return jsonify({'success': False, 'error': 'Propozycja nie istnieje'}), 404
if proposal.status != 'pending':
return jsonify({'success': False, 'error': f'Propozycja ma status: {proposal.status}'}), 400
data = request.get_json() or {}
approved_fields = data.get('fields') # Optional: only approve specific fields
comment = data.get('comment', '')
ai_data = proposal.proposed_data
# Apply to CompanyAIInsights
existing_insights = db.query(CompanyAIInsights).filter_by(company_id=company.id).first()
# Determine which fields to apply
if approved_fields:
# Partial approval
fields_to_apply = approved_fields
else:
# Full approval - all fields
fields_to_apply = list(ai_data.keys())
if existing_insights:
# Update existing
if 'business_summary' in fields_to_apply:
existing_insights.business_summary = ai_data.get('business_summary')
if 'services_list' in fields_to_apply:
existing_insights.services_list = ai_data.get('services_list', [])
if 'target_market' in fields_to_apply:
existing_insights.target_market = ai_data.get('target_market')
if 'unique_selling_points' in fields_to_apply:
existing_insights.unique_selling_points = ai_data.get('unique_selling_points', [])
if 'company_values' in fields_to_apply:
existing_insights.company_values = ai_data.get('company_values', [])
if 'certifications' in fields_to_apply:
existing_insights.certifications = ai_data.get('certifications', [])
if 'industry_tags' in fields_to_apply:
existing_insights.industry_tags = ai_data.get('industry_tags', [])
if 'suggested_category' in fields_to_apply:
existing_insights.suggested_category = ai_data.get('suggested_category')
existing_insights.ai_confidence_score = proposal.confidence_score
existing_insights.analyzed_at = datetime.utcnow()
else:
# Create new
new_insights = CompanyAIInsights(
company_id=company.id,
business_summary=ai_data.get('business_summary') if 'business_summary' in fields_to_apply else None,
services_list=ai_data.get('services_list', []) if 'services_list' in fields_to_apply else [],
target_market=ai_data.get('target_market') if 'target_market' in fields_to_apply else None,
unique_selling_points=ai_data.get('unique_selling_points', []) if 'unique_selling_points' in fields_to_apply else [],
company_values=ai_data.get('company_values', []) if 'company_values' in fields_to_apply else [],
certifications=ai_data.get('certifications', []) if 'certifications' in fields_to_apply else [],
industry_tags=ai_data.get('industry_tags', []) if 'industry_tags' in fields_to_apply else [],
suggested_category=ai_data.get('suggested_category') if 'suggested_category' in fields_to_apply else None,
ai_confidence_score=proposal.confidence_score,
analyzed_at=datetime.utcnow()
)
db.add(new_insights)
# Update proposal status
proposal.status = 'approved'
proposal.reviewed_at = datetime.utcnow()
proposal.reviewed_by_id = current_user.id
proposal.review_comment = comment
proposal.approved_fields = fields_to_apply
proposal.applied_at = datetime.utcnow()
db.commit()
logger.info(f"AI proposal {proposal_id} approved for company {company.name} by {current_user.email}")
return jsonify({
'success': True,
'message': f'Propozycja została zaakceptowana i dane zastosowane do profilu',
'approved_fields': fields_to_apply
})
except Exception as e:
db.rollback()
logger.error(f"Error approving proposal {proposal_id}: {str(e)}")
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/company/<int:company_id>/proposals/<int:proposal_id>/reject', methods=['POST'])
@login_required
def api_reject_proposal(company_id, proposal_id):
"""
API: Reject an AI enrichment proposal.
"""
db = SessionLocal()
try:
company = db.query(Company).filter_by(id=company_id).first()
if not company:
return jsonify({'success': False, 'error': 'Firma nie istnieje'}), 404
# Check permissions - user with company edit rights
if not current_user.can_edit_company(company.id):
return jsonify({'success': False, 'error': 'Brak uprawnien'}), 403
proposal = db.query(AiEnrichmentProposal).filter_by(
id=proposal_id,
company_id=company_id
).first()
if not proposal:
return jsonify({'success': False, 'error': 'Propozycja nie istnieje'}), 404
if proposal.status != 'pending':
return jsonify({'success': False, 'error': f'Propozycja ma status: {proposal.status}'}), 400
data = request.get_json() or {}
comment = data.get('comment', '')
# Update proposal status
proposal.status = 'rejected'
proposal.reviewed_at = datetime.utcnow()
proposal.reviewed_by_id = current_user.id
proposal.review_comment = comment
db.commit()
logger.info(f"AI proposal {proposal_id} rejected for company {company.name} by {current_user.email}")
return jsonify({
'success': True,
'message': 'Propozycja została odrzucona'
})
except Exception as e:
db.rollback()
logger.error(f"Error rejecting proposal {proposal_id}: {str(e)}")
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
# ============================================================
# UTILITY API ROUTES
# ============================================================
@bp.route('/model-info', methods=['GET'])
def api_model_info():
"""API: Get current AI model information"""
service = gemini_service.get_gemini_service()
if service:
return jsonify({
'success': True,
'model': service.model_name,
'provider': 'Google Gemini'
})
else:
return jsonify({
'success': False,
'error': 'AI service not initialized'
}), 500
@bp.route('/admin/test-sanitization', methods=['POST'])
@login_required
def test_sanitization():
"""
Admin API: Test sensitive data detection without saving.
Allows admins to verify what data would be sanitized.
"""
if not current_user.can_access_admin_panel():
return jsonify({'success': False, 'error': 'Admin access required'}), 403
try:
from sensitive_data_service import sanitize_message
data = request.get_json()
text = data.get('text', '')
if not text:
return jsonify({'success': False, 'error': 'Text is required'}), 400
sanitized, matches = sanitize_message(text)
return jsonify({
'success': True,
'original': text,
'sanitized': sanitized,
'matches': [
{
'type': m.data_type.value,
'original': m.original,
'masked': m.masked,
'confidence': m.confidence
}
for m in matches
],
'has_sensitive_data': len(matches) > 0
})
except ImportError:
return jsonify({
'success': False,
'error': 'Sensitive data service not available'
}), 500
except Exception as e:
logger.error(f"Error testing sanitization: {e}")
return jsonify({'success': False, 'error': str(e)}), 500