From a67d069f81f3ec9d182d2ff2c4722ebc2d8aa087 Mon Sep 17 00:00:00 2001 From: Maciej Pienczyn Date: Wed, 18 Feb 2026 14:58:41 +0100 Subject: [PATCH] fix: Fix CEIDG enrichment - two-phase API fetch with correct field mapping CEIDG enrichment was broken due to key mismatches (expected adres_ulica but API returns adresDzialalnosci.ulica), writes to non-existent columns (address_building, address_postal_code), and missing saves for ceidg_id/status/owner/PKD fields. Now fetches full details via /firma/{id} endpoint (Phase 2) for complete data including PKD list, correspondence address, and succession manager. Co-Authored-By: Claude Opus 4.6 --- blueprints/api/routes_company.py | 120 +++++++++++++++++----- ceidg_api_service.py | 171 +++++++++++++++++++++++++------ 2 files changed, 236 insertions(+), 55 deletions(-) diff --git a/blueprints/api/routes_company.py b/blueprints/api/routes_company.py index f445f8f..95cd2bc 100644 --- a/blueprints/api/routes_company.py +++ b/blueprints/api/routes_company.py @@ -10,7 +10,7 @@ import logging import os import re import time -from datetime import datetime +from datetime import datetime, date import requests from bs4 import BeautifulSoup @@ -450,36 +450,110 @@ def api_enrich_company_registry(company_id): source = 'CEIDG' updated_fields = [] - if ceidg_data.get('nazwa') and not company.legal_name: - company.legal_name = ceidg_data['nazwa'] + # --- CEIDG identifiers & metadata --- + if ceidg_data.get('ceidg_id'): + company.ceidg_id = ceidg_data['ceidg_id'] + updated_fields.append('CEIDG ID') + if ceidg_data.get('status'): + company.ceidg_status = ceidg_data['status'] + updated_fields.append('status CEIDG') + company.ceidg_raw_data = ceidg_data.get('raw') + company.ceidg_fetched_at = datetime.now() + company.data_source = 'CEIDG API' + company.last_verified_at = datetime.now() + + # --- Owner --- + wlasciciel = ceidg_data.get('wlasciciel', {}) + if wlasciciel.get('imie'): + company.owner_first_name = wlasciciel['imie'] + updated_fields.append('właściciel') + if wlasciciel.get('nazwisko'): + company.owner_last_name = wlasciciel['nazwisko'] + if ceidg_data.get('obywatelstwa'): + company.owner_citizenships = ceidg_data['obywatelstwa'] + + # --- Legal name --- + if ceidg_data.get('firma') and (not company.legal_name or company.legal_name == company.name): + company.legal_name = ceidg_data['firma'] updated_fields.append('nazwa pełna') - if ceidg_data.get('adres_ulica'): - company.address_street = ceidg_data['adres_ulica'] - updated_fields.append('ulica') - if ceidg_data.get('adres_budynek'): - company.address_building = ceidg_data['adres_budynek'] - updated_fields.append('nr budynku') - if ceidg_data.get('adres_lokal'): - company.address_apartment = ceidg_data['adres_lokal'] - updated_fields.append('nr lokalu') - if ceidg_data.get('adres_kod'): - company.address_postal_code = ceidg_data['adres_kod'] - updated_fields.append('kod pocztowy') - if ceidg_data.get('adres_miasto'): - company.address_city = ceidg_data['adres_miasto'] - updated_fields.append('miasto') + + # --- REGON --- + if not company.regon: + regon = ceidg_data.get('regon') or wlasciciel.get('regon') + if regon: + company.regon = regon + updated_fields.append('REGON') + + # --- Business start date --- + if ceidg_data.get('dataRozpoczecia'): + try: + d = ceidg_data['dataRozpoczecia'] + if isinstance(d, str): + company.business_start_date = date.fromisoformat(d) + updated_fields.append('data rozpoczęcia') + except (ValueError, TypeError): + pass + + # --- Legal form --- + if not company.legal_form: + company.legal_form = 'JEDNOOSOBOWA DZIAŁALNOŚĆ GOSPODARCZA' + updated_fields.append('forma prawna') + + # --- PKD (main) --- + pkd_gl = ceidg_data.get('pkdGlowny', {}) + if pkd_gl and pkd_gl.get('kod'): + company.pkd_code = pkd_gl['kod'] + company.pkd_description = pkd_gl.get('nazwa') + updated_fields.append(f'PKD główny ({pkd_gl["kod"]})') + + # --- PKD (full list) --- + pkd_lista = ceidg_data.get('pkd', []) + if pkd_lista: + company.ceidg_pkd_list = pkd_lista + updated_fields.append(f'lista PKD ({len(pkd_lista)} kodów)') + + # --- Business address --- + adres = ceidg_data.get('adresDzialalnosci', {}) + ulica = adres.get('ulica', '') + budynek = adres.get('budynek', '') + lokal = adres.get('lokal', '') + if ulica or budynek: + street_parts = [ulica, budynek] + if lokal: + street_parts[-1] = (budynek + '/' + lokal) if budynek else lokal + company.address_street = ' '.join(p for p in street_parts if p) + updated_fields.append('adres') + if adres.get('kod') or adres.get('kodPocztowy'): + company.address_postal = adres.get('kod') or adres.get('kodPocztowy') + if adres.get('miasto') or adres.get('miejscowosc'): + company.address_city = adres.get('miasto') or adres.get('miejscowosc') + if company.address_street and company.address_postal and company.address_city: + company.address_full = f'{company.address_street}, {company.address_postal} {company.address_city}' + + # --- Correspondence address --- + koresp = ceidg_data.get('adresKorespondencyjny', {}) + if koresp and (koresp.get('ulica') or koresp.get('budynek')): + k_ulica = koresp.get('ulica', '') + k_budynek = koresp.get('budynek', '') + k_lokal = koresp.get('lokal', '') + k_parts = [k_ulica, k_budynek] + if k_lokal: + k_parts[-1] = (k_budynek + '/' + k_lokal) if k_budynek else k_lokal + company.correspondence_street = ' '.join(p for p in k_parts if p) + company.correspondence_postal = koresp.get('kod') + company.correspondence_city = koresp.get('miasto') + updated_fields.append('adres korespondencyjny') + + # --- Contact (only if empty) --- if ceidg_data.get('email') and not company.email: company.email = ceidg_data['email'] updated_fields.append('email') - if ceidg_data.get('www') and not company.website: - company.website = ceidg_data['www'] + if ceidg_data.get('stronaWWW') and not company.website: + company.website = ceidg_data['stronaWWW'] updated_fields.append('strona www') if ceidg_data.get('telefon') and not company.phone: company.phone = ceidg_data['telefon'] updated_fields.append('telefon') - if ceidg_data.get('regon') and not company.regon: - company.regon = ceidg_data['regon'] - updated_fields.append('REGON') details = {'updated_fields': updated_fields} message_parts.append(f'zaktualizowano {len(updated_fields)} pól') diff --git a/ceidg_api_service.py b/ceidg_api_service.py index a7136cf..30c1d0c 100644 --- a/ceidg_api_service.py +++ b/ceidg_api_service.py @@ -4,10 +4,12 @@ CEIDG API Service ================== Service module for fetching company data from CEIDG (Centralna Ewidencja -i Informacja o Działalności Gospodarczej) using the official API at +i Informacja o Działalności Gospodarczej) using the official API v3 at dane.biznes.gov.pl. -Provides fetch_ceidg_by_nip function for membership application workflow. +Two-phase fetching: + Phase 1: GET /firmy?nip=X → find company ID + Phase 2: GET /firma/{id} → full details (PKD list, correspondence address, etc.) """ import os @@ -22,20 +24,32 @@ load_dotenv() logger = logging.getLogger(__name__) # API Configuration -CEIDG_API_V3_URL = "https://dane.biznes.gov.pl/api/ceidg/v3/firmy" +CEIDG_API_BASE = "https://dane.biznes.gov.pl/api/ceidg/v3" CEIDG_API_KEY = os.getenv("CEIDG_API_KEY") CEIDG_TIMEOUT = 15 # seconds +def _fetch_firma_detail(firma_id: str, headers: dict) -> Optional[Dict[str, Any]]: + """Phase 2: Fetch full company details from /firma/{id} endpoint.""" + url = f"{CEIDG_API_BASE}/firma/{firma_id}" + try: + response = requests.get(url, headers=headers, timeout=CEIDG_TIMEOUT) + if response.status_code == 200: + return response.json() + logger.warning(f"CEIDG detail endpoint returned {response.status_code} for id={firma_id}") + except Exception as e: + logger.warning(f"CEIDG detail fetch failed for id={firma_id}: {e}") + return None + + def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]: """ - Fetch company data from CEIDG API by NIP. + Fetch company data from CEIDG API by NIP (two-phase). - Args: - nip: NIP number (10 digits, no dashes) + Phase 1: Search by NIP via /firmy?nip=X + Phase 2: Get full details via /firma/{id} - Returns: - Dictionary with company data or None if not found + Returns normalized dict with all available CEIDG data, or None. """ if not CEIDG_API_KEY: logger.warning("CEIDG_API_KEY not configured - CEIDG lookup disabled") @@ -53,10 +67,10 @@ def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]: } try: - logger.info(f"Fetching CEIDG data for NIP {nip}") - + # --- Phase 1: Search by NIP --- + logger.info(f"CEIDG Phase 1: searching for NIP {nip}") response = requests.get( - CEIDG_API_V3_URL, + f"{CEIDG_API_BASE}/firmy", params={"nip": nip}, headers=headers, timeout=CEIDG_TIMEOUT @@ -65,62 +79,148 @@ def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]: if response.status_code == 401: logger.error("CEIDG API authentication failed - check CEIDG_API_KEY") return None - if response.status_code == 404: logger.info(f"NIP {nip} not found in CEIDG") return None - if response.status_code != 200: logger.error(f"CEIDG API error: {response.status_code} - {response.text[:200]}") return None data = response.json() - # Handle response format - can be list or dict + # Handle response format - can be list or dict with 'firmy' key if isinstance(data, list): if not data: logger.info(f"NIP {nip} not found in CEIDG (empty list)") return None - firma = data[0] + firma_summary = data[0] elif isinstance(data, dict): if 'firmy' in data: firmy = data.get('firmy', []) if not firmy: logger.info(f"NIP {nip} not found in CEIDG") return None - firma = firmy[0] + firma_summary = firmy[0] else: - firma = data + firma_summary = data else: logger.error(f"Unexpected CEIDG response format: {type(data)}") return None - # Extract address - adres = firma.get('adresDzialalnosci', {}) or firma.get('adres', {}) or {} - if isinstance(adres, str): - adres = {'full': adres} + # --- Phase 2: Fetch full details --- + firma_id = firma_summary.get('id') + firma = firma_summary # fallback if detail fetch fails + + if firma_id: + logger.info(f"CEIDG Phase 2: fetching details for id={firma_id}") + detail = _fetch_firma_detail(firma_id, headers) + if detail: + firma = detail + logger.info(f"CEIDG Phase 2: got full details for id={firma_id}") + else: + logger.info(f"CEIDG Phase 2: detail unavailable, using summary data") + else: + # Try extracting id from 'link' field + link = firma_summary.get('link', '') + if '/firma/' in link: + firma_id = link.split('/firma/')[-1].strip('/') + detail = _fetch_firma_detail(firma_id, headers) + if detail: + firma = detail + + # --- Build normalized result --- + # Owner info + wlasciciel = firma.get('wlasciciel', {}) or {} + + # Address - handle both nested and flat formats + adres = firma.get('adresDzialalnosci', {}) or {} + if isinstance(adres, str): + adres = {} + + # Correspondence address + adres_koresp = firma.get('adresKorespondencyjny', {}) or {} + if isinstance(adres_koresp, str): + adres_koresp = {} + + # PKD - main and full list + pkd_glowny = firma.get('pkdGlowny', {}) or {} + pkd_lista = firma.get('pkd', []) or [] + # Some responses use 'pkdPozostale' for additional PKD codes + if not pkd_lista: + pkd_pozostale = firma.get('pkdPozostale', []) or [] + if pkd_glowny and pkd_pozostale: + pkd_lista = [pkd_glowny] + pkd_pozostale + elif pkd_glowny: + pkd_lista = [pkd_glowny] + + # Succession manager + zarzadca = firma.get('zarzadcaSukcesyjny', {}) or {} - # Build normalized result result = { - 'firma': firma.get('nazwa') or firma.get('nazwaSkrocona'), - 'nip': firma.get('nip'), - 'regon': firma.get('regon'), + # Identity + 'firma': firma.get('nazwa') or firma.get('firma') or firma.get('nazwaSkrocona'), + 'nip': firma.get('nip') or nip, + 'regon': firma.get('regon') or wlasciciel.get('regon'), + 'ceidg_id': firma_id or firma.get('id'), + 'status': firma.get('status'), + + # Owner + 'wlasciciel': { + 'imie': wlasciciel.get('imie'), + 'nazwisko': wlasciciel.get('nazwisko'), + } if wlasciciel else {}, + 'obywatelstwa': firma.get('obywatelstwa', []) or wlasciciel.get('obywatelstwa', []) or [], + + # Dates + 'dataRozpoczecia': ( + firma.get('dataRozpoczeciaDzialalnosci') + or firma.get('dataWpisuDoCeidg') + ), + + # Business address 'adresDzialalnosci': { - 'kodPocztowy': adres.get('kodPocztowy') or adres.get('kod'), - 'miejscowosc': adres.get('miejscowosc') or adres.get('miasto'), 'ulica': adres.get('ulica'), 'budynek': adres.get('budynek') or adres.get('nrDomu') or adres.get('nrBudynku'), 'lokal': adres.get('lokal') or adres.get('nrLokalu'), + 'kod': adres.get('kodPocztowy') or adres.get('kod'), + 'miasto': adres.get('miejscowosc') or adres.get('miasto'), + 'wojewodztwo': adres.get('wojewodztwo'), + 'powiat': adres.get('powiat'), + 'gmina': adres.get('gmina'), }, + + # Correspondence address + 'adresKorespondencyjny': { + 'ulica': adres_koresp.get('ulica'), + 'budynek': adres_koresp.get('budynek') or adres_koresp.get('nrDomu'), + 'lokal': adres_koresp.get('lokal') or adres_koresp.get('nrLokalu'), + 'kod': adres_koresp.get('kodPocztowy') or adres_koresp.get('kod'), + 'miasto': adres_koresp.get('miejscowosc') or adres_koresp.get('miasto'), + } if adres_koresp else {}, + + # PKD + 'pkdGlowny': pkd_glowny, + 'pkd': pkd_lista, + + # Contact 'email': firma.get('email') or firma.get('adresEmail'), - 'stronaWWW': firma.get('stronaWWW') or firma.get('www') or firma.get('strona'), 'telefon': firma.get('telefon'), - 'dataRozpoczeciaDzialalnosci': firma.get('dataRozpoczeciaDzialalnosci') or firma.get('dataWpisuDoCeidg'), - 'status': firma.get('status'), - 'raw': firma + 'stronaWWW': firma.get('stronaWWW') or firma.get('www') or firma.get('strona'), + + # Succession + 'zarzadcaSukcesyjny': { + 'imie': zarzadca.get('imie'), + 'nazwisko': zarzadca.get('nazwisko'), + } if zarzadca.get('imie') or zarzadca.get('nazwisko') else None, + + # Electronic delivery address + 'adresDoreczenElektronicznych': firma.get('adresDoreczenElektronicznych'), + + # Raw API response for template access + 'raw': firma, } - logger.info(f"CEIDG data found for NIP {nip}: {result['firma']}") + logger.info(f"CEIDG data found for NIP {nip}: {result['firma']} (PKD codes: {len(pkd_lista)})") return result except requests.exceptions.Timeout: @@ -152,5 +252,12 @@ if __name__ == '__main__': if data: print(json.dumps(data, indent=2, ensure_ascii=False, default=str)) + print("=" * 60) + pkd = data.get('pkd', []) + print(f"PKD codes: {len(pkd)}") + for p in pkd[:5]: + print(f" - {p.get('kod', '?')}: {p.get('nazwa', '?')}") + if len(pkd) > 5: + print(f" ... i {len(pkd) - 5} więcej") else: print(f"Nie znaleziono firmy o NIP {nip} w CEIDG")