fix: Fix CEIDG enrichment - two-phase API fetch with correct field mapping
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
CEIDG enrichment was broken due to key mismatches (expected adres_ulica but API
returns adresDzialalnosci.ulica), writes to non-existent columns (address_building,
address_postal_code), and missing saves for ceidg_id/status/owner/PKD fields.
Now fetches full details via /firma/{id} endpoint (Phase 2) for complete data
including PKD list, correspondence address, and succession manager.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
79de5521f3
commit
a67d069f81
@ -10,7 +10,7 @@ import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
from datetime import datetime, date
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
@ -450,36 +450,110 @@ def api_enrich_company_registry(company_id):
|
||||
source = 'CEIDG'
|
||||
updated_fields = []
|
||||
|
||||
if ceidg_data.get('nazwa') and not company.legal_name:
|
||||
company.legal_name = ceidg_data['nazwa']
|
||||
# --- CEIDG identifiers & metadata ---
|
||||
if ceidg_data.get('ceidg_id'):
|
||||
company.ceidg_id = ceidg_data['ceidg_id']
|
||||
updated_fields.append('CEIDG ID')
|
||||
if ceidg_data.get('status'):
|
||||
company.ceidg_status = ceidg_data['status']
|
||||
updated_fields.append('status CEIDG')
|
||||
company.ceidg_raw_data = ceidg_data.get('raw')
|
||||
company.ceidg_fetched_at = datetime.now()
|
||||
company.data_source = 'CEIDG API'
|
||||
company.last_verified_at = datetime.now()
|
||||
|
||||
# --- Owner ---
|
||||
wlasciciel = ceidg_data.get('wlasciciel', {})
|
||||
if wlasciciel.get('imie'):
|
||||
company.owner_first_name = wlasciciel['imie']
|
||||
updated_fields.append('właściciel')
|
||||
if wlasciciel.get('nazwisko'):
|
||||
company.owner_last_name = wlasciciel['nazwisko']
|
||||
if ceidg_data.get('obywatelstwa'):
|
||||
company.owner_citizenships = ceidg_data['obywatelstwa']
|
||||
|
||||
# --- Legal name ---
|
||||
if ceidg_data.get('firma') and (not company.legal_name or company.legal_name == company.name):
|
||||
company.legal_name = ceidg_data['firma']
|
||||
updated_fields.append('nazwa pełna')
|
||||
if ceidg_data.get('adres_ulica'):
|
||||
company.address_street = ceidg_data['adres_ulica']
|
||||
updated_fields.append('ulica')
|
||||
if ceidg_data.get('adres_budynek'):
|
||||
company.address_building = ceidg_data['adres_budynek']
|
||||
updated_fields.append('nr budynku')
|
||||
if ceidg_data.get('adres_lokal'):
|
||||
company.address_apartment = ceidg_data['adres_lokal']
|
||||
updated_fields.append('nr lokalu')
|
||||
if ceidg_data.get('adres_kod'):
|
||||
company.address_postal_code = ceidg_data['adres_kod']
|
||||
updated_fields.append('kod pocztowy')
|
||||
if ceidg_data.get('adres_miasto'):
|
||||
company.address_city = ceidg_data['adres_miasto']
|
||||
updated_fields.append('miasto')
|
||||
|
||||
# --- REGON ---
|
||||
if not company.regon:
|
||||
regon = ceidg_data.get('regon') or wlasciciel.get('regon')
|
||||
if regon:
|
||||
company.regon = regon
|
||||
updated_fields.append('REGON')
|
||||
|
||||
# --- Business start date ---
|
||||
if ceidg_data.get('dataRozpoczecia'):
|
||||
try:
|
||||
d = ceidg_data['dataRozpoczecia']
|
||||
if isinstance(d, str):
|
||||
company.business_start_date = date.fromisoformat(d)
|
||||
updated_fields.append('data rozpoczęcia')
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
# --- Legal form ---
|
||||
if not company.legal_form:
|
||||
company.legal_form = 'JEDNOOSOBOWA DZIAŁALNOŚĆ GOSPODARCZA'
|
||||
updated_fields.append('forma prawna')
|
||||
|
||||
# --- PKD (main) ---
|
||||
pkd_gl = ceidg_data.get('pkdGlowny', {})
|
||||
if pkd_gl and pkd_gl.get('kod'):
|
||||
company.pkd_code = pkd_gl['kod']
|
||||
company.pkd_description = pkd_gl.get('nazwa')
|
||||
updated_fields.append(f'PKD główny ({pkd_gl["kod"]})')
|
||||
|
||||
# --- PKD (full list) ---
|
||||
pkd_lista = ceidg_data.get('pkd', [])
|
||||
if pkd_lista:
|
||||
company.ceidg_pkd_list = pkd_lista
|
||||
updated_fields.append(f'lista PKD ({len(pkd_lista)} kodów)')
|
||||
|
||||
# --- Business address ---
|
||||
adres = ceidg_data.get('adresDzialalnosci', {})
|
||||
ulica = adres.get('ulica', '')
|
||||
budynek = adres.get('budynek', '')
|
||||
lokal = adres.get('lokal', '')
|
||||
if ulica or budynek:
|
||||
street_parts = [ulica, budynek]
|
||||
if lokal:
|
||||
street_parts[-1] = (budynek + '/' + lokal) if budynek else lokal
|
||||
company.address_street = ' '.join(p for p in street_parts if p)
|
||||
updated_fields.append('adres')
|
||||
if adres.get('kod') or adres.get('kodPocztowy'):
|
||||
company.address_postal = adres.get('kod') or adres.get('kodPocztowy')
|
||||
if adres.get('miasto') or adres.get('miejscowosc'):
|
||||
company.address_city = adres.get('miasto') or adres.get('miejscowosc')
|
||||
if company.address_street and company.address_postal and company.address_city:
|
||||
company.address_full = f'{company.address_street}, {company.address_postal} {company.address_city}'
|
||||
|
||||
# --- Correspondence address ---
|
||||
koresp = ceidg_data.get('adresKorespondencyjny', {})
|
||||
if koresp and (koresp.get('ulica') or koresp.get('budynek')):
|
||||
k_ulica = koresp.get('ulica', '')
|
||||
k_budynek = koresp.get('budynek', '')
|
||||
k_lokal = koresp.get('lokal', '')
|
||||
k_parts = [k_ulica, k_budynek]
|
||||
if k_lokal:
|
||||
k_parts[-1] = (k_budynek + '/' + k_lokal) if k_budynek else k_lokal
|
||||
company.correspondence_street = ' '.join(p for p in k_parts if p)
|
||||
company.correspondence_postal = koresp.get('kod')
|
||||
company.correspondence_city = koresp.get('miasto')
|
||||
updated_fields.append('adres korespondencyjny')
|
||||
|
||||
# --- Contact (only if empty) ---
|
||||
if ceidg_data.get('email') and not company.email:
|
||||
company.email = ceidg_data['email']
|
||||
updated_fields.append('email')
|
||||
if ceidg_data.get('www') and not company.website:
|
||||
company.website = ceidg_data['www']
|
||||
if ceidg_data.get('stronaWWW') and not company.website:
|
||||
company.website = ceidg_data['stronaWWW']
|
||||
updated_fields.append('strona www')
|
||||
if ceidg_data.get('telefon') and not company.phone:
|
||||
company.phone = ceidg_data['telefon']
|
||||
updated_fields.append('telefon')
|
||||
if ceidg_data.get('regon') and not company.regon:
|
||||
company.regon = ceidg_data['regon']
|
||||
updated_fields.append('REGON')
|
||||
|
||||
details = {'updated_fields': updated_fields}
|
||||
message_parts.append(f'zaktualizowano {len(updated_fields)} pól')
|
||||
|
||||
@ -4,10 +4,12 @@ CEIDG API Service
|
||||
==================
|
||||
|
||||
Service module for fetching company data from CEIDG (Centralna Ewidencja
|
||||
i Informacja o Działalności Gospodarczej) using the official API at
|
||||
i Informacja o Działalności Gospodarczej) using the official API v3 at
|
||||
dane.biznes.gov.pl.
|
||||
|
||||
Provides fetch_ceidg_by_nip function for membership application workflow.
|
||||
Two-phase fetching:
|
||||
Phase 1: GET /firmy?nip=X → find company ID
|
||||
Phase 2: GET /firma/{id} → full details (PKD list, correspondence address, etc.)
|
||||
"""
|
||||
|
||||
import os
|
||||
@ -22,20 +24,32 @@ load_dotenv()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# API Configuration
|
||||
CEIDG_API_V3_URL = "https://dane.biznes.gov.pl/api/ceidg/v3/firmy"
|
||||
CEIDG_API_BASE = "https://dane.biznes.gov.pl/api/ceidg/v3"
|
||||
CEIDG_API_KEY = os.getenv("CEIDG_API_KEY")
|
||||
CEIDG_TIMEOUT = 15 # seconds
|
||||
|
||||
|
||||
def _fetch_firma_detail(firma_id: str, headers: dict) -> Optional[Dict[str, Any]]:
|
||||
"""Phase 2: Fetch full company details from /firma/{id} endpoint."""
|
||||
url = f"{CEIDG_API_BASE}/firma/{firma_id}"
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=CEIDG_TIMEOUT)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
logger.warning(f"CEIDG detail endpoint returned {response.status_code} for id={firma_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"CEIDG detail fetch failed for id={firma_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Fetch company data from CEIDG API by NIP.
|
||||
Fetch company data from CEIDG API by NIP (two-phase).
|
||||
|
||||
Args:
|
||||
nip: NIP number (10 digits, no dashes)
|
||||
Phase 1: Search by NIP via /firmy?nip=X
|
||||
Phase 2: Get full details via /firma/{id}
|
||||
|
||||
Returns:
|
||||
Dictionary with company data or None if not found
|
||||
Returns normalized dict with all available CEIDG data, or None.
|
||||
"""
|
||||
if not CEIDG_API_KEY:
|
||||
logger.warning("CEIDG_API_KEY not configured - CEIDG lookup disabled")
|
||||
@ -53,10 +67,10 @@ def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]:
|
||||
}
|
||||
|
||||
try:
|
||||
logger.info(f"Fetching CEIDG data for NIP {nip}")
|
||||
|
||||
# --- Phase 1: Search by NIP ---
|
||||
logger.info(f"CEIDG Phase 1: searching for NIP {nip}")
|
||||
response = requests.get(
|
||||
CEIDG_API_V3_URL,
|
||||
f"{CEIDG_API_BASE}/firmy",
|
||||
params={"nip": nip},
|
||||
headers=headers,
|
||||
timeout=CEIDG_TIMEOUT
|
||||
@ -65,62 +79,148 @@ def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]:
|
||||
if response.status_code == 401:
|
||||
logger.error("CEIDG API authentication failed - check CEIDG_API_KEY")
|
||||
return None
|
||||
|
||||
if response.status_code == 404:
|
||||
logger.info(f"NIP {nip} not found in CEIDG")
|
||||
return None
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"CEIDG API error: {response.status_code} - {response.text[:200]}")
|
||||
return None
|
||||
|
||||
data = response.json()
|
||||
|
||||
# Handle response format - can be list or dict
|
||||
# Handle response format - can be list or dict with 'firmy' key
|
||||
if isinstance(data, list):
|
||||
if not data:
|
||||
logger.info(f"NIP {nip} not found in CEIDG (empty list)")
|
||||
return None
|
||||
firma = data[0]
|
||||
firma_summary = data[0]
|
||||
elif isinstance(data, dict):
|
||||
if 'firmy' in data:
|
||||
firmy = data.get('firmy', [])
|
||||
if not firmy:
|
||||
logger.info(f"NIP {nip} not found in CEIDG")
|
||||
return None
|
||||
firma = firmy[0]
|
||||
firma_summary = firmy[0]
|
||||
else:
|
||||
firma = data
|
||||
firma_summary = data
|
||||
else:
|
||||
logger.error(f"Unexpected CEIDG response format: {type(data)}")
|
||||
return None
|
||||
|
||||
# Extract address
|
||||
adres = firma.get('adresDzialalnosci', {}) or firma.get('adres', {}) or {}
|
||||
if isinstance(adres, str):
|
||||
adres = {'full': adres}
|
||||
# --- Phase 2: Fetch full details ---
|
||||
firma_id = firma_summary.get('id')
|
||||
firma = firma_summary # fallback if detail fetch fails
|
||||
|
||||
if firma_id:
|
||||
logger.info(f"CEIDG Phase 2: fetching details for id={firma_id}")
|
||||
detail = _fetch_firma_detail(firma_id, headers)
|
||||
if detail:
|
||||
firma = detail
|
||||
logger.info(f"CEIDG Phase 2: got full details for id={firma_id}")
|
||||
else:
|
||||
logger.info(f"CEIDG Phase 2: detail unavailable, using summary data")
|
||||
else:
|
||||
# Try extracting id from 'link' field
|
||||
link = firma_summary.get('link', '')
|
||||
if '/firma/' in link:
|
||||
firma_id = link.split('/firma/')[-1].strip('/')
|
||||
detail = _fetch_firma_detail(firma_id, headers)
|
||||
if detail:
|
||||
firma = detail
|
||||
|
||||
# --- Build normalized result ---
|
||||
# Owner info
|
||||
wlasciciel = firma.get('wlasciciel', {}) or {}
|
||||
|
||||
# Address - handle both nested and flat formats
|
||||
adres = firma.get('adresDzialalnosci', {}) or {}
|
||||
if isinstance(adres, str):
|
||||
adres = {}
|
||||
|
||||
# Correspondence address
|
||||
adres_koresp = firma.get('adresKorespondencyjny', {}) or {}
|
||||
if isinstance(adres_koresp, str):
|
||||
adres_koresp = {}
|
||||
|
||||
# PKD - main and full list
|
||||
pkd_glowny = firma.get('pkdGlowny', {}) or {}
|
||||
pkd_lista = firma.get('pkd', []) or []
|
||||
# Some responses use 'pkdPozostale' for additional PKD codes
|
||||
if not pkd_lista:
|
||||
pkd_pozostale = firma.get('pkdPozostale', []) or []
|
||||
if pkd_glowny and pkd_pozostale:
|
||||
pkd_lista = [pkd_glowny] + pkd_pozostale
|
||||
elif pkd_glowny:
|
||||
pkd_lista = [pkd_glowny]
|
||||
|
||||
# Succession manager
|
||||
zarzadca = firma.get('zarzadcaSukcesyjny', {}) or {}
|
||||
|
||||
# Build normalized result
|
||||
result = {
|
||||
'firma': firma.get('nazwa') or firma.get('nazwaSkrocona'),
|
||||
'nip': firma.get('nip'),
|
||||
'regon': firma.get('regon'),
|
||||
# Identity
|
||||
'firma': firma.get('nazwa') or firma.get('firma') or firma.get('nazwaSkrocona'),
|
||||
'nip': firma.get('nip') or nip,
|
||||
'regon': firma.get('regon') or wlasciciel.get('regon'),
|
||||
'ceidg_id': firma_id or firma.get('id'),
|
||||
'status': firma.get('status'),
|
||||
|
||||
# Owner
|
||||
'wlasciciel': {
|
||||
'imie': wlasciciel.get('imie'),
|
||||
'nazwisko': wlasciciel.get('nazwisko'),
|
||||
} if wlasciciel else {},
|
||||
'obywatelstwa': firma.get('obywatelstwa', []) or wlasciciel.get('obywatelstwa', []) or [],
|
||||
|
||||
# Dates
|
||||
'dataRozpoczecia': (
|
||||
firma.get('dataRozpoczeciaDzialalnosci')
|
||||
or firma.get('dataWpisuDoCeidg')
|
||||
),
|
||||
|
||||
# Business address
|
||||
'adresDzialalnosci': {
|
||||
'kodPocztowy': adres.get('kodPocztowy') or adres.get('kod'),
|
||||
'miejscowosc': adres.get('miejscowosc') or adres.get('miasto'),
|
||||
'ulica': adres.get('ulica'),
|
||||
'budynek': adres.get('budynek') or adres.get('nrDomu') or adres.get('nrBudynku'),
|
||||
'lokal': adres.get('lokal') or adres.get('nrLokalu'),
|
||||
'kod': adres.get('kodPocztowy') or adres.get('kod'),
|
||||
'miasto': adres.get('miejscowosc') or adres.get('miasto'),
|
||||
'wojewodztwo': adres.get('wojewodztwo'),
|
||||
'powiat': adres.get('powiat'),
|
||||
'gmina': adres.get('gmina'),
|
||||
},
|
||||
|
||||
# Correspondence address
|
||||
'adresKorespondencyjny': {
|
||||
'ulica': adres_koresp.get('ulica'),
|
||||
'budynek': adres_koresp.get('budynek') or adres_koresp.get('nrDomu'),
|
||||
'lokal': adres_koresp.get('lokal') or adres_koresp.get('nrLokalu'),
|
||||
'kod': adres_koresp.get('kodPocztowy') or adres_koresp.get('kod'),
|
||||
'miasto': adres_koresp.get('miejscowosc') or adres_koresp.get('miasto'),
|
||||
} if adres_koresp else {},
|
||||
|
||||
# PKD
|
||||
'pkdGlowny': pkd_glowny,
|
||||
'pkd': pkd_lista,
|
||||
|
||||
# Contact
|
||||
'email': firma.get('email') or firma.get('adresEmail'),
|
||||
'stronaWWW': firma.get('stronaWWW') or firma.get('www') or firma.get('strona'),
|
||||
'telefon': firma.get('telefon'),
|
||||
'dataRozpoczeciaDzialalnosci': firma.get('dataRozpoczeciaDzialalnosci') or firma.get('dataWpisuDoCeidg'),
|
||||
'status': firma.get('status'),
|
||||
'raw': firma
|
||||
'stronaWWW': firma.get('stronaWWW') or firma.get('www') or firma.get('strona'),
|
||||
|
||||
# Succession
|
||||
'zarzadcaSukcesyjny': {
|
||||
'imie': zarzadca.get('imie'),
|
||||
'nazwisko': zarzadca.get('nazwisko'),
|
||||
} if zarzadca.get('imie') or zarzadca.get('nazwisko') else None,
|
||||
|
||||
# Electronic delivery address
|
||||
'adresDoreczenElektronicznych': firma.get('adresDoreczenElektronicznych'),
|
||||
|
||||
# Raw API response for template access
|
||||
'raw': firma,
|
||||
}
|
||||
|
||||
logger.info(f"CEIDG data found for NIP {nip}: {result['firma']}")
|
||||
logger.info(f"CEIDG data found for NIP {nip}: {result['firma']} (PKD codes: {len(pkd_lista)})")
|
||||
return result
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
@ -152,5 +252,12 @@ if __name__ == '__main__':
|
||||
|
||||
if data:
|
||||
print(json.dumps(data, indent=2, ensure_ascii=False, default=str))
|
||||
print("=" * 60)
|
||||
pkd = data.get('pkd', [])
|
||||
print(f"PKD codes: {len(pkd)}")
|
||||
for p in pkd[:5]:
|
||||
print(f" - {p.get('kod', '?')}: {p.get('nazwa', '?')}")
|
||||
if len(pkd) > 5:
|
||||
print(f" ... i {len(pkd) - 5} więcej")
|
||||
else:
|
||||
print(f"Nie znaleziono firmy o NIP {nip} w CEIDG")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user