#!/usr/bin/env python3 """ CEIDG API Service ================== Service module for fetching company data from CEIDG (Centralna Ewidencja i Informacja o Działalności Gospodarczej) using the official API v3 at dane.biznes.gov.pl. Two-phase fetching: Phase 1: GET /firmy?nip=X → find company ID Phase 2: GET /firma/{id} → full details (PKD list, correspondence address, etc.) """ import os import logging import requests from typing import Optional, Dict, Any from dotenv import load_dotenv # Load environment variables load_dotenv() logger = logging.getLogger(__name__) # API Configuration CEIDG_API_BASE = "https://dane.biznes.gov.pl/api/ceidg/v3" CEIDG_API_KEY = os.getenv("CEIDG_API_KEY") CEIDG_TIMEOUT = 15 # seconds def _fetch_firma_detail(firma_id: str, headers: dict) -> Optional[Dict[str, Any]]: """Phase 2: Fetch full company details from /firma/{id} endpoint.""" url = f"{CEIDG_API_BASE}/firma/{firma_id}" try: response = requests.get(url, headers=headers, timeout=CEIDG_TIMEOUT) if response.status_code == 200: return response.json() logger.warning(f"CEIDG detail endpoint returned {response.status_code} for id={firma_id}") except Exception as e: logger.warning(f"CEIDG detail fetch failed for id={firma_id}: {e}") return None def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]: """ Fetch company data from CEIDG API by NIP (two-phase). Phase 1: Search by NIP via /firmy?nip=X Phase 2: Get full details via /firma/{id} Returns normalized dict with all available CEIDG data, or None. """ if not CEIDG_API_KEY: logger.warning("CEIDG_API_KEY not configured - CEIDG lookup disabled") return None # Clean NIP nip = nip.strip().replace('-', '').replace(' ', '') if not nip or len(nip) != 10 or not nip.isdigit(): logger.warning(f"Invalid NIP format: {nip}") return None headers = { "Authorization": f"Bearer {CEIDG_API_KEY}", "Accept": "application/json" } try: # --- Phase 1: Search by NIP --- logger.info(f"CEIDG Phase 1: searching for NIP {nip}") response = requests.get( f"{CEIDG_API_BASE}/firmy", params={"nip": nip}, headers=headers, timeout=CEIDG_TIMEOUT ) if response.status_code == 401: logger.error("CEIDG API authentication failed - check CEIDG_API_KEY") return None if response.status_code == 404: logger.info(f"NIP {nip} not found in CEIDG") return None if response.status_code != 200: logger.error(f"CEIDG API error: {response.status_code} - {response.text[:200]}") return None data = response.json() # Handle response format - can be list or dict with 'firmy' key if isinstance(data, list): if not data: logger.info(f"NIP {nip} not found in CEIDG (empty list)") return None firma_summary = data[0] elif isinstance(data, dict): if 'firmy' in data: firmy = data.get('firmy', []) if not firmy: logger.info(f"NIP {nip} not found in CEIDG") return None firma_summary = firmy[0] else: firma_summary = data else: logger.error(f"Unexpected CEIDG response format: {type(data)}") return None # --- Phase 2: Fetch full details --- firma_id = firma_summary.get('id') firma = firma_summary # fallback if detail fetch fails if firma_id: logger.info(f"CEIDG Phase 2: fetching details for id={firma_id}") detail = _fetch_firma_detail(firma_id, headers) if detail: firma = detail logger.info(f"CEIDG Phase 2: got full details for id={firma_id}") else: logger.info(f"CEIDG Phase 2: detail unavailable, using summary data") else: # Try extracting id from 'link' field link = firma_summary.get('link', '') if '/firma/' in link: firma_id = link.split('/firma/')[-1].strip('/') detail = _fetch_firma_detail(firma_id, headers) if detail: firma = detail # --- Build normalized result --- # Owner info wlasciciel = firma.get('wlasciciel', {}) or {} # Address - handle both nested and flat formats adres = firma.get('adresDzialalnosci', {}) or {} if isinstance(adres, str): adres = {} # Correspondence address adres_koresp = firma.get('adresKorespondencyjny', {}) or {} if isinstance(adres_koresp, str): adres_koresp = {} # PKD - main and full list pkd_glowny = firma.get('pkdGlowny', {}) or {} pkd_lista = firma.get('pkd', []) or [] # Some responses use 'pkdPozostale' for additional PKD codes if not pkd_lista: pkd_pozostale = firma.get('pkdPozostale', []) or [] if pkd_glowny and pkd_pozostale: pkd_lista = [pkd_glowny] + pkd_pozostale elif pkd_glowny: pkd_lista = [pkd_glowny] # Succession manager zarzadca = firma.get('zarzadcaSukcesyjny', {}) or {} result = { # Identity 'firma': firma.get('nazwa') or firma.get('firma') or firma.get('nazwaSkrocona'), 'nip': firma.get('nip') or nip, 'regon': firma.get('regon') or wlasciciel.get('regon'), 'ceidg_id': firma_id or firma.get('id'), 'status': firma.get('status'), # Owner 'wlasciciel': { 'imie': wlasciciel.get('imie'), 'nazwisko': wlasciciel.get('nazwisko'), } if wlasciciel else {}, 'obywatelstwa': firma.get('obywatelstwa', []) or wlasciciel.get('obywatelstwa', []) or [], # Dates 'dataRozpoczecia': ( firma.get('dataRozpoczeciaDzialalnosci') or firma.get('dataWpisuDoCeidg') ), # Business address 'adresDzialalnosci': { 'ulica': adres.get('ulica'), 'budynek': adres.get('budynek') or adres.get('nrDomu') or adres.get('nrBudynku'), 'lokal': adres.get('lokal') or adres.get('nrLokalu'), 'kod': adres.get('kodPocztowy') or adres.get('kod'), 'miasto': adres.get('miejscowosc') or adres.get('miasto'), 'wojewodztwo': adres.get('wojewodztwo'), 'powiat': adres.get('powiat'), 'gmina': adres.get('gmina'), }, # Correspondence address 'adresKorespondencyjny': { 'ulica': adres_koresp.get('ulica'), 'budynek': adres_koresp.get('budynek') or adres_koresp.get('nrDomu'), 'lokal': adres_koresp.get('lokal') or adres_koresp.get('nrLokalu'), 'kod': adres_koresp.get('kodPocztowy') or adres_koresp.get('kod'), 'miasto': adres_koresp.get('miejscowosc') or adres_koresp.get('miasto'), } if adres_koresp else {}, # PKD 'pkdGlowny': pkd_glowny, 'pkd': pkd_lista, # Contact 'email': firma.get('email') or firma.get('adresEmail'), 'telefon': firma.get('telefon'), 'stronaWWW': firma.get('stronaWWW') or firma.get('www') or firma.get('strona'), # Succession 'zarzadcaSukcesyjny': { 'imie': zarzadca.get('imie'), 'nazwisko': zarzadca.get('nazwisko'), } if zarzadca.get('imie') or zarzadca.get('nazwisko') else None, # Electronic delivery address 'adresDoreczenElektronicznych': firma.get('adresDoreczenElektronicznych'), # Raw API response for template access 'raw': firma, } logger.info(f"CEIDG data found for NIP {nip}: {result['firma']} (PKD codes: {len(pkd_lista)})") return result except requests.exceptions.Timeout: logger.error(f"CEIDG API timeout for NIP {nip}") return None except requests.exceptions.RequestException as e: logger.error(f"CEIDG API request error for NIP {nip}: {e}") return None except Exception as e: logger.error(f"Error fetching CEIDG data for NIP {nip}: {e}") return None # For testing if __name__ == '__main__': import sys import json if len(sys.argv) < 2: print("Usage: python ceidg_api_service.py ") print("Example: python ceidg_api_service.py 5881571773") sys.exit(1) nip = sys.argv[1] print(f"Pobieranie danych z CEIDG API dla NIP: {nip}") print("=" * 60) data = fetch_ceidg_by_nip(nip) if data: print(json.dumps(data, indent=2, ensure_ascii=False, default=str)) print("=" * 60) pkd = data.get('pkd', []) print(f"PKD codes: {len(pkd)}") for p in pkd[:5]: print(f" - {p.get('kod', '?')}: {p.get('nazwa', '?')}") if len(pkd) > 5: print(f" ... i {len(pkd) - 5} więcej") else: print(f"Nie znaleziono firmy o NIP {nip} w CEIDG")