nordabiz/scripts/fetch_ceidg_api.py
Maciej Pienczyn 3f9273cff6 feat: Add company logos to search results, hide events section
- Add company logo display in search results cards
- Make logo clickable (links to company profile)
- Temporarily hide "Aktualności i wydarzenia" section on company profiles
- Add scripts for KRS PDF download/parsing and CEIDG API

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 15:32:53 +01:00

343 lines
10 KiB
Python

#!/usr/bin/env python3
"""
CEIDG API v3 Client - pobiera dane właścicieli JDG
Używa oficjalnego API CEIDG v3 (dane.biznes.gov.pl) do pobierania
danych o jednoosobowych działalnościach gospodarczych.
Usage:
python scripts/fetch_ceidg_api.py --nip 5881571773
python scripts/fetch_ceidg_api.py --all # wszystkie JDG z bazy
python scripts/fetch_ceidg_api.py --all --import # pobierz i importuj do bazy
"""
import os
import sys
import argparse
import json
import time
from pathlib import Path
from datetime import datetime
from dataclasses import dataclass, asdict
from typing import Optional, List
import requests
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
# Load environment
from dotenv import load_dotenv
load_dotenv(Path(__file__).parent.parent / '.env')
# API Configuration
CEIDG_API_URL = "https://dane.biznes.gov.pl/api/ceidg/v3/firma"
CEIDG_API_KEY = os.getenv("CEIDG_API_KEY")
# Output directory for JSON cache
JSON_OUTPUT_DIR = Path(__file__).parent.parent / "data" / "ceidg_json"
@dataclass
class CEIDGOwner:
"""Dane właściciela JDG z CEIDG"""
imie: str
nazwisko: str
nip: str
regon: str = ""
def to_dict(self):
return asdict(self)
@dataclass
class CEIDGData:
"""Dane firmy z CEIDG API v3"""
id: str
nazwa: str
nip: str
regon: str = ""
wlasciciel: Optional[CEIDGOwner] = None
adres_miasto: str = ""
adres_ulica: str = ""
adres_kod: str = ""
pkd_glowny: str = ""
pkd_opis: str = ""
data_rozpoczecia: str = ""
status: str = ""
zrodlo: str = "dane.biznes.gov.pl"
pobrano: str = ""
def to_dict(self):
d = asdict(self)
if self.wlasciciel:
d['wlasciciel'] = self.wlasciciel.to_dict()
return d
def fetch_ceidg_data(nip: str) -> Optional[CEIDGData]:
"""
Pobiera dane z CEIDG API v3 dla podanego NIP.
Returns:
CEIDGData lub None jeśli nie znaleziono
"""
if not CEIDG_API_KEY:
print(" [ERROR] Brak CEIDG_API_KEY w .env")
return None
print(f" [INFO] Pobieranie danych CEIDG dla NIP {nip}...")
headers = {
"Authorization": f"Bearer {CEIDG_API_KEY}",
"Accept": "application/json"
}
try:
response = requests.get(
CEIDG_API_URL,
params={"nip": nip},
headers=headers,
timeout=30
)
if response.status_code == 204:
print(f" [WARN] Brak danych w CEIDG dla NIP {nip}")
return None
if response.status_code == 401:
print(f" [ERROR] Błąd autoryzacji - sprawdź CEIDG_API_KEY")
return None
if response.status_code != 200:
print(f" [ERROR] HTTP {response.status_code}: {response.text[:100]}")
return None
data = response.json()
if "firma" not in data or not data["firma"]:
print(f" [WARN] Brak danych firmy w odpowiedzi")
return None
firma = data["firma"][0]
# Parse owner data
owner = None
if "wlasciciel" in firma:
w = firma["wlasciciel"]
owner = CEIDGOwner(
imie=w.get("imie", ""),
nazwisko=w.get("nazwisko", ""),
nip=w.get("nip", nip),
regon=w.get("regon", "")
)
# Parse address
adres = firma.get("adresDzialalnosci", {})
adres_ulica = ""
if adres.get("ulica"):
adres_ulica = adres.get("ulica", "")
if adres.get("budynek"):
adres_ulica += f" {adres.get('budynek')}"
if adres.get("lokal"):
adres_ulica += f"/{adres.get('lokal')}"
# Parse PKD
pkd_glowny = firma.get("pkdGlowny", {})
ceidg_data = CEIDGData(
id=firma.get("id", ""),
nazwa=firma.get("nazwa", ""),
nip=nip,
regon=owner.regon if owner else "",
wlasciciel=owner,
adres_miasto=adres.get("miasto", ""),
adres_ulica=adres_ulica,
adres_kod=adres.get("kod", ""),
pkd_glowny=pkd_glowny.get("kod", ""),
pkd_opis=pkd_glowny.get("nazwa", ""),
data_rozpoczecia=firma.get("dataRozpoczecia", ""),
status=firma.get("status", ""),
pobrano=datetime.now().isoformat()
)
if owner:
print(f" [OK] {owner.imie} {owner.nazwisko} ({ceidg_data.status})")
else:
print(f" [OK] {ceidg_data.nazwa} ({ceidg_data.status})")
return ceidg_data
except requests.RequestException as e:
print(f" [ERROR] Błąd połączenia: {e}")
return None
except json.JSONDecodeError as e:
print(f" [ERROR] Błąd parsowania JSON: {e}")
return None
def import_to_database(results: List[CEIDGData]) -> dict:
"""
Importuje dane właścicieli JDG do bazy danych.
Returns:
dict z podsumowaniem importu
"""
from database import SessionLocal, Company, Person, CompanyPerson
db = SessionLocal()
stats = {"imported": 0, "updated": 0, "skipped": 0, "errors": 0}
try:
for data in results:
if not data.wlasciciel:
stats["skipped"] += 1
continue
owner = data.wlasciciel
# Find company by NIP
company = db.query(Company).filter(Company.nip == data.nip).first()
if not company:
print(f" [SKIP] Firma z NIP {data.nip} nie istnieje w bazie")
stats["skipped"] += 1
continue
# Find or create person (by name since JDG owners don't have PESEL in API)
person = db.query(Person).filter(
Person.nazwisko == owner.nazwisko,
Person.imiona == owner.imie
).first()
if not person:
person = Person(
imiona=owner.imie,
nazwisko=owner.nazwisko,
pesel=None # CEIDG API doesn't return PESEL
)
db.add(person)
db.flush()
print(f" [NEW] Utworzono osobę: {owner.imie} {owner.nazwisko}")
# Check if relationship already exists
existing = db.query(CompanyPerson).filter(
CompanyPerson.company_id == company.id,
CompanyPerson.person_id == person.id,
CompanyPerson.role_category == "wlasciciel_jdg"
).first()
if existing:
# Update source if needed
if existing.source != "dane.biznes.gov.pl":
existing.source = "dane.biznes.gov.pl"
existing.fetched_at = datetime.now()
stats["updated"] += 1
else:
stats["skipped"] += 1
else:
# Create new relationship
company_person = CompanyPerson(
company_id=company.id,
person_id=person.id,
role="WŁAŚCICIEL",
role_category="wlasciciel_jdg",
source="dane.biznes.gov.pl",
fetched_at=datetime.now()
)
db.add(company_person)
stats["imported"] += 1
print(f" [ADD] {owner.imie} {owner.nazwisko}{company.name}")
db.commit()
except Exception as e:
db.rollback()
print(f" [ERROR] Błąd importu: {e}")
stats["errors"] += 1
finally:
db.close()
return stats
def main():
parser = argparse.ArgumentParser(description="Fetch JDG owner data from CEIDG API v3")
parser.add_argument("--nip", type=str, help="Single NIP to fetch")
parser.add_argument("--all", action="store_true", help="Fetch all JDG from database")
parser.add_argument("--import", dest="do_import", action="store_true",
help="Import fetched data to database")
parser.add_argument("--output", type=str, help="Output JSON file")
args = parser.parse_args()
results = []
if args.nip:
data = fetch_ceidg_data(args.nip)
if data:
results.append(data)
print(f"\n=== {data.nazwa} ===")
if data.wlasciciel:
print(f" Właściciel: {data.wlasciciel.imie} {data.wlasciciel.nazwisko}")
print(f" Status: {data.status}")
print(f" PKD: {data.pkd_glowny} - {data.pkd_opis}")
print(f" Adres: {data.adres_ulica}, {data.adres_kod} {data.adres_miasto}")
elif args.all:
from database import SessionLocal, Company
db = SessionLocal()
try:
# Get JDG companies (no KRS)
jdg_companies = db.query(Company).filter(
(Company.krs.is_(None)) | (Company.krs == ''),
Company.nip.isnot(None),
Company.nip != ''
).all()
print(f"Znaleziono {len(jdg_companies)} firm JDG\n")
success = 0
failed = 0
for i, company in enumerate(jdg_companies):
print(f"[{i+1}/{len(jdg_companies)}] {company.name}")
data = fetch_ceidg_data(company.nip)
if data:
results.append(data)
success += 1
else:
failed += 1
time.sleep(0.5) # Rate limiting
print(f"\n=== PODSUMOWANIE ===")
print(f"Pobrano: {success}")
print(f"Błędy/brak danych: {failed}")
finally:
db.close()
else:
parser.print_help()
return
# Save to JSON cache
if results:
JSON_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
output_file = args.output or str(JSON_OUTPUT_DIR / f"ceidg_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
with open(output_file, 'w', encoding='utf-8') as f:
json.dump([r.to_dict() for r in results], f, ensure_ascii=False, indent=2)
print(f"\nDane zapisane do: {output_file}")
# Import to database if requested
if args.do_import and results:
print("\n=== IMPORT DO BAZY ===")
stats = import_to_database(results)
print(f"\nZaimportowano: {stats['imported']}")
print(f"Zaktualizowano: {stats['updated']}")
print(f"Pominięto: {stats['skipped']}")
print(f"Błędy: {stats['errors']}")
if __name__ == "__main__":
main()