- Add company logo display in search results cards - Make logo clickable (links to company profile) - Temporarily hide "Aktualności i wydarzenia" section on company profiles - Add scripts for KRS PDF download/parsing and CEIDG API Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
293 lines
8.6 KiB
Python
293 lines
8.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Import danych osób z odpisu KRS do bazy danych.
|
|
|
|
Używa parse_krs_pdf.py do wyciągania danych z PDF i importuje je do tabel:
|
|
- people: osoby (zarząd, wspólnicy, prokurenci)
|
|
- company_people: relacje osoba-firma
|
|
|
|
Usage:
|
|
python scripts/import_krs_people.py --file /path/to/odpis.pdf --company-id 26
|
|
python scripts/import_krs_people.py --dir /path/to/pdfs/
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
# Load environment variables
|
|
from dotenv import load_dotenv
|
|
load_dotenv(Path(__file__).parent.parent / '.env')
|
|
|
|
from database import SessionLocal, Company, Person, CompanyPerson
|
|
from parse_krs_pdf import parse_krs_pdf, KRSData
|
|
|
|
|
|
def get_or_create_person(db, nazwisko: str, imiona: str, pesel: str = None) -> Person:
|
|
"""
|
|
Znajdź istniejącą osobę lub utwórz nową.
|
|
Jeśli PESEL podany, szuka po PESEL (unikalne).
|
|
W przeciwnym razie szuka po nazwisku i imionach.
|
|
"""
|
|
if pesel:
|
|
person = db.query(Person).filter(Person.pesel == pesel).first()
|
|
if person:
|
|
return person
|
|
|
|
# Szukaj po nazwisku i imionach (jeśli brak PESEL lub nie znaleziono)
|
|
person = db.query(Person).filter(
|
|
Person.nazwisko == nazwisko,
|
|
Person.imiona == imiona
|
|
).first()
|
|
|
|
if person:
|
|
# Jeśli znaleziono osobę bez PESEL, a teraz mamy PESEL - aktualizuj
|
|
if pesel and not person.pesel:
|
|
person.pesel = pesel
|
|
db.flush()
|
|
return person
|
|
|
|
# Utwórz nową osobę
|
|
person = Person(
|
|
nazwisko=nazwisko,
|
|
imiona=imiona,
|
|
pesel=pesel
|
|
)
|
|
db.add(person)
|
|
db.flush() # Aby uzyskać ID
|
|
return person
|
|
|
|
|
|
def find_company_by_krs(db, krs: str) -> Company:
|
|
"""Znajdź firmę po numerze KRS."""
|
|
return db.query(Company).filter(Company.krs == krs).first()
|
|
|
|
|
|
def find_company_by_nip(db, nip: str) -> Company:
|
|
"""Znajdź firmę po numerze NIP."""
|
|
return db.query(Company).filter(Company.nip == nip).first()
|
|
|
|
|
|
def import_krs_data(db, krs_data: KRSData, company: Company, pdf_filename: str) -> dict:
|
|
"""
|
|
Importuje dane z odpisu KRS do bazy danych.
|
|
|
|
Returns:
|
|
dict z podsumowaniem importu
|
|
"""
|
|
stats = {
|
|
'zarzad_added': 0,
|
|
'wspolnicy_added': 0,
|
|
'prokurenci_added': 0,
|
|
'people_created': 0,
|
|
'people_updated': 0,
|
|
'skipped': 0
|
|
}
|
|
|
|
now = datetime.now()
|
|
|
|
# Import zarządu
|
|
for p in krs_data.zarzad:
|
|
person = get_or_create_person(db, p.nazwisko, p.imiona, p.pesel)
|
|
|
|
# Sprawdź czy relacja już istnieje
|
|
existing = db.query(CompanyPerson).filter(
|
|
CompanyPerson.company_id == company.id,
|
|
CompanyPerson.person_id == person.id,
|
|
CompanyPerson.role_category == 'zarzad',
|
|
CompanyPerson.role == p.rola
|
|
).first()
|
|
|
|
if not existing:
|
|
cp = CompanyPerson(
|
|
company_id=company.id,
|
|
person_id=person.id,
|
|
role=p.rola or 'CZŁONEK ZARZĄDU',
|
|
role_category='zarzad',
|
|
source='ekrs.ms.gov.pl',
|
|
source_document=pdf_filename,
|
|
fetched_at=now
|
|
)
|
|
db.add(cp)
|
|
stats['zarzad_added'] += 1
|
|
else:
|
|
stats['skipped'] += 1
|
|
|
|
# Import wspólników
|
|
for p in krs_data.wspolnicy:
|
|
person = get_or_create_person(db, p.nazwisko, p.imiona, p.pesel)
|
|
|
|
existing = db.query(CompanyPerson).filter(
|
|
CompanyPerson.company_id == company.id,
|
|
CompanyPerson.person_id == person.id,
|
|
CompanyPerson.role_category == 'wspolnik'
|
|
).first()
|
|
|
|
if not existing:
|
|
cp = CompanyPerson(
|
|
company_id=company.id,
|
|
person_id=person.id,
|
|
role='WSPÓLNIK',
|
|
role_category='wspolnik',
|
|
source='ekrs.ms.gov.pl',
|
|
source_document=pdf_filename,
|
|
fetched_at=now
|
|
)
|
|
db.add(cp)
|
|
stats['wspolnicy_added'] += 1
|
|
else:
|
|
stats['skipped'] += 1
|
|
|
|
# Import prokurentów
|
|
for p in krs_data.prokurenci:
|
|
person = get_or_create_person(db, p.nazwisko, p.imiona, p.pesel)
|
|
|
|
existing = db.query(CompanyPerson).filter(
|
|
CompanyPerson.company_id == company.id,
|
|
CompanyPerson.person_id == person.id,
|
|
CompanyPerson.role_category == 'prokurent'
|
|
).first()
|
|
|
|
if not existing:
|
|
cp = CompanyPerson(
|
|
company_id=company.id,
|
|
person_id=person.id,
|
|
role='PROKURENT',
|
|
role_category='prokurent',
|
|
source='ekrs.ms.gov.pl',
|
|
source_document=pdf_filename,
|
|
fetched_at=now
|
|
)
|
|
db.add(cp)
|
|
stats['prokurenci_added'] += 1
|
|
else:
|
|
stats['skipped'] += 1
|
|
|
|
return stats
|
|
|
|
|
|
def import_from_file(pdf_path: str, company_id: int = None, dry_run: bool = False):
|
|
"""
|
|
Importuje dane z pojedynczego pliku PDF.
|
|
"""
|
|
print(f"\n{'='*60}")
|
|
print(f"Przetwarzanie: {pdf_path}")
|
|
print('='*60)
|
|
|
|
# Parsuj PDF
|
|
try:
|
|
krs_data = parse_krs_pdf(pdf_path)
|
|
except Exception as e:
|
|
print(f" [ERROR] Błąd parsowania: {e}")
|
|
return None
|
|
|
|
print(f" Nazwa: {krs_data.nazwa}")
|
|
print(f" KRS: {krs_data.krs}")
|
|
print(f" NIP: {krs_data.nip}")
|
|
print(f" Zarząd: {len(krs_data.zarzad)} osób")
|
|
print(f" Wspólnicy: {len(krs_data.wspolnicy)} osób")
|
|
print(f" Prokurenci: {len(krs_data.prokurenci)} osób")
|
|
|
|
if dry_run:
|
|
print(" [DRY-RUN] Pomijam zapis do bazy")
|
|
return krs_data
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
# Znajdź firmę w bazie
|
|
company = None
|
|
|
|
if company_id:
|
|
company = db.query(Company).filter(Company.id == company_id).first()
|
|
if not company:
|
|
print(f" [ERROR] Firma o ID {company_id} nie istnieje")
|
|
return None
|
|
elif krs_data.krs:
|
|
company = find_company_by_krs(db, krs_data.krs)
|
|
|
|
if not company and krs_data.nip:
|
|
company = find_company_by_nip(db, krs_data.nip)
|
|
|
|
if not company:
|
|
print(f" [ERROR] Nie znaleziono firmy w bazie (KRS: {krs_data.krs}, NIP: {krs_data.nip})")
|
|
return None
|
|
|
|
print(f" Firma w bazie: {company.name} (ID: {company.id})")
|
|
|
|
# Import danych
|
|
pdf_filename = Path(pdf_path).name
|
|
stats = import_krs_data(db, krs_data, company, pdf_filename)
|
|
|
|
db.commit()
|
|
|
|
print(f"\n [OK] Import zakończony:")
|
|
print(f" Zarząd: +{stats['zarzad_added']}")
|
|
print(f" Wspólnicy: +{stats['wspolnicy_added']}")
|
|
print(f" Prokurenci: +{stats['prokurenci_added']}")
|
|
print(f" Pominięto (duplikaty): {stats['skipped']}")
|
|
|
|
return krs_data
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
print(f" [ERROR] Błąd importu: {e}")
|
|
raise
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
def import_from_directory(dir_path: str, dry_run: bool = False):
|
|
"""
|
|
Importuje dane ze wszystkich PDF w katalogu.
|
|
"""
|
|
pdf_dir = Path(dir_path)
|
|
pdf_files = sorted(pdf_dir.glob("odpis_*.pdf"))
|
|
|
|
print(f"Znaleziono {len(pdf_files)} plików PDF")
|
|
|
|
success = 0
|
|
errors = 0
|
|
|
|
for pdf_file in pdf_files:
|
|
try:
|
|
result = import_from_file(str(pdf_file), dry_run=dry_run)
|
|
if result:
|
|
success += 1
|
|
else:
|
|
errors += 1
|
|
except Exception as e:
|
|
print(f" [ERROR] {e}")
|
|
errors += 1
|
|
|
|
print(f"\n{'='*60}")
|
|
print("PODSUMOWANIE")
|
|
print('='*60)
|
|
print(f" Sukces: {success}")
|
|
print(f" Błędy: {errors}")
|
|
print(f" Łącznie: {len(pdf_files)}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Import KRS people data to database")
|
|
parser.add_argument("--file", type=str, help="Single PDF file to import")
|
|
parser.add_argument("--dir", type=str, help="Directory with PDF files")
|
|
parser.add_argument("--company-id", type=int, help="Force company ID (for --file only)")
|
|
parser.add_argument("--dry-run", action="store_true", help="Parse only, don't save to database")
|
|
args = parser.parse_args()
|
|
|
|
if args.file:
|
|
import_from_file(args.file, company_id=args.company_id, dry_run=args.dry_run)
|
|
elif args.dir:
|
|
import_from_directory(args.dir, dry_run=args.dry_run)
|
|
else:
|
|
parser.print_help()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|