- Add pkd_code, pkd_description columns for business activity classification - Add business_start_date column from CEIDG - Add owner_first_name, owner_last_name for JDG companies - Create import script scripts/import_ceidg_to_db.py - Add PKD card display in company profile template - Add owner section for JDG companies without KRS - Track SQL migrations in git (database/migrations/*.sql) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
174 lines
5.5 KiB
Python
174 lines
5.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Import CEIDG data (PKD, owner, start date) to database.
|
|
|
|
Reads cached CEIDG JSON files and updates company records with:
|
|
- PKD code and description
|
|
- Business start date
|
|
- Owner name (for JDG without KRS)
|
|
|
|
Usage:
|
|
python scripts/import_ceidg_to_db.py # Dry run
|
|
python scripts/import_ceidg_to_db.py --apply # Apply changes
|
|
python scripts/import_ceidg_to_db.py --nip 5881571773 # Single company
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import argparse
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
# Add parent directory to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
# Load environment
|
|
from dotenv import load_dotenv
|
|
load_dotenv(Path(__file__).parent.parent / '.env')
|
|
|
|
from database import SessionLocal, Company
|
|
|
|
|
|
def find_latest_ceidg_json() -> Path:
|
|
"""Find the most recent CEIDG JSON file."""
|
|
json_dir = Path(__file__).parent.parent / "data" / "ceidg_json"
|
|
if not json_dir.exists():
|
|
return None
|
|
|
|
json_files = list(json_dir.glob("ceidg_data_*.json"))
|
|
if not json_files:
|
|
return None
|
|
|
|
# Sort by modification time, newest first
|
|
return sorted(json_files, key=lambda f: f.stat().st_mtime, reverse=True)[0]
|
|
|
|
|
|
def load_ceidg_data(json_path: Path) -> list:
|
|
"""Load CEIDG data from JSON file."""
|
|
with open(json_path, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
|
|
|
|
def import_ceidg_to_db(apply: bool = False, target_nip: str = None):
|
|
"""
|
|
Import CEIDG data to database.
|
|
|
|
Args:
|
|
apply: If True, actually apply changes. If False, dry run.
|
|
target_nip: If set, only process this NIP.
|
|
"""
|
|
json_path = find_latest_ceidg_json()
|
|
if not json_path:
|
|
print("ERROR: No CEIDG JSON files found in data/ceidg_json/")
|
|
return
|
|
|
|
print(f"Loading CEIDG data from: {json_path.name}")
|
|
ceidg_records = load_ceidg_data(json_path)
|
|
print(f"Found {len(ceidg_records)} CEIDG records")
|
|
|
|
db = SessionLocal()
|
|
updated = 0
|
|
skipped = 0
|
|
not_found = 0
|
|
|
|
try:
|
|
for record in ceidg_records:
|
|
nip = record.get('nip')
|
|
if not nip:
|
|
continue
|
|
|
|
# Filter by target NIP if specified
|
|
if target_nip and nip != target_nip:
|
|
continue
|
|
|
|
# Find company by NIP
|
|
company = db.query(Company).filter(Company.nip == nip).first()
|
|
if not company:
|
|
not_found += 1
|
|
if target_nip:
|
|
print(f" NOT FOUND: NIP {nip}")
|
|
continue
|
|
|
|
# Extract CEIDG data
|
|
pkd_code = record.get('pkd_glowny')
|
|
pkd_description = record.get('pkd_opis')
|
|
start_date_str = record.get('data_rozpoczecia')
|
|
owner = record.get('wlasciciel', {})
|
|
owner_first = owner.get('imie', '').title() if owner else None
|
|
owner_last = owner.get('nazwisko', '').title() if owner else None
|
|
|
|
# Parse start date
|
|
business_start_date = None
|
|
if start_date_str:
|
|
try:
|
|
business_start_date = datetime.strptime(start_date_str, '%Y-%m-%d').date()
|
|
except ValueError:
|
|
pass
|
|
|
|
# Check if there are any changes
|
|
changes = []
|
|
if pkd_code and company.pkd_code != pkd_code:
|
|
changes.append(f"PKD: {company.pkd_code} → {pkd_code}")
|
|
if pkd_description and company.pkd_description != pkd_description:
|
|
changes.append(f"PKD opis: {'set' if not company.pkd_description else 'update'}")
|
|
if business_start_date and company.business_start_date != business_start_date:
|
|
changes.append(f"Data rozpoczęcia: {company.business_start_date} → {business_start_date}")
|
|
if owner_first and company.owner_first_name != owner_first:
|
|
changes.append(f"Właściciel: {owner_first} {owner_last}")
|
|
|
|
if not changes:
|
|
skipped += 1
|
|
continue
|
|
|
|
# Print changes
|
|
print(f"\n{company.name} (NIP: {nip}):")
|
|
for change in changes:
|
|
print(f" • {change}")
|
|
|
|
if apply:
|
|
# Apply updates
|
|
if pkd_code:
|
|
company.pkd_code = pkd_code
|
|
if pkd_description:
|
|
company.pkd_description = pkd_description
|
|
if business_start_date:
|
|
company.business_start_date = business_start_date
|
|
if owner_first:
|
|
company.owner_first_name = owner_first
|
|
if owner_last:
|
|
company.owner_last_name = owner_last
|
|
|
|
updated += 1
|
|
|
|
if apply:
|
|
db.commit()
|
|
print(f"\n✅ Zaktualizowano {updated} firm")
|
|
else:
|
|
print(f"\n🔍 DRY RUN: {updated} firm do aktualizacji")
|
|
print(" Użyj --apply aby zapisać zmiany")
|
|
|
|
print(f" Pominięto (bez zmian): {skipped}")
|
|
print(f" Nie znaleziono w bazie: {not_found}")
|
|
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Import CEIDG data to database')
|
|
parser.add_argument('--apply', action='store_true', help='Apply changes (default: dry run)')
|
|
parser.add_argument('--nip', type=str, help='Process only this NIP')
|
|
|
|
args = parser.parse_args()
|
|
|
|
print("=" * 60)
|
|
print("CEIDG → Database Import")
|
|
print("=" * 60)
|
|
|
|
import_ceidg_to_db(apply=args.apply, target_nip=args.nip)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|