From abe1cd38a1e25ea36083bf6e7b5ccfe03452a44f Mon Sep 17 00:00:00 2001 From: Maciej Pienczyn Date: Tue, 13 Jan 2026 16:07:03 +0100 Subject: [PATCH] feat: Add PKD codes and CEIDG owner data to company profiles - Add pkd_code, pkd_description columns for business activity classification - Add business_start_date column from CEIDG - Add owner_first_name, owner_last_name for JDG companies - Create import script scripts/import_ceidg_to_db.py - Add PKD card display in company profile template - Add owner section for JDG companies without KRS - Track SQL migrations in git (database/migrations/*.sql) Co-Authored-By: Claude Opus 4.5 --- .gitignore | 1 + database.py | 11 ++ database/migrations/011_company_people.sql | 83 +++++++++ database/migrations/012_pkd_integration.sql | 35 ++++ database/migrations/add_google_gbp_fields.sql | 20 ++ scripts/import_ceidg_to_db.py | 173 ++++++++++++++++++ templates/company_detail.html | 70 +++++++ 7 files changed, 393 insertions(+) create mode 100644 database/migrations/011_company_people.sql create mode 100644 database/migrations/012_pkd_integration.sql create mode 100644 database/migrations/add_google_gbp_fields.sql create mode 100644 scripts/import_ceidg_to_db.py diff --git a/.gitignore b/.gitignore index 0220d45..e8e3206 100644 --- a/.gitignore +++ b/.gitignore @@ -73,6 +73,7 @@ venv-py312/ .worktrees/ *.dump *.sql +!database/migrations/*.sql nordabiz_*.dump nordabiz_*.sql diff --git a/database.py b/database.py index ce090a1..0d96909 100644 --- a/database.py +++ b/database.py @@ -291,6 +291,17 @@ class Company(Base): branch_count = Column(Integer) employee_count_range = Column(String(50)) + # PKD (kod działalności gospodarczej) - z CEIDG + pkd_code = Column(String(10)) # np. "6201Z" + pkd_description = Column(Text) # np. "Działalność związana z oprogramowaniem" + + # Data rozpoczęcia działalności - z CEIDG + business_start_date = Column(Date) # np. 2021-02-10 + + # Właściciel JDG - z CEIDG (tylko dla jednoosobowych działalności) + owner_first_name = Column(String(100)) + owner_last_name = Column(String(100)) + # Data source tracking data_source = Column(String(100)) data_quality_score = Column(Integer) diff --git a/database/migrations/011_company_people.sql b/database/migrations/011_company_people.sql new file mode 100644 index 0000000..1b95656 --- /dev/null +++ b/database/migrations/011_company_people.sql @@ -0,0 +1,83 @@ +-- Migration: Tabele dla osób powiązanych z firmami (zarząd, wspólnicy, prokurenci) +-- Data: 2026-01-11 +-- Cel: Przechowywanie danych z odpisów KRS i budowanie mapy powiązań + +-- Tabela osób (unikalna po PESEL lub kombinacji imię+nazwisko) +CREATE TABLE IF NOT EXISTS people ( + id SERIAL PRIMARY KEY, + pesel VARCHAR(11) UNIQUE, -- może być NULL dla osób prawnych + imiona VARCHAR(255) NOT NULL, + nazwisko VARCHAR(255) NOT NULL, + + -- Metadane + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW(), + + -- Constraint dla unikalności gdy brak PESEL + CONSTRAINT people_unique_name UNIQUE NULLS NOT DISTINCT (pesel, imiona, nazwisko) +); + +-- Tabela powiązań osoba-firma +CREATE TABLE IF NOT EXISTS company_people ( + id SERIAL PRIMARY KEY, + company_id INTEGER NOT NULL REFERENCES companies(id) ON DELETE CASCADE, + person_id INTEGER NOT NULL REFERENCES people(id) ON DELETE CASCADE, + + -- Rola w firmie + role VARCHAR(50) NOT NULL, -- PREZES ZARZĄDU, CZŁONEK ZARZĄDU, WSPÓLNIK, PROKURENT + role_category VARCHAR(20) NOT NULL, -- zarzad, wspolnik, prokurent + + -- Dane dodatkowe (dla wspólników) + shares_count INTEGER, -- liczba udziałów + shares_value DECIMAL(12,2), -- wartość udziałów + shares_percent DECIMAL(5,2), -- procent udziałów + + -- Źródło danych + source VARCHAR(100) DEFAULT 'ekrs.ms.gov.pl', + source_document VARCHAR(255), -- np. "odpis_pelny_0000725183.pdf" + fetched_at TIMESTAMP, -- kiedy pobrano dane + + -- Metadane + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW(), + + -- Osoba może mieć tylko jedną rolę danej kategorii w firmie + CONSTRAINT company_people_unique UNIQUE (company_id, person_id, role_category, role) +); + +-- Indeksy dla szybkiego wyszukiwania +CREATE INDEX IF NOT EXISTS idx_people_pesel ON people(pesel); +CREATE INDEX IF NOT EXISTS idx_people_nazwisko ON people(nazwisko); +CREATE INDEX IF NOT EXISTS idx_company_people_company ON company_people(company_id); +CREATE INDEX IF NOT EXISTS idx_company_people_person ON company_people(person_id); +CREATE INDEX IF NOT EXISTS idx_company_people_role ON company_people(role_category); + +-- Widok dla łatwego wyszukiwania powiązań między firmami +CREATE OR REPLACE VIEW company_connections AS +SELECT + p.id as person_id, + p.imiona || ' ' || p.nazwisko as person_name, + p.pesel, + array_agg(DISTINCT c.name ORDER BY c.name) as companies, + array_agg(DISTINCT c.id ORDER BY c.id) as company_ids, + count(DISTINCT c.id) as company_count +FROM people p +JOIN company_people cp ON p.id = cp.person_id +JOIN companies c ON cp.company_id = c.id +GROUP BY p.id, p.imiona, p.nazwisko, p.pesel +HAVING count(DISTINCT c.id) > 1; + +-- Uprawnienia dla aplikacji +GRANT ALL ON TABLE people TO nordabiz_app; +GRANT ALL ON TABLE company_people TO nordabiz_app; +GRANT USAGE, SELECT ON SEQUENCE people_id_seq TO nordabiz_app; +GRANT USAGE, SELECT ON SEQUENCE company_people_id_seq TO nordabiz_app; +GRANT SELECT ON company_connections TO nordabiz_app; + +-- Komentarze +COMMENT ON TABLE people IS 'Osoby powiązane z firmami (zarząd, wspólnicy, prokurenci)'; +COMMENT ON TABLE company_people IS 'Relacja wiele-do-wielu między osobami a firmami'; +COMMENT ON VIEW company_connections IS 'Osoby powiązane z wieloma firmami Norda Biznes'; +COMMENT ON COLUMN people.pesel IS 'PESEL osoby fizycznej (NULL dla osób prawnych)'; +COMMENT ON COLUMN company_people.role IS 'Pełna nazwa funkcji np. PREZES ZARZĄDU'; +COMMENT ON COLUMN company_people.role_category IS 'Kategoria: zarzad, wspolnik, prokurent'; diff --git a/database/migrations/012_pkd_integration.sql b/database/migrations/012_pkd_integration.sql new file mode 100644 index 0000000..b42e312 --- /dev/null +++ b/database/migrations/012_pkd_integration.sql @@ -0,0 +1,35 @@ +-- ============================================================ +-- Migration: 012_pkd_integration.sql +-- Date: 2026-01-13 +-- Description: Add PKD codes and CEIDG owner data to companies +-- ============================================================ + +-- PKD (Polska Klasyfikacja Działalności) - main business activity code +ALTER TABLE companies ADD COLUMN IF NOT EXISTS pkd_code VARCHAR(10); +ALTER TABLE companies ADD COLUMN IF NOT EXISTS pkd_description TEXT; + +-- Business start date from CEIDG +ALTER TABLE companies ADD COLUMN IF NOT EXISTS business_start_date DATE; + +-- JDG owner info from CEIDG (only for sole proprietorships without KRS) +ALTER TABLE companies ADD COLUMN IF NOT EXISTS owner_first_name VARCHAR(100); +ALTER TABLE companies ADD COLUMN IF NOT EXISTS owner_last_name VARCHAR(100); + +-- Add comments for documentation +COMMENT ON COLUMN companies.pkd_code IS 'Primary PKD code from CEIDG (e.g., 6201Z, 4321Z)'; +COMMENT ON COLUMN companies.pkd_description IS 'Polish description of main business activity from CEIDG'; +COMMENT ON COLUMN companies.business_start_date IS 'Business start date from CEIDG (data_rozpoczecia)'; +COMMENT ON COLUMN companies.owner_first_name IS 'JDG owner first name from CEIDG (for sole proprietorships)'; +COMMENT ON COLUMN companies.owner_last_name IS 'JDG owner last name from CEIDG (for sole proprietorships)'; + +-- Create index for PKD code lookup (useful for filtering by industry) +CREATE INDEX IF NOT EXISTS idx_companies_pkd_code ON companies(pkd_code); + +-- Grant permissions +GRANT ALL ON TABLE companies TO nordabiz_app; + +-- ============================================================ +-- Verification query (run after migration): +-- SELECT name, pkd_code, pkd_description, business_start_date, owner_first_name, owner_last_name +-- FROM companies WHERE pkd_code IS NOT NULL LIMIT 5; +-- ============================================================ diff --git a/database/migrations/add_google_gbp_fields.sql b/database/migrations/add_google_gbp_fields.sql new file mode 100644 index 0000000..a81bae1 --- /dev/null +++ b/database/migrations/add_google_gbp_fields.sql @@ -0,0 +1,20 @@ +-- Migration: Add Google Business Profile fields to company_website_analysis +-- Date: 2026-01-09 +-- Purpose: Store all GBP data fetched from Google Places API for accurate auditing + +-- Add new columns for Google data +ALTER TABLE company_website_analysis +ADD COLUMN IF NOT EXISTS google_name VARCHAR(255), +ADD COLUMN IF NOT EXISTS google_address VARCHAR(500), +ADD COLUMN IF NOT EXISTS google_phone VARCHAR(50), +ADD COLUMN IF NOT EXISTS google_website VARCHAR(500), +ADD COLUMN IF NOT EXISTS google_types TEXT[], +ADD COLUMN IF NOT EXISTS google_maps_url VARCHAR(500); + +-- Add comment for documentation +COMMENT ON COLUMN company_website_analysis.google_name IS 'Business name from Google Places API'; +COMMENT ON COLUMN company_website_analysis.google_address IS 'Formatted address from Google Places API'; +COMMENT ON COLUMN company_website_analysis.google_phone IS 'Phone number from Google Places API'; +COMMENT ON COLUMN company_website_analysis.google_website IS 'Website URL from Google Places API'; +COMMENT ON COLUMN company_website_analysis.google_types IS 'Business types/categories from Google Places API'; +COMMENT ON COLUMN company_website_analysis.google_maps_url IS 'Google Maps URL for the business'; diff --git a/scripts/import_ceidg_to_db.py b/scripts/import_ceidg_to_db.py new file mode 100644 index 0000000..a82511d --- /dev/null +++ b/scripts/import_ceidg_to_db.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +""" +Import CEIDG data (PKD, owner, start date) to database. + +Reads cached CEIDG JSON files and updates company records with: +- PKD code and description +- Business start date +- Owner name (for JDG without KRS) + +Usage: + python scripts/import_ceidg_to_db.py # Dry run + python scripts/import_ceidg_to_db.py --apply # Apply changes + python scripts/import_ceidg_to_db.py --nip 5881571773 # Single company +""" + +import os +import sys +import json +import argparse +from pathlib import Path +from datetime import datetime + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +# Load environment +from dotenv import load_dotenv +load_dotenv(Path(__file__).parent.parent / '.env') + +from database import SessionLocal, Company + + +def find_latest_ceidg_json() -> Path: + """Find the most recent CEIDG JSON file.""" + json_dir = Path(__file__).parent.parent / "data" / "ceidg_json" + if not json_dir.exists(): + return None + + json_files = list(json_dir.glob("ceidg_data_*.json")) + if not json_files: + return None + + # Sort by modification time, newest first + return sorted(json_files, key=lambda f: f.stat().st_mtime, reverse=True)[0] + + +def load_ceidg_data(json_path: Path) -> list: + """Load CEIDG data from JSON file.""" + with open(json_path, 'r', encoding='utf-8') as f: + return json.load(f) + + +def import_ceidg_to_db(apply: bool = False, target_nip: str = None): + """ + Import CEIDG data to database. + + Args: + apply: If True, actually apply changes. If False, dry run. + target_nip: If set, only process this NIP. + """ + json_path = find_latest_ceidg_json() + if not json_path: + print("ERROR: No CEIDG JSON files found in data/ceidg_json/") + return + + print(f"Loading CEIDG data from: {json_path.name}") + ceidg_records = load_ceidg_data(json_path) + print(f"Found {len(ceidg_records)} CEIDG records") + + db = SessionLocal() + updated = 0 + skipped = 0 + not_found = 0 + + try: + for record in ceidg_records: + nip = record.get('nip') + if not nip: + continue + + # Filter by target NIP if specified + if target_nip and nip != target_nip: + continue + + # Find company by NIP + company = db.query(Company).filter(Company.nip == nip).first() + if not company: + not_found += 1 + if target_nip: + print(f" NOT FOUND: NIP {nip}") + continue + + # Extract CEIDG data + pkd_code = record.get('pkd_glowny') + pkd_description = record.get('pkd_opis') + start_date_str = record.get('data_rozpoczecia') + owner = record.get('wlasciciel', {}) + owner_first = owner.get('imie', '').title() if owner else None + owner_last = owner.get('nazwisko', '').title() if owner else None + + # Parse start date + business_start_date = None + if start_date_str: + try: + business_start_date = datetime.strptime(start_date_str, '%Y-%m-%d').date() + except ValueError: + pass + + # Check if there are any changes + changes = [] + if pkd_code and company.pkd_code != pkd_code: + changes.append(f"PKD: {company.pkd_code} → {pkd_code}") + if pkd_description and company.pkd_description != pkd_description: + changes.append(f"PKD opis: {'set' if not company.pkd_description else 'update'}") + if business_start_date and company.business_start_date != business_start_date: + changes.append(f"Data rozpoczęcia: {company.business_start_date} → {business_start_date}") + if owner_first and company.owner_first_name != owner_first: + changes.append(f"Właściciel: {owner_first} {owner_last}") + + if not changes: + skipped += 1 + continue + + # Print changes + print(f"\n{company.name} (NIP: {nip}):") + for change in changes: + print(f" • {change}") + + if apply: + # Apply updates + if pkd_code: + company.pkd_code = pkd_code + if pkd_description: + company.pkd_description = pkd_description + if business_start_date: + company.business_start_date = business_start_date + if owner_first: + company.owner_first_name = owner_first + if owner_last: + company.owner_last_name = owner_last + + updated += 1 + + if apply: + db.commit() + print(f"\n✅ Zaktualizowano {updated} firm") + else: + print(f"\n🔍 DRY RUN: {updated} firm do aktualizacji") + print(" Użyj --apply aby zapisać zmiany") + + print(f" Pominięto (bez zmian): {skipped}") + print(f" Nie znaleziono w bazie: {not_found}") + + finally: + db.close() + + +def main(): + parser = argparse.ArgumentParser(description='Import CEIDG data to database') + parser.add_argument('--apply', action='store_true', help='Apply changes (default: dry run)') + parser.add_argument('--nip', type=str, help='Process only this NIP') + + args = parser.parse_args() + + print("=" * 60) + print("CEIDG → Database Import") + print("=" * 60) + + import_ceidg_to_db(apply=args.apply, target_nip=args.nip) + + +if __name__ == '__main__': + main() diff --git a/templates/company_detail.html b/templates/company_detail.html index a2afe00..6a7fcdc 100755 --- a/templates/company_detail.html +++ b/templates/company_detail.html @@ -1224,6 +1224,49 @@ {% endif %} + + {% if company.business_start_date %} +
+
+
+ + + +
+
+
Data rozpoczęcia
+
{{ company.business_start_date.strftime('%d.%m.%Y') }}
+
+
+
+ Źródło: CEIDG +
+
+ {% endif %} + + + {% if company.pkd_code %} +
+
+
+ + + +
+
+
PKD - Główna działalność
+
{{ company.pkd_code }}
+ {% if company.pkd_description %} +
{{ company.pkd_description }}
+ {% endif %} +
+
+
+ Źródło: CEIDG +
+
+ {% endif %} + {% if company.employees_count or company.employee_count_range %}
@@ -1269,6 +1312,33 @@
+ +{% if company.owner_first_name and not company.krs and not people %} +
+

Właściciel

+ +
+
+ + + +
+
+
+ {{ company.owner_first_name }} {{ company.owner_last_name }} +
+
+ Właściciel jednoosobowej działalności gospodarczej +
+
+
+ +
+ Źródło: CEIDG • Dane publiczne +
+
+{% endif %} + {% if people %}