feat: Add PKD codes and CEIDG owner data to company profiles

- Add pkd_code, pkd_description columns for business activity classification
- Add business_start_date column from CEIDG
- Add owner_first_name, owner_last_name for JDG companies
- Create import script scripts/import_ceidg_to_db.py
- Add PKD card display in company profile template
- Add owner section for JDG companies without KRS
- Track SQL migrations in git (database/migrations/*.sql)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-01-13 16:07:03 +01:00
parent 93695209d0
commit abe1cd38a1
7 changed files with 393 additions and 0 deletions

1
.gitignore vendored
View File

@ -73,6 +73,7 @@ venv-py312/
.worktrees/
*.dump
*.sql
!database/migrations/*.sql
nordabiz_*.dump
nordabiz_*.sql

View File

@ -291,6 +291,17 @@ class Company(Base):
branch_count = Column(Integer)
employee_count_range = Column(String(50))
# PKD (kod działalności gospodarczej) - z CEIDG
pkd_code = Column(String(10)) # np. "6201Z"
pkd_description = Column(Text) # np. "Działalność związana z oprogramowaniem"
# Data rozpoczęcia działalności - z CEIDG
business_start_date = Column(Date) # np. 2021-02-10
# Właściciel JDG - z CEIDG (tylko dla jednoosobowych działalności)
owner_first_name = Column(String(100))
owner_last_name = Column(String(100))
# Data source tracking
data_source = Column(String(100))
data_quality_score = Column(Integer)

View File

@ -0,0 +1,83 @@
-- Migration: Tabele dla osób powiązanych z firmami (zarząd, wspólnicy, prokurenci)
-- Data: 2026-01-11
-- Cel: Przechowywanie danych z odpisów KRS i budowanie mapy powiązań
-- Tabela osób (unikalna po PESEL lub kombinacji imię+nazwisko)
CREATE TABLE IF NOT EXISTS people (
id SERIAL PRIMARY KEY,
pesel VARCHAR(11) UNIQUE, -- może być NULL dla osób prawnych
imiona VARCHAR(255) NOT NULL,
nazwisko VARCHAR(255) NOT NULL,
-- Metadane
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW(),
-- Constraint dla unikalności gdy brak PESEL
CONSTRAINT people_unique_name UNIQUE NULLS NOT DISTINCT (pesel, imiona, nazwisko)
);
-- Tabela powiązań osoba-firma
CREATE TABLE IF NOT EXISTS company_people (
id SERIAL PRIMARY KEY,
company_id INTEGER NOT NULL REFERENCES companies(id) ON DELETE CASCADE,
person_id INTEGER NOT NULL REFERENCES people(id) ON DELETE CASCADE,
-- Rola w firmie
role VARCHAR(50) NOT NULL, -- PREZES ZARZĄDU, CZŁONEK ZARZĄDU, WSPÓLNIK, PROKURENT
role_category VARCHAR(20) NOT NULL, -- zarzad, wspolnik, prokurent
-- Dane dodatkowe (dla wspólników)
shares_count INTEGER, -- liczba udziałów
shares_value DECIMAL(12,2), -- wartość udziałów
shares_percent DECIMAL(5,2), -- procent udziałów
-- Źródło danych
source VARCHAR(100) DEFAULT 'ekrs.ms.gov.pl',
source_document VARCHAR(255), -- np. "odpis_pelny_0000725183.pdf"
fetched_at TIMESTAMP, -- kiedy pobrano dane
-- Metadane
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW(),
-- Osoba może mieć tylko jedną rolę danej kategorii w firmie
CONSTRAINT company_people_unique UNIQUE (company_id, person_id, role_category, role)
);
-- Indeksy dla szybkiego wyszukiwania
CREATE INDEX IF NOT EXISTS idx_people_pesel ON people(pesel);
CREATE INDEX IF NOT EXISTS idx_people_nazwisko ON people(nazwisko);
CREATE INDEX IF NOT EXISTS idx_company_people_company ON company_people(company_id);
CREATE INDEX IF NOT EXISTS idx_company_people_person ON company_people(person_id);
CREATE INDEX IF NOT EXISTS idx_company_people_role ON company_people(role_category);
-- Widok dla łatwego wyszukiwania powiązań między firmami
CREATE OR REPLACE VIEW company_connections AS
SELECT
p.id as person_id,
p.imiona || ' ' || p.nazwisko as person_name,
p.pesel,
array_agg(DISTINCT c.name ORDER BY c.name) as companies,
array_agg(DISTINCT c.id ORDER BY c.id) as company_ids,
count(DISTINCT c.id) as company_count
FROM people p
JOIN company_people cp ON p.id = cp.person_id
JOIN companies c ON cp.company_id = c.id
GROUP BY p.id, p.imiona, p.nazwisko, p.pesel
HAVING count(DISTINCT c.id) > 1;
-- Uprawnienia dla aplikacji
GRANT ALL ON TABLE people TO nordabiz_app;
GRANT ALL ON TABLE company_people TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE people_id_seq TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE company_people_id_seq TO nordabiz_app;
GRANT SELECT ON company_connections TO nordabiz_app;
-- Komentarze
COMMENT ON TABLE people IS 'Osoby powiązane z firmami (zarząd, wspólnicy, prokurenci)';
COMMENT ON TABLE company_people IS 'Relacja wiele-do-wielu między osobami a firmami';
COMMENT ON VIEW company_connections IS 'Osoby powiązane z wieloma firmami Norda Biznes';
COMMENT ON COLUMN people.pesel IS 'PESEL osoby fizycznej (NULL dla osób prawnych)';
COMMENT ON COLUMN company_people.role IS 'Pełna nazwa funkcji np. PREZES ZARZĄDU';
COMMENT ON COLUMN company_people.role_category IS 'Kategoria: zarzad, wspolnik, prokurent';

View File

@ -0,0 +1,35 @@
-- ============================================================
-- Migration: 012_pkd_integration.sql
-- Date: 2026-01-13
-- Description: Add PKD codes and CEIDG owner data to companies
-- ============================================================
-- PKD (Polska Klasyfikacja Działalności) - main business activity code
ALTER TABLE companies ADD COLUMN IF NOT EXISTS pkd_code VARCHAR(10);
ALTER TABLE companies ADD COLUMN IF NOT EXISTS pkd_description TEXT;
-- Business start date from CEIDG
ALTER TABLE companies ADD COLUMN IF NOT EXISTS business_start_date DATE;
-- JDG owner info from CEIDG (only for sole proprietorships without KRS)
ALTER TABLE companies ADD COLUMN IF NOT EXISTS owner_first_name VARCHAR(100);
ALTER TABLE companies ADD COLUMN IF NOT EXISTS owner_last_name VARCHAR(100);
-- Add comments for documentation
COMMENT ON COLUMN companies.pkd_code IS 'Primary PKD code from CEIDG (e.g., 6201Z, 4321Z)';
COMMENT ON COLUMN companies.pkd_description IS 'Polish description of main business activity from CEIDG';
COMMENT ON COLUMN companies.business_start_date IS 'Business start date from CEIDG (data_rozpoczecia)';
COMMENT ON COLUMN companies.owner_first_name IS 'JDG owner first name from CEIDG (for sole proprietorships)';
COMMENT ON COLUMN companies.owner_last_name IS 'JDG owner last name from CEIDG (for sole proprietorships)';
-- Create index for PKD code lookup (useful for filtering by industry)
CREATE INDEX IF NOT EXISTS idx_companies_pkd_code ON companies(pkd_code);
-- Grant permissions
GRANT ALL ON TABLE companies TO nordabiz_app;
-- ============================================================
-- Verification query (run after migration):
-- SELECT name, pkd_code, pkd_description, business_start_date, owner_first_name, owner_last_name
-- FROM companies WHERE pkd_code IS NOT NULL LIMIT 5;
-- ============================================================

View File

@ -0,0 +1,20 @@
-- Migration: Add Google Business Profile fields to company_website_analysis
-- Date: 2026-01-09
-- Purpose: Store all GBP data fetched from Google Places API for accurate auditing
-- Add new columns for Google data
ALTER TABLE company_website_analysis
ADD COLUMN IF NOT EXISTS google_name VARCHAR(255),
ADD COLUMN IF NOT EXISTS google_address VARCHAR(500),
ADD COLUMN IF NOT EXISTS google_phone VARCHAR(50),
ADD COLUMN IF NOT EXISTS google_website VARCHAR(500),
ADD COLUMN IF NOT EXISTS google_types TEXT[],
ADD COLUMN IF NOT EXISTS google_maps_url VARCHAR(500);
-- Add comment for documentation
COMMENT ON COLUMN company_website_analysis.google_name IS 'Business name from Google Places API';
COMMENT ON COLUMN company_website_analysis.google_address IS 'Formatted address from Google Places API';
COMMENT ON COLUMN company_website_analysis.google_phone IS 'Phone number from Google Places API';
COMMENT ON COLUMN company_website_analysis.google_website IS 'Website URL from Google Places API';
COMMENT ON COLUMN company_website_analysis.google_types IS 'Business types/categories from Google Places API';
COMMENT ON COLUMN company_website_analysis.google_maps_url IS 'Google Maps URL for the business';

View File

@ -0,0 +1,173 @@
#!/usr/bin/env python3
"""
Import CEIDG data (PKD, owner, start date) to database.
Reads cached CEIDG JSON files and updates company records with:
- PKD code and description
- Business start date
- Owner name (for JDG without KRS)
Usage:
python scripts/import_ceidg_to_db.py # Dry run
python scripts/import_ceidg_to_db.py --apply # Apply changes
python scripts/import_ceidg_to_db.py --nip 5881571773 # Single company
"""
import os
import sys
import json
import argparse
from pathlib import Path
from datetime import datetime
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
# Load environment
from dotenv import load_dotenv
load_dotenv(Path(__file__).parent.parent / '.env')
from database import SessionLocal, Company
def find_latest_ceidg_json() -> Path:
"""Find the most recent CEIDG JSON file."""
json_dir = Path(__file__).parent.parent / "data" / "ceidg_json"
if not json_dir.exists():
return None
json_files = list(json_dir.glob("ceidg_data_*.json"))
if not json_files:
return None
# Sort by modification time, newest first
return sorted(json_files, key=lambda f: f.stat().st_mtime, reverse=True)[0]
def load_ceidg_data(json_path: Path) -> list:
"""Load CEIDG data from JSON file."""
with open(json_path, 'r', encoding='utf-8') as f:
return json.load(f)
def import_ceidg_to_db(apply: bool = False, target_nip: str = None):
"""
Import CEIDG data to database.
Args:
apply: If True, actually apply changes. If False, dry run.
target_nip: If set, only process this NIP.
"""
json_path = find_latest_ceidg_json()
if not json_path:
print("ERROR: No CEIDG JSON files found in data/ceidg_json/")
return
print(f"Loading CEIDG data from: {json_path.name}")
ceidg_records = load_ceidg_data(json_path)
print(f"Found {len(ceidg_records)} CEIDG records")
db = SessionLocal()
updated = 0
skipped = 0
not_found = 0
try:
for record in ceidg_records:
nip = record.get('nip')
if not nip:
continue
# Filter by target NIP if specified
if target_nip and nip != target_nip:
continue
# Find company by NIP
company = db.query(Company).filter(Company.nip == nip).first()
if not company:
not_found += 1
if target_nip:
print(f" NOT FOUND: NIP {nip}")
continue
# Extract CEIDG data
pkd_code = record.get('pkd_glowny')
pkd_description = record.get('pkd_opis')
start_date_str = record.get('data_rozpoczecia')
owner = record.get('wlasciciel', {})
owner_first = owner.get('imie', '').title() if owner else None
owner_last = owner.get('nazwisko', '').title() if owner else None
# Parse start date
business_start_date = None
if start_date_str:
try:
business_start_date = datetime.strptime(start_date_str, '%Y-%m-%d').date()
except ValueError:
pass
# Check if there are any changes
changes = []
if pkd_code and company.pkd_code != pkd_code:
changes.append(f"PKD: {company.pkd_code}{pkd_code}")
if pkd_description and company.pkd_description != pkd_description:
changes.append(f"PKD opis: {'set' if not company.pkd_description else 'update'}")
if business_start_date and company.business_start_date != business_start_date:
changes.append(f"Data rozpoczęcia: {company.business_start_date}{business_start_date}")
if owner_first and company.owner_first_name != owner_first:
changes.append(f"Właściciel: {owner_first} {owner_last}")
if not changes:
skipped += 1
continue
# Print changes
print(f"\n{company.name} (NIP: {nip}):")
for change in changes:
print(f"{change}")
if apply:
# Apply updates
if pkd_code:
company.pkd_code = pkd_code
if pkd_description:
company.pkd_description = pkd_description
if business_start_date:
company.business_start_date = business_start_date
if owner_first:
company.owner_first_name = owner_first
if owner_last:
company.owner_last_name = owner_last
updated += 1
if apply:
db.commit()
print(f"\n✅ Zaktualizowano {updated} firm")
else:
print(f"\n🔍 DRY RUN: {updated} firm do aktualizacji")
print(" Użyj --apply aby zapisać zmiany")
print(f" Pominięto (bez zmian): {skipped}")
print(f" Nie znaleziono w bazie: {not_found}")
finally:
db.close()
def main():
parser = argparse.ArgumentParser(description='Import CEIDG data to database')
parser.add_argument('--apply', action='store_true', help='Apply changes (default: dry run)')
parser.add_argument('--nip', type=str, help='Process only this NIP')
args = parser.parse_args()
print("=" * 60)
print("CEIDG → Database Import")
print("=" * 60)
import_ceidg_to_db(apply=args.apply, target_nip=args.nip)
if __name__ == '__main__':
main()

View File

@ -1224,6 +1224,49 @@
</div>
{% endif %}
<!-- Business Start Date Card (from CEIDG) -->
{% if company.business_start_date %}
<div style="background: var(--background); border-radius: var(--radius-lg); padding: var(--spacing-lg); border: 2px solid #059669;">
<div style="display: flex; align-items: center; gap: var(--spacing-md);">
<div style="width: 48px; height: 48px; border-radius: 12px; display: flex; align-items: center; justify-content: center; background: #059669; color: white;">
<svg width="24" height="24" fill="currentColor" viewBox="0 0 24 24">
<path d="M11.99 2C6.47 2 2 6.48 2 12s4.47 10 9.99 10C17.52 22 22 17.52 22 12S17.52 2 11.99 2zM12 20c-4.42 0-8-3.58-8-8s3.58-8 8-8 8 3.58 8 8-3.58 8-8 8zm.5-13H11v6l5.25 3.15.75-1.23-4.5-2.67z"/>
</svg>
</div>
<div>
<div style="font-size: var(--font-size-sm); color: var(--text-secondary); text-transform: uppercase; letter-spacing: 0.05em;">Data rozpoczęcia</div>
<div style="font-size: var(--font-size-xl); font-weight: 700; color: #059669;">{{ company.business_start_date.strftime('%d.%m.%Y') }}</div>
</div>
</div>
<div style="font-size: var(--font-size-sm); color: var(--text-secondary); padding-left: 60px; margin-top: var(--spacing-sm);">
Źródło: <a href="https://dane.biznes.gov.pl" target="_blank" style="color: #059669; font-weight: bold;">CEIDG</a>
</div>
</div>
{% endif %}
<!-- PKD Card (from CEIDG) -->
{% if company.pkd_code %}
<div style="background: var(--background); border-radius: var(--radius-lg); padding: var(--spacing-lg); border: 2px solid #7c3aed;">
<div style="display: flex; align-items: center; gap: var(--spacing-md);">
<div style="width: 48px; height: 48px; border-radius: 12px; display: flex; align-items: center; justify-content: center; background: #7c3aed; color: white;">
<svg width="24" height="24" fill="currentColor" viewBox="0 0 24 24">
<path d="M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zm-5 14H7v-2h7v2zm3-4H7v-2h10v2zm0-4H7V7h10v2z"/>
</svg>
</div>
<div style="flex: 1;">
<div style="font-size: var(--font-size-sm); color: var(--text-secondary); text-transform: uppercase; letter-spacing: 0.05em;">PKD - Główna działalność</div>
<div style="font-size: var(--font-size-xl); font-weight: 700; color: #7c3aed; font-family: monospace;">{{ company.pkd_code }}</div>
{% if company.pkd_description %}
<div style="font-size: var(--font-size-sm); color: var(--text-secondary); margin-top: 4px;">{{ company.pkd_description }}</div>
{% endif %}
</div>
</div>
<div style="font-size: var(--font-size-sm); color: var(--text-secondary); padding-left: 60px; margin-top: var(--spacing-sm);">
Źródło: <a href="https://dane.biznes.gov.pl" target="_blank" style="color: #7c3aed; font-weight: bold;">CEIDG</a>
</div>
</div>
{% endif %}
<!-- Employees Count Card -->
{% if company.employees_count or company.employee_count_range %}
<div style="background: var(--background); border-radius: var(--radius-lg); padding: var(--spacing-lg); border: 2px solid #06b6d4;">
@ -1269,6 +1312,33 @@
</div>
</div>
<!-- Właściciel JDG Section (for sole proprietorships without KRS) -->
{% if company.owner_first_name and not company.krs and not people %}
<div class="company-section">
<h2 class="section-title">Właściciel</h2>
<div style="display: flex; align-items: center; gap: var(--spacing-lg); padding: var(--spacing-lg); background: var(--background); border-radius: var(--radius-lg); border-left: 4px solid #9b59b6;">
<div style="width: 64px; height: 64px; border-radius: 50%; background: linear-gradient(135deg, #9b59b6, #3498db); display: flex; align-items: center; justify-content: center; flex-shrink: 0;">
<svg width="32" height="32" fill="white" viewBox="0 0 24 24">
<path d="M12 12c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm0 2c-2.67 0-8 1.34-8 4v2h16v-2c0-2.66-5.33-4-8-4z"/>
</svg>
</div>
<div>
<div style="font-weight: 700; font-size: var(--font-size-xl); color: var(--text-primary);">
{{ company.owner_first_name }} {{ company.owner_last_name }}
</div>
<div style="font-size: var(--font-size-base); color: var(--text-secondary); margin-top: 4px;">
Właściciel jednoosobowej działalności gospodarczej
</div>
</div>
</div>
<div style="font-size: var(--font-size-sm); color: var(--text-muted); margin-top: var(--spacing-md);">
Źródło: <a href="https://dane.biznes.gov.pl" target="_blank" style="color: #9b59b6;">CEIDG</a> &bull; Dane publiczne
</div>
</div>
{% endif %}
<!-- Zarząd i Wspólnicy Section -->
{% if people %}
<div class="company-section">