From 99f7a5a88a6a7ce91a28b7b375b13ad9d40e67b0 Mon Sep 17 00:00:00 2001
From: Maciej Pienczyn <maciej.pienczyn@inpi.pl>
Date: Sun, 1 Feb 2026 07:03:55 +0100
Subject: [PATCH] fix: Remove sticky sidebar on account pages + add Companies
 to admin menu
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Changes:
- Remove position: sticky from konto sidebar (dane, prywatnosc, bezpieczenstwo, blokady)
- Add "Firmy" link to admin dropdown menu (before "Użytkownicy")
- Add scan_websites_for_nip.py script for data quality

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 scripts/scan_websites_for_nip.py    | 361 ++++++++++++++++++++++++++++
 templates/base.html                 |   6 +
 templates/konto/bezpieczenstwo.html |   2 -
 templates/konto/blokady.html        |   2 -
 templates/konto/dane.html           |   2 -
 templates/konto/prywatnosc.html     |   2 -
 6 files changed, 367 insertions(+), 8 deletions(-)
 create mode 100644 scripts/scan_websites_for_nip.py

diff --git a/scripts/scan_websites_for_nip.py b/scripts/scan_websites_for_nip.py
new file mode 100644
index 0000000..a025365
--- /dev/null
+++ b/scripts/scan_websites_for_nip.py
@@ -0,0 +1,361 @@
+#!/usr/bin/env python3
+"""
+Website NIP Scanner - skanuje strony www firm w poszukiwaniu NIP/REGON
+
+Dla firm bez NIP w bazie - pobiera stronę www (z domeny email)
+i szuka numerów NIP/REGON w treści.
+
+Usage:
+    python scripts/scan_websites_for_nip.py              # Skanuj wszystkie
+    python scripts/scan_websites_for_nip.py --id 119     # Skanuj konkretną firmę
+    python scripts/scan_websites_for_nip.py --apply      # Zapisz znalezione NIP do bazy
+"""
+
+import os
+import sys
+import re
+import argparse
+import time
+import json
+from pathlib import Path
+from datetime import datetime
+from dataclasses import dataclass, asdict
+from typing import Optional, List, Tuple
+import requests
+from urllib.parse import urlparse
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from database import SessionLocal, Company
+
+# Output directory for scan results
+RESULTS_DIR = Path(__file__).parent.parent / "data" / "nip_scan_results"
+RESULTS_DIR.mkdir(parents=True, exist_ok=True)
+
+# Domains to skip (public email providers)
+SKIP_DOMAINS = {
+    'gmail.com', 'wp.pl', 'onet.pl', 'op.pl', 'interia.pl',
+    'o2.pl', 'poczta.fm', 'yahoo.com', 'hotmail.com', 'outlook.com'
+}
+
+# Request timeout
+REQUEST_TIMEOUT = 15
+
+# User agent
+USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+
+
+@dataclass
+class ScanResult:
+    """Wynik skanowania strony www"""
+    company_id: int
+    company_name: str
+    domain: str
+    url_scanned: str
+    nip_found: Optional[str] = None
+    regon_found: Optional[str] = None
+    nips_all: List[str] = None  # Wszystkie znalezione NIP (może być wiele)
+    regons_all: List[str] = None
+    phone_found: Optional[str] = None
+    address_found: Optional[str] = None
+    confidence: str = "low"  # low, medium, high
+    error: Optional[str] = None
+    scanned_at: str = ""
+
+    def __post_init__(self):
+        if self.nips_all is None:
+            self.nips_all = []
+        if self.regons_all is None:
+            self.regons_all = []
+        if not self.scanned_at:
+            self.scanned_at = datetime.now().isoformat()
+
+    def to_dict(self):
+        return asdict(self)
+
+
+def extract_domain_from_email(email: str) -> Optional[str]:
+    """Wyciąga domenę z adresu email"""
+    if not email or '@' not in email:
+        return None
+    domain = email.split('@')[1].lower()
+    if domain in SKIP_DOMAINS:
+        return None
+    return domain
+
+
+def normalize_nip(nip: str) -> str:
+    """Normalizuje NIP do 10 cyfr"""
+    return re.sub(r'[^0-9]', '', nip)
+
+
+def validate_nip(nip: str) -> bool:
+    """Waliduje NIP (checksum)"""
+    nip = normalize_nip(nip)
+    if len(nip) != 10:
+        return False
+
+    weights = [6, 5, 7, 2, 3, 4, 5, 6, 7]
+    try:
+        checksum = sum(int(nip[i]) * weights[i] for i in range(9)) % 11
+        return checksum == int(nip[9])
+    except (ValueError, IndexError):
+        return False
+
+
+def validate_regon(regon: str) -> bool:
+    """Waliduje REGON (9 lub 14 cyfr)"""
+    regon = re.sub(r'[^0-9]', '', regon)
+
+    if len(regon) == 9:
+        weights = [8, 9, 2, 3, 4, 5, 6, 7]
+        checksum = sum(int(regon[i]) * weights[i] for i in range(8)) % 11
+        if checksum == 10:
+            checksum = 0
+        return checksum == int(regon[8])
+    elif len(regon) == 14:
+        # Validate first 9 digits
+        weights9 = [8, 9, 2, 3, 4, 5, 6, 7]
+        checksum9 = sum(int(regon[i]) * weights9[i] for i in range(8)) % 11
+        if checksum9 == 10:
+            checksum9 = 0
+        if checksum9 != int(regon[8]):
+            return False
+
+        # Validate full 14 digits
+        weights14 = [2, 4, 8, 5, 0, 9, 7, 3, 6, 1, 2, 4, 8]
+        checksum14 = sum(int(regon[i]) * weights14[i] for i in range(13)) % 11
+        if checksum14 == 10:
+            checksum14 = 0
+        return checksum14 == int(regon[13])
+
+    return False
+
+
+def find_nips_in_text(text: str) -> List[str]:
+    """Znajduje wszystkie NIP-y w tekście"""
+    # Patterns for NIP
+    patterns = [
+        r'NIP[:\s]*(\d{3}[-\s]?\d{3}[-\s]?\d{2}[-\s]?\d{2})',  # NIP: 123-456-78-90
+        r'NIP[:\s]*(\d{10})',  # NIP: 1234567890
+        r'numer\s+identyfikacji\s+podatkowej[:\s]*(\d{10})',
+    ]
+
+    nips = []
+    for pattern in patterns:
+        matches = re.findall(pattern, text, re.IGNORECASE)
+        for match in matches:
+            nip = normalize_nip(match)
+            if validate_nip(nip) and nip not in nips:
+                nips.append(nip)
+
+    return nips
+
+
+def find_regons_in_text(text: str) -> List[str]:
+    """Znajduje wszystkie REGON-y w tekście"""
+    patterns = [
+        r'REGON[:\s]*(\d{9,14})',
+        r'rejestr\s+gospodarczy[:\s]*(\d{9,14})',
+    ]
+
+    regons = []
+    for pattern in patterns:
+        matches = re.findall(pattern, text, re.IGNORECASE)
+        for match in matches:
+            regon = re.sub(r'[^0-9]', '', match)
+            if validate_regon(regon) and regon not in regons:
+                regons.append(regon)
+
+    return regons
+
+
+def fetch_website(url: str) -> Tuple[Optional[str], Optional[str]]:
+    """
+    Pobiera zawartość strony www.
+
+    Returns:
+        (content, error) - treść strony lub błąd
+    """
+    headers = {
+        'User-Agent': USER_AGENT,
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+        'Accept-Language': 'pl-PL,pl;q=0.9,en;q=0.8',
+    }
+
+    try:
+        response = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT, allow_redirects=True)
+        response.raise_for_status()
+        return response.text, None
+    except requests.exceptions.Timeout:
+        return None, "Timeout"
+    except requests.exceptions.ConnectionError:
+        return None, "Connection error"
+    except requests.exceptions.HTTPError as e:
+        return None, f"HTTP {e.response.status_code}"
+    except Exception as e:
+        return None, str(e)
+
+
+def scan_company_website(company: Company) -> ScanResult:
+    """
+    Skanuje stronę www firmy w poszukiwaniu NIP/REGON.
+    """
+    # Get domain from email or website
+    domain = None
+    if company.website:
+        parsed = urlparse(company.website if company.website.startswith('http') else f'https://{company.website}')
+        domain = parsed.netloc or parsed.path.split('/')[0]
+    elif company.email:
+        domain = extract_domain_from_email(company.email)
+
+    if not domain:
+        return ScanResult(
+            company_id=company.id,
+            company_name=company.name,
+            domain="",
+            url_scanned="",
+            error="No domain available"
+        )
+
+    # Clean domain
+    domain = domain.lower().replace('www.', '')
+
+    # Try different URL variants
+    urls_to_try = [
+        f"https://{domain}",
+        f"https://www.{domain}",
+        f"https://{domain}/kontakt",
+        f"https://{domain}/o-nas",
+        f"https://{domain}/contact",
+        f"https://{domain}/about",
+    ]
+
+    result = ScanResult(
+        company_id=company.id,
+        company_name=company.name,
+        domain=domain,
+        url_scanned=""
+    )
+
+    all_nips = []
+    all_regons = []
+
+    for url in urls_to_try:
+        print(f"    Scanning: {url}")
+        content, error = fetch_website(url)
+
+        if error:
+            continue
+
+        result.url_scanned = url
+
+        # Find NIPs and REGONs
+        nips = find_nips_in_text(content)
+        regons = find_regons_in_text(content)
+
+        all_nips.extend([n for n in nips if n not in all_nips])
+        all_regons.extend([r for r in regons if r not in all_regons])
+
+        # If found, set confidence
+        if nips or regons:
+            print(f"      Found NIP: {nips}, REGON: {regons}")
+            break
+
+        time.sleep(0.5)  # Rate limiting
+
+    # Set results
+    if all_nips:
+        result.nips_all = all_nips
+        result.nip_found = all_nips[0]  # Primary NIP
+        result.confidence = "high" if len(all_nips) == 1 else "medium"
+
+    if all_regons:
+        result.regons_all = all_regons
+        result.regon_found = all_regons[0]
+
+    if not all_nips and not all_regons and not result.error:
+        result.error = "NIP/REGON not found on website"
+        result.confidence = "low"
+
+    return result
+
+
+def get_companies_without_nip(db, company_id: int = None) -> List[Company]:
+    """Pobiera firmy bez NIP z domeną firmową"""
+    query = db.query(Company).filter(
+        (Company.nip == None) | (Company.nip == '')
+    )
+
+    if company_id:
+        query = query.filter(Company.id == company_id)
+
+    companies = query.order_by(Company.name).all()
+
+    # Filter out companies with public email domains
+    result = []
+    for c in companies:
+        domain = extract_domain_from_email(c.email) if c.email else None
+        if domain or c.website:
+            result.append(c)
+
+    return result
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Scan websites for NIP/REGON")
+    parser.add_argument('--id', type=int, help="Scan specific company ID")
+    parser.add_argument('--apply', action='store_true', help="Apply found NIPs to database")
+    parser.add_argument('--output', type=str, help="Output JSON file path")
+    args = parser.parse_args()
+
+    db = SessionLocal()
+
+    try:
+        companies = get_companies_without_nip(db, args.id)
+        print(f"\n=== Skanowanie {len(companies)} firm bez NIP ===\n")
+
+        results = []
+        found_count = 0
+
+        for i, company in enumerate(companies, 1):
+            print(f"[{i}/{len(companies)}] {company.name}")
+
+            result = scan_company_website(company)
+            results.append(result)
+
+            if result.nip_found:
+                found_count += 1
+                print(f"  ✓ NIP: {result.nip_found} (confidence: {result.confidence})")
+
+                if args.apply and result.confidence in ('high', 'medium'):
+                    company.nip = result.nip_found
+                    if result.regon_found and not company.regon:
+                        company.regon = result.regon_found
+                    db.commit()
+                    print(f"  → Zapisano do bazy")
+            elif result.error:
+                print(f"  ✗ {result.error}")
+
+            time.sleep(1)  # Rate limiting between companies
+
+        # Save results to JSON
+        output_file = args.output or (RESULTS_DIR / f"scan_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump([r.to_dict() for r in results], f, ensure_ascii=False, indent=2)
+
+        print(f"\n=== Podsumowanie ===")
+        print(f"Przeskanowano: {len(companies)} firm")
+        print(f"Znaleziono NIP: {found_count}")
+        print(f"Wyniki zapisane: {output_file}")
+
+        if found_count > 0 and not args.apply:
+            print(f"\nUżyj --apply aby zapisać znalezione NIP do bazy")
+
+    finally:
+        db.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/templates/base.html b/templates/base.html
index e4af2a5..d149c7c 100755
--- a/templates/base.html
+++ b/templates/base.html
@@ -1220,6 +1220,12 @@
                     </svg>
                 </button>
                 <div class="admin-dropdown-menu">
+                    <a href="{{ url_for('admin.admin_companies') }}">
+                        <svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 21V5a2 2 0 00-2-2H7a2 2 0 00-2 2v16m14 0h2m-2 0h-5m-9 0H3m2 0h5M9 7h1m-1 4h1m4-4h1m-1 4h1m-5 10v-5a1 1 0 011-1h2a1 1 0 011 1v5m-4 0h4"/>
+                        </svg>
+                        Firmy
+                    </a>
                     <a href="{{ url_for('admin.admin_users') }}">
                         <svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
                             <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 4.354a4 4 0 110 5.292M15 21H3v-1a6 6 0 0112 0v1zm0 0h6v-1a6 6 0 00-9-5.197M13 7a4 4 0 11-8 0 4 4 0 018 0z"/>
diff --git a/templates/konto/bezpieczenstwo.html b/templates/konto/bezpieczenstwo.html
index f6e8a56..7130287 100644
--- a/templates/konto/bezpieczenstwo.html
+++ b/templates/konto/bezpieczenstwo.html
@@ -18,8 +18,6 @@
         padding: var(--spacing-lg);
         box-shadow: var(--shadow);
         height: fit-content;
-        position: sticky;
-        top: 100px;
     }
 
     .konto-sidebar-header {
diff --git a/templates/konto/blokady.html b/templates/konto/blokady.html
index ec2351b..5dd878f 100644
--- a/templates/konto/blokady.html
+++ b/templates/konto/blokady.html
@@ -18,8 +18,6 @@
         padding: var(--spacing-lg);
         box-shadow: var(--shadow);
         height: fit-content;
-        position: sticky;
-        top: 100px;
     }
 
     .konto-sidebar-header {
diff --git a/templates/konto/dane.html b/templates/konto/dane.html
index 09b47e2..e1c32b1 100644
--- a/templates/konto/dane.html
+++ b/templates/konto/dane.html
@@ -18,8 +18,6 @@
         padding: var(--spacing-lg);
         box-shadow: var(--shadow);
         height: fit-content;
-        position: sticky;
-        top: 100px;
     }
 
     .konto-sidebar-header {
diff --git a/templates/konto/prywatnosc.html b/templates/konto/prywatnosc.html
index 84fea7b..2cc9ab8 100644
--- a/templates/konto/prywatnosc.html
+++ b/templates/konto/prywatnosc.html
@@ -18,8 +18,6 @@
         padding: var(--spacing-lg);
         box-shadow: var(--shadow);
         height: fit-content;
-        position: sticky;
-        top: 100px;
     }
 
     .konto-sidebar-header {