nordabiz/scripts/import_membership_fees.py

#!/usr/bin/env python3
"""
Import Membership Fees from Excel
==================================

Imports membership fee data from the Norda Biznes membership fees Excel file.
Matches company names to database records and creates MembershipFee entries.

Usage:
    # From project root on server:
    DATABASE_URL=... python3 scripts/import_membership_fees.py <excel_path> [--year 2026] [--dry-run]

    # All years:
    DATABASE_URL=... python3 scripts/import_membership_fees.py <excel_path> --all
"""

import os
import sys
import argparse
from decimal import Decimal

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import openpyxl
from database import SessionLocal, Company, MembershipFee, MembershipFeeConfig
from sqlalchemy import func


# Sheet name → year mapping
SHEET_YEARS = {
    '2022-przejecie': 2022,
    '2022': 2022,
    '2023': 2023,
    '2024': 2024,
    '2025': 2025,
    '2025 MK': 2025,
    '2026': 2026,
}

# Preferred sheets per year (later sheet wins if duplicate year)
PREFERRED_SHEETS = {
    2022: '2022',       # Use main 2022, not 2022-przejecie
    2025: '2025 MK',    # Use Magda's updated version
}

# Manual name mapping: Excel name → Company ID (for names that don't auto-match)
MANUAL_NAME_MAP = {
    'CRIS TAP': 27,
    'EURA TECH': 34,
    'HEBEL, MASIAK': 80,
    'HILL OB.': 35,
    'ISSRFID': 136,
    'Informatyk 1 Mateusz Kurpet': 112,
    'KANCELARIA -ŁUKASZ GILEWICZ': 81,
    'KBS': 42,
    'Konkol Piekarnia': 132,
    'LITWIC & LITWIC': 47,
    'MIZAK KANCELARIA': 95,
    'NOWAK CHŁOD.': 26,
    'PODRÓRZE I MY': 98,
    'RADCA SKWARŁO': 40,
    'ROUNDTWO': 60,
    'TK CHOPIN': 25,
    'WTBS': 135,
    'Your Welcome/': 118,
    'ORLEX DESIGNE': 96,
    'ORLEX INVEST': 96,
    'PODRÓRZE MY': 98,
    'CUKIERNIA JACEK PLACEK': 132,
    'AGAT': 13,
    'GRULA': 104,
    'KORPOR. BUDOWL KBMS': 43,
    'PERFEKTA': 23,
    'TED': 53,
    'U WITKA': 77,
    'WIENIAWA': 36,
    'PZU': 100,
}


def normalize_name(name):
    """Normalize company name for matching."""
    if not name:
        return ''
    return name.strip().upper().replace('Sp. z o.o.', '').replace('SP. Z O.O.', '').replace('S.A.', '').replace('  ', ' ').strip()


def find_company(db, name):
    """Find company by name (fuzzy match)."""
    if not name:
        return None

    name_clean = name.strip()

    # Exact match
    company = db.query(Company).filter(
        func.upper(Company.name) == name_clean.upper()
    ).first()
    if company:
        return company

    # Contains match
    company = db.query(Company).filter(
        func.upper(Company.name).contains(name_clean.upper())
    ).first()
    if company:
        return company

    # Reverse contains
    company = db.query(Company).filter(
        func.upper(func.concat('%', name_clean.upper(), '%')).op('~~')(func.upper(Company.name))
    ).first()

    return company


def parse_sheet(ws, year):
    """Parse a single Excel sheet and return list of company fee records."""
    records = []

    # Detect header row (find 'Nazwa firmy' or 'L.p.')
    header_row = 1
    for row in ws.iter_rows(min_row=1, max_row=5, values_only=False):
        for cell in row:
            if cell.value and 'Nazwa firmy' in str(cell.value):
                header_row = cell.row
                break

    # Detect column layout
    name_col = None
    month_start_col = None
    paid_col = None
    monthly_rate_col = None
    yearly_col = None
    remaining_col = None
    notes_col = None
    nota_col = None

    for cell in ws[header_row]:
        val = str(cell.value or '').strip()
        if val == 'Nazwa firmy':
            name_col = cell.column - 1
        elif val == 'I' and month_start_col is None:
            month_start_col = cell.column - 1
        elif val == 'Wpłacono' or 'Wpłacono' in val:
            paid_col = cell.column - 1
        elif 'składka miesięczna' in val.lower() or 'miesięczna' in val.lower():
            monthly_rate_col = cell.column - 1
        elif 'składka roczna' in val.lower() or 'roczna' in val.lower():
            yearly_col = cell.column - 1
        elif 'Pozostaje' in val or 'pozostaje' in val:
            remaining_col = cell.column - 1
        elif val == 'Uwagi' or 'Uwagi' in val:
            notes_col = cell.column - 1
        elif 'Nota' in val or 'nota' in val:
            nota_col = cell.column - 1

    if name_col is None:
        # Fallback: some sheets don't have 'Nazwa firmy' header, name is col 1
        name_col = 1
        month_start_col = 2

    data_start_row = header_row + 1

    for row in ws.iter_rows(min_row=data_start_row, values_only=True):
        name = row[name_col] if name_col < len(row) else None
        if not name or str(name).strip() == '':
            continue

        name = str(name).strip()

        # Monthly payments
        monthly_payments = {}
        if month_start_col is not None:
            for m in range(12):
                col_idx = month_start_col + m
                if col_idx < len(row):
                    val = row[col_idx]
                    if val and str(val).strip():
                        try:
                            monthly_payments[m + 1] = Decimal(str(val))
                        except Exception:
                            pass

        monthly_rate = None
        if monthly_rate_col and monthly_rate_col < len(row) and row[monthly_rate_col]:
            try:
                monthly_rate = Decimal(str(row[monthly_rate_col]))
            except (ValueError, TypeError):
                pass

        notes = ''
        if notes_col and notes_col < len(row) and row[notes_col]:
            notes = str(row[notes_col]).strip()

        nota_sent = ''
        if nota_col is not None and nota_col < len(row) and row[nota_col]:
            nota_sent = str(row[nota_col]).strip()

        records.append({
            'name': name,
            'year': year,
            'monthly_payments': monthly_payments,
            'monthly_rate': monthly_rate,
            'notes': notes,
            'nota_sent': nota_sent,
        })

    return records


def import_fees(excel_path, target_year=None, all_years=False, dry_run=False):
    """Import fees from Excel to database."""
    wb = openpyxl.load_workbook(excel_path, data_only=True)
    db = SessionLocal()

    # Build company name → ID mapping
    companies = db.query(Company).filter(Company.status.in_(['active', 'inactive'])).all()
    company_map = {}
    for c in companies:
        company_map[c.name.upper()] = c
        if c.name:
            # Short name variants
            short = c.name.upper().split(' SP.')[0].split(' S.A.')[0].strip()
            if short not in company_map:
                company_map[short] = c

    # Determine which sheets to process
    sheets_to_process = {}
    for sheet_name, year in SHEET_YEARS.items():
        if target_year and year != target_year:
            continue
        if not all_years and not target_year:
            if year != 2026:  # Default: only 2026
                continue
        # Use preferred sheet if multiple for same year
        if year in sheets_to_process:
            preferred = PREFERRED_SHEETS.get(year)
            if preferred == sheet_name:
                sheets_to_process[year] = sheet_name
        else:
            sheets_to_process[year] = sheet_name

    total_imported = 0
    total_skipped = 0
    unmatched = []

    for year, sheet_name in sorted(sheets_to_process.items()):
        if sheet_name not in wb.sheetnames:
            print(f'  Sheet "{sheet_name}" not found, skipping')
            continue

        ws = wb[sheet_name]
        records = parse_sheet(ws, year)
        print(f'\n=== {sheet_name} (rok {year}) — {len(records)} firm ===')

        for rec in records:
            # Match company — first check manual map
            manual_id = MANUAL_NAME_MAP.get(rec['name'])
            if manual_id:
                company = db.query(Company).filter_by(id=manual_id).first()
            else:
                company = company_map.get(rec['name'].upper())
            if not company:
                # Try partial match — but only if Excel name is a prefix/suffix of DB name
                # (avoid "AMA" matching "ULTRAMARE")
                excel_upper = rec['name'].upper().strip()
                for key, c in company_map.items():
                    # Excel name starts with DB name or DB name starts with Excel name
                    # Minimum 4 chars to avoid false positives
                    if len(excel_upper) >= 4 and len(key) >= 4:
                        if key.startswith(excel_upper) or excel_upper.startswith(key):
                            company = c
                            break

            if not company:
                unmatched.append(f"{rec['name']} ({year})")
                total_skipped += 1
                continue

            monthly_rate = rec['monthly_rate'] or Decimal('0')

            for month, amount in rec['monthly_payments'].items():
                if amount <= 0:
                    continue

                # Check if already exists
                existing = db.query(MembershipFee).filter_by(
                    company_id=company.id,
                    fee_year=year,
                    fee_month=month,
                ).first()

                if existing:
                    if not dry_run:
                        existing.amount = monthly_rate or amount
                        existing.amount_paid = amount
                        existing.status = 'paid' if amount >= (monthly_rate or amount) else 'partial'
                        existing.notes = rec['notes'] or existing.notes
                    continue

                fee = MembershipFee(
                    company_id=company.id,
                    fee_year=year,
                    fee_month=month,
                    amount=monthly_rate or amount,
                    amount_paid=amount,
                    status='paid' if amount >= (monthly_rate or amount) else 'partial',
                    notes=rec['notes'] if month == min(rec['monthly_payments'].keys()) else None,
                )

                if not dry_run:
                    db.add(fee)
                total_imported += 1

            # Create pending entries for unpaid months (if monthly rate known)
            if monthly_rate and monthly_rate > 0:
                # Determine start month (first paid month or January)
                start_month = min(rec['monthly_payments'].keys()) if rec['monthly_payments'] else 1
                for month in range(start_month, 13):
                    if month in rec['monthly_payments']:
                        continue
                    existing = db.query(MembershipFee).filter_by(
                        company_id=company.id, fee_year=year, fee_month=month,
                    ).first()
                    if existing:
                        continue

                    fee = MembershipFee(
                        company_id=company.id,
                        fee_year=year,
                        fee_month=month,
                        amount=monthly_rate,
                        amount_paid=Decimal('0'),
                        status='pending',
                    )
                    if not dry_run:
                        db.add(fee)
                    total_imported += 1

        # Save MembershipFeeConfig for this year
        if not dry_run:
            from datetime import date
            # Standard rate config
            std_config = db.query(MembershipFeeConfig).filter_by(
                scope='global', company_id=None, category_id=None,
            ).filter(
                MembershipFeeConfig.valid_from <= date(year, 1, 1),
            ).first()

            if not std_config:
                rates = {2022: 150, 2023: 150, 2024: 150, 2025: 180, 2026: 200}
                if year in rates:
                    db.add(MembershipFeeConfig(
                        scope='global',
                        monthly_amount=Decimal(str(rates[year])),
                        valid_from=date(year, 1, 1),
                        notes=f'Składka standardowa {year}',
                    ))

    if not dry_run:
        db.commit()

    print(f'\n=== PODSUMOWANIE ===')
    print(f'Zaimportowano: {total_imported} rekordów')
    print(f'Pominięto (brak dopasowania): {total_skipped}')
    if unmatched:
        print(f'\nNiedopasowane firmy:')
        for name in sorted(set(unmatched)):
            print(f'  - {name}')

    db.close()


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Import membership fees from Excel')
    parser.add_argument('excel_path', help='Path to Excel file')
    parser.add_argument('--year', type=int, help='Import specific year only')
    parser.add_argument('--all', action='store_true', help='Import all years')
    parser.add_argument('--dry-run', action='store_true', help='Preview without saving')
    args = parser.parse_args()

    import_fees(args.excel_path, target_year=args.year, all_years=args.all, dry_run=args.dry_run)