nordabiz/scripts/import_membership_fees.py
Maciej Pienczyn e97f586311
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
fix(import): add manual name mappings for Podróże i My, Cukiernia Konkol
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 15:04:56 +01:00

384 lines
13 KiB
Python

#!/usr/bin/env python3
"""
Import Membership Fees from Excel
==================================
Imports membership fee data from the Norda Biznes membership fees Excel file.
Matches company names to database records and creates MembershipFee entries.
Usage:
# From project root on server:
DATABASE_URL=... python3 scripts/import_membership_fees.py <excel_path> [--year 2026] [--dry-run]
# All years:
DATABASE_URL=... python3 scripts/import_membership_fees.py <excel_path> --all
"""
import os
import sys
import argparse
from decimal import Decimal
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import openpyxl
from database import SessionLocal, Company, MembershipFee, MembershipFeeConfig
from sqlalchemy import func
# Sheet name → year mapping
SHEET_YEARS = {
'2022-przejecie': 2022,
'2022': 2022,
'2023': 2023,
'2024': 2024,
'2025': 2025,
'2025 MK': 2025,
'2026': 2026,
}
# Preferred sheets per year (later sheet wins if duplicate year)
PREFERRED_SHEETS = {
2022: '2022', # Use main 2022, not 2022-przejecie
2025: '2025 MK', # Use Magda's updated version
}
# Manual name mapping: Excel name → Company ID (for names that don't auto-match)
MANUAL_NAME_MAP = {
'CRIS TAP': 27,
'EURA TECH': 34,
'HEBEL, MASIAK': 80,
'HILL OB.': 35,
'ISSRFID': 136,
'Informatyk 1 Mateusz Kurpet': 112,
'KANCELARIA -ŁUKASZ GILEWICZ': 81,
'KBS': 42,
'Konkol Piekarnia': 132,
'LITWIC & LITWIC': 47,
'MIZAK KANCELARIA': 95,
'NOWAK CHŁOD.': 26,
'PODRÓRZE I MY': 98,
'RADCA SKWARŁO': 40,
'ROUNDTWO': 60,
'TK CHOPIN': 25,
'WTBS': 135,
'Your Welcome/': 118,
'ORLEX DESIGNE': 96,
'ORLEX INVEST': 96,
'PODRÓRZE MY': 98,
'CUKIERNIA JACEK PLACEK': 132,
'AGAT': 13,
'GRULA': 104,
'KORPOR. BUDOWL KBMS': 43,
'PERFEKTA': 23,
'TED': 53,
'U WITKA': 77,
'WIENIAWA': 36,
'PZU': 100,
}
def normalize_name(name):
"""Normalize company name for matching."""
if not name:
return ''
return name.strip().upper().replace('Sp. z o.o.', '').replace('SP. Z O.O.', '').replace('S.A.', '').replace(' ', ' ').strip()
def find_company(db, name):
"""Find company by name (fuzzy match)."""
if not name:
return None
name_clean = name.strip()
# Exact match
company = db.query(Company).filter(
func.upper(Company.name) == name_clean.upper()
).first()
if company:
return company
# Contains match
company = db.query(Company).filter(
func.upper(Company.name).contains(name_clean.upper())
).first()
if company:
return company
# Reverse contains
company = db.query(Company).filter(
func.upper(func.concat('%', name_clean.upper(), '%')).op('~~')(func.upper(Company.name))
).first()
return company
def parse_sheet(ws, year):
"""Parse a single Excel sheet and return list of company fee records."""
records = []
# Detect header row (find 'Nazwa firmy' or 'L.p.')
header_row = 1
for row in ws.iter_rows(min_row=1, max_row=5, values_only=False):
for cell in row:
if cell.value and 'Nazwa firmy' in str(cell.value):
header_row = cell.row
break
# Detect column layout
name_col = None
month_start_col = None
paid_col = None
monthly_rate_col = None
yearly_col = None
remaining_col = None
notes_col = None
nota_col = None
for cell in ws[header_row]:
val = str(cell.value or '').strip()
if val == 'Nazwa firmy':
name_col = cell.column - 1
elif val == 'I' and month_start_col is None:
month_start_col = cell.column - 1
elif val == 'Wpłacono' or 'Wpłacono' in val:
paid_col = cell.column - 1
elif 'składka miesięczna' in val.lower() or 'miesięczna' in val.lower():
monthly_rate_col = cell.column - 1
elif 'składka roczna' in val.lower() or 'roczna' in val.lower():
yearly_col = cell.column - 1
elif 'Pozostaje' in val or 'pozostaje' in val:
remaining_col = cell.column - 1
elif val == 'Uwagi' or 'Uwagi' in val:
notes_col = cell.column - 1
elif 'Nota' in val or 'nota' in val:
nota_col = cell.column - 1
if name_col is None:
# Fallback: some sheets don't have 'Nazwa firmy' header, name is col 1
name_col = 1
month_start_col = 2
data_start_row = header_row + 1
for row in ws.iter_rows(min_row=data_start_row, values_only=True):
name = row[name_col] if name_col < len(row) else None
if not name or str(name).strip() == '':
continue
name = str(name).strip()
# Monthly payments
monthly_payments = {}
if month_start_col is not None:
for m in range(12):
col_idx = month_start_col + m
if col_idx < len(row):
val = row[col_idx]
if val and str(val).strip():
try:
monthly_payments[m + 1] = Decimal(str(val))
except Exception:
pass
monthly_rate = None
if monthly_rate_col and monthly_rate_col < len(row) and row[monthly_rate_col]:
try:
monthly_rate = Decimal(str(row[monthly_rate_col]))
except (ValueError, TypeError):
pass
notes = ''
if notes_col and notes_col < len(row) and row[notes_col]:
notes = str(row[notes_col]).strip()
nota_sent = ''
if nota_col is not None and nota_col < len(row) and row[nota_col]:
nota_sent = str(row[nota_col]).strip()
records.append({
'name': name,
'year': year,
'monthly_payments': monthly_payments,
'monthly_rate': monthly_rate,
'notes': notes,
'nota_sent': nota_sent,
})
return records
def import_fees(excel_path, target_year=None, all_years=False, dry_run=False):
"""Import fees from Excel to database."""
wb = openpyxl.load_workbook(excel_path, data_only=True)
db = SessionLocal()
# Build company name → ID mapping
companies = db.query(Company).filter(Company.status.in_(['active', 'inactive'])).all()
company_map = {}
for c in companies:
company_map[c.name.upper()] = c
if c.name:
# Short name variants
short = c.name.upper().split(' SP.')[0].split(' S.A.')[0].strip()
if short not in company_map:
company_map[short] = c
# Determine which sheets to process
sheets_to_process = {}
for sheet_name, year in SHEET_YEARS.items():
if target_year and year != target_year:
continue
if not all_years and not target_year:
if year != 2026: # Default: only 2026
continue
# Use preferred sheet if multiple for same year
if year in sheets_to_process:
preferred = PREFERRED_SHEETS.get(year)
if preferred == sheet_name:
sheets_to_process[year] = sheet_name
else:
sheets_to_process[year] = sheet_name
total_imported = 0
total_skipped = 0
unmatched = []
for year, sheet_name in sorted(sheets_to_process.items()):
if sheet_name not in wb.sheetnames:
print(f' Sheet "{sheet_name}" not found, skipping')
continue
ws = wb[sheet_name]
records = parse_sheet(ws, year)
print(f'\n=== {sheet_name} (rok {year}) — {len(records)} firm ===')
for rec in records:
# Match company — first check manual map
manual_id = MANUAL_NAME_MAP.get(rec['name'])
if manual_id:
company = db.query(Company).filter_by(id=manual_id).first()
else:
company = company_map.get(rec['name'].upper())
if not company:
# Try partial match — but only if Excel name is a prefix/suffix of DB name
# (avoid "AMA" matching "ULTRAMARE")
excel_upper = rec['name'].upper().strip()
for key, c in company_map.items():
# Excel name starts with DB name or DB name starts with Excel name
# Minimum 4 chars to avoid false positives
if len(excel_upper) >= 4 and len(key) >= 4:
if key.startswith(excel_upper) or excel_upper.startswith(key):
company = c
break
if not company:
unmatched.append(f"{rec['name']} ({year})")
total_skipped += 1
continue
monthly_rate = rec['monthly_rate'] or Decimal('0')
for month, amount in rec['monthly_payments'].items():
if amount <= 0:
continue
# Check if already exists
existing = db.query(MembershipFee).filter_by(
company_id=company.id,
fee_year=year,
fee_month=month,
).first()
if existing:
if not dry_run:
existing.amount = monthly_rate or amount
existing.amount_paid = amount
existing.status = 'paid' if amount >= (monthly_rate or amount) else 'partial'
existing.notes = rec['notes'] or existing.notes
continue
fee = MembershipFee(
company_id=company.id,
fee_year=year,
fee_month=month,
amount=monthly_rate or amount,
amount_paid=amount,
status='paid' if amount >= (monthly_rate or amount) else 'partial',
notes=rec['notes'] if month == min(rec['monthly_payments'].keys()) else None,
)
if not dry_run:
db.add(fee)
total_imported += 1
# Create pending entries for unpaid months (if monthly rate known)
if monthly_rate and monthly_rate > 0:
# Determine start month (first paid month or January)
start_month = min(rec['monthly_payments'].keys()) if rec['monthly_payments'] else 1
for month in range(start_month, 13):
if month in rec['monthly_payments']:
continue
existing = db.query(MembershipFee).filter_by(
company_id=company.id, fee_year=year, fee_month=month,
).first()
if existing:
continue
fee = MembershipFee(
company_id=company.id,
fee_year=year,
fee_month=month,
amount=monthly_rate,
amount_paid=Decimal('0'),
status='pending',
)
if not dry_run:
db.add(fee)
total_imported += 1
# Save MembershipFeeConfig for this year
if not dry_run:
from datetime import date
# Standard rate config
std_config = db.query(MembershipFeeConfig).filter_by(
scope='global', company_id=None, category_id=None,
).filter(
MembershipFeeConfig.valid_from <= date(year, 1, 1),
).first()
if not std_config:
rates = {2022: 150, 2023: 150, 2024: 150, 2025: 180, 2026: 200}
if year in rates:
db.add(MembershipFeeConfig(
scope='global',
monthly_amount=Decimal(str(rates[year])),
valid_from=date(year, 1, 1),
notes=f'Składka standardowa {year}',
))
if not dry_run:
db.commit()
print(f'\n=== PODSUMOWANIE ===')
print(f'Zaimportowano: {total_imported} rekordów')
print(f'Pominięto (brak dopasowania): {total_skipped}')
if unmatched:
print(f'\nNiedopasowane firmy:')
for name in sorted(set(unmatched)):
print(f' - {name}')
db.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Import membership fees from Excel')
parser.add_argument('excel_path', help='Path to Excel file')
parser.add_argument('--year', type=int, help='Import specific year only')
parser.add_argument('--all', action='store_true', help='Import all years')
parser.add_argument('--dry-run', action='store_true', help='Preview without saving')
args = parser.parse_args()
import_fees(args.excel_path, target_year=args.year, all_years=args.all, dry_run=args.dry_run)