""" Board Document Upload Service ============================= Secure file upload handling for Rada Izby (Board Council) documents. Supports PDF, DOCX, DOC files up to 50MB. Features: - File type validation (magic bytes + extension) - Size limits - UUID-based filenames for security - Date-organized storage structure - Protected storage outside webroot Author: Maciej Pienczyn, InPi sp. z o.o. Created: 2026-02-03 """ import os import uuid import logging from datetime import datetime from typing import Tuple, Optional from werkzeug.datastructures import FileStorage logger = logging.getLogger(__name__) # Configuration ALLOWED_EXTENSIONS = {'pdf', 'docx', 'doc'} ALLOWED_MIME_TYPES = { 'application/pdf', 'application/msword', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' } MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB # Storage path - OUTSIDE webroot for security UPLOAD_BASE_PATH = '/data/board-docs' # Magic bytes for document validation DOCUMENT_SIGNATURES = { b'%PDF': 'pdf', # PDF files b'PK\x03\x04': 'docx', # DOCX (ZIP-based Office format) b'\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1': 'doc', # DOC (OLE Compound Document) } # MIME type mapping MIME_TYPES = { 'pdf': 'application/pdf', 'doc': 'application/msword', 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' } class DocumentUploadService: """Secure file upload service for board documents""" @staticmethod def validate_file(file: FileStorage) -> Tuple[bool, str]: """ Validate uploaded document file. Args: file: Werkzeug FileStorage object Returns: Tuple of (is_valid, error_message) """ # Check if file exists if not file or file.filename == '': return False, 'Nie wybrano pliku' # Check extension ext = file.filename.rsplit('.', 1)[-1].lower() if '.' in file.filename else '' if ext not in ALLOWED_EXTENSIONS: return False, f'Niedozwolony format pliku. Dozwolone: {", ".join(sorted(ALLOWED_EXTENSIONS))}' # Check file size file.seek(0, 2) # Seek to end size = file.tell() file.seek(0) # Reset to beginning if size > MAX_FILE_SIZE: return False, f'Plik jest za duży (max {MAX_FILE_SIZE // 1024 // 1024}MB)' if size == 0: return False, 'Plik jest pusty' # Verify magic bytes (actual file type) header = file.read(16) file.seek(0) detected_type = None for signature, file_type in DOCUMENT_SIGNATURES.items(): if header.startswith(signature): detected_type = file_type break if not detected_type: return False, 'Plik nie jest prawidłowym dokumentem (PDF, DOCX lub DOC)' # Check if extension matches detected type if detected_type != ext: # Allow docx detected as zip (PK signature) if not (detected_type == 'docx' and ext == 'docx'): return False, f'Rozszerzenie pliku ({ext}) nie odpowiada zawartości ({detected_type})' return True, '' @staticmethod def generate_stored_filename(original_filename: str) -> str: """ Generate secure UUID-based filename preserving extension. Args: original_filename: Original filename from upload Returns: UUID-based filename with original extension """ ext = original_filename.rsplit('.', 1)[-1].lower() if '.' in original_filename else 'bin' return f"{uuid.uuid4()}.{ext}" @staticmethod def get_upload_path() -> str: """ Get upload directory path with date-based organization. Returns: Full path to upload directory """ now = datetime.now() path = os.path.join(UPLOAD_BASE_PATH, str(now.year), f"{now.month:02d}") os.makedirs(path, exist_ok=True) return path @staticmethod def save_file(file: FileStorage) -> Tuple[str, str, int, str]: """ Save document file securely. Args: file: Werkzeug FileStorage object Returns: Tuple of (stored_filename, file_path, file_size, mime_type) """ stored_filename = DocumentUploadService.generate_stored_filename(file.filename) upload_dir = DocumentUploadService.get_upload_path() file_path = os.path.join(upload_dir, stored_filename) # Determine mime type ext = stored_filename.rsplit('.', 1)[-1].lower() mime_type = MIME_TYPES.get(ext, 'application/octet-stream') # Save file file.seek(0) file.save(file_path) file_size = os.path.getsize(file_path) logger.info(f"Saved board document: {stored_filename} ({file_size} bytes)") return stored_filename, file_path, file_size, mime_type @staticmethod def delete_file(stored_filename: str, uploaded_at: Optional[datetime] = None) -> bool: """ Delete document file from storage. Args: stored_filename: UUID-based filename uploaded_at: Upload timestamp to determine path Returns: True if deleted, False otherwise """ if uploaded_at: # Try exact path first path = os.path.join( UPLOAD_BASE_PATH, str(uploaded_at.year), f"{uploaded_at.month:02d}", stored_filename ) if os.path.exists(path): try: os.remove(path) logger.info(f"Deleted board document: {stored_filename}") return True except OSError as e: logger.error(f"Failed to delete {stored_filename}: {e}") return False # Search in all date directories for root, dirs, files in os.walk(UPLOAD_BASE_PATH): if stored_filename in files: try: os.remove(os.path.join(root, stored_filename)) logger.info(f"Deleted board document: {stored_filename}") return True except OSError as e: logger.error(f"Failed to delete {stored_filename}: {e}") return False logger.warning(f"Document not found for deletion: {stored_filename}") return False @staticmethod def get_file_path(stored_filename: str, uploaded_at: datetime) -> str: """ Get full path to the stored file. Args: stored_filename: UUID-based filename uploaded_at: Upload timestamp Returns: Full path to the file """ return os.path.join( UPLOAD_BASE_PATH, str(uploaded_at.year), f"{uploaded_at.month:02d}", stored_filename ) @staticmethod def file_exists(stored_filename: str, uploaded_at: datetime) -> bool: """ Check if file exists in storage. Args: stored_filename: UUID-based filename uploaded_at: Upload timestamp Returns: True if file exists, False otherwise """ path = DocumentUploadService.get_file_path(stored_filename, uploaded_at) return os.path.exists(path)