nordabiz/utils/helpers.py

"""
Helper Functions
================

Common utility functions used across blueprints.
"""

import re
import logging

import bleach

logger = logging.getLogger(__name__)

# Allowed HTML tags and attributes for rich-text content (announcements, events, proceedings)
_ALLOWED_TAGS = ['p', 'br', 'strong', 'em', 'b', 'i', 'a', 'ul', 'ol', 'li', 'h3', 'h4', 'blockquote']
_ALLOWED_ATTRS = {'a': ['href', 'target', 'rel']}


def sanitize_html(content):
    """
    Sanitize HTML content to prevent stored XSS.
    Strips all tags except a safe whitelist.

    Args:
        content: HTML string to sanitize

    Returns:
        Sanitized HTML string
    """
    if not content:
        return content
    return bleach.clean(content, tags=_ALLOWED_TAGS, attributes=_ALLOWED_ATTRS, strip=True)


def sanitize_input(text, max_length=1000):
    """
    Sanitize user input - remove potentially dangerous characters.

    Args:
        text: Input string to sanitize
        max_length: Maximum allowed length (default 1000)

    Returns:
        Sanitized string
    """
    if not text:
        return ""

    # Remove null bytes
    text = text.replace('\x00', '')

    # Trim to max length
    text = text[:max_length]

    # Strip whitespace
    text = text.strip()

    return text


def validate_email(email):
    """
    Validate email format.

    Args:
        email: Email address to validate

    Returns:
        bool: True if valid, False otherwise
    """
    if not email or len(email) > 255:
        return False

    # RFC 5322 compliant email regex (simplified)
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return re.match(pattern, email) is not None


def validate_password(password):
    """
    Validate password strength.

    Requirements:
    - Minimum 8 characters
    - At least one uppercase letter
    - At least one lowercase letter
    - At least one digit

    Args:
        password: Password to validate

    Returns:
        tuple: (is_valid: bool, message: str)
    """
    if not password or len(password) < 8:
        return False, "Hasło musi mieć minimum 8 znaków"

    if not re.search(r'[A-Z]', password):
        return False, "Hasło musi zawierać przynajmniej jedną wielką literę"

    if not re.search(r'[a-z]', password):
        return False, "Hasło musi zawierać przynajmniej jedną małą literę"

    if not re.search(r'\d', password):
        return False, "Hasło musi zawierać przynajmniej jedną cyfrę"

    return True, "OK"


def ensure_url(url):
    """
    Ensure URL has http:// or https:// scheme.

    Args:
        url: URL string

    Returns:
        URL with https:// prefix if no scheme present
    """
    if url and not url.startswith(('http://', 'https://')):
        return f'https://{url}'
    return url