nordabiz/utils/history_formatter.py

"""
Founding History Formatter
==========================

Converts raw founding_history text (with emoji section markers)
into structured HTML cards. Handles three formats:
1. Emoji-sectioned text (from KRS/AI enrichment)
2. Plain text with newlines
3. HTML (from Quill editor) - passed through unchanged
"""

import re
from markupsafe import Markup, escape


# Section markers: emoji → (css_class, icon_color_gradient)
SECTION_MAP = {
    '🏢': ('section-board', '#1e3050', '#2E4872'),
    '👥': ('section-shareholders', '#7c3aed', '#6d28d9'),
    '📋': ('section-registry', '#0369a1', '#0284c7'),
    '📊': ('section-finance', '#059669', '#10b981'),
    '📝': ('section-profile', '#d97706', '#f59e0b'),
}

EMOJI_PATTERN = re.compile(r'^(' + '|'.join(re.escape(e) for e in SECTION_MAP) + r')\s*(.+)$')


def format_founding_history(text):
    """Convert founding_history to structured HTML."""
    if not text:
        return ''

    text = text.strip()

    # Already HTML (from Quill editor) — pass through
    if '<p>' in text or '<div>' in text or '<br>' in text:
        return Markup(text)

    # Check if it has emoji section markers
    has_sections = any(emoji in text for emoji in SECTION_MAP)

    if not has_sections:
        # Plain text — just convert newlines to <br> and bullet points
        return Markup(_format_plain_text(text))

    # Parse emoji-sectioned text
    return Markup(_format_sectioned_text(text))


def _format_plain_text(text):
    """Format plain text with newlines and bullet points."""
    escaped = escape(text)
    # Convert bullet points
    result = str(escaped).replace('• ', '<li style="margin-bottom: 4px;">')
    if '<li' in result:
        lines = result.split('\n')
        formatted = []
        in_list = False
        for line in lines:
            line = line.strip()
            if not line:
                if in_list:
                    formatted.append('</ul>')
                    in_list = False
                continue
            if '<li' in line:
                if not in_list:
                    formatted.append('<ul style="margin: 0.5rem 0; padding-left: 1.2rem;">')
                    in_list = True
                formatted.append(line + '</li>')
            else:
                if in_list:
                    formatted.append('</ul>')
                    in_list = False
                formatted.append(f'<p style="margin: 0.25rem 0;">{line}</p>')
        if in_list:
            formatted.append('</ul>')
        return '\n'.join(formatted)

    return str(escaped).replace('\n', '<br>')


def _format_sectioned_text(text):
    """Parse emoji-sectioned text into card-based HTML."""
    sections = []
    current_emoji = None
    current_title = None
    current_lines = []

    for line in text.split('\n'):
        line = line.strip()
        if not line:
            continue

        match = EMOJI_PATTERN.match(line)
        if match:
            # Save previous section
            if current_emoji:
                sections.append((current_emoji, current_title, current_lines))
            current_emoji = match.group(1)
            # Clean title: remove trailing colon, normalize case
            title = match.group(2).rstrip(':')
            current_title = title
            current_lines = []
        else:
            current_lines.append(line)

    # Save last section
    if current_emoji:
        sections.append((current_emoji, current_title, current_lines))

    if not sections:
        return _format_plain_text(text)

    html_parts = ['<div class="history-sections">']

    for emoji, title, lines in sections:
        css_class = SECTION_MAP.get(emoji, ('section-default', '#6b7280', '#9ca3af'))[0]
        color1 = SECTION_MAP.get(emoji, ('', '#6b7280', '#9ca3af'))[1]
        color2 = SECTION_MAP.get(emoji, ('', '#6b7280', '#9ca3af'))[2]

        html_parts.append(f'<div class="history-section {css_class}">')
        html_parts.append(
            f'<div class="history-section-header">'
            f'<span class="history-section-icon" style="background: linear-gradient(135deg, {color1}, {color2});">{emoji}</span>'
            f'<span class="history-section-title">{escape(title)}</span>'
            f'</div>'
        )

        if lines:
            # Check if lines are bullet points
            bullet_lines = [l for l in lines if l.startswith('• ')]
            non_bullet = [l for l in lines if not l.startswith('• ')]

            if bullet_lines:
                html_parts.append('<ul class="history-list">')
                for bl in bullet_lines:
                    content = escape(bl[2:])  # Remove "• "
                    # Highlight key-value pairs (e.g., "KRS: 123")
                    content = _highlight_kv(str(content))
                    html_parts.append(f'<li>{content}</li>')
                html_parts.append('</ul>')

            for nl in non_bullet:
                content = escape(nl)
                html_parts.append(f'<p class="history-text">{content}</p>')

        html_parts.append('</div>')

    html_parts.append('</div>')
    return '\n'.join(html_parts)


def _highlight_kv(text):
    """Highlight key-value pairs like 'KRS: 0000328525' with bold keys."""
    # Match patterns like "Key: value" but only for known keys
    known_keys = [
        'KRS', 'NIP', 'REGON', 'EBITDA', 'EBIT', 'Data rejestracji',
        'Kapitał zakładowy', 'Siedziba', 'Reprezentacja',
        'Wiarygodność płatnicza', 'Działalność'
    ]
    for key in known_keys:
        pattern = re.compile(rf'({re.escape(key)}:\s*)')
        text = pattern.sub(rf'<strong>\1</strong>', text)
    return text


def register_history_filter(app):
    """Register the Jinja2 filter."""
    app.jinja_env.filters['format_history'] = format_founding_history