nordabiz/utils/markdown.py

"""
Simple Markdown Parser for Forum
================================

Converts basic markdown to safe HTML.
Supports: bold, italic, code, links, auto-links, lists, quotes, @mentions
"""

import re
import urllib.parse
from markupsafe import Markup, escape


def _link_display(url):
    """Shorten a URL to a human-friendly label while keeping the full href.

    Google Maps URLs are especially unreadable — a single place link can be
    800 characters of tracking data. Extract the place name (or coordinates)
    and render it as `📍 Name` instead.
    """
    if 'google.' in url and '/maps/' in url:
        m = re.search(r'/maps/place/([^/@?]+)', url)
        if m:
            name = urllib.parse.unquote(m.group(1)).replace('+', ' ').strip()
            if name:
                return f'📍 {name}'
        m = re.search(r'@(-?\d+\.\d+),(-?\d+\.\d+)', url)
        if m:
            return f'📍 Mapa ({m.group(1)}, {m.group(2)})'
        return '📍 Google Maps'
    return url


def _autolink(text):
    """Convert bare URLs to clickable links. Works on escaped text before HTML wrapping."""
    def wrap(m):
        url = m.group(0)
        display = _link_display(url)
        return f'<a href="{url}" target="_blank" rel="noopener noreferrer" class="forum-link">{display}</a>'
    return re.sub(r'https?://[^\s<]+', wrap, text)


def parse_forum_markdown(text, current_user_name=None):
    """
    Convert markdown text to safe HTML.

    Supported syntax:
    - **bold** or __bold__
    - *italic* or _italic_
    - `inline code`
    - [link text](url)
    - bare https://... URLs (auto-linked)
    - - list items
    - > quotes
    - @mentions (highlighted)
    """
    if not text:
        return Markup('')

    # Normalize line endings (Windows \r\n -> \n)
    text = text.replace('\r\n', '\n').replace('\r', '\n')

    # Escape HTML first for security
    text = str(escape(text))

    # Apply inline formatting BEFORE block structure
    # This ensures URLs inside list items get linked

    # Code blocks (``` ... ```)
    text = re.sub(
        r'```(.*?)```',
        r'<pre class="forum-code-block"><code>\1</code></pre>',
        text,
        flags=re.DOTALL
    )

    # Inline code (`code`)
    text = re.sub(r'`([^`]+)`', r'<code class="forum-code">\1</code>', text)

    # Bold (**text** or __text__) — require non-word boundary on `_` form
    # so URLs like `forestry_office` don't get partially bolded.
    text = re.sub(r'\*\*([^*]+)\*\*', r'<strong>\1</strong>', text)
    text = re.sub(r'(^|\W)__([^_\n]+?)__(?=\W|$)', r'\1<strong>\2</strong>', text)

    # Italic (*text* or _text_) — same boundary rule for `_` to avoid
    # eating underscores inside URLs (e.g. ?g_ep=...) which corrupted forum
    # links. The captured leading char is re-emitted.
    text = re.sub(r'(?<!\*)\*([^*\n]+?)\*(?!\*)', r'<em>\1</em>', text)
    text = re.sub(r'(^|\W)_(?!_)([^_\n]+?)_(?=\W|$)', r'\1<em>\2</em>', text)

    # Links [text](url) - only allow http/https
    def safe_link(match):
        link_text = match.group(1)
        url = match.group(2)
        if url.startswith(('http://', 'https://', '/')):
            return f'<a href="{url}" target="_blank" rel="noopener noreferrer" class="forum-link">{link_text}</a>'
        return match.group(0)

    text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', safe_link, text)

    # Auto-link bare URLs (after [text](url) to avoid doubling).
    # Beautify Google Maps URLs via _link_display so the visible label is
    # a pin + place name instead of an 800-char tracking URL.
    def _wrap_autolink(m):
        url = m.group(0)
        display = _link_display(url)
        return f'<a href="{url}" target="_blank" rel="noopener noreferrer" class="forum-link">{display}</a>'
    text = re.sub(r'(?<!href=")(?<!">)https?://[^\s<]+', _wrap_autolink, text)

    # @mentions - highlight them; mark self-mentions with extra class
    self_variants = set()
    if current_user_name:
        norm = current_user_name.strip().lower()
        self_variants = {norm.replace(' ', '.'), norm.replace(' ', '_'), norm.replace(' ', '')}

    def _render_mention(m):
        handle = m.group(1).lower()
        cls = 'forum-mention forum-mention-self' if handle in self_variants else 'forum-mention'
        return f'<span class="{cls}">@{m.group(1)}</span>'

    # Mentions must start with a letter and not be preceded by `/` or another
    # word char — this prevents matching `@54.123` from Google Maps URLs or
    # `email@host` style strings that happen to land in plaintext.
    text = re.sub(r'(?<![/\w])@([a-zA-Z][\w.\-]*)', _render_mention, text)

    # Now process block structure (lists, quotes, paragraphs)
    lines = text.split('\n')
    result_lines = []
    in_list = False
    in_quote = False

    for line in lines:
        stripped = line.strip()

        # Empty line = paragraph break
        if not stripped:
            if in_list:
                result_lines.append('</ul>')
                in_list = False
            if in_quote:
                result_lines.append('</blockquote>')
                in_quote = False
            result_lines.append('<br>')
            continue

        # Quote blocks (> text) — &gt; because already escaped
        if stripped.startswith('&gt; '):
            if not in_quote:
                result_lines.append('<blockquote class="forum-quote">')
                in_quote = True
            result_lines.append(stripped[5:])
            continue
        elif in_quote:
            result_lines.append('</blockquote>')
            in_quote = False

        # List items (- text)
        if stripped.startswith('- '):
            if not in_list:
                result_lines.append('<ul class="forum-list">')
                in_list = True
            result_lines.append(f'<li>{stripped[2:]}</li>')
            continue
        elif in_list:
            result_lines.append('</ul>')
            in_list = False

        result_lines.append(stripped)

    # Close open blocks
    if in_list:
        result_lines.append('</ul>')
    if in_quote:
        result_lines.append('</blockquote>')

    # Join with spaces — no extra <br> between lines within same paragraph
    # Consecutive non-block lines are part of the same paragraph
    output = []
    for i, line in enumerate(result_lines):
        s = line.strip()
        # Block elements get their own line, no extra spacing
        if any(s.startswith(t) for t in ['<ul', '</ul>', '<li', '</li>', '<blockquote', '</blockquote>', '<pre', '</pre>', '<br>']):
            output.append(line)
        else:
            # Regular text — join with previous regular text using space
            if output and output[-1] and not any(output[-1].strip().startswith(t) for t in ['<ul', '</ul>', '<li', '</li>', '<blockquote', '</blockquote>', '<pre', '</pre>', '<br>']):
                output[-1] = output[-1] + ' ' + line
            else:
                output.append(line)

    return Markup('\n'.join(output))


def register_markdown_filter(app):
    """Register the markdown filter with Flask app."""
    app.jinja_env.filters['forum_markdown'] = parse_forum_markdown