nordabiz/utils/markdown.py
Maciej Pienczyn 958b967df2
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
feat(forum): beautify Google Maps URLs + fix edit/quote raw content
Google Maps URLs can be 800+ chars of tracking data that poisoned the
forum UI. Extract the place name from /maps/place/NAME/ (or fall back
to coordinates) and render as '📍 Name'. Full URL remains in the href.

Two secondary fixes:

- Edit/quote modals were reading .innerText of the rendered reply,
  which baked the current render (including any stale/broken HTML from
  older bad renders) back into the textarea. Switched to emitting the
  raw DB content via {{ content|tojson }} so what you edit is what you
  wrote.

- @mention regex was matching '@54.1234' inside Maps URLs and similar.
  Tightened to require a letter start and non-slash/non-word lookbehind
  so coords and email-style strings pass through untouched.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 14:38:59 +02:00

197 lines
7.0 KiB
Python

"""
Simple Markdown Parser for Forum
================================
Converts basic markdown to safe HTML.
Supports: bold, italic, code, links, auto-links, lists, quotes, @mentions
"""
import re
import urllib.parse
from markupsafe import Markup, escape
def _link_display(url):
"""Shorten a URL to a human-friendly label while keeping the full href.
Google Maps URLs are especially unreadable — a single place link can be
800 characters of tracking data. Extract the place name (or coordinates)
and render it as `📍 Name` instead.
"""
if 'google.' in url and '/maps/' in url:
m = re.search(r'/maps/place/([^/@?]+)', url)
if m:
name = urllib.parse.unquote(m.group(1)).replace('+', ' ').strip()
if name:
return f'📍 {name}'
m = re.search(r'@(-?\d+\.\d+),(-?\d+\.\d+)', url)
if m:
return f'📍 Mapa ({m.group(1)}, {m.group(2)})'
return '📍 Google Maps'
return url
def _autolink(text):
"""Convert bare URLs to clickable links. Works on escaped text before HTML wrapping."""
def wrap(m):
url = m.group(0)
display = _link_display(url)
return f'<a href="{url}" target="_blank" rel="noopener noreferrer" class="forum-link">{display}</a>'
return re.sub(r'https?://[^\s<]+', wrap, text)
def parse_forum_markdown(text, current_user_name=None):
"""
Convert markdown text to safe HTML.
Supported syntax:
- **bold** or __bold__
- *italic* or _italic_
- `inline code`
- [link text](url)
- bare https://... URLs (auto-linked)
- - list items
- > quotes
- @mentions (highlighted)
"""
if not text:
return Markup('')
# Normalize line endings (Windows \r\n -> \n)
text = text.replace('\r\n', '\n').replace('\r', '\n')
# Escape HTML first for security
text = str(escape(text))
# Apply inline formatting BEFORE block structure
# This ensures URLs inside list items get linked
# Code blocks (``` ... ```)
text = re.sub(
r'```(.*?)```',
r'<pre class="forum-code-block"><code>\1</code></pre>',
text,
flags=re.DOTALL
)
# Inline code (`code`)
text = re.sub(r'`([^`]+)`', r'<code class="forum-code">\1</code>', text)
# Bold (**text** or __text__) — require non-word boundary on `_` form
# so URLs like `forestry_office` don't get partially bolded.
text = re.sub(r'\*\*([^*]+)\*\*', r'<strong>\1</strong>', text)
text = re.sub(r'(^|\W)__([^_\n]+?)__(?=\W|$)', r'\1<strong>\2</strong>', text)
# Italic (*text* or _text_) — same boundary rule for `_` to avoid
# eating underscores inside URLs (e.g. ?g_ep=...) which corrupted forum
# links. The captured leading char is re-emitted.
text = re.sub(r'(?<!\*)\*([^*\n]+?)\*(?!\*)', r'<em>\1</em>', text)
text = re.sub(r'(^|\W)_(?!_)([^_\n]+?)_(?=\W|$)', r'\1<em>\2</em>', text)
# Links [text](url) - only allow http/https
def safe_link(match):
link_text = match.group(1)
url = match.group(2)
if url.startswith(('http://', 'https://', '/')):
return f'<a href="{url}" target="_blank" rel="noopener noreferrer" class="forum-link">{link_text}</a>'
return match.group(0)
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', safe_link, text)
# Auto-link bare URLs (after [text](url) to avoid doubling).
# Beautify Google Maps URLs via _link_display so the visible label is
# a pin + place name instead of an 800-char tracking URL.
def _wrap_autolink(m):
url = m.group(0)
display = _link_display(url)
return f'<a href="{url}" target="_blank" rel="noopener noreferrer" class="forum-link">{display}</a>'
text = re.sub(r'(?<!href=")(?<!">)https?://[^\s<]+', _wrap_autolink, text)
# @mentions - highlight them; mark self-mentions with extra class
self_variants = set()
if current_user_name:
norm = current_user_name.strip().lower()
self_variants = {norm.replace(' ', '.'), norm.replace(' ', '_'), norm.replace(' ', '')}
def _render_mention(m):
handle = m.group(1).lower()
cls = 'forum-mention forum-mention-self' if handle in self_variants else 'forum-mention'
return f'<span class="{cls}">@{m.group(1)}</span>'
# Mentions must start with a letter and not be preceded by `/` or another
# word char — this prevents matching `@54.123` from Google Maps URLs or
# `email@host` style strings that happen to land in plaintext.
text = re.sub(r'(?<![/\w])@([a-zA-Z][\w.\-]*)', _render_mention, text)
# Now process block structure (lists, quotes, paragraphs)
lines = text.split('\n')
result_lines = []
in_list = False
in_quote = False
for line in lines:
stripped = line.strip()
# Empty line = paragraph break
if not stripped:
if in_list:
result_lines.append('</ul>')
in_list = False
if in_quote:
result_lines.append('</blockquote>')
in_quote = False
result_lines.append('<br>')
continue
# Quote blocks (> text) — &gt; because already escaped
if stripped.startswith('&gt; '):
if not in_quote:
result_lines.append('<blockquote class="forum-quote">')
in_quote = True
result_lines.append(stripped[5:])
continue
elif in_quote:
result_lines.append('</blockquote>')
in_quote = False
# List items (- text)
if stripped.startswith('- '):
if not in_list:
result_lines.append('<ul class="forum-list">')
in_list = True
result_lines.append(f'<li>{stripped[2:]}</li>')
continue
elif in_list:
result_lines.append('</ul>')
in_list = False
result_lines.append(stripped)
# Close open blocks
if in_list:
result_lines.append('</ul>')
if in_quote:
result_lines.append('</blockquote>')
# Join with spaces — no extra <br> between lines within same paragraph
# Consecutive non-block lines are part of the same paragraph
output = []
for i, line in enumerate(result_lines):
s = line.strip()
# Block elements get their own line, no extra spacing
if any(s.startswith(t) for t in ['<ul', '</ul>', '<li', '</li>', '<blockquote', '</blockquote>', '<pre', '</pre>', '<br>']):
output.append(line)
else:
# Regular text — join with previous regular text using space
if output and output[-1] and not any(output[-1].strip().startswith(t) for t in ['<ul', '</ul>', '<li', '</li>', '<blockquote', '</blockquote>', '<pre', '</pre>', '<br>']):
output[-1] = output[-1] + ' ' + line
else:
output.append(line)
return Markup('\n'.join(output))
def register_markdown_filter(app):
"""Register the markdown filter with Flask app."""
app.jinja_env.filters['forum_markdown'] = parse_forum_markdown