Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Google Maps URLs can be 800+ chars of tracking data that poisoned the
forum UI. Extract the place name from /maps/place/NAME/ (or fall back
to coordinates) and render as '📍 Name'. Full URL remains in the href.
Two secondary fixes:
- Edit/quote modals were reading .innerText of the rendered reply,
which baked the current render (including any stale/broken HTML from
older bad renders) back into the textarea. Switched to emitting the
raw DB content via {{ content|tojson }} so what you edit is what you
wrote.
- @mention regex was matching '@54.1234' inside Maps URLs and similar.
Tightened to require a letter start and non-slash/non-word lookbehind
so coords and email-style strings pass through untouched.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
197 lines
7.0 KiB
Python
197 lines
7.0 KiB
Python
"""
|
|
Simple Markdown Parser for Forum
|
|
================================
|
|
|
|
Converts basic markdown to safe HTML.
|
|
Supports: bold, italic, code, links, auto-links, lists, quotes, @mentions
|
|
"""
|
|
|
|
import re
|
|
import urllib.parse
|
|
from markupsafe import Markup, escape
|
|
|
|
|
|
def _link_display(url):
|
|
"""Shorten a URL to a human-friendly label while keeping the full href.
|
|
|
|
Google Maps URLs are especially unreadable — a single place link can be
|
|
800 characters of tracking data. Extract the place name (or coordinates)
|
|
and render it as `📍 Name` instead.
|
|
"""
|
|
if 'google.' in url and '/maps/' in url:
|
|
m = re.search(r'/maps/place/([^/@?]+)', url)
|
|
if m:
|
|
name = urllib.parse.unquote(m.group(1)).replace('+', ' ').strip()
|
|
if name:
|
|
return f'📍 {name}'
|
|
m = re.search(r'@(-?\d+\.\d+),(-?\d+\.\d+)', url)
|
|
if m:
|
|
return f'📍 Mapa ({m.group(1)}, {m.group(2)})'
|
|
return '📍 Google Maps'
|
|
return url
|
|
|
|
|
|
def _autolink(text):
|
|
"""Convert bare URLs to clickable links. Works on escaped text before HTML wrapping."""
|
|
def wrap(m):
|
|
url = m.group(0)
|
|
display = _link_display(url)
|
|
return f'<a href="{url}" target="_blank" rel="noopener noreferrer" class="forum-link">{display}</a>'
|
|
return re.sub(r'https?://[^\s<]+', wrap, text)
|
|
|
|
|
|
def parse_forum_markdown(text, current_user_name=None):
|
|
"""
|
|
Convert markdown text to safe HTML.
|
|
|
|
Supported syntax:
|
|
- **bold** or __bold__
|
|
- *italic* or _italic_
|
|
- `inline code`
|
|
- [link text](url)
|
|
- bare https://... URLs (auto-linked)
|
|
- - list items
|
|
- > quotes
|
|
- @mentions (highlighted)
|
|
"""
|
|
if not text:
|
|
return Markup('')
|
|
|
|
# Normalize line endings (Windows \r\n -> \n)
|
|
text = text.replace('\r\n', '\n').replace('\r', '\n')
|
|
|
|
# Escape HTML first for security
|
|
text = str(escape(text))
|
|
|
|
# Apply inline formatting BEFORE block structure
|
|
# This ensures URLs inside list items get linked
|
|
|
|
# Code blocks (``` ... ```)
|
|
text = re.sub(
|
|
r'```(.*?)```',
|
|
r'<pre class="forum-code-block"><code>\1</code></pre>',
|
|
text,
|
|
flags=re.DOTALL
|
|
)
|
|
|
|
# Inline code (`code`)
|
|
text = re.sub(r'`([^`]+)`', r'<code class="forum-code">\1</code>', text)
|
|
|
|
# Bold (**text** or __text__) — require non-word boundary on `_` form
|
|
# so URLs like `forestry_office` don't get partially bolded.
|
|
text = re.sub(r'\*\*([^*]+)\*\*', r'<strong>\1</strong>', text)
|
|
text = re.sub(r'(^|\W)__([^_\n]+?)__(?=\W|$)', r'\1<strong>\2</strong>', text)
|
|
|
|
# Italic (*text* or _text_) — same boundary rule for `_` to avoid
|
|
# eating underscores inside URLs (e.g. ?g_ep=...) which corrupted forum
|
|
# links. The captured leading char is re-emitted.
|
|
text = re.sub(r'(?<!\*)\*([^*\n]+?)\*(?!\*)', r'<em>\1</em>', text)
|
|
text = re.sub(r'(^|\W)_(?!_)([^_\n]+?)_(?=\W|$)', r'\1<em>\2</em>', text)
|
|
|
|
# Links [text](url) - only allow http/https
|
|
def safe_link(match):
|
|
link_text = match.group(1)
|
|
url = match.group(2)
|
|
if url.startswith(('http://', 'https://', '/')):
|
|
return f'<a href="{url}" target="_blank" rel="noopener noreferrer" class="forum-link">{link_text}</a>'
|
|
return match.group(0)
|
|
|
|
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', safe_link, text)
|
|
|
|
# Auto-link bare URLs (after [text](url) to avoid doubling).
|
|
# Beautify Google Maps URLs via _link_display so the visible label is
|
|
# a pin + place name instead of an 800-char tracking URL.
|
|
def _wrap_autolink(m):
|
|
url = m.group(0)
|
|
display = _link_display(url)
|
|
return f'<a href="{url}" target="_blank" rel="noopener noreferrer" class="forum-link">{display}</a>'
|
|
text = re.sub(r'(?<!href=")(?<!">)https?://[^\s<]+', _wrap_autolink, text)
|
|
|
|
# @mentions - highlight them; mark self-mentions with extra class
|
|
self_variants = set()
|
|
if current_user_name:
|
|
norm = current_user_name.strip().lower()
|
|
self_variants = {norm.replace(' ', '.'), norm.replace(' ', '_'), norm.replace(' ', '')}
|
|
|
|
def _render_mention(m):
|
|
handle = m.group(1).lower()
|
|
cls = 'forum-mention forum-mention-self' if handle in self_variants else 'forum-mention'
|
|
return f'<span class="{cls}">@{m.group(1)}</span>'
|
|
|
|
# Mentions must start with a letter and not be preceded by `/` or another
|
|
# word char — this prevents matching `@54.123` from Google Maps URLs or
|
|
# `email@host` style strings that happen to land in plaintext.
|
|
text = re.sub(r'(?<![/\w])@([a-zA-Z][\w.\-]*)', _render_mention, text)
|
|
|
|
# Now process block structure (lists, quotes, paragraphs)
|
|
lines = text.split('\n')
|
|
result_lines = []
|
|
in_list = False
|
|
in_quote = False
|
|
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
|
|
# Empty line = paragraph break
|
|
if not stripped:
|
|
if in_list:
|
|
result_lines.append('</ul>')
|
|
in_list = False
|
|
if in_quote:
|
|
result_lines.append('</blockquote>')
|
|
in_quote = False
|
|
result_lines.append('<br>')
|
|
continue
|
|
|
|
# Quote blocks (> text) — > because already escaped
|
|
if stripped.startswith('> '):
|
|
if not in_quote:
|
|
result_lines.append('<blockquote class="forum-quote">')
|
|
in_quote = True
|
|
result_lines.append(stripped[5:])
|
|
continue
|
|
elif in_quote:
|
|
result_lines.append('</blockquote>')
|
|
in_quote = False
|
|
|
|
# List items (- text)
|
|
if stripped.startswith('- '):
|
|
if not in_list:
|
|
result_lines.append('<ul class="forum-list">')
|
|
in_list = True
|
|
result_lines.append(f'<li>{stripped[2:]}</li>')
|
|
continue
|
|
elif in_list:
|
|
result_lines.append('</ul>')
|
|
in_list = False
|
|
|
|
result_lines.append(stripped)
|
|
|
|
# Close open blocks
|
|
if in_list:
|
|
result_lines.append('</ul>')
|
|
if in_quote:
|
|
result_lines.append('</blockquote>')
|
|
|
|
# Join with spaces — no extra <br> between lines within same paragraph
|
|
# Consecutive non-block lines are part of the same paragraph
|
|
output = []
|
|
for i, line in enumerate(result_lines):
|
|
s = line.strip()
|
|
# Block elements get their own line, no extra spacing
|
|
if any(s.startswith(t) for t in ['<ul', '</ul>', '<li', '</li>', '<blockquote', '</blockquote>', '<pre', '</pre>', '<br>']):
|
|
output.append(line)
|
|
else:
|
|
# Regular text — join with previous regular text using space
|
|
if output and output[-1] and not any(output[-1].strip().startswith(t) for t in ['<ul', '</ul>', '<li', '</li>', '<blockquote', '</blockquote>', '<pre', '</pre>', '<br>']):
|
|
output[-1] = output[-1] + ' ' + line
|
|
else:
|
|
output.append(line)
|
|
|
|
return Markup('\n'.join(output))
|
|
|
|
|
|
def register_markdown_filter(app):
|
|
"""Register the markdown filter with Flask app."""
|
|
app.jinja_env.filters['forum_markdown'] = parse_forum_markdown
|