fix: member name linking works inside HTML tags (strong, em, etc)

Parse HTML into tags and text nodes, only process text nodes outside <a> tags. Uses \b word boundary instead of broken lookbehind. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 10:26:26 +01:00 · 2026-03-12 10:26:26 +01:00 · eaf29b0aa2
commit eaf29b0aa2
parent be2c3e030b
1 changed files with 63 additions and 23 deletions
--- a/blueprints/community/calendar/routes.py
+++ b/blueprints/community/calendar/routes.py
@ -121,42 +121,82 @@ def index():


 def _enrich_event_description(db, html):
-    """Enrich event description: link member names and auto-link URLs."""
+    """Enrich event description: link member names and auto-link URLs.
+
+    Processes HTML text nodes only (skips content inside <a> tags and HTML attributes).
+    """
    import re
    from markupsafe import Markup
    from flask import url_for as flask_url_for
    from database import User

-    # 1. Auto-link bare URLs (not already inside href="...")
-    def linkify_urls(text):
-        url_pattern = r'(?<!["\'>=/])(https?://[^\s<>"\']+|(?<!\w)(?:www\.)[^\s<>"\']+|(?<!\w)nordabiznes\.pl[^\s<>"\']*)'
-        def url_replacer(m):
-            url = m.group(0)
-            href = url if url.startswith('http') else 'https://' + url
-            return f'<a href="{href}" target="_blank" style="color:var(--primary);font-weight:500;">{url}</a>'
-        return re.sub(url_pattern, url_replacer, text)
-
-    # 2. Find portal members with person_id (for clickable names)
+    # Build replacement maps
    members = db.query(User.name, User.person_id).filter(
        User.person_id.isnot(None),
        User.name.isnot(None),
    ).all()
-
-    # Sort by name length descending (longer names first to avoid partial matches)
+    # Sort longest first to avoid partial matches
    members = sorted(members, key=lambda m: len(m.name), reverse=True)

-    # Apply URL linkification first
-    html = linkify_urls(html)
+    member_map = {}
+    for m in members:
+        member_map[m.name] = flask_url_for('person_detail', person_id=m.person_id)

-    # 3. Replace member names with links (not inside existing tags)
-    for member in members:
-        name = member.name
-        person_url = flask_url_for('person_detail', person_id=member.person_id)
-        pattern = r'(?<!["\w>])(' + re.escape(name) + r')(?!["\w<])'
-        link = f'<a href="{person_url}" style="color:var(--primary);font-weight:600;text-decoration:none;border-bottom:1px dashed var(--primary);" title="Zobacz profil">{name}</a>'
-        html = re.sub(pattern, link, html)
+    def enrich_text_node(text):
+        """Apply member linking and URL linkification to a plain text fragment."""
+        # 1. Auto-link URLs
+        url_pattern = r'(https?://[^\s<>"\']+|(?<!\w)www\.[^\s<>"\']+|(?<!\w)nordabiznes\.pl[^\s<>"\']*)'
+        def url_replacer(m):
+            url = m.group(0)
+            href = url if url.startswith('http') else 'https://' + url
+            return f'<a href="{href}" target="_blank" style="color:var(--primary);font-weight:500;">{url}</a>'
+        text = re.sub(url_pattern, url_replacer, text)

-    return Markup(html)
+        # 2. Link member names (whole word match)
+        for name, url in member_map.items():
+            pattern = r'\b' + re.escape(name) + r'\b'
+            link = f'<a href="{url}" style="color:var(--primary);font-weight:600;text-decoration:none;border-bottom:1px dashed var(--primary);" title="Zobacz profil">{name}</a>'
+            text = re.sub(pattern, link, text)
+
+        return text
+
+    # Split HTML into tags and text nodes, only process text outside <a> tags
+    # Pattern: match HTML tags (including their content for <a>) or text between tags
+    result = []
+    pos = 0
+    in_a_tag = False
+
+    # Regex to find HTML tags
+    tag_pattern = re.compile(r'<(/?)(\w+)([^>]*)>')
+
+    for match in tag_pattern.finditer(html):
+        start, end = match.start(), match.end()
+        is_closing = match.group(1) == '/'
+        tag_name = match.group(2).lower()
+
+        # Process text before this tag
+        if start > pos:
+            text_chunk = html[pos:start]
+            if in_a_tag:
+                result.append(text_chunk)  # Don't modify text inside <a>
+            else:
+                result.append(enrich_text_node(text_chunk))
+
+        result.append(match.group(0))  # The tag itself
+        pos = end
+
+        if tag_name == 'a':
+            in_a_tag = not is_closing
+
+    # Process remaining text after last tag
+    if pos < len(html):
+        text_chunk = html[pos:]
+        if in_a_tag:
+            result.append(text_chunk)
+        else:
+            result.append(enrich_text_node(text_chunk))
+
+    return Markup(''.join(result))


@bp.route('/<int:event_id>', endpoint='calendar_event')