fix: member name linking works inside HTML tags (strong, em, etc)
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

Parse HTML into tags and text nodes, only process text nodes outside
<a> tags. Uses \b word boundary instead of broken lookbehind.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-03-12 10:26:26 +01:00
parent be2c3e030b
commit eaf29b0aa2

View File

@ -121,42 +121,82 @@ def index():
def _enrich_event_description(db, html):
"""Enrich event description: link member names and auto-link URLs."""
"""Enrich event description: link member names and auto-link URLs.
Processes HTML text nodes only (skips content inside <a> tags and HTML attributes).
"""
import re
from markupsafe import Markup
from flask import url_for as flask_url_for
from database import User
# 1. Auto-link bare URLs (not already inside href="...")
def linkify_urls(text):
url_pattern = r'(?<!["\'>=/])(https?://[^\s<>"\']+|(?<!\w)(?:www\.)[^\s<>"\']+|(?<!\w)nordabiznes\.pl[^\s<>"\']*)'
def url_replacer(m):
url = m.group(0)
href = url if url.startswith('http') else 'https://' + url
return f'<a href="{href}" target="_blank" style="color:var(--primary);font-weight:500;">{url}</a>'
return re.sub(url_pattern, url_replacer, text)
# 2. Find portal members with person_id (for clickable names)
# Build replacement maps
members = db.query(User.name, User.person_id).filter(
User.person_id.isnot(None),
User.name.isnot(None),
).all()
# Sort by name length descending (longer names first to avoid partial matches)
# Sort longest first to avoid partial matches
members = sorted(members, key=lambda m: len(m.name), reverse=True)
# Apply URL linkification first
html = linkify_urls(html)
member_map = {}
for m in members:
member_map[m.name] = flask_url_for('person_detail', person_id=m.person_id)
# 3. Replace member names with links (not inside existing tags)
for member in members:
name = member.name
person_url = flask_url_for('person_detail', person_id=member.person_id)
pattern = r'(?<!["\w>])(' + re.escape(name) + r')(?!["\w<])'
link = f'<a href="{person_url}" style="color:var(--primary);font-weight:600;text-decoration:none;border-bottom:1px dashed var(--primary);" title="Zobacz profil">{name}</a>'
html = re.sub(pattern, link, html)
def enrich_text_node(text):
"""Apply member linking and URL linkification to a plain text fragment."""
# 1. Auto-link URLs
url_pattern = r'(https?://[^\s<>"\']+|(?<!\w)www\.[^\s<>"\']+|(?<!\w)nordabiznes\.pl[^\s<>"\']*)'
def url_replacer(m):
url = m.group(0)
href = url if url.startswith('http') else 'https://' + url
return f'<a href="{href}" target="_blank" style="color:var(--primary);font-weight:500;">{url}</a>'
text = re.sub(url_pattern, url_replacer, text)
return Markup(html)
# 2. Link member names (whole word match)
for name, url in member_map.items():
pattern = r'\b' + re.escape(name) + r'\b'
link = f'<a href="{url}" style="color:var(--primary);font-weight:600;text-decoration:none;border-bottom:1px dashed var(--primary);" title="Zobacz profil">{name}</a>'
text = re.sub(pattern, link, text)
return text
# Split HTML into tags and text nodes, only process text outside <a> tags
# Pattern: match HTML tags (including their content for <a>) or text between tags
result = []
pos = 0
in_a_tag = False
# Regex to find HTML tags
tag_pattern = re.compile(r'<(/?)(\w+)([^>]*)>')
for match in tag_pattern.finditer(html):
start, end = match.start(), match.end()
is_closing = match.group(1) == '/'
tag_name = match.group(2).lower()
# Process text before this tag
if start > pos:
text_chunk = html[pos:start]
if in_a_tag:
result.append(text_chunk) # Don't modify text inside <a>
else:
result.append(enrich_text_node(text_chunk))
result.append(match.group(0)) # The tag itself
pos = end
if tag_name == 'a':
in_a_tag = not is_closing
# Process remaining text after last tag
if pos < len(html):
text_chunk = html[pos:]
if in_a_tag:
result.append(text_chunk)
else:
result.append(enrich_text_node(text_chunk))
return Markup(''.join(result))
@bp.route('/<int:event_id>', endpoint='calendar_event')