fix: member name linking works inside HTML tags (strong, em, etc)
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Parse HTML into tags and text nodes, only process text nodes outside <a> tags. Uses \b word boundary instead of broken lookbehind. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
be2c3e030b
commit
eaf29b0aa2
@ -121,42 +121,82 @@ def index():
|
||||
|
||||
|
||||
def _enrich_event_description(db, html):
|
||||
"""Enrich event description: link member names and auto-link URLs."""
|
||||
"""Enrich event description: link member names and auto-link URLs.
|
||||
|
||||
Processes HTML text nodes only (skips content inside <a> tags and HTML attributes).
|
||||
"""
|
||||
import re
|
||||
from markupsafe import Markup
|
||||
from flask import url_for as flask_url_for
|
||||
from database import User
|
||||
|
||||
# 1. Auto-link bare URLs (not already inside href="...")
|
||||
def linkify_urls(text):
|
||||
url_pattern = r'(?<!["\'>=/])(https?://[^\s<>"\']+|(?<!\w)(?:www\.)[^\s<>"\']+|(?<!\w)nordabiznes\.pl[^\s<>"\']*)'
|
||||
def url_replacer(m):
|
||||
url = m.group(0)
|
||||
href = url if url.startswith('http') else 'https://' + url
|
||||
return f'<a href="{href}" target="_blank" style="color:var(--primary);font-weight:500;">{url}</a>'
|
||||
return re.sub(url_pattern, url_replacer, text)
|
||||
|
||||
# 2. Find portal members with person_id (for clickable names)
|
||||
# Build replacement maps
|
||||
members = db.query(User.name, User.person_id).filter(
|
||||
User.person_id.isnot(None),
|
||||
User.name.isnot(None),
|
||||
).all()
|
||||
|
||||
# Sort by name length descending (longer names first to avoid partial matches)
|
||||
# Sort longest first to avoid partial matches
|
||||
members = sorted(members, key=lambda m: len(m.name), reverse=True)
|
||||
|
||||
# Apply URL linkification first
|
||||
html = linkify_urls(html)
|
||||
member_map = {}
|
||||
for m in members:
|
||||
member_map[m.name] = flask_url_for('person_detail', person_id=m.person_id)
|
||||
|
||||
# 3. Replace member names with links (not inside existing tags)
|
||||
for member in members:
|
||||
name = member.name
|
||||
person_url = flask_url_for('person_detail', person_id=member.person_id)
|
||||
pattern = r'(?<!["\w>])(' + re.escape(name) + r')(?!["\w<])'
|
||||
link = f'<a href="{person_url}" style="color:var(--primary);font-weight:600;text-decoration:none;border-bottom:1px dashed var(--primary);" title="Zobacz profil">{name}</a>'
|
||||
html = re.sub(pattern, link, html)
|
||||
def enrich_text_node(text):
|
||||
"""Apply member linking and URL linkification to a plain text fragment."""
|
||||
# 1. Auto-link URLs
|
||||
url_pattern = r'(https?://[^\s<>"\']+|(?<!\w)www\.[^\s<>"\']+|(?<!\w)nordabiznes\.pl[^\s<>"\']*)'
|
||||
def url_replacer(m):
|
||||
url = m.group(0)
|
||||
href = url if url.startswith('http') else 'https://' + url
|
||||
return f'<a href="{href}" target="_blank" style="color:var(--primary);font-weight:500;">{url}</a>'
|
||||
text = re.sub(url_pattern, url_replacer, text)
|
||||
|
||||
return Markup(html)
|
||||
# 2. Link member names (whole word match)
|
||||
for name, url in member_map.items():
|
||||
pattern = r'\b' + re.escape(name) + r'\b'
|
||||
link = f'<a href="{url}" style="color:var(--primary);font-weight:600;text-decoration:none;border-bottom:1px dashed var(--primary);" title="Zobacz profil">{name}</a>'
|
||||
text = re.sub(pattern, link, text)
|
||||
|
||||
return text
|
||||
|
||||
# Split HTML into tags and text nodes, only process text outside <a> tags
|
||||
# Pattern: match HTML tags (including their content for <a>) or text between tags
|
||||
result = []
|
||||
pos = 0
|
||||
in_a_tag = False
|
||||
|
||||
# Regex to find HTML tags
|
||||
tag_pattern = re.compile(r'<(/?)(\w+)([^>]*)>')
|
||||
|
||||
for match in tag_pattern.finditer(html):
|
||||
start, end = match.start(), match.end()
|
||||
is_closing = match.group(1) == '/'
|
||||
tag_name = match.group(2).lower()
|
||||
|
||||
# Process text before this tag
|
||||
if start > pos:
|
||||
text_chunk = html[pos:start]
|
||||
if in_a_tag:
|
||||
result.append(text_chunk) # Don't modify text inside <a>
|
||||
else:
|
||||
result.append(enrich_text_node(text_chunk))
|
||||
|
||||
result.append(match.group(0)) # The tag itself
|
||||
pos = end
|
||||
|
||||
if tag_name == 'a':
|
||||
in_a_tag = not is_closing
|
||||
|
||||
# Process remaining text after last tag
|
||||
if pos < len(html):
|
||||
text_chunk = html[pos:]
|
||||
if in_a_tag:
|
||||
result.append(text_chunk)
|
||||
else:
|
||||
result.append(enrich_text_node(text_chunk))
|
||||
|
||||
return Markup(''.join(result))
|
||||
|
||||
|
||||
@bp.route('/<int:event_id>', endpoint='calendar_event')
|
||||
|
||||
Loading…
Reference in New Issue
Block a user