fix(security): Resolve 1 HIGH and 7 MEDIUM vulnerabilities from code review

- HIGH: Fix SQL injection in ZOPK knowledge service (3 functions) — replace f-strings with parameterized queries - MEDIUM: Sanitize tsquery/LIKE input in SearchService to prevent injection - MEDIUM: Add @login_required + @role_required(ADMIN) to /health/full endpoint - MEDIUM: Add @role_required(ADMIN) to ZOPK knowledge search API - MEDIUM: Add bleach HTML sanitization on write for announcements, events, board proceedings (stored XSS via |safe) - MEDIUM: Remove partial API key from Gemini service logs - MEDIUM: Remove @csrf.exempt from chat endpoints, add X-CSRFToken headers in JS - MEDIUM: Add missing CSRF tokens to 3 POST forms (data_request, benefits_form, benefits_list) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 05:25:18 +01:00 · 2026-02-06 05:25:18 +01:00 · e718d96a7d
commit e718d96a7d
parent ef08bebc8c
15 changed files with 72 additions and 30 deletions
--- a/app.py
+++ b/app.py
@ -166,6 +166,8 @@ from database import (
    SystemRole
 )

+from utils.decorators import role_required
+
 # Import services
 import gemini_service
 from nordabiz_chat import NordaBizChatEngine
@ -938,6 +940,8 @@ def test_error_500():


@app.route('/health/full')
+@login_required
+@role_required(SystemRole.ADMIN)
 def health_full():
    """
    Extended health check - verifies all critical endpoints.
--- a/blueprints/admin/routes.py
+++ b/blueprints/admin/routes.py
@ -27,6 +27,7 @@ from database import (
    SystemRole
 )
 from utils.decorators import role_required
+from utils.helpers import sanitize_html
 import gemini_service

 # Logger
@ -847,7 +848,7 @@ def admin_calendar_new():

            event = NordaEvent(
                title=request.form.get('title', '').strip(),
-                description=request.form.get('description', '').strip(),
+                description=sanitize_html(request.form.get('description', '').strip()),
                event_date=datetime.strptime(request.form.get('event_date'), '%Y-%m-%d').date(),
                start_time=request.form.get('start_time') or None,
                end_time=request.form.get('end_time') or None,
--- a/blueprints/admin/routes_announcements.py
+++ b/blueprints/admin/routes_announcements.py
@ -15,6 +15,7 @@ from flask_login import login_required, current_user
 from . import bp
 from database import SessionLocal, Announcement, SystemRole
 from utils.decorators import role_required
+from utils.helpers import sanitize_html

 logger = logging.getLogger(__name__)

@ -98,7 +99,7 @@ def admin_announcements_new():
        try:
            title = request.form.get('title', '').strip()
            excerpt = request.form.get('excerpt', '').strip()
-            content = request.form.get('content', '').strip()
+            content = sanitize_html(request.form.get('content', '').strip())
            categories = request.form.getlist('categories')
            if not categories:
                categories = ['internal']  # Default category
@ -182,7 +183,7 @@ def admin_announcements_edit(id):
        if request.method == 'POST':
            announcement.title = request.form.get('title', '').strip()
            announcement.excerpt = request.form.get('excerpt', '').strip() or None
-            announcement.content = request.form.get('content', '').strip()
+            announcement.content = sanitize_html(request.form.get('content', '').strip())
            categories = request.form.getlist('categories')
            if not categories:
                categories = ['internal']  # Default category
--- a/blueprints/admin/routes_zopk_knowledge.py
+++ b/blueprints/admin/routes_zopk_knowledge.py
@ -352,6 +352,7 @@ def admin_zopk_embeddings_stream():

@bp.route('/zopk-api/knowledge/search', methods=['POST'])
@login_required
+@role_required(SystemRole.ADMIN)
 def api_zopk_knowledge_search():
    """
    Semantic search in ZOPK knowledge base.
--- a/blueprints/board/routes.py
+++ b/blueprints/board/routes.py
@ -27,6 +27,7 @@ from sqlalchemy import desc
 from . import bp
 from database import SessionLocal, BoardMeeting, SystemRole, User
 from utils.decorators import rada_member_required, office_manager_required
+from utils.helpers import sanitize_html
 from datetime import date, time

 try:
@ -452,6 +453,13 @@ def _handle_meeting_form(db, meeting=None):
    except json.JSONDecodeError:
        proceedings = []

+    # Sanitize text fields in proceedings to prevent stored XSS
+    for proc in proceedings:
+        if isinstance(proc, dict):
+            for field in ('discussion', 'discussed', 'title'):
+                if field in proc and isinstance(proc[field], str):
+                    proc[field] = sanitize_html(proc[field])
+
    # Validate
    errors = []
    if not meeting_number:
--- a/blueprints/chat/routes.py
+++ b/blueprints/chat/routes.py
@ -18,7 +18,6 @@ from database import (
    SystemRole
 )
 from nordabiz_chat import NordaBizChatEngine
-from extensions import csrf
 from utils.decorators import member_required

 # Logger
@ -40,7 +39,6 @@ def chat():


@bp.route('/api/chat/settings', methods=['GET', 'POST'])
-@csrf.exempt
@login_required
@member_required
 def chat_settings():
@ -96,7 +94,6 @@ def chat_settings():


@bp.route('/api/chat/start', methods=['POST'])
-@csrf.exempt
@login_required
@member_required
 def chat_start():
@ -123,7 +120,6 @@ def chat_start():


@bp.route('/api/chat/<int:conversation_id>/message', methods=['POST'])
-@csrf.exempt
@login_required
@member_required
 def chat_send_message(conversation_id):
--- a/gemini_service.py
+++ b/gemini_service.py
@ -104,7 +104,7 @@ class GeminiService:

        # Debug: Log API key (masked)
        if self.api_key:
-            logger.info(f"API key loaded: {self.api_key[:10]}...{self.api_key[-4:]}")
+            logger.info("Gemini API key loaded successfully")
        else:
            logger.error("API key is None or empty!")

--- a/requirements.txt
+++ b/requirements.txt
@ -11,6 +11,7 @@ Werkzeug==3.1.5
 Flask-WTF==1.2.2
 Flask-Limiter==4.1.1
 cryptography==46.0.4
+bleach==6.3.0

 # Database
 SQLAlchemy==2.0.46
--- a/search_service.py
+++ b/search_service.py
@ -263,8 +263,12 @@ class SearchService:
        """
        keywords = self._expand_keywords(query)

-        # Build tsquery from keywords
-        tsquery_parts = [f"{kw}:*" for kw in keywords if kw]
+        # Sanitize keywords for tsquery - keep only word characters (alphanumeric + polish chars)
+        sanitized_keywords = [re.sub(r'[^\w]', '', kw, flags=re.UNICODE) for kw in keywords]
+        sanitized_keywords = [kw for kw in sanitized_keywords if kw]
+
+        # Build tsquery from sanitized keywords
+        tsquery_parts = [f"{kw}:*" for kw in sanitized_keywords]
        tsquery = ' | '.join(tsquery_parts)

        # Check if pg_trgm is available
@ -276,7 +280,8 @@ class SearchService:
            has_trgm = False

        # Build ILIKE patterns for each keyword (for multi-word searches)
-        like_patterns = [f'%{kw}%' for kw in keywords if len(kw) > 2]
+        # Escape LIKE wildcards in user input before wrapping with %
+        like_patterns = [f'%{kw.replace("%", r"\\%").replace("_", r"\\_")}%' for kw in sanitized_keywords if len(kw) > 2]

        # Build SQL query with scoring for founding_history matches (owners/founders)
        if has_trgm:
--- a/templates/admin/benefits_form.html
+++ b/templates/admin/benefits_form.html
@ -177,6 +177,7 @@
        </div>

        <form method="POST">
+            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
            <!-- Podstawowe info -->
            <div class="form-section">
                <div class="form-section-title">Podstawowe informacje</div>
--- a/templates/admin/benefits_list.html
+++ b/templates/admin/benefits_list.html
@ -307,6 +307,7 @@
                        <div class="actions-cell">
                            <a href="{{ url_for('admin.admin_benefits_edit', benefit_id=benefit.id) }}" class="btn-small btn-edit">Edytuj</a>
                            <form action="{{ url_for('admin.admin_benefits_toggle', benefit_id=benefit.id) }}" method="POST" style="display:inline;">
+                                <input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
                                <button type="submit" class="btn-small btn-toggle">
                                    {% if benefit.is_active %}Wyłącz{% else %}Włącz{% endif %}
                                </button>
--- a/templates/chat.html
+++ b/templates/chat.html
@ -1534,6 +1534,7 @@

 {% block extra_js %}
 // NordaGPT Chat - State
+const csrfToken = document.querySelector('meta[name="csrf-token"]')?.content || '';
 let currentConversationId = null;
 let conversations = [];
 let currentModel = 'flash';  // Default model (flash = ekonomiczny)
@ -1578,7 +1579,7 @@ async function saveModelPreference(model) {
    try {
        await fetch('/api/chat/settings', {
            method: 'POST',
-            headers: { 'Content-Type': 'application/json' },
+            headers: { 'Content-Type': 'application/json', 'X-CSRFToken': csrfToken },
            body: JSON.stringify({ model: model })
        });
    } catch (error) {
@ -1832,7 +1833,8 @@ async function deleteConversation(conversationId) {

    try {
        const response = await fetch(`/api/chat/${conversationId}/delete`, {
-            method: 'DELETE'
+            method: 'DELETE',
+            headers: { 'X-CSRFToken': csrfToken }
        });
        const data = await response.json();

@ -1879,7 +1881,7 @@ async function sendMessage() {
        if (!currentConversationId) {
            const startResponse = await fetch('/api/chat/start', {
                method: 'POST',
-                headers: { 'Content-Type': 'application/json' },
+                headers: { 'Content-Type': 'application/json', 'X-CSRFToken': csrfToken },
                body: JSON.stringify({
                    title: message.substring(0, 50) + (message.length > 50 ? '...' : '')
                })
@ -1895,7 +1897,7 @@ async function sendMessage() {
        // Send message with model selection
        const response = await fetch(`/api/chat/${currentConversationId}/message`, {
            method: 'POST',
-            headers: { 'Content-Type': 'application/json' },
+            headers: { 'Content-Type': 'application/json', 'X-CSRFToken': csrfToken },
            body: JSON.stringify({
                message: message,
                model: currentModel
--- a/templates/membership/data_request.html
+++ b/templates/membership/data_request.html
@ -249,6 +249,7 @@
    <div class="form-section">
        <h2>Wprowadź NIP firmy</h2>
        <form method="POST" id="dataRequestForm">
+            <input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
            <div class="form-group">
                <label>NIP firmy</label>
                <div class="nip-lookup">
--- a/utils/helpers.py
+++ b/utils/helpers.py
@ -8,8 +8,30 @@ Common utility functions used across blueprints.
 import re
 import logging

+import bleach
+
 logger = logging.getLogger(__name__)

+# Allowed HTML tags and attributes for rich-text content (announcements, events, proceedings)
+_ALLOWED_TAGS = ['p', 'br', 'strong', 'em', 'b', 'i', 'a', 'ul', 'ol', 'li', 'h3', 'h4', 'blockquote']
+_ALLOWED_ATTRS = {'a': ['href', 'target', 'rel']}
+
+
+def sanitize_html(content):
+    """
+    Sanitize HTML content to prevent stored XSS.
+    Strips all tags except a safe whitelist.
+
+    Args:
+        content: HTML string to sanitize
+
+    Returns:
+        Sanitized HTML string
+    """
+    if not content:
+        return content
+    return bleach.clean(content, tags=_ALLOWED_TAGS, attributes=_ALLOWED_ATTRS, strip=True)
+

 def sanitize_input(text, max_length=1000):
    """
--- a/zopk_knowledge_service.py
+++ b/zopk_knowledge_service.py
@ -1691,9 +1691,8 @@ def find_duplicate_entities(
    from sqlalchemy import text

    # Build query with pg_trgm similarity
-    type_filter = f"AND e1.entity_type = '{entity_type}'" if entity_type else ""
-
-    query = text(f"""
+    # Use conditional SQL with COALESCE to avoid f-string interpolation
+    query = text("""
        SELECT
            e1.id as id1, e1.name as name1, e1.entity_type as type1,
            e1.mentions_count as mentions1, e1.is_verified as verified1,
@ -1715,7 +1714,7 @@ def find_duplicate_entities(
            OR LOWER(e1.name) LIKE '%' || LOWER(e2.name) || '%'
            OR LOWER(e2.name) LIKE '%' || LOWER(e1.name) || '%'
        )
-        {type_filter}
+        AND (:entity_type IS NULL OR e1.entity_type = :entity_type)
        ORDER BY
            sim DESC,
            e1.entity_type,
@ -1723,7 +1722,8 @@ def find_duplicate_entities(
        LIMIT :limit
    """)

-    result = db_session.execute(query, {'min_sim': min_similarity, 'limit': limit})
+    params = {'min_sim': min_similarity, 'limit': limit, 'entity_type': entity_type}
+    result = db_session.execute(query, params)

    duplicates = []
    for row in result:
@ -1970,12 +1970,10 @@ def find_duplicate_facts(
    """
    from sqlalchemy import text

-    type_filter = f"AND f1.fact_type = '{fact_type}'" if fact_type else ""
-
    # Set similarity threshold and use % operator (uses GiST index)
    db_session.execute(text("SET pg_trgm.similarity_threshold = :threshold"), {'threshold': min_similarity})

-    query = text(f"""
+    query = text("""
        SELECT
            f1.id as id1, f1.full_text as text1, f1.fact_type as type1,
            f1.is_verified as verified1, f1.confidence_score as score1,
@ -1985,12 +1983,13 @@ def find_duplicate_facts(
        FROM zopk_knowledge_facts f1
        JOIN zopk_knowledge_facts f2 ON f1.id < f2.id
        WHERE f1.full_text % f2.full_text
-        {type_filter}
+        AND (:fact_type IS NULL OR f1.fact_type = :fact_type)
        ORDER BY sim DESC, COALESCE(GREATEST(f1.confidence_score, f2.confidence_score), 0) DESC
        LIMIT :limit
    """)

-    result = db_session.execute(query, {'limit': limit})
+    params = {'limit': limit, 'fact_type': fact_type}
+    result = db_session.execute(query, params)

    duplicates = []
    for row in result:
@ -2320,9 +2319,7 @@ def get_timeline_suggestions(

        # Get milestone facts not yet in timeline
        # Prioritize: verified, high confidence, has numeric value (dates/amounts)
-        verified_filter = "AND f.is_verified = TRUE" if only_verified else ""
-
-        suggestions_query = text(f"""
+        suggestions_query = text("""
            SELECT DISTINCT ON (f.id)
                f.id as fact_id,
                f.full_text,
@ -2341,7 +2338,7 @@ def get_timeline_suggestions(
            FROM zopk_knowledge_facts f
            LEFT JOIN zopk_news n ON n.id = f.source_news_id
            WHERE f.fact_type = 'milestone'
-            {verified_filter}
+            AND (:only_verified = FALSE OR f.is_verified = TRUE)
            AND NOT EXISTS (
                SELECT 1 FROM zopk_milestones m
                WHERE m.source_news_id = f.source_news_id
@ -2351,7 +2348,8 @@ def get_timeline_suggestions(
            LIMIT :limit
        """)

-        results = db_session.execute(suggestions_query, {'limit': limit}).fetchall()
+        params = {'limit': limit, 'only_verified': bool(only_verified)}
+        results = db_session.execute(suggestions_query, params).fetchall()

        suggestions = []
        for row in results: