fix(security): Resolve 1 HIGH and 7 MEDIUM vulnerabilities from code review
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

- HIGH: Fix SQL injection in ZOPK knowledge service (3 functions) — replace f-strings with parameterized queries
- MEDIUM: Sanitize tsquery/LIKE input in SearchService to prevent injection
- MEDIUM: Add @login_required + @role_required(ADMIN) to /health/full endpoint
- MEDIUM: Add @role_required(ADMIN) to ZOPK knowledge search API
- MEDIUM: Add bleach HTML sanitization on write for announcements, events, board proceedings (stored XSS via |safe)
- MEDIUM: Remove partial API key from Gemini service logs
- MEDIUM: Remove @csrf.exempt from chat endpoints, add X-CSRFToken headers in JS
- MEDIUM: Add missing CSRF tokens to 3 POST forms (data_request, benefits_form, benefits_list)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-06 05:25:18 +01:00
parent ef08bebc8c
commit e718d96a7d
15 changed files with 72 additions and 30 deletions

4
app.py
View File

@ -166,6 +166,8 @@ from database import (
SystemRole SystemRole
) )
from utils.decorators import role_required
# Import services # Import services
import gemini_service import gemini_service
from nordabiz_chat import NordaBizChatEngine from nordabiz_chat import NordaBizChatEngine
@ -938,6 +940,8 @@ def test_error_500():
@app.route('/health/full') @app.route('/health/full')
@login_required
@role_required(SystemRole.ADMIN)
def health_full(): def health_full():
""" """
Extended health check - verifies all critical endpoints. Extended health check - verifies all critical endpoints.

View File

@ -27,6 +27,7 @@ from database import (
SystemRole SystemRole
) )
from utils.decorators import role_required from utils.decorators import role_required
from utils.helpers import sanitize_html
import gemini_service import gemini_service
# Logger # Logger
@ -847,7 +848,7 @@ def admin_calendar_new():
event = NordaEvent( event = NordaEvent(
title=request.form.get('title', '').strip(), title=request.form.get('title', '').strip(),
description=request.form.get('description', '').strip(), description=sanitize_html(request.form.get('description', '').strip()),
event_date=datetime.strptime(request.form.get('event_date'), '%Y-%m-%d').date(), event_date=datetime.strptime(request.form.get('event_date'), '%Y-%m-%d').date(),
start_time=request.form.get('start_time') or None, start_time=request.form.get('start_time') or None,
end_time=request.form.get('end_time') or None, end_time=request.form.get('end_time') or None,

View File

@ -15,6 +15,7 @@ from flask_login import login_required, current_user
from . import bp from . import bp
from database import SessionLocal, Announcement, SystemRole from database import SessionLocal, Announcement, SystemRole
from utils.decorators import role_required from utils.decorators import role_required
from utils.helpers import sanitize_html
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -98,7 +99,7 @@ def admin_announcements_new():
try: try:
title = request.form.get('title', '').strip() title = request.form.get('title', '').strip()
excerpt = request.form.get('excerpt', '').strip() excerpt = request.form.get('excerpt', '').strip()
content = request.form.get('content', '').strip() content = sanitize_html(request.form.get('content', '').strip())
categories = request.form.getlist('categories') categories = request.form.getlist('categories')
if not categories: if not categories:
categories = ['internal'] # Default category categories = ['internal'] # Default category
@ -182,7 +183,7 @@ def admin_announcements_edit(id):
if request.method == 'POST': if request.method == 'POST':
announcement.title = request.form.get('title', '').strip() announcement.title = request.form.get('title', '').strip()
announcement.excerpt = request.form.get('excerpt', '').strip() or None announcement.excerpt = request.form.get('excerpt', '').strip() or None
announcement.content = request.form.get('content', '').strip() announcement.content = sanitize_html(request.form.get('content', '').strip())
categories = request.form.getlist('categories') categories = request.form.getlist('categories')
if not categories: if not categories:
categories = ['internal'] # Default category categories = ['internal'] # Default category

View File

@ -352,6 +352,7 @@ def admin_zopk_embeddings_stream():
@bp.route('/zopk-api/knowledge/search', methods=['POST']) @bp.route('/zopk-api/knowledge/search', methods=['POST'])
@login_required @login_required
@role_required(SystemRole.ADMIN)
def api_zopk_knowledge_search(): def api_zopk_knowledge_search():
""" """
Semantic search in ZOPK knowledge base. Semantic search in ZOPK knowledge base.

View File

@ -27,6 +27,7 @@ from sqlalchemy import desc
from . import bp from . import bp
from database import SessionLocal, BoardMeeting, SystemRole, User from database import SessionLocal, BoardMeeting, SystemRole, User
from utils.decorators import rada_member_required, office_manager_required from utils.decorators import rada_member_required, office_manager_required
from utils.helpers import sanitize_html
from datetime import date, time from datetime import date, time
try: try:
@ -452,6 +453,13 @@ def _handle_meeting_form(db, meeting=None):
except json.JSONDecodeError: except json.JSONDecodeError:
proceedings = [] proceedings = []
# Sanitize text fields in proceedings to prevent stored XSS
for proc in proceedings:
if isinstance(proc, dict):
for field in ('discussion', 'discussed', 'title'):
if field in proc and isinstance(proc[field], str):
proc[field] = sanitize_html(proc[field])
# Validate # Validate
errors = [] errors = []
if not meeting_number: if not meeting_number:

View File

@ -18,7 +18,6 @@ from database import (
SystemRole SystemRole
) )
from nordabiz_chat import NordaBizChatEngine from nordabiz_chat import NordaBizChatEngine
from extensions import csrf
from utils.decorators import member_required from utils.decorators import member_required
# Logger # Logger
@ -40,7 +39,6 @@ def chat():
@bp.route('/api/chat/settings', methods=['GET', 'POST']) @bp.route('/api/chat/settings', methods=['GET', 'POST'])
@csrf.exempt
@login_required @login_required
@member_required @member_required
def chat_settings(): def chat_settings():
@ -96,7 +94,6 @@ def chat_settings():
@bp.route('/api/chat/start', methods=['POST']) @bp.route('/api/chat/start', methods=['POST'])
@csrf.exempt
@login_required @login_required
@member_required @member_required
def chat_start(): def chat_start():
@ -123,7 +120,6 @@ def chat_start():
@bp.route('/api/chat/<int:conversation_id>/message', methods=['POST']) @bp.route('/api/chat/<int:conversation_id>/message', methods=['POST'])
@csrf.exempt
@login_required @login_required
@member_required @member_required
def chat_send_message(conversation_id): def chat_send_message(conversation_id):

View File

@ -104,7 +104,7 @@ class GeminiService:
# Debug: Log API key (masked) # Debug: Log API key (masked)
if self.api_key: if self.api_key:
logger.info(f"API key loaded: {self.api_key[:10]}...{self.api_key[-4:]}") logger.info("Gemini API key loaded successfully")
else: else:
logger.error("API key is None or empty!") logger.error("API key is None or empty!")

View File

@ -11,6 +11,7 @@ Werkzeug==3.1.5
Flask-WTF==1.2.2 Flask-WTF==1.2.2
Flask-Limiter==4.1.1 Flask-Limiter==4.1.1
cryptography==46.0.4 cryptography==46.0.4
bleach==6.3.0
# Database # Database
SQLAlchemy==2.0.46 SQLAlchemy==2.0.46

View File

@ -263,8 +263,12 @@ class SearchService:
""" """
keywords = self._expand_keywords(query) keywords = self._expand_keywords(query)
# Build tsquery from keywords # Sanitize keywords for tsquery - keep only word characters (alphanumeric + polish chars)
tsquery_parts = [f"{kw}:*" for kw in keywords if kw] sanitized_keywords = [re.sub(r'[^\w]', '', kw, flags=re.UNICODE) for kw in keywords]
sanitized_keywords = [kw for kw in sanitized_keywords if kw]
# Build tsquery from sanitized keywords
tsquery_parts = [f"{kw}:*" for kw in sanitized_keywords]
tsquery = ' | '.join(tsquery_parts) tsquery = ' | '.join(tsquery_parts)
# Check if pg_trgm is available # Check if pg_trgm is available
@ -276,7 +280,8 @@ class SearchService:
has_trgm = False has_trgm = False
# Build ILIKE patterns for each keyword (for multi-word searches) # Build ILIKE patterns for each keyword (for multi-word searches)
like_patterns = [f'%{kw}%' for kw in keywords if len(kw) > 2] # Escape LIKE wildcards in user input before wrapping with %
like_patterns = [f'%{kw.replace("%", r"\\%").replace("_", r"\\_")}%' for kw in sanitized_keywords if len(kw) > 2]
# Build SQL query with scoring for founding_history matches (owners/founders) # Build SQL query with scoring for founding_history matches (owners/founders)
if has_trgm: if has_trgm:

View File

@ -177,6 +177,7 @@
</div> </div>
<form method="POST"> <form method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<!-- Podstawowe info --> <!-- Podstawowe info -->
<div class="form-section"> <div class="form-section">
<div class="form-section-title">Podstawowe informacje</div> <div class="form-section-title">Podstawowe informacje</div>

View File

@ -307,6 +307,7 @@
<div class="actions-cell"> <div class="actions-cell">
<a href="{{ url_for('admin.admin_benefits_edit', benefit_id=benefit.id) }}" class="btn-small btn-edit">Edytuj</a> <a href="{{ url_for('admin.admin_benefits_edit', benefit_id=benefit.id) }}" class="btn-small btn-edit">Edytuj</a>
<form action="{{ url_for('admin.admin_benefits_toggle', benefit_id=benefit.id) }}" method="POST" style="display:inline;"> <form action="{{ url_for('admin.admin_benefits_toggle', benefit_id=benefit.id) }}" method="POST" style="display:inline;">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<button type="submit" class="btn-small btn-toggle"> <button type="submit" class="btn-small btn-toggle">
{% if benefit.is_active %}Wyłącz{% else %}Włącz{% endif %} {% if benefit.is_active %}Wyłącz{% else %}Włącz{% endif %}
</button> </button>

View File

@ -1534,6 +1534,7 @@
{% block extra_js %} {% block extra_js %}
// NordaGPT Chat - State // NordaGPT Chat - State
const csrfToken = document.querySelector('meta[name="csrf-token"]')?.content || '';
let currentConversationId = null; let currentConversationId = null;
let conversations = []; let conversations = [];
let currentModel = 'flash'; // Default model (flash = ekonomiczny) let currentModel = 'flash'; // Default model (flash = ekonomiczny)
@ -1578,7 +1579,7 @@ async function saveModelPreference(model) {
try { try {
await fetch('/api/chat/settings', { await fetch('/api/chat/settings', {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json', 'X-CSRFToken': csrfToken },
body: JSON.stringify({ model: model }) body: JSON.stringify({ model: model })
}); });
} catch (error) { } catch (error) {
@ -1832,7 +1833,8 @@ async function deleteConversation(conversationId) {
try { try {
const response = await fetch(`/api/chat/${conversationId}/delete`, { const response = await fetch(`/api/chat/${conversationId}/delete`, {
method: 'DELETE' method: 'DELETE',
headers: { 'X-CSRFToken': csrfToken }
}); });
const data = await response.json(); const data = await response.json();
@ -1879,7 +1881,7 @@ async function sendMessage() {
if (!currentConversationId) { if (!currentConversationId) {
const startResponse = await fetch('/api/chat/start', { const startResponse = await fetch('/api/chat/start', {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json', 'X-CSRFToken': csrfToken },
body: JSON.stringify({ body: JSON.stringify({
title: message.substring(0, 50) + (message.length > 50 ? '...' : '') title: message.substring(0, 50) + (message.length > 50 ? '...' : '')
}) })
@ -1895,7 +1897,7 @@ async function sendMessage() {
// Send message with model selection // Send message with model selection
const response = await fetch(`/api/chat/${currentConversationId}/message`, { const response = await fetch(`/api/chat/${currentConversationId}/message`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json', 'X-CSRFToken': csrfToken },
body: JSON.stringify({ body: JSON.stringify({
message: message, message: message,
model: currentModel model: currentModel

View File

@ -249,6 +249,7 @@
<div class="form-section"> <div class="form-section">
<h2>Wprowadź NIP firmy</h2> <h2>Wprowadź NIP firmy</h2>
<form method="POST" id="dataRequestForm"> <form method="POST" id="dataRequestForm">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<div class="form-group"> <div class="form-group">
<label>NIP firmy</label> <label>NIP firmy</label>
<div class="nip-lookup"> <div class="nip-lookup">

View File

@ -8,8 +8,30 @@ Common utility functions used across blueprints.
import re import re
import logging import logging
import bleach
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Allowed HTML tags and attributes for rich-text content (announcements, events, proceedings)
_ALLOWED_TAGS = ['p', 'br', 'strong', 'em', 'b', 'i', 'a', 'ul', 'ol', 'li', 'h3', 'h4', 'blockquote']
_ALLOWED_ATTRS = {'a': ['href', 'target', 'rel']}
def sanitize_html(content):
"""
Sanitize HTML content to prevent stored XSS.
Strips all tags except a safe whitelist.
Args:
content: HTML string to sanitize
Returns:
Sanitized HTML string
"""
if not content:
return content
return bleach.clean(content, tags=_ALLOWED_TAGS, attributes=_ALLOWED_ATTRS, strip=True)
def sanitize_input(text, max_length=1000): def sanitize_input(text, max_length=1000):
""" """

View File

@ -1691,9 +1691,8 @@ def find_duplicate_entities(
from sqlalchemy import text from sqlalchemy import text
# Build query with pg_trgm similarity # Build query with pg_trgm similarity
type_filter = f"AND e1.entity_type = '{entity_type}'" if entity_type else "" # Use conditional SQL with COALESCE to avoid f-string interpolation
query = text("""
query = text(f"""
SELECT SELECT
e1.id as id1, e1.name as name1, e1.entity_type as type1, e1.id as id1, e1.name as name1, e1.entity_type as type1,
e1.mentions_count as mentions1, e1.is_verified as verified1, e1.mentions_count as mentions1, e1.is_verified as verified1,
@ -1715,7 +1714,7 @@ def find_duplicate_entities(
OR LOWER(e1.name) LIKE '%' || LOWER(e2.name) || '%' OR LOWER(e1.name) LIKE '%' || LOWER(e2.name) || '%'
OR LOWER(e2.name) LIKE '%' || LOWER(e1.name) || '%' OR LOWER(e2.name) LIKE '%' || LOWER(e1.name) || '%'
) )
{type_filter} AND (:entity_type IS NULL OR e1.entity_type = :entity_type)
ORDER BY ORDER BY
sim DESC, sim DESC,
e1.entity_type, e1.entity_type,
@ -1723,7 +1722,8 @@ def find_duplicate_entities(
LIMIT :limit LIMIT :limit
""") """)
result = db_session.execute(query, {'min_sim': min_similarity, 'limit': limit}) params = {'min_sim': min_similarity, 'limit': limit, 'entity_type': entity_type}
result = db_session.execute(query, params)
duplicates = [] duplicates = []
for row in result: for row in result:
@ -1970,12 +1970,10 @@ def find_duplicate_facts(
""" """
from sqlalchemy import text from sqlalchemy import text
type_filter = f"AND f1.fact_type = '{fact_type}'" if fact_type else ""
# Set similarity threshold and use % operator (uses GiST index) # Set similarity threshold and use % operator (uses GiST index)
db_session.execute(text("SET pg_trgm.similarity_threshold = :threshold"), {'threshold': min_similarity}) db_session.execute(text("SET pg_trgm.similarity_threshold = :threshold"), {'threshold': min_similarity})
query = text(f""" query = text("""
SELECT SELECT
f1.id as id1, f1.full_text as text1, f1.fact_type as type1, f1.id as id1, f1.full_text as text1, f1.fact_type as type1,
f1.is_verified as verified1, f1.confidence_score as score1, f1.is_verified as verified1, f1.confidence_score as score1,
@ -1985,12 +1983,13 @@ def find_duplicate_facts(
FROM zopk_knowledge_facts f1 FROM zopk_knowledge_facts f1
JOIN zopk_knowledge_facts f2 ON f1.id < f2.id JOIN zopk_knowledge_facts f2 ON f1.id < f2.id
WHERE f1.full_text % f2.full_text WHERE f1.full_text % f2.full_text
{type_filter} AND (:fact_type IS NULL OR f1.fact_type = :fact_type)
ORDER BY sim DESC, COALESCE(GREATEST(f1.confidence_score, f2.confidence_score), 0) DESC ORDER BY sim DESC, COALESCE(GREATEST(f1.confidence_score, f2.confidence_score), 0) DESC
LIMIT :limit LIMIT :limit
""") """)
result = db_session.execute(query, {'limit': limit}) params = {'limit': limit, 'fact_type': fact_type}
result = db_session.execute(query, params)
duplicates = [] duplicates = []
for row in result: for row in result:
@ -2320,9 +2319,7 @@ def get_timeline_suggestions(
# Get milestone facts not yet in timeline # Get milestone facts not yet in timeline
# Prioritize: verified, high confidence, has numeric value (dates/amounts) # Prioritize: verified, high confidence, has numeric value (dates/amounts)
verified_filter = "AND f.is_verified = TRUE" if only_verified else "" suggestions_query = text("""
suggestions_query = text(f"""
SELECT DISTINCT ON (f.id) SELECT DISTINCT ON (f.id)
f.id as fact_id, f.id as fact_id,
f.full_text, f.full_text,
@ -2341,7 +2338,7 @@ def get_timeline_suggestions(
FROM zopk_knowledge_facts f FROM zopk_knowledge_facts f
LEFT JOIN zopk_news n ON n.id = f.source_news_id LEFT JOIN zopk_news n ON n.id = f.source_news_id
WHERE f.fact_type = 'milestone' WHERE f.fact_type = 'milestone'
{verified_filter} AND (:only_verified = FALSE OR f.is_verified = TRUE)
AND NOT EXISTS ( AND NOT EXISTS (
SELECT 1 FROM zopk_milestones m SELECT 1 FROM zopk_milestones m
WHERE m.source_news_id = f.source_news_id WHERE m.source_news_id = f.source_news_id
@ -2351,7 +2348,8 @@ def get_timeline_suggestions(
LIMIT :limit LIMIT :limit
""") """)
results = db_session.execute(suggestions_query, {'limit': limit}).fetchall() params = {'limit': limit, 'only_verified': bool(only_verified)}
results = db_session.execute(suggestions_query, params).fetchall()
suggestions = [] suggestions = []
for row in results: for row in results: