fix(security): Resolve 1 HIGH and 7 MEDIUM vulnerabilities from code review
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

- HIGH: Fix SQL injection in ZOPK knowledge service (3 functions) — replace f-strings with parameterized queries
- MEDIUM: Sanitize tsquery/LIKE input in SearchService to prevent injection
- MEDIUM: Add @login_required + @role_required(ADMIN) to /health/full endpoint
- MEDIUM: Add @role_required(ADMIN) to ZOPK knowledge search API
- MEDIUM: Add bleach HTML sanitization on write for announcements, events, board proceedings (stored XSS via |safe)
- MEDIUM: Remove partial API key from Gemini service logs
- MEDIUM: Remove @csrf.exempt from chat endpoints, add X-CSRFToken headers in JS
- MEDIUM: Add missing CSRF tokens to 3 POST forms (data_request, benefits_form, benefits_list)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-06 05:25:18 +01:00
parent ef08bebc8c
commit e718d96a7d
15 changed files with 72 additions and 30 deletions

4
app.py
View File

@ -166,6 +166,8 @@ from database import (
SystemRole
)
from utils.decorators import role_required
# Import services
import gemini_service
from nordabiz_chat import NordaBizChatEngine
@ -938,6 +940,8 @@ def test_error_500():
@app.route('/health/full')
@login_required
@role_required(SystemRole.ADMIN)
def health_full():
"""
Extended health check - verifies all critical endpoints.

View File

@ -27,6 +27,7 @@ from database import (
SystemRole
)
from utils.decorators import role_required
from utils.helpers import sanitize_html
import gemini_service
# Logger
@ -847,7 +848,7 @@ def admin_calendar_new():
event = NordaEvent(
title=request.form.get('title', '').strip(),
description=request.form.get('description', '').strip(),
description=sanitize_html(request.form.get('description', '').strip()),
event_date=datetime.strptime(request.form.get('event_date'), '%Y-%m-%d').date(),
start_time=request.form.get('start_time') or None,
end_time=request.form.get('end_time') or None,

View File

@ -15,6 +15,7 @@ from flask_login import login_required, current_user
from . import bp
from database import SessionLocal, Announcement, SystemRole
from utils.decorators import role_required
from utils.helpers import sanitize_html
logger = logging.getLogger(__name__)
@ -98,7 +99,7 @@ def admin_announcements_new():
try:
title = request.form.get('title', '').strip()
excerpt = request.form.get('excerpt', '').strip()
content = request.form.get('content', '').strip()
content = sanitize_html(request.form.get('content', '').strip())
categories = request.form.getlist('categories')
if not categories:
categories = ['internal'] # Default category
@ -182,7 +183,7 @@ def admin_announcements_edit(id):
if request.method == 'POST':
announcement.title = request.form.get('title', '').strip()
announcement.excerpt = request.form.get('excerpt', '').strip() or None
announcement.content = request.form.get('content', '').strip()
announcement.content = sanitize_html(request.form.get('content', '').strip())
categories = request.form.getlist('categories')
if not categories:
categories = ['internal'] # Default category

View File

@ -352,6 +352,7 @@ def admin_zopk_embeddings_stream():
@bp.route('/zopk-api/knowledge/search', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_knowledge_search():
"""
Semantic search in ZOPK knowledge base.

View File

@ -27,6 +27,7 @@ from sqlalchemy import desc
from . import bp
from database import SessionLocal, BoardMeeting, SystemRole, User
from utils.decorators import rada_member_required, office_manager_required
from utils.helpers import sanitize_html
from datetime import date, time
try:
@ -452,6 +453,13 @@ def _handle_meeting_form(db, meeting=None):
except json.JSONDecodeError:
proceedings = []
# Sanitize text fields in proceedings to prevent stored XSS
for proc in proceedings:
if isinstance(proc, dict):
for field in ('discussion', 'discussed', 'title'):
if field in proc and isinstance(proc[field], str):
proc[field] = sanitize_html(proc[field])
# Validate
errors = []
if not meeting_number:

View File

@ -18,7 +18,6 @@ from database import (
SystemRole
)
from nordabiz_chat import NordaBizChatEngine
from extensions import csrf
from utils.decorators import member_required
# Logger
@ -40,7 +39,6 @@ def chat():
@bp.route('/api/chat/settings', methods=['GET', 'POST'])
@csrf.exempt
@login_required
@member_required
def chat_settings():
@ -96,7 +94,6 @@ def chat_settings():
@bp.route('/api/chat/start', methods=['POST'])
@csrf.exempt
@login_required
@member_required
def chat_start():
@ -123,7 +120,6 @@ def chat_start():
@bp.route('/api/chat/<int:conversation_id>/message', methods=['POST'])
@csrf.exempt
@login_required
@member_required
def chat_send_message(conversation_id):

View File

@ -104,7 +104,7 @@ class GeminiService:
# Debug: Log API key (masked)
if self.api_key:
logger.info(f"API key loaded: {self.api_key[:10]}...{self.api_key[-4:]}")
logger.info("Gemini API key loaded successfully")
else:
logger.error("API key is None or empty!")

View File

@ -11,6 +11,7 @@ Werkzeug==3.1.5
Flask-WTF==1.2.2
Flask-Limiter==4.1.1
cryptography==46.0.4
bleach==6.3.0
# Database
SQLAlchemy==2.0.46

View File

@ -263,8 +263,12 @@ class SearchService:
"""
keywords = self._expand_keywords(query)
# Build tsquery from keywords
tsquery_parts = [f"{kw}:*" for kw in keywords if kw]
# Sanitize keywords for tsquery - keep only word characters (alphanumeric + polish chars)
sanitized_keywords = [re.sub(r'[^\w]', '', kw, flags=re.UNICODE) for kw in keywords]
sanitized_keywords = [kw for kw in sanitized_keywords if kw]
# Build tsquery from sanitized keywords
tsquery_parts = [f"{kw}:*" for kw in sanitized_keywords]
tsquery = ' | '.join(tsquery_parts)
# Check if pg_trgm is available
@ -276,7 +280,8 @@ class SearchService:
has_trgm = False
# Build ILIKE patterns for each keyword (for multi-word searches)
like_patterns = [f'%{kw}%' for kw in keywords if len(kw) > 2]
# Escape LIKE wildcards in user input before wrapping with %
like_patterns = [f'%{kw.replace("%", r"\\%").replace("_", r"\\_")}%' for kw in sanitized_keywords if len(kw) > 2]
# Build SQL query with scoring for founding_history matches (owners/founders)
if has_trgm:

View File

@ -177,6 +177,7 @@
</div>
<form method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<!-- Podstawowe info -->
<div class="form-section">
<div class="form-section-title">Podstawowe informacje</div>

View File

@ -307,6 +307,7 @@
<div class="actions-cell">
<a href="{{ url_for('admin.admin_benefits_edit', benefit_id=benefit.id) }}" class="btn-small btn-edit">Edytuj</a>
<form action="{{ url_for('admin.admin_benefits_toggle', benefit_id=benefit.id) }}" method="POST" style="display:inline;">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<button type="submit" class="btn-small btn-toggle">
{% if benefit.is_active %}Wyłącz{% else %}Włącz{% endif %}
</button>

View File

@ -1534,6 +1534,7 @@
{% block extra_js %}
// NordaGPT Chat - State
const csrfToken = document.querySelector('meta[name="csrf-token"]')?.content || '';
let currentConversationId = null;
let conversations = [];
let currentModel = 'flash'; // Default model (flash = ekonomiczny)
@ -1578,7 +1579,7 @@ async function saveModelPreference(model) {
try {
await fetch('/api/chat/settings', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
headers: { 'Content-Type': 'application/json', 'X-CSRFToken': csrfToken },
body: JSON.stringify({ model: model })
});
} catch (error) {
@ -1832,7 +1833,8 @@ async function deleteConversation(conversationId) {
try {
const response = await fetch(`/api/chat/${conversationId}/delete`, {
method: 'DELETE'
method: 'DELETE',
headers: { 'X-CSRFToken': csrfToken }
});
const data = await response.json();
@ -1879,7 +1881,7 @@ async function sendMessage() {
if (!currentConversationId) {
const startResponse = await fetch('/api/chat/start', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
headers: { 'Content-Type': 'application/json', 'X-CSRFToken': csrfToken },
body: JSON.stringify({
title: message.substring(0, 50) + (message.length > 50 ? '...' : '')
})
@ -1895,7 +1897,7 @@ async function sendMessage() {
// Send message with model selection
const response = await fetch(`/api/chat/${currentConversationId}/message`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
headers: { 'Content-Type': 'application/json', 'X-CSRFToken': csrfToken },
body: JSON.stringify({
message: message,
model: currentModel

View File

@ -249,6 +249,7 @@
<div class="form-section">
<h2>Wprowadź NIP firmy</h2>
<form method="POST" id="dataRequestForm">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<div class="form-group">
<label>NIP firmy</label>
<div class="nip-lookup">

View File

@ -8,8 +8,30 @@ Common utility functions used across blueprints.
import re
import logging
import bleach
logger = logging.getLogger(__name__)
# Allowed HTML tags and attributes for rich-text content (announcements, events, proceedings)
_ALLOWED_TAGS = ['p', 'br', 'strong', 'em', 'b', 'i', 'a', 'ul', 'ol', 'li', 'h3', 'h4', 'blockquote']
_ALLOWED_ATTRS = {'a': ['href', 'target', 'rel']}
def sanitize_html(content):
"""
Sanitize HTML content to prevent stored XSS.
Strips all tags except a safe whitelist.
Args:
content: HTML string to sanitize
Returns:
Sanitized HTML string
"""
if not content:
return content
return bleach.clean(content, tags=_ALLOWED_TAGS, attributes=_ALLOWED_ATTRS, strip=True)
def sanitize_input(text, max_length=1000):
"""

View File

@ -1691,9 +1691,8 @@ def find_duplicate_entities(
from sqlalchemy import text
# Build query with pg_trgm similarity
type_filter = f"AND e1.entity_type = '{entity_type}'" if entity_type else ""
query = text(f"""
# Use conditional SQL with COALESCE to avoid f-string interpolation
query = text("""
SELECT
e1.id as id1, e1.name as name1, e1.entity_type as type1,
e1.mentions_count as mentions1, e1.is_verified as verified1,
@ -1715,7 +1714,7 @@ def find_duplicate_entities(
OR LOWER(e1.name) LIKE '%' || LOWER(e2.name) || '%'
OR LOWER(e2.name) LIKE '%' || LOWER(e1.name) || '%'
)
{type_filter}
AND (:entity_type IS NULL OR e1.entity_type = :entity_type)
ORDER BY
sim DESC,
e1.entity_type,
@ -1723,7 +1722,8 @@ def find_duplicate_entities(
LIMIT :limit
""")
result = db_session.execute(query, {'min_sim': min_similarity, 'limit': limit})
params = {'min_sim': min_similarity, 'limit': limit, 'entity_type': entity_type}
result = db_session.execute(query, params)
duplicates = []
for row in result:
@ -1970,12 +1970,10 @@ def find_duplicate_facts(
"""
from sqlalchemy import text
type_filter = f"AND f1.fact_type = '{fact_type}'" if fact_type else ""
# Set similarity threshold and use % operator (uses GiST index)
db_session.execute(text("SET pg_trgm.similarity_threshold = :threshold"), {'threshold': min_similarity})
query = text(f"""
query = text("""
SELECT
f1.id as id1, f1.full_text as text1, f1.fact_type as type1,
f1.is_verified as verified1, f1.confidence_score as score1,
@ -1985,12 +1983,13 @@ def find_duplicate_facts(
FROM zopk_knowledge_facts f1
JOIN zopk_knowledge_facts f2 ON f1.id < f2.id
WHERE f1.full_text % f2.full_text
{type_filter}
AND (:fact_type IS NULL OR f1.fact_type = :fact_type)
ORDER BY sim DESC, COALESCE(GREATEST(f1.confidence_score, f2.confidence_score), 0) DESC
LIMIT :limit
""")
result = db_session.execute(query, {'limit': limit})
params = {'limit': limit, 'fact_type': fact_type}
result = db_session.execute(query, params)
duplicates = []
for row in result:
@ -2320,9 +2319,7 @@ def get_timeline_suggestions(
# Get milestone facts not yet in timeline
# Prioritize: verified, high confidence, has numeric value (dates/amounts)
verified_filter = "AND f.is_verified = TRUE" if only_verified else ""
suggestions_query = text(f"""
suggestions_query = text("""
SELECT DISTINCT ON (f.id)
f.id as fact_id,
f.full_text,
@ -2341,7 +2338,7 @@ def get_timeline_suggestions(
FROM zopk_knowledge_facts f
LEFT JOIN zopk_news n ON n.id = f.source_news_id
WHERE f.fact_type = 'milestone'
{verified_filter}
AND (:only_verified = FALSE OR f.is_verified = TRUE)
AND NOT EXISTS (
SELECT 1 FROM zopk_milestones m
WHERE m.source_news_id = f.source_news_id
@ -2351,7 +2348,8 @@ def get_timeline_suggestions(
LIMIT :limit
""")
results = db_session.execute(suggestions_query, {'limit': limit}).fetchall()
params = {'limit': limit, 'only_verified': bool(only_verified)}
results = db_session.execute(suggestions_query, params).fetchall()
suggestions = []
for row in results: