nordabiz/blueprints/admin/routes_zopk_knowledge.py
Maciej Pienczyn e718d96a7d
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
fix(security): Resolve 1 HIGH and 7 MEDIUM vulnerabilities from code review
- HIGH: Fix SQL injection in ZOPK knowledge service (3 functions) — replace f-strings with parameterized queries
- MEDIUM: Sanitize tsquery/LIKE input in SearchService to prevent injection
- MEDIUM: Add @login_required + @role_required(ADMIN) to /health/full endpoint
- MEDIUM: Add @role_required(ADMIN) to ZOPK knowledge search API
- MEDIUM: Add bleach HTML sanitization on write for announcements, events, board proceedings (stored XSS via |safe)
- MEDIUM: Remove partial API key from Gemini service logs
- MEDIUM: Remove @csrf.exempt from chat endpoints, add X-CSRFToken headers in JS
- MEDIUM: Add missing CSRF tokens to 3 POST forms (data_request, benefits_form, benefits_list)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 05:25:18 +01:00

1027 lines
35 KiB
Python

"""
ZOPK Knowledge Routes - Admin blueprint
Migrated from app.py as part of the blueprint refactoring.
Contains routes for ZOPK knowledge base management, extraction, embeddings, and graph visualization.
"""
import json
import logging
import time
import threading
from dataclasses import asdict
from datetime import datetime
from flask import flash, jsonify, redirect, render_template, request, url_for, Response, stream_with_context
from flask_login import current_user, login_required
from sqlalchemy import text, func, distinct
from database import (
SessionLocal,
SystemRole,
ZOPKNews,
ZOPKKnowledgeChunk,
ZOPKKnowledgeEntity,
ZOPKKnowledgeEntityMention
)
from utils.decorators import role_required
from . import bp
logger = logging.getLogger(__name__)
# Cache for graph data (in-memory with TTL)
_graph_cache = {}
_GRAPH_CACHE_TTL = 300 # 5 minutes
@bp.route('/zopk/knowledge/stats')
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_knowledge_stats():
"""
Get knowledge extraction statistics.
Returns:
- articles: stats about articles (approved, scraped, extracted)
- knowledge_base: stats about chunks, facts, entities, relations
- top_entities: most mentioned entities
"""
from zopk_knowledge_service import get_knowledge_stats
db = SessionLocal()
try:
stats = get_knowledge_stats(db)
return jsonify({
'success': True,
**stats
})
except Exception as e:
logger.error(f"Error getting knowledge stats: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk/knowledge/extract', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_knowledge_extract():
"""
Batch extract knowledge from scraped articles.
Request JSON:
- limit: int (default 50) - max articles to process
Response:
- success/failed counts
- chunks/facts/entities/relations created
- errors list
"""
from zopk_knowledge_service import ZOPKKnowledgeService
db = SessionLocal()
try:
data = request.get_json() or {}
limit = min(data.get('limit', 50), 100)
service = ZOPKKnowledgeService(db, user_id=current_user.id)
result = service.batch_extract(limit=limit)
return jsonify({
'success': True,
'message': f"Ekstrakcja zakończona: {result['success']}/{result['total']} artykułów. "
f"Utworzono: {result['chunks_created']} chunks, {result['facts_created']} faktów, "
f"{result['entities_created']} encji, {result['relations_created']} relacji.",
'processed': result['success'],
'total': result['total'],
'failed': result['failed'],
'chunks_created': result['chunks_created'],
'facts_created': result['facts_created'],
'entities_created': result['entities_created'],
'relations_created': result['relations_created']
})
except Exception as e:
db.rollback()
logger.error(f"Error in knowledge extraction: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk/knowledge/extract/<int:news_id>', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_knowledge_extract_single(news_id):
"""
Extract knowledge from a single article.
"""
from zopk_knowledge_service import ZOPKKnowledgeService
db = SessionLocal()
try:
service = ZOPKKnowledgeService(db, user_id=current_user.id)
result = service.extract_from_news(news_id)
if result.success:
return jsonify({
'success': True,
'message': f"Wyekstrahowano: {result.chunks_created} chunks, "
f"{result.facts_created} faktów, {result.entities_created} encji",
'chunks_created': result.chunks_created,
'facts_created': result.facts_created,
'entities_created': result.entities_created,
'relations_created': result.relations_created,
'processing_time': result.processing_time
})
else:
return jsonify({
'success': False,
'error': result.error
}), 400
except Exception as e:
db.rollback()
logger.error(f"Error extracting from news {news_id}: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk/knowledge/embeddings', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_generate_embeddings():
"""
Generate embeddings for chunks that don't have them.
Request JSON:
- limit: int (default 100) - max chunks to process
"""
from zopk_knowledge_service import generate_chunk_embeddings
db = SessionLocal()
try:
data = request.get_json() or {}
limit = min(data.get('limit', 100), 500)
result = generate_chunk_embeddings(db, limit=limit, user_id=current_user.id)
return jsonify({
'success': True,
'message': f"Wygenerowano embeddings: {result['success']}/{result['total']}",
'generated': result['success'],
'total': result['total'],
'failed': result['failed']
})
except Exception as e:
db.rollback()
logger.error(f"Error generating embeddings: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk/knowledge/extract/stream', methods=['GET'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_knowledge_extract_stream():
"""
SSE endpoint for streaming knowledge extraction progress.
Query params:
- limit: int (default 10) - max articles to process
"""
limit = min(int(request.args.get('limit', 10)), 50)
user_id = current_user.id
def generate():
db = SessionLocal()
try:
from zopk_knowledge_service import ZOPKKnowledgeService
service = ZOPKKnowledgeService(db, user_id=user_id)
# Find articles ready for extraction
articles = db.query(ZOPKNews).filter(
ZOPKNews.status.in_(['approved', 'auto_approved']),
ZOPKNews.scrape_status == 'scraped',
ZOPKNews.knowledge_extracted == False
).order_by(
ZOPKNews.created_at.desc()
).limit(limit).all()
total = len(articles)
if total == 0:
yield f"data: {json.dumps({'status': 'complete', 'message': 'Brak artykułów do ekstrakcji', 'total': 0}, ensure_ascii=False)}\n\n"
return
# Send initial
yield f"data: {json.dumps({'current': 0, 'total': total, 'percent': 0, 'stage': 'extracting', 'status': 'processing', 'message': f'Rozpoczynam ekstrakcję z {total} artykułów...'}, ensure_ascii=False)}\n\n"
stats = {'success': 0, 'failed': 0, 'chunks': 0, 'facts': 0, 'entities': 0}
start_time = time.time()
for idx, article in enumerate(articles, 1):
# Send processing update
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round((idx-1)/total*100, 1), 'stage': 'extracting', 'status': 'processing', 'message': f'Analizuję AI: {article.title[:50]}...', 'article_id': article.id, 'article_title': article.title[:80], 'details': stats}, ensure_ascii=False)}\n\n"
result = service.extract_from_news(article.id)
if result.success:
stats['success'] += 1
stats['chunks'] += result.chunks_created
stats['facts'] += result.facts_created
stats['entities'] += result.entities_created
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'success', 'message': f'{result.chunks_created}ch, {result.facts_created}f, {result.entities_created}e', 'article_id': article.id, 'details': {'new_chunks': result.chunks_created, 'new_facts': result.facts_created, 'new_entities': result.entities_created, **stats}}, ensure_ascii=False)}\n\n"
else:
stats['failed'] += 1
error_msg = result.error[:50] if result.error else 'Nieznany błąd'
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'failed', 'message': f'{error_msg}', 'article_id': article.id, 'details': {'error': result.error, **stats}}, ensure_ascii=False)}\n\n"
processing_time = round(time.time() - start_time, 2)
# Send completion
success_count = stats['success']
chunks_count = stats['chunks']
facts_count = stats['facts']
entities_count = stats['entities']
complete_msg = f'Zakończono: {success_count}/{total}. Utworzono: {chunks_count}ch, {facts_count}f, {entities_count}e'
complete_data = {'current': total, 'total': total, 'percent': 100, 'stage': 'extracting', 'status': 'complete', 'message': complete_msg, 'details': {'processing_time': processing_time, **stats}}
yield f"data: {json.dumps(complete_data, ensure_ascii=False)}\n\n"
except Exception as e:
logger.error(f"SSE extraction error: {e}")
yield f"data: {json.dumps({'status': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
finally:
db.close()
return Response(generate(), mimetype='text/event-stream', headers={
'Cache-Control': 'no-cache',
'X-Accel-Buffering': 'no'
})
@bp.route('/zopk/knowledge/embeddings/stream', methods=['GET'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_embeddings_stream():
"""
SSE endpoint for streaming embeddings generation progress.
Query params:
- limit: int (default 50) - max chunks to process
"""
limit = min(int(request.args.get('limit', 50)), 200)
user_id = current_user.id
def generate():
from gemini_service import GeminiService
db = SessionLocal()
try:
gemini = GeminiService()
# Find chunks without embeddings
chunks = db.query(ZOPKKnowledgeChunk).filter(
ZOPKKnowledgeChunk.embedding.is_(None)
).limit(limit).all()
total = len(chunks)
if total == 0:
yield f"data: {json.dumps({'status': 'complete', 'message': 'Brak chunks bez embeddingów', 'total': 0}, ensure_ascii=False)}\n\n"
return
# Send initial
yield f"data: {json.dumps({'current': 0, 'total': total, 'percent': 0, 'stage': 'embedding', 'status': 'processing', 'message': f'Generuję embeddingi dla {total} chunks...'}, ensure_ascii=False)}\n\n"
stats = {'success': 0, 'failed': 0}
start_time = time.time()
for idx, chunk in enumerate(chunks, 1):
summary_short = chunk.summary[:40] if chunk.summary else f'chunk_{chunk.id}'
# Send processing update
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round((idx-1)/total*100, 1), 'stage': 'embedding', 'status': 'processing', 'message': f'Embedding {idx}/{total}: {summary_short}...', 'details': stats}, ensure_ascii=False)}\n\n"
try:
embedding = gemini.generate_embedding(
text=chunk.content,
task_type='retrieval_document',
title=chunk.summary,
user_id=user_id,
feature='zopk_chunk_embedding'
)
if embedding:
chunk.embedding = json.dumps(embedding)
stats['success'] += 1
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'success', 'message': f'✓ 768 dim: {summary_short}', 'details': stats}, ensure_ascii=False)}\n\n"
else:
stats['failed'] += 1
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'failed', 'message': f'✗ Brak odpowiedzi API', 'details': stats}, ensure_ascii=False)}\n\n"
except Exception as e:
stats['failed'] += 1
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'failed', 'message': f'{str(e)[:40]}', 'details': {'error': str(e), **stats}}, ensure_ascii=False)}\n\n"
db.commit()
processing_time = round(time.time() - start_time, 2)
# Send completion
success_count = stats['success']
complete_msg = f'Zakończono: {success_count}/{total} embeddingów'
complete_data = {'current': total, 'total': total, 'percent': 100, 'stage': 'embedding', 'status': 'complete', 'message': complete_msg, 'details': {'processing_time': processing_time, **stats}}
yield f"data: {json.dumps(complete_data, ensure_ascii=False)}\n\n"
except Exception as e:
logger.error(f"SSE embedding error: {e}")
yield f"data: {json.dumps({'status': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
finally:
db.close()
return Response(generate(), mimetype='text/event-stream', headers={
'Cache-Control': 'no-cache',
'X-Accel-Buffering': 'no'
})
@bp.route('/zopk-api/knowledge/search', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_knowledge_search():
"""
Semantic search in ZOPK knowledge base.
Request JSON:
- query: str (required) - search query
- limit: int (default 5) - max results
Response:
- chunks: list of matching knowledge chunks with similarity scores
- facts: list of relevant facts
"""
from zopk_knowledge_service import search_knowledge, get_relevant_facts
db = SessionLocal()
try:
data = request.get_json() or {}
query = data.get('query', '')
if not query:
return jsonify({'success': False, 'error': 'Query wymagane'}), 400
limit = min(data.get('limit', 5), 20)
# Search chunks
chunks = search_knowledge(
db,
query=query,
limit=limit,
user_id=current_user.id
)
# Get relevant facts
facts = get_relevant_facts(db, query=query, limit=limit)
return jsonify({
'success': True,
'query': query,
'chunks': chunks,
'facts': facts
})
except Exception as e:
logger.error(f"Error in knowledge search: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk/knowledge')
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_knowledge_dashboard():
"""
Dashboard for ZOPK Knowledge Base management.
Shows stats and links to chunks, facts, entities lists.
"""
return render_template('admin/zopk_knowledge_dashboard.html')
@bp.route('/zopk/knowledge/chunks')
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_knowledge_chunks():
"""
List knowledge chunks with pagination and filtering.
"""
from zopk_knowledge_service import list_chunks
# Get query params
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 20, type=int)
source_news_id = request.args.get('source_news_id', type=int)
has_embedding = request.args.get('has_embedding')
is_verified = request.args.get('is_verified')
# Convert string params to bool
if has_embedding is not None:
has_embedding = has_embedding.lower() == 'true'
if is_verified is not None:
is_verified = is_verified.lower() == 'true'
db = SessionLocal()
try:
result = list_chunks(
db,
page=page,
per_page=per_page,
source_news_id=source_news_id,
has_embedding=has_embedding,
is_verified=is_verified
)
return render_template(
'admin/zopk_knowledge_chunks.html',
chunks=result['chunks'],
total=result['total'],
page=result['page'],
per_page=result['per_page'],
pages=result['pages'],
source_news_id=source_news_id,
has_embedding=has_embedding,
is_verified=is_verified
)
finally:
db.close()
@bp.route('/zopk/knowledge/facts')
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_knowledge_facts():
"""
List knowledge facts with pagination and filtering.
"""
from zopk_knowledge_service import list_facts
# Get query params
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 20, type=int)
fact_type = request.args.get('fact_type')
source_news_id = request.args.get('source_news_id', type=int)
is_verified = request.args.get('is_verified')
# Convert string param to bool
if is_verified is not None:
is_verified = is_verified.lower() == 'true'
db = SessionLocal()
try:
result = list_facts(
db,
page=page,
per_page=per_page,
fact_type=fact_type,
is_verified=is_verified,
source_news_id=source_news_id
)
return render_template(
'admin/zopk_knowledge_facts.html',
facts=result['facts'],
total=result['total'],
page=result['page'],
per_page=result['per_page'],
pages=result['pages'],
fact_types=result['fact_types'],
current_fact_type=fact_type,
source_news_id=source_news_id,
is_verified=is_verified
)
finally:
db.close()
@bp.route('/zopk/knowledge/entities')
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_knowledge_entities():
"""
List knowledge entities with pagination and filtering.
"""
from zopk_knowledge_service import list_entities
# Get query params
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 20, type=int)
entity_type = request.args.get('entity_type')
min_mentions = request.args.get('min_mentions', type=int)
is_verified = request.args.get('is_verified')
# Convert string param to bool
if is_verified is not None:
is_verified = is_verified.lower() == 'true'
db = SessionLocal()
try:
result = list_entities(
db,
page=page,
per_page=per_page,
entity_type=entity_type,
is_verified=is_verified,
min_mentions=min_mentions
)
return render_template(
'admin/zopk_knowledge_entities.html',
entities=result['entities'],
total=result['total'],
page=result['page'],
per_page=result['per_page'],
pages=result['pages'],
entity_types=result['entity_types'],
current_entity_type=entity_type,
min_mentions=min_mentions,
is_verified=is_verified
)
finally:
db.close()
@bp.route('/zopk-api/knowledge/chunks/<int:chunk_id>')
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_chunk_detail(chunk_id):
"""Get detailed information about a chunk."""
from zopk_knowledge_service import get_chunk_detail
db = SessionLocal()
try:
chunk = get_chunk_detail(db, chunk_id)
if not chunk:
return jsonify({'success': False, 'error': 'Chunk nie znaleziony'}), 404
return jsonify({'success': True, 'chunk': chunk})
finally:
db.close()
@bp.route('/zopk-api/knowledge/chunks/<int:chunk_id>/verify', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_chunk_verify(chunk_id):
"""Toggle chunk verification status."""
from zopk_knowledge_service import update_chunk_verification
db = SessionLocal()
try:
data = request.get_json() or {}
is_verified = data.get('is_verified', True)
success = update_chunk_verification(db, chunk_id, is_verified, current_user.id)
if not success:
return jsonify({'success': False, 'error': 'Chunk nie znaleziony'}), 404
return jsonify({'success': True, 'is_verified': is_verified})
except Exception as e:
db.rollback()
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk-api/knowledge/facts/<int:fact_id>/verify', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_fact_verify(fact_id):
"""Toggle fact verification status."""
from zopk_knowledge_service import update_fact_verification
db = SessionLocal()
try:
data = request.get_json() or {}
is_verified = data.get('is_verified', True)
success = update_fact_verification(db, fact_id, is_verified)
if not success:
return jsonify({'success': False, 'error': 'Fakt nie znaleziony'}), 404
return jsonify({'success': True, 'is_verified': is_verified})
except Exception as e:
db.rollback()
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk-api/knowledge/entities/<int:entity_id>/verify', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_entity_verify(entity_id):
"""Toggle entity verification status."""
from zopk_knowledge_service import update_entity_verification
db = SessionLocal()
try:
data = request.get_json() or {}
is_verified = data.get('is_verified', True)
success = update_entity_verification(db, entity_id, is_verified)
if not success:
return jsonify({'success': False, 'error': 'Encja nie znaleziona'}), 404
return jsonify({'success': True, 'is_verified': is_verified})
except Exception as e:
db.rollback()
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk-api/knowledge/chunks/<int:chunk_id>', methods=['DELETE'])
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_chunk_delete(chunk_id):
"""Delete a chunk and its associated data."""
from zopk_knowledge_service import delete_chunk
db = SessionLocal()
try:
success = delete_chunk(db, chunk_id)
if not success:
return jsonify({'success': False, 'error': 'Chunk nie znaleziony'}), 404
return jsonify({'success': True, 'message': 'Chunk usunięty'})
except Exception as e:
db.rollback()
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk/knowledge/duplicates')
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_knowledge_duplicates():
"""Admin page for managing duplicate entities."""
from zopk_knowledge_service import find_duplicate_entities
db = SessionLocal()
try:
# Get filter parameters
entity_type = request.args.get('entity_type', '')
min_similarity = float(request.args.get('min_similarity', 0.4))
# Find duplicates
duplicates = find_duplicate_entities(
db,
entity_type=entity_type if entity_type else None,
min_similarity=min_similarity,
limit=100
)
# Get unique entity types for filter
entity_types = [r[0] for r in db.query(distinct(ZOPKKnowledgeEntity.entity_type)).all()]
return render_template(
'admin/zopk_knowledge_duplicates.html',
duplicates=duplicates,
entity_types=sorted(entity_types),
selected_type=entity_type,
min_similarity=min_similarity
)
finally:
db.close()
@bp.route('/zopk-api/knowledge/duplicates/preview', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_duplicates_preview():
"""Preview merge operation between two entities."""
from zopk_knowledge_service import get_entity_merge_preview
db = SessionLocal()
try:
data = request.get_json() or {}
primary_id = data.get('primary_id')
duplicate_id = data.get('duplicate_id')
if not primary_id or not duplicate_id:
return jsonify({'success': False, 'error': 'Brak ID encji'}), 400
preview = get_entity_merge_preview(db, primary_id, duplicate_id)
if 'error' in preview:
return jsonify({'success': False, 'error': preview['error']}), 404
return jsonify({'success': True, 'preview': preview})
finally:
db.close()
@bp.route('/zopk-api/knowledge/duplicates/merge', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_duplicates_merge():
"""Merge two entities - keep primary, delete duplicate."""
from zopk_knowledge_service import merge_entities
db = SessionLocal()
try:
data = request.get_json() or {}
primary_id = data.get('primary_id')
duplicate_id = data.get('duplicate_id')
new_name = data.get('new_name')
if not primary_id or not duplicate_id:
return jsonify({'success': False, 'error': 'Brak ID encji'}), 400
result = merge_entities(db, primary_id, duplicate_id, new_name)
return jsonify(result)
finally:
db.close()
@bp.route('/zopk/knowledge/graph')
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_knowledge_graph():
"""Admin page for entity relations graph visualization."""
return render_template('admin/zopk_knowledge_graph.html')
@bp.route('/zopk-api/knowledge/graph/data')
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_knowledge_graph_data():
"""Get graph data for entity co-occurrence visualization.
Uses in-memory cache with 5 minute TTL to avoid recalculating
co-occurrences on every request.
"""
global _graph_cache
# Build cache key from parameters
entity_type = request.args.get('entity_type', '')
min_cooccurrence = int(request.args.get('min_cooccurrence', 3))
limit = min(int(request.args.get('limit', 100)), 500)
cache_key = f"graph:{entity_type}:{min_cooccurrence}:{limit}"
# Check cache
if cache_key in _graph_cache:
cached_data, cached_time = _graph_cache[cache_key]
if time.time() - cached_time < _GRAPH_CACHE_TTL:
# Return cached data with cache indicator
cached_data['cached'] = True
return jsonify(cached_data)
db = SessionLocal()
try:
# Get top entities by mentions
entities_query = db.query(ZOPKKnowledgeEntity).filter(
ZOPKKnowledgeEntity.mentions_count >= 5
)
if entity_type:
entities_query = entities_query.filter(
ZOPKKnowledgeEntity.entity_type == entity_type
)
entities_query = entities_query.order_by(
ZOPKKnowledgeEntity.mentions_count.desc()
).limit(100)
entities = entities_query.all()
entity_ids = [e.id for e in entities]
if not entity_ids:
return jsonify({'success': True, 'nodes': [], 'links': []})
# Get co-occurrences (entities appearing in same chunk)
cooccur_query = text("""
SELECT
m1.entity_id as source,
m2.entity_id as target,
COUNT(*) as value
FROM zopk_knowledge_entity_mentions m1
JOIN zopk_knowledge_entity_mentions m2
ON m1.chunk_id = m2.chunk_id
AND m1.entity_id < m2.entity_id
WHERE m1.entity_id = ANY(:entity_ids)
AND m2.entity_id = ANY(:entity_ids)
GROUP BY m1.entity_id, m2.entity_id
HAVING COUNT(*) >= :min_cooccurrence
ORDER BY COUNT(*) DESC
LIMIT :limit
""")
result = db.execute(cooccur_query, {
'entity_ids': entity_ids,
'min_cooccurrence': min_cooccurrence,
'limit': limit
})
# Build nodes and links
used_entity_ids = set()
links = []
for row in result:
links.append({
'source': row.source,
'target': row.target,
'value': row.value
})
used_entity_ids.add(row.source)
used_entity_ids.add(row.target)
# Build nodes only for entities that have links
entity_map = {e.id: e for e in entities}
nodes = []
for eid in used_entity_ids:
if eid in entity_map:
e = entity_map[eid]
nodes.append({
'id': e.id,
'name': e.name,
'type': e.entity_type,
'mentions': e.mentions_count,
'verified': e.is_verified
})
# Build response
response_data = {
'success': True,
'nodes': nodes,
'links': links,
'stats': {
'total_nodes': len(nodes),
'total_links': len(links)
},
'cached': False
}
# Save to cache
_graph_cache[cache_key] = (response_data.copy(), time.time())
return jsonify(response_data)
except Exception as e:
logger.error(f"Error getting graph data: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk/knowledge/fact-duplicates')
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_fact_duplicates():
"""Panel deduplikacji faktów."""
return render_template('admin/zopk_fact_duplicates.html')
@bp.route('/zopk-api/knowledge/fact-duplicates')
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_fact_duplicates():
"""API - lista duplikatów faktów."""
from zopk_knowledge_service import find_duplicate_facts
db = SessionLocal()
try:
min_sim = float(request.args.get('min_similarity', 0.7))
fact_type = request.args.get('fact_type', '')
limit = min(int(request.args.get('limit', 100)), 500)
duplicates = find_duplicate_facts(db, min_sim, limit, fact_type if fact_type else None)
return jsonify({'success': True, 'duplicates': duplicates, 'count': len(duplicates)})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk-api/knowledge/fact-duplicates/merge', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_fact_merge():
"""API - merge duplikatów faktów."""
from zopk_knowledge_service import merge_facts
db = SessionLocal()
try:
data = request.get_json()
primary_id = data.get('primary_id')
duplicate_id = data.get('duplicate_id')
new_text = data.get('new_text')
result = merge_facts(db, primary_id, duplicate_id, new_text)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk-api/knowledge/auto-verify/entities', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_auto_verify_entities():
"""Auto-weryfikacja encji z wysoką liczbą wzmianek."""
from zopk_knowledge_service import auto_verify_top_entities
db = SessionLocal()
try:
data = request.get_json() or {}
min_mentions = int(data.get('min_mentions', 5))
limit = int(data.get('limit', 100))
result = auto_verify_top_entities(db, min_mentions, limit)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk-api/knowledge/auto-verify/facts', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_auto_verify_facts():
"""Auto-weryfikacja faktów z wysoką ważnością."""
from zopk_knowledge_service import auto_verify_top_facts
db = SessionLocal()
try:
data = request.get_json() or {}
min_importance = float(data.get('min_importance', 0.7))
limit = int(data.get('limit', 200))
result = auto_verify_top_facts(db, min_importance, limit)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk-api/knowledge/auto-verify/similar', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_auto_verify_similar():
"""Auto-weryfikacja faktów podobnych do już zweryfikowanych (uczenie się)."""
from zopk_knowledge_service import auto_verify_similar_to_verified
db = SessionLocal()
try:
data = request.get_json() or {}
min_similarity = float(data.get('min_similarity', 0.8))
limit = int(data.get('limit', 100))
result = auto_verify_similar_to_verified(db, min_similarity, limit)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk-api/knowledge/suggest-similar-facts')
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_suggest_similar_facts():
"""Pobierz sugestie faktów podobnych do zweryfikowanych (bez auto-weryfikacji)."""
from zopk_knowledge_service import find_similar_to_verified_facts
db = SessionLocal()
try:
min_similarity = float(request.args.get('min_similarity', 0.8))
limit = int(request.args.get('limit', 50))
suggestions = find_similar_to_verified_facts(db, min_similarity, limit)
return jsonify({
'success': True,
'suggestions': suggestions,
'count': len(suggestions)
})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk-api/knowledge/dashboard-stats')
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_dashboard_stats():
"""API - statystyki dashboardu."""
from zopk_knowledge_service import get_knowledge_dashboard_stats
db = SessionLocal()
try:
stats = get_knowledge_dashboard_stats(db)
return jsonify({'success': True, **stats})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()