Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
- Remaining scraper messages: Domain/Not HTML/Extraction error → Polish - Embedding failures shown as skipped (yellow) instead of failed (red) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1041 lines
36 KiB
Python
1041 lines
36 KiB
Python
"""
|
|
ZOPK Knowledge Routes - Admin blueprint
|
|
|
|
Migrated from app.py as part of the blueprint refactoring.
|
|
Contains routes for ZOPK knowledge base management, extraction, embeddings, and graph visualization.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import time
|
|
import threading
|
|
from dataclasses import asdict
|
|
from datetime import datetime
|
|
|
|
from flask import flash, jsonify, redirect, render_template, request, url_for, Response, stream_with_context
|
|
from flask_login import current_user, login_required
|
|
from sqlalchemy import text, func, distinct
|
|
|
|
from database import (
|
|
SessionLocal,
|
|
SystemRole,
|
|
ZOPKNews,
|
|
ZOPKKnowledgeChunk,
|
|
ZOPKKnowledgeEntity,
|
|
ZOPKKnowledgeEntityMention
|
|
)
|
|
from utils.decorators import role_required
|
|
from . import bp
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Cache for graph data (in-memory with TTL)
|
|
_graph_cache = {}
|
|
_GRAPH_CACHE_TTL = 300 # 5 minutes
|
|
|
|
|
|
@bp.route('/zopk/knowledge/stats')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_zopk_knowledge_stats():
|
|
"""
|
|
Get knowledge extraction statistics.
|
|
|
|
Returns:
|
|
- articles: stats about articles (approved, scraped, extracted)
|
|
- knowledge_base: stats about chunks, facts, entities, relations
|
|
- top_entities: most mentioned entities
|
|
"""
|
|
from zopk_knowledge_service import get_knowledge_stats
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
stats = get_knowledge_stats(db)
|
|
return jsonify({
|
|
'success': True,
|
|
**stats
|
|
})
|
|
except Exception as e:
|
|
logger.error(f"Error getting knowledge stats: {e}")
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk/knowledge/extract', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_zopk_knowledge_extract():
|
|
"""
|
|
Batch extract knowledge from scraped articles.
|
|
|
|
Request JSON:
|
|
- limit: int (default 50) - max articles to process
|
|
|
|
Response:
|
|
- success/failed counts
|
|
- chunks/facts/entities/relations created
|
|
- errors list
|
|
"""
|
|
from zopk_knowledge_service import ZOPKKnowledgeService
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
data = request.get_json() or {}
|
|
limit = min(data.get('limit', 50), 100)
|
|
|
|
service = ZOPKKnowledgeService(db, user_id=current_user.id)
|
|
result = service.batch_extract(limit=limit)
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'message': f"Ekstrakcja zakończona: {result['success']}/{result['total']} artykułów. "
|
|
f"Utworzono: {result['chunks_created']} chunks, {result['facts_created']} faktów, "
|
|
f"{result['entities_created']} encji, {result['relations_created']} relacji.",
|
|
'processed': result['success'],
|
|
'total': result['total'],
|
|
'failed': result['failed'],
|
|
'chunks_created': result['chunks_created'],
|
|
'facts_created': result['facts_created'],
|
|
'entities_created': result['entities_created'],
|
|
'relations_created': result['relations_created']
|
|
})
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
logger.error(f"Error in knowledge extraction: {e}")
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk/knowledge/extract/<int:news_id>', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_zopk_knowledge_extract_single(news_id):
|
|
"""
|
|
Extract knowledge from a single article.
|
|
"""
|
|
from zopk_knowledge_service import ZOPKKnowledgeService
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
service = ZOPKKnowledgeService(db, user_id=current_user.id)
|
|
result = service.extract_from_news(news_id)
|
|
|
|
if result.success:
|
|
return jsonify({
|
|
'success': True,
|
|
'message': f"Wyekstrahowano: {result.chunks_created} chunks, "
|
|
f"{result.facts_created} faktów, {result.entities_created} encji",
|
|
'chunks_created': result.chunks_created,
|
|
'facts_created': result.facts_created,
|
|
'entities_created': result.entities_created,
|
|
'relations_created': result.relations_created,
|
|
'processing_time': result.processing_time
|
|
})
|
|
else:
|
|
return jsonify({
|
|
'success': False,
|
|
'error': result.error
|
|
}), 400
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
logger.error(f"Error extracting from news {news_id}: {e}")
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk/knowledge/embeddings', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_zopk_generate_embeddings():
|
|
"""
|
|
Generate embeddings for chunks that don't have them.
|
|
|
|
Request JSON:
|
|
- limit: int (default 100) - max chunks to process
|
|
"""
|
|
from zopk_knowledge_service import generate_chunk_embeddings
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
data = request.get_json() or {}
|
|
limit = min(data.get('limit', 100), 500)
|
|
|
|
result = generate_chunk_embeddings(db, limit=limit, user_id=current_user.id)
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'message': f"Wygenerowano embeddings: {result['success']}/{result['total']}",
|
|
'generated': result['success'],
|
|
'total': result['total'],
|
|
'failed': result['failed']
|
|
})
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
logger.error(f"Error generating embeddings: {e}")
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk/knowledge/extract/stream', methods=['GET'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_zopk_knowledge_extract_stream():
|
|
"""
|
|
SSE endpoint for streaming knowledge extraction progress.
|
|
|
|
Query params:
|
|
- limit: int (default 10) - max articles to process
|
|
"""
|
|
limit = min(int(request.args.get('limit', 10)), 50)
|
|
user_id = current_user.id
|
|
|
|
def generate():
|
|
db = SessionLocal()
|
|
try:
|
|
from zopk_knowledge_service import ZOPKKnowledgeService
|
|
|
|
service = ZOPKKnowledgeService(db, user_id=user_id)
|
|
|
|
# Find articles ready for extraction
|
|
articles = db.query(ZOPKNews).filter(
|
|
ZOPKNews.status.in_(['approved', 'auto_approved']),
|
|
ZOPKNews.scrape_status == 'scraped',
|
|
ZOPKNews.knowledge_extracted == False
|
|
).order_by(
|
|
ZOPKNews.created_at.desc()
|
|
).limit(limit).all()
|
|
|
|
total = len(articles)
|
|
|
|
if total == 0:
|
|
yield f"data: {json.dumps({'status': 'complete', 'message': 'Brak artykułów do ekstrakcji', 'total': 0}, ensure_ascii=False)}\n\n"
|
|
return
|
|
|
|
# Send initial
|
|
yield f"data: {json.dumps({'current': 0, 'total': total, 'percent': 0, 'stage': 'extracting', 'status': 'processing', 'message': f'Rozpoczynam ekstrakcję z {total} artykułów...'}, ensure_ascii=False)}\n\n"
|
|
|
|
stats = {'success': 0, 'failed': 0, 'chunks': 0, 'facts': 0, 'entities': 0}
|
|
start_time = time.time()
|
|
|
|
for idx, article in enumerate(articles, 1):
|
|
# Send processing update
|
|
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round((idx-1)/total*100, 1), 'stage': 'extracting', 'status': 'processing', 'message': f'Analizuję AI: {article.title[:50]}...', 'article_id': article.id, 'article_title': article.title[:80], 'details': stats}, ensure_ascii=False)}\n\n"
|
|
|
|
# Run extraction in thread with heartbeats to prevent SSE timeout
|
|
import threading
|
|
extract_result = [None]
|
|
def _extract():
|
|
extract_result[0] = service.extract_from_news(article.id)
|
|
t = threading.Thread(target=_extract)
|
|
t.start()
|
|
heartbeat_count = 0
|
|
while t.is_alive():
|
|
t.join(timeout=10)
|
|
if t.is_alive():
|
|
heartbeat_count += 1
|
|
elapsed = round(time.time() - start_time)
|
|
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round((idx-1)/total*100, 1), 'stage': 'extracting', 'status': 'processing', 'message': f'Analizuję AI: {article.title[:50]}... ({elapsed}s)', 'details': stats}, ensure_ascii=False)}\n\n"
|
|
result = extract_result[0]
|
|
|
|
if result.success:
|
|
stats['success'] += 1
|
|
stats['chunks'] += result.chunks_created
|
|
stats['facts'] += result.facts_created
|
|
stats['entities'] += result.entities_created
|
|
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'success', 'message': f'✓ {result.chunks_created}ch, {result.facts_created}f, {result.entities_created}e', 'article_id': article.id, 'details': {'new_chunks': result.chunks_created, 'new_facts': result.facts_created, 'new_entities': result.entities_created, **stats}}, ensure_ascii=False)}\n\n"
|
|
else:
|
|
stats['failed'] += 1
|
|
error_msg = result.error[:60] if result.error else 'Nieznany błąd'
|
|
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'skipped', 'message': f'⊘ {error_msg}', 'article_id': article.id, 'details': {'error': result.error, **stats}}, ensure_ascii=False)}\n\n"
|
|
|
|
processing_time = round(time.time() - start_time, 2)
|
|
|
|
# Send completion
|
|
success_count = stats['success']
|
|
chunks_count = stats['chunks']
|
|
facts_count = stats['facts']
|
|
entities_count = stats['entities']
|
|
complete_msg = f'Zakończono: {success_count}/{total}. Utworzono: {chunks_count}ch, {facts_count}f, {entities_count}e'
|
|
complete_data = {'current': total, 'total': total, 'percent': 100, 'stage': 'extracting', 'status': 'complete', 'message': complete_msg, 'details': {'processing_time': processing_time, **stats}}
|
|
yield f"data: {json.dumps(complete_data, ensure_ascii=False)}\n\n"
|
|
|
|
except Exception as e:
|
|
logger.error(f"SSE extraction error: {e}")
|
|
yield f"data: {json.dumps({'status': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
|
|
finally:
|
|
db.close()
|
|
|
|
return Response(generate(), mimetype='text/event-stream', headers={
|
|
'Cache-Control': 'no-cache',
|
|
'X-Accel-Buffering': 'no'
|
|
})
|
|
|
|
|
|
@bp.route('/zopk/knowledge/embeddings/stream', methods=['GET'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_zopk_embeddings_stream():
|
|
"""
|
|
SSE endpoint for streaming embeddings generation progress.
|
|
|
|
Query params:
|
|
- limit: int (default 50) - max chunks to process
|
|
"""
|
|
limit = min(int(request.args.get('limit', 50)), 200)
|
|
user_id = current_user.id
|
|
|
|
def generate():
|
|
from gemini_service import GeminiService
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
gemini = GeminiService()
|
|
|
|
# Find chunks without embeddings
|
|
chunks = db.query(ZOPKKnowledgeChunk).filter(
|
|
ZOPKKnowledgeChunk.embedding.is_(None)
|
|
).limit(limit).all()
|
|
|
|
total = len(chunks)
|
|
|
|
if total == 0:
|
|
yield f"data: {json.dumps({'status': 'complete', 'message': 'Brak chunks bez embeddingów', 'total': 0}, ensure_ascii=False)}\n\n"
|
|
return
|
|
|
|
# Send initial
|
|
yield f"data: {json.dumps({'current': 0, 'total': total, 'percent': 0, 'stage': 'embedding', 'status': 'processing', 'message': f'Generuję embeddingi dla {total} chunks...'}, ensure_ascii=False)}\n\n"
|
|
|
|
stats = {'success': 0, 'failed': 0}
|
|
start_time = time.time()
|
|
|
|
for idx, chunk in enumerate(chunks, 1):
|
|
summary_short = chunk.summary[:40] if chunk.summary else f'chunk_{chunk.id}'
|
|
|
|
# Send processing update
|
|
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round((idx-1)/total*100, 1), 'stage': 'embedding', 'status': 'processing', 'message': f'Embedding {idx}/{total}: {summary_short}...', 'details': stats}, ensure_ascii=False)}\n\n"
|
|
|
|
try:
|
|
embedding = gemini.generate_embedding(
|
|
text=chunk.content,
|
|
task_type='retrieval_document',
|
|
title=chunk.summary,
|
|
user_id=user_id,
|
|
feature='zopk_chunk_embedding'
|
|
)
|
|
|
|
if embedding:
|
|
chunk.embedding = json.dumps(embedding)
|
|
stats['success'] += 1
|
|
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'success', 'message': f'✓ 768 dim: {summary_short}', 'details': stats}, ensure_ascii=False)}\n\n"
|
|
else:
|
|
stats['failed'] += 1
|
|
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'skipped', 'message': f'⊘ Brak odpowiedzi API', 'details': stats}, ensure_ascii=False)}\n\n"
|
|
|
|
except Exception as e:
|
|
stats['failed'] += 1
|
|
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'skipped', 'message': f'⊘ {str(e)[:40]}', 'details': {'error': str(e), **stats}}, ensure_ascii=False)}\n\n"
|
|
|
|
db.commit()
|
|
processing_time = round(time.time() - start_time, 2)
|
|
|
|
# Send completion
|
|
success_count = stats['success']
|
|
complete_msg = f'Zakończono: {success_count}/{total} embeddingów'
|
|
complete_data = {'current': total, 'total': total, 'percent': 100, 'stage': 'embedding', 'status': 'complete', 'message': complete_msg, 'details': {'processing_time': processing_time, **stats}}
|
|
yield f"data: {json.dumps(complete_data, ensure_ascii=False)}\n\n"
|
|
|
|
except Exception as e:
|
|
logger.error(f"SSE embedding error: {e}")
|
|
yield f"data: {json.dumps({'status': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
|
|
finally:
|
|
db.close()
|
|
|
|
return Response(generate(), mimetype='text/event-stream', headers={
|
|
'Cache-Control': 'no-cache',
|
|
'X-Accel-Buffering': 'no'
|
|
})
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/search', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_knowledge_search():
|
|
"""
|
|
Semantic search in ZOPK knowledge base.
|
|
|
|
Request JSON:
|
|
- query: str (required) - search query
|
|
- limit: int (default 5) - max results
|
|
|
|
Response:
|
|
- chunks: list of matching knowledge chunks with similarity scores
|
|
- facts: list of relevant facts
|
|
"""
|
|
from zopk_knowledge_service import search_knowledge, get_relevant_facts
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
data = request.get_json() or {}
|
|
query = data.get('query', '')
|
|
|
|
if not query:
|
|
return jsonify({'success': False, 'error': 'Query wymagane'}), 400
|
|
|
|
limit = min(data.get('limit', 5), 20)
|
|
|
|
# Search chunks
|
|
chunks = search_knowledge(
|
|
db,
|
|
query=query,
|
|
limit=limit,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
# Get relevant facts
|
|
facts = get_relevant_facts(db, query=query, limit=limit)
|
|
|
|
return jsonify({
|
|
'success': True,
|
|
'query': query,
|
|
'chunks': chunks,
|
|
'facts': facts
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in knowledge search: {e}")
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk/knowledge')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_zopk_knowledge_dashboard():
|
|
"""
|
|
Dashboard for ZOPK Knowledge Base management.
|
|
Shows stats and links to chunks, facts, entities lists.
|
|
"""
|
|
return render_template('admin/zopk_knowledge_dashboard.html')
|
|
|
|
|
|
@bp.route('/zopk/knowledge/chunks')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_zopk_knowledge_chunks():
|
|
"""
|
|
List knowledge chunks with pagination and filtering.
|
|
"""
|
|
from zopk_knowledge_service import list_chunks
|
|
|
|
# Get query params
|
|
page = request.args.get('page', 1, type=int)
|
|
per_page = request.args.get('per_page', 20, type=int)
|
|
source_news_id = request.args.get('source_news_id', type=int)
|
|
has_embedding = request.args.get('has_embedding')
|
|
is_verified = request.args.get('is_verified')
|
|
|
|
# Convert string params to bool
|
|
if has_embedding is not None:
|
|
has_embedding = has_embedding.lower() == 'true'
|
|
if is_verified is not None:
|
|
is_verified = is_verified.lower() == 'true'
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
result = list_chunks(
|
|
db,
|
|
page=page,
|
|
per_page=per_page,
|
|
source_news_id=source_news_id,
|
|
has_embedding=has_embedding,
|
|
is_verified=is_verified
|
|
)
|
|
|
|
return render_template(
|
|
'admin/zopk_knowledge_chunks.html',
|
|
chunks=result['chunks'],
|
|
total=result['total'],
|
|
page=result['page'],
|
|
per_page=result['per_page'],
|
|
pages=result['pages'],
|
|
source_news_id=source_news_id,
|
|
has_embedding=has_embedding,
|
|
is_verified=is_verified
|
|
)
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk/knowledge/facts')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_zopk_knowledge_facts():
|
|
"""
|
|
List knowledge facts with pagination and filtering.
|
|
"""
|
|
from zopk_knowledge_service import list_facts
|
|
|
|
# Get query params
|
|
page = request.args.get('page', 1, type=int)
|
|
per_page = request.args.get('per_page', 20, type=int)
|
|
fact_type = request.args.get('fact_type')
|
|
source_news_id = request.args.get('source_news_id', type=int)
|
|
is_verified = request.args.get('is_verified')
|
|
|
|
# Convert string param to bool
|
|
if is_verified is not None:
|
|
is_verified = is_verified.lower() == 'true'
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
result = list_facts(
|
|
db,
|
|
page=page,
|
|
per_page=per_page,
|
|
fact_type=fact_type,
|
|
is_verified=is_verified,
|
|
source_news_id=source_news_id
|
|
)
|
|
|
|
return render_template(
|
|
'admin/zopk_knowledge_facts.html',
|
|
facts=result['facts'],
|
|
total=result['total'],
|
|
page=result['page'],
|
|
per_page=result['per_page'],
|
|
pages=result['pages'],
|
|
fact_types=result['fact_types'],
|
|
current_fact_type=fact_type,
|
|
source_news_id=source_news_id,
|
|
is_verified=is_verified
|
|
)
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk/knowledge/entities')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_zopk_knowledge_entities():
|
|
"""
|
|
List knowledge entities with pagination and filtering.
|
|
"""
|
|
from zopk_knowledge_service import list_entities
|
|
|
|
# Get query params
|
|
page = request.args.get('page', 1, type=int)
|
|
per_page = request.args.get('per_page', 20, type=int)
|
|
entity_type = request.args.get('entity_type')
|
|
min_mentions = request.args.get('min_mentions', type=int)
|
|
is_verified = request.args.get('is_verified')
|
|
|
|
# Convert string param to bool
|
|
if is_verified is not None:
|
|
is_verified = is_verified.lower() == 'true'
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
result = list_entities(
|
|
db,
|
|
page=page,
|
|
per_page=per_page,
|
|
entity_type=entity_type,
|
|
is_verified=is_verified,
|
|
min_mentions=min_mentions
|
|
)
|
|
|
|
return render_template(
|
|
'admin/zopk_knowledge_entities.html',
|
|
entities=result['entities'],
|
|
total=result['total'],
|
|
page=result['page'],
|
|
per_page=result['per_page'],
|
|
pages=result['pages'],
|
|
entity_types=result['entity_types'],
|
|
current_entity_type=entity_type,
|
|
min_mentions=min_mentions,
|
|
is_verified=is_verified
|
|
)
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/chunks/<int:chunk_id>')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_chunk_detail(chunk_id):
|
|
"""Get detailed information about a chunk."""
|
|
from zopk_knowledge_service import get_chunk_detail
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
chunk = get_chunk_detail(db, chunk_id)
|
|
if not chunk:
|
|
return jsonify({'success': False, 'error': 'Chunk nie znaleziony'}), 404
|
|
|
|
return jsonify({'success': True, 'chunk': chunk})
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/chunks/<int:chunk_id>/verify', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_chunk_verify(chunk_id):
|
|
"""Toggle chunk verification status."""
|
|
from zopk_knowledge_service import update_chunk_verification
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
data = request.get_json() or {}
|
|
is_verified = data.get('is_verified', True)
|
|
|
|
success = update_chunk_verification(db, chunk_id, is_verified, current_user.id)
|
|
if not success:
|
|
return jsonify({'success': False, 'error': 'Chunk nie znaleziony'}), 404
|
|
|
|
return jsonify({'success': True, 'is_verified': is_verified})
|
|
except Exception as e:
|
|
db.rollback()
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/facts/<int:fact_id>/verify', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_fact_verify(fact_id):
|
|
"""Toggle fact verification status."""
|
|
from zopk_knowledge_service import update_fact_verification
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
data = request.get_json() or {}
|
|
is_verified = data.get('is_verified', True)
|
|
|
|
success = update_fact_verification(db, fact_id, is_verified)
|
|
if not success:
|
|
return jsonify({'success': False, 'error': 'Fakt nie znaleziony'}), 404
|
|
|
|
return jsonify({'success': True, 'is_verified': is_verified})
|
|
except Exception as e:
|
|
db.rollback()
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/entities/<int:entity_id>/verify', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_entity_verify(entity_id):
|
|
"""Toggle entity verification status."""
|
|
from zopk_knowledge_service import update_entity_verification
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
data = request.get_json() or {}
|
|
is_verified = data.get('is_verified', True)
|
|
|
|
success = update_entity_verification(db, entity_id, is_verified)
|
|
if not success:
|
|
return jsonify({'success': False, 'error': 'Encja nie znaleziona'}), 404
|
|
|
|
return jsonify({'success': True, 'is_verified': is_verified})
|
|
except Exception as e:
|
|
db.rollback()
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/chunks/<int:chunk_id>', methods=['DELETE'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_chunk_delete(chunk_id):
|
|
"""Delete a chunk and its associated data."""
|
|
from zopk_knowledge_service import delete_chunk
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
success = delete_chunk(db, chunk_id)
|
|
if not success:
|
|
return jsonify({'success': False, 'error': 'Chunk nie znaleziony'}), 404
|
|
|
|
return jsonify({'success': True, 'message': 'Chunk usunięty'})
|
|
except Exception as e:
|
|
db.rollback()
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk/knowledge/duplicates')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_zopk_knowledge_duplicates():
|
|
"""Admin page for managing duplicate entities."""
|
|
from zopk_knowledge_service import find_duplicate_entities
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
# Get filter parameters
|
|
entity_type = request.args.get('entity_type', '')
|
|
min_similarity = float(request.args.get('min_similarity', 0.4))
|
|
|
|
# Find duplicates
|
|
duplicates = find_duplicate_entities(
|
|
db,
|
|
entity_type=entity_type if entity_type else None,
|
|
min_similarity=min_similarity,
|
|
limit=100
|
|
)
|
|
|
|
# Get unique entity types for filter
|
|
entity_types = [r[0] for r in db.query(distinct(ZOPKKnowledgeEntity.entity_type)).all()]
|
|
|
|
return render_template(
|
|
'admin/zopk_knowledge_duplicates.html',
|
|
duplicates=duplicates,
|
|
entity_types=sorted(entity_types),
|
|
selected_type=entity_type,
|
|
min_similarity=min_similarity
|
|
)
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/duplicates/preview', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_duplicates_preview():
|
|
"""Preview merge operation between two entities."""
|
|
from zopk_knowledge_service import get_entity_merge_preview
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
data = request.get_json() or {}
|
|
primary_id = data.get('primary_id')
|
|
duplicate_id = data.get('duplicate_id')
|
|
|
|
if not primary_id or not duplicate_id:
|
|
return jsonify({'success': False, 'error': 'Brak ID encji'}), 400
|
|
|
|
preview = get_entity_merge_preview(db, primary_id, duplicate_id)
|
|
if 'error' in preview:
|
|
return jsonify({'success': False, 'error': preview['error']}), 404
|
|
|
|
return jsonify({'success': True, 'preview': preview})
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/duplicates/merge', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_duplicates_merge():
|
|
"""Merge two entities - keep primary, delete duplicate."""
|
|
from zopk_knowledge_service import merge_entities
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
data = request.get_json() or {}
|
|
primary_id = data.get('primary_id')
|
|
duplicate_id = data.get('duplicate_id')
|
|
new_name = data.get('new_name')
|
|
|
|
if not primary_id or not duplicate_id:
|
|
return jsonify({'success': False, 'error': 'Brak ID encji'}), 400
|
|
|
|
result = merge_entities(db, primary_id, duplicate_id, new_name)
|
|
return jsonify(result)
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk/knowledge/graph')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_zopk_knowledge_graph():
|
|
"""Admin page for entity relations graph visualization."""
|
|
return render_template('admin/zopk_knowledge_graph.html')
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/graph/data')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_knowledge_graph_data():
|
|
"""Get graph data for entity co-occurrence visualization.
|
|
|
|
Uses in-memory cache with 5 minute TTL to avoid recalculating
|
|
co-occurrences on every request.
|
|
"""
|
|
global _graph_cache
|
|
|
|
# Build cache key from parameters
|
|
entity_type = request.args.get('entity_type', '')
|
|
min_cooccurrence = int(request.args.get('min_cooccurrence', 3))
|
|
limit = min(int(request.args.get('limit', 100)), 500)
|
|
cache_key = f"graph:{entity_type}:{min_cooccurrence}:{limit}"
|
|
|
|
# Check cache
|
|
if cache_key in _graph_cache:
|
|
cached_data, cached_time = _graph_cache[cache_key]
|
|
if time.time() - cached_time < _GRAPH_CACHE_TTL:
|
|
# Return cached data with cache indicator
|
|
cached_data['cached'] = True
|
|
return jsonify(cached_data)
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
# Get top entities by mentions
|
|
entities_query = db.query(ZOPKKnowledgeEntity).filter(
|
|
ZOPKKnowledgeEntity.mentions_count >= 5
|
|
)
|
|
if entity_type:
|
|
entities_query = entities_query.filter(
|
|
ZOPKKnowledgeEntity.entity_type == entity_type
|
|
)
|
|
entities_query = entities_query.order_by(
|
|
ZOPKKnowledgeEntity.mentions_count.desc()
|
|
).limit(100)
|
|
|
|
entities = entities_query.all()
|
|
entity_ids = [e.id for e in entities]
|
|
|
|
if not entity_ids:
|
|
return jsonify({'success': True, 'nodes': [], 'links': []})
|
|
|
|
# Get co-occurrences (entities appearing in same chunk)
|
|
cooccur_query = text("""
|
|
SELECT
|
|
m1.entity_id as source,
|
|
m2.entity_id as target,
|
|
COUNT(*) as value
|
|
FROM zopk_knowledge_entity_mentions m1
|
|
JOIN zopk_knowledge_entity_mentions m2
|
|
ON m1.chunk_id = m2.chunk_id
|
|
AND m1.entity_id < m2.entity_id
|
|
WHERE m1.entity_id = ANY(:entity_ids)
|
|
AND m2.entity_id = ANY(:entity_ids)
|
|
GROUP BY m1.entity_id, m2.entity_id
|
|
HAVING COUNT(*) >= :min_cooccurrence
|
|
ORDER BY COUNT(*) DESC
|
|
LIMIT :limit
|
|
""")
|
|
|
|
result = db.execute(cooccur_query, {
|
|
'entity_ids': entity_ids,
|
|
'min_cooccurrence': min_cooccurrence,
|
|
'limit': limit
|
|
})
|
|
|
|
# Build nodes and links
|
|
used_entity_ids = set()
|
|
links = []
|
|
|
|
for row in result:
|
|
links.append({
|
|
'source': row.source,
|
|
'target': row.target,
|
|
'value': row.value
|
|
})
|
|
used_entity_ids.add(row.source)
|
|
used_entity_ids.add(row.target)
|
|
|
|
# Build nodes only for entities that have links
|
|
entity_map = {e.id: e for e in entities}
|
|
nodes = []
|
|
|
|
for eid in used_entity_ids:
|
|
if eid in entity_map:
|
|
e = entity_map[eid]
|
|
nodes.append({
|
|
'id': e.id,
|
|
'name': e.name,
|
|
'type': e.entity_type,
|
|
'mentions': e.mentions_count,
|
|
'verified': e.is_verified
|
|
})
|
|
|
|
# Build response
|
|
response_data = {
|
|
'success': True,
|
|
'nodes': nodes,
|
|
'links': links,
|
|
'stats': {
|
|
'total_nodes': len(nodes),
|
|
'total_links': len(links)
|
|
},
|
|
'cached': False
|
|
}
|
|
|
|
# Save to cache
|
|
_graph_cache[cache_key] = (response_data.copy(), time.time())
|
|
|
|
return jsonify(response_data)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting graph data: {e}")
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk/knowledge/fact-duplicates')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_zopk_fact_duplicates():
|
|
"""Panel deduplikacji faktów."""
|
|
return render_template('admin/zopk_fact_duplicates.html')
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/fact-duplicates')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_fact_duplicates():
|
|
"""API - lista duplikatów faktów."""
|
|
from zopk_knowledge_service import find_duplicate_facts
|
|
db = SessionLocal()
|
|
try:
|
|
min_sim = float(request.args.get('min_similarity', 0.7))
|
|
fact_type = request.args.get('fact_type', '')
|
|
limit = min(int(request.args.get('limit', 100)), 500)
|
|
|
|
duplicates = find_duplicate_facts(db, min_sim, limit, fact_type if fact_type else None)
|
|
return jsonify({'success': True, 'duplicates': duplicates, 'count': len(duplicates)})
|
|
except Exception as e:
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/fact-duplicates/merge', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_fact_merge():
|
|
"""API - merge duplikatów faktów."""
|
|
from zopk_knowledge_service import merge_facts
|
|
db = SessionLocal()
|
|
try:
|
|
data = request.get_json()
|
|
primary_id = data.get('primary_id')
|
|
duplicate_id = data.get('duplicate_id')
|
|
new_text = data.get('new_text')
|
|
|
|
result = merge_facts(db, primary_id, duplicate_id, new_text)
|
|
return jsonify(result)
|
|
except Exception as e:
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/auto-verify/entities', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_auto_verify_entities():
|
|
"""Auto-weryfikacja encji z wysoką liczbą wzmianek."""
|
|
from zopk_knowledge_service import auto_verify_top_entities
|
|
db = SessionLocal()
|
|
try:
|
|
data = request.get_json() or {}
|
|
min_mentions = int(data.get('min_mentions', 5))
|
|
limit = int(data.get('limit', 100))
|
|
|
|
result = auto_verify_top_entities(db, min_mentions, limit)
|
|
return jsonify(result)
|
|
except Exception as e:
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/auto-verify/facts', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_auto_verify_facts():
|
|
"""Auto-weryfikacja faktów z wysoką ważnością."""
|
|
from zopk_knowledge_service import auto_verify_top_facts
|
|
db = SessionLocal()
|
|
try:
|
|
data = request.get_json() or {}
|
|
min_importance = float(data.get('min_importance', 0.7))
|
|
limit = int(data.get('limit', 200))
|
|
|
|
result = auto_verify_top_facts(db, min_importance, limit)
|
|
return jsonify(result)
|
|
except Exception as e:
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/auto-verify/similar', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_auto_verify_similar():
|
|
"""Auto-weryfikacja faktów podobnych do już zweryfikowanych (uczenie się)."""
|
|
from zopk_knowledge_service import auto_verify_similar_to_verified
|
|
db = SessionLocal()
|
|
try:
|
|
data = request.get_json() or {}
|
|
min_similarity = float(data.get('min_similarity', 0.8))
|
|
limit = int(data.get('limit', 100))
|
|
|
|
result = auto_verify_similar_to_verified(db, min_similarity, limit)
|
|
return jsonify(result)
|
|
except Exception as e:
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/suggest-similar-facts')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_suggest_similar_facts():
|
|
"""Pobierz sugestie faktów podobnych do zweryfikowanych (bez auto-weryfikacji)."""
|
|
from zopk_knowledge_service import find_similar_to_verified_facts
|
|
db = SessionLocal()
|
|
try:
|
|
min_similarity = float(request.args.get('min_similarity', 0.8))
|
|
limit = int(request.args.get('limit', 50))
|
|
|
|
suggestions = find_similar_to_verified_facts(db, min_similarity, limit)
|
|
return jsonify({
|
|
'success': True,
|
|
'suggestions': suggestions,
|
|
'count': len(suggestions)
|
|
})
|
|
except Exception as e:
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/zopk-api/knowledge/dashboard-stats')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def api_zopk_dashboard_stats():
|
|
"""API - statystyki dashboardu."""
|
|
from zopk_knowledge_service import get_knowledge_dashboard_stats
|
|
db = SessionLocal()
|
|
try:
|
|
stats = get_knowledge_dashboard_stats(db)
|
|
return jsonify({'success': True, **stats})
|
|
except Exception as e:
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|