feat(admin): Paski postępu dla operacji AI w panelu ZOPK
Dodano Server-Sent Events (SSE) dla śledzenia postępu w czasie rzeczywistym: - Scraping treści artykułów - Ekstrakcja wiedzy przez Gemini AI - Generowanie embeddingów Funkcje: - Modal z paskiem postępu i statystykami - Live log operacji z kolorowaniem statusów - Podsumowanie na zakończenie (sukces/błędy/czas) - Możliwość zamknięcia modalu po zakończeniu Zmiany techniczne: - 3 nowe SSE endpointy (/stream) - ProgressUpdate dataclass w scraperze - Callback pattern w batch_scrape, batch_extract, generate_chunk_embeddings Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
31d5a112b8
commit
37af8abc73
348
app.py
348
app.py
@ -11131,6 +11131,354 @@ def admin_zopk_generate_embeddings():
|
||||
db.close()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# ZOPK SSE ENDPOINTS (Server-Sent Events for Progress Tracking)
|
||||
# ============================================================
|
||||
|
||||
def sse_progress_generator(operation_func, db, **kwargs):
|
||||
"""
|
||||
Generic SSE generator for progress tracking.
|
||||
|
||||
Args:
|
||||
operation_func: Function to call (must accept progress_callback)
|
||||
db: Database session
|
||||
**kwargs: Additional arguments for operation_func
|
||||
|
||||
Yields:
|
||||
SSE formatted progress events
|
||||
"""
|
||||
import json
|
||||
from dataclasses import asdict
|
||||
|
||||
progress_queue = []
|
||||
|
||||
def progress_callback(update):
|
||||
progress_queue.append(update)
|
||||
|
||||
def run_operation():
|
||||
try:
|
||||
result = operation_func(progress_callback=progress_callback, **kwargs)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"SSE operation error: {e}")
|
||||
return {'error': str(e)}
|
||||
|
||||
# Start operation in separate thread
|
||||
import threading
|
||||
result_container = [None]
|
||||
|
||||
def thread_target():
|
||||
result_container[0] = run_operation()
|
||||
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread.start()
|
||||
|
||||
# Yield progress updates while thread is running
|
||||
while thread.is_alive() or progress_queue:
|
||||
while progress_queue:
|
||||
update = progress_queue.pop(0)
|
||||
data = asdict(update)
|
||||
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
||||
|
||||
if thread.is_alive():
|
||||
import time
|
||||
time.sleep(0.1)
|
||||
|
||||
thread.join()
|
||||
|
||||
# Send final result
|
||||
final_result = result_container[0] or {}
|
||||
yield f"data: {json.dumps({'type': 'result', **final_result}, ensure_ascii=False)}\n\n"
|
||||
|
||||
|
||||
@app.route('/admin/zopk/news/scrape-content/stream', methods=['GET'])
|
||||
@login_required
|
||||
def admin_zopk_scrape_content_stream():
|
||||
"""
|
||||
SSE endpoint for streaming scrape progress.
|
||||
|
||||
Query params:
|
||||
- limit: int (default 30) - max articles to scrape
|
||||
- force: bool (default false) - re-scrape already scraped
|
||||
"""
|
||||
if not current_user.is_admin:
|
||||
return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
|
||||
|
||||
from zopk_content_scraper import ZOPKContentScraper
|
||||
|
||||
limit = min(int(request.args.get('limit', 30)), 100)
|
||||
force = request.args.get('force', 'false').lower() == 'true'
|
||||
user_id = current_user.id
|
||||
|
||||
def generate():
|
||||
import json
|
||||
from dataclasses import asdict
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
scraper = ZOPKContentScraper(db, user_id=user_id)
|
||||
|
||||
def progress_callback(update):
|
||||
data = asdict(update)
|
||||
yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
||||
|
||||
# This won't work with generator, need different approach
|
||||
# Use a queue-based approach instead
|
||||
|
||||
progress_updates = []
|
||||
|
||||
def queue_callback(update):
|
||||
progress_updates.append(update)
|
||||
|
||||
# Run in this thread, yielding updates as they come
|
||||
from zopk_content_scraper import ZOPKContentScraper, ProgressUpdate
|
||||
import time
|
||||
|
||||
# Get articles to scrape
|
||||
from database import ZOPKNews
|
||||
query = db.query(ZOPKNews).filter(
|
||||
ZOPKNews.status.in_(['approved', 'auto_approved'])
|
||||
)
|
||||
|
||||
if not force:
|
||||
from zopk_content_scraper import MAX_RETRY_ATTEMPTS
|
||||
query = query.filter(ZOPKNews.scrape_status.in_(['pending', 'failed']))
|
||||
query = query.filter(
|
||||
(ZOPKNews.scrape_status == 'pending') |
|
||||
((ZOPKNews.scrape_status == 'failed') & (ZOPKNews.scrape_attempts < MAX_RETRY_ATTEMPTS))
|
||||
)
|
||||
|
||||
query = query.order_by(ZOPKNews.created_at.desc())
|
||||
articles = query.limit(limit).all()
|
||||
total = len(articles)
|
||||
|
||||
if total == 0:
|
||||
yield f"data: {json.dumps({'status': 'complete', 'message': 'Brak artykułów do scrapowania', 'total': 0}, ensure_ascii=False)}\n\n"
|
||||
return
|
||||
|
||||
# Send initial
|
||||
yield f"data: {json.dumps({'current': 0, 'total': total, 'percent': 0, 'stage': 'scraping', 'status': 'processing', 'message': f'Rozpoczynam scraping {total} artykułów...'}, ensure_ascii=False)}\n\n"
|
||||
|
||||
stats = {'scraped': 0, 'failed': 0, 'skipped': 0}
|
||||
start_time = time.time()
|
||||
|
||||
for idx, article in enumerate(articles, 1):
|
||||
# Send processing update
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round((idx-1)/total*100, 1), 'stage': 'scraping', 'status': 'processing', 'message': f'Pobieram: {article.title[:50]}...', 'article_id': article.id, 'article_title': article.title[:80], 'details': {'source': article.source_name or 'nieznane', **stats}}, ensure_ascii=False)}\n\n"
|
||||
|
||||
result = scraper.scrape_article(article.id)
|
||||
|
||||
if result.status == 'scraped':
|
||||
stats['scraped'] += 1
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'scraping', 'status': 'success', 'message': f'✓ {result.word_count} słów: {article.title[:40]}...', 'article_id': article.id, 'details': {'word_count': result.word_count, **stats}}, ensure_ascii=False)}\n\n"
|
||||
elif result.status == 'skipped':
|
||||
stats['skipped'] += 1
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'scraping', 'status': 'skipped', 'message': f'⊘ Pominięto: {article.title[:40]}...', 'article_id': article.id, 'details': stats}, ensure_ascii=False)}\n\n"
|
||||
else:
|
||||
stats['failed'] += 1
|
||||
error_msg = result.error[:50] if result.error else 'Nieznany błąd'
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'scraping', 'status': 'failed', 'message': f'✗ {error_msg}', 'article_id': article.id, 'details': {'error': result.error, **stats}}, ensure_ascii=False)}\n\n"
|
||||
|
||||
processing_time = round(time.time() - start_time, 2)
|
||||
|
||||
# Send completion
|
||||
scraped_count = stats['scraped']
|
||||
failed_count = stats['failed']
|
||||
skipped_count = stats['skipped']
|
||||
complete_msg = f'Zakończono: {scraped_count} pobrano, {failed_count} błędów, {skipped_count} pominięto'
|
||||
complete_data = {'current': total, 'total': total, 'percent': 100, 'stage': 'scraping', 'status': 'complete', 'message': complete_msg, 'details': {'processing_time': processing_time, **stats}}
|
||||
yield f"data: {json.dumps(complete_data, ensure_ascii=False)}\n\n"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"SSE scraping error: {e}")
|
||||
yield f"data: {json.dumps({'status': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
return Response(generate(), mimetype='text/event-stream', headers={
|
||||
'Cache-Control': 'no-cache',
|
||||
'X-Accel-Buffering': 'no'
|
||||
})
|
||||
|
||||
|
||||
@app.route('/admin/zopk/knowledge/extract/stream', methods=['GET'])
|
||||
@login_required
|
||||
def admin_zopk_knowledge_extract_stream():
|
||||
"""
|
||||
SSE endpoint for streaming knowledge extraction progress.
|
||||
|
||||
Query params:
|
||||
- limit: int (default 10) - max articles to process
|
||||
"""
|
||||
if not current_user.is_admin:
|
||||
return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
|
||||
|
||||
limit = min(int(request.args.get('limit', 10)), 50)
|
||||
user_id = current_user.id
|
||||
|
||||
def generate():
|
||||
import json
|
||||
import time
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
from zopk_knowledge_service import ZOPKKnowledgeService
|
||||
from database import ZOPKNews
|
||||
|
||||
service = ZOPKKnowledgeService(db, user_id=user_id)
|
||||
|
||||
# Find articles ready for extraction
|
||||
articles = db.query(ZOPKNews).filter(
|
||||
ZOPKNews.status.in_(['approved', 'auto_approved']),
|
||||
ZOPKNews.scrape_status == 'scraped',
|
||||
ZOPKNews.knowledge_extracted == False
|
||||
).order_by(
|
||||
ZOPKNews.created_at.desc()
|
||||
).limit(limit).all()
|
||||
|
||||
total = len(articles)
|
||||
|
||||
if total == 0:
|
||||
yield f"data: {json.dumps({'status': 'complete', 'message': 'Brak artykułów do ekstrakcji', 'total': 0}, ensure_ascii=False)}\n\n"
|
||||
return
|
||||
|
||||
# Send initial
|
||||
yield f"data: {json.dumps({'current': 0, 'total': total, 'percent': 0, 'stage': 'extracting', 'status': 'processing', 'message': f'Rozpoczynam ekstrakcję z {total} artykułów...'}, ensure_ascii=False)}\n\n"
|
||||
|
||||
stats = {'success': 0, 'failed': 0, 'chunks': 0, 'facts': 0, 'entities': 0}
|
||||
start_time = time.time()
|
||||
|
||||
for idx, article in enumerate(articles, 1):
|
||||
# Send processing update
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round((idx-1)/total*100, 1), 'stage': 'extracting', 'status': 'processing', 'message': f'Analizuję AI: {article.title[:50]}...', 'article_id': article.id, 'article_title': article.title[:80], 'details': stats}, ensure_ascii=False)}\n\n"
|
||||
|
||||
result = service.extract_from_news(article.id)
|
||||
|
||||
if result.success:
|
||||
stats['success'] += 1
|
||||
stats['chunks'] += result.chunks_created
|
||||
stats['facts'] += result.facts_created
|
||||
stats['entities'] += result.entities_created
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'success', 'message': f'✓ {result.chunks_created}ch, {result.facts_created}f, {result.entities_created}e', 'article_id': article.id, 'details': {'new_chunks': result.chunks_created, 'new_facts': result.facts_created, 'new_entities': result.entities_created, **stats}}, ensure_ascii=False)}\n\n"
|
||||
else:
|
||||
stats['failed'] += 1
|
||||
error_msg = result.error[:50] if result.error else 'Nieznany błąd'
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'failed', 'message': f'✗ {error_msg}', 'article_id': article.id, 'details': {'error': result.error, **stats}}, ensure_ascii=False)}\n\n"
|
||||
|
||||
processing_time = round(time.time() - start_time, 2)
|
||||
|
||||
# Send completion
|
||||
success_count = stats['success']
|
||||
chunks_count = stats['chunks']
|
||||
facts_count = stats['facts']
|
||||
entities_count = stats['entities']
|
||||
complete_msg = f'Zakończono: {success_count}/{total}. Utworzono: {chunks_count}ch, {facts_count}f, {entities_count}e'
|
||||
complete_data = {'current': total, 'total': total, 'percent': 100, 'stage': 'extracting', 'status': 'complete', 'message': complete_msg, 'details': {'processing_time': processing_time, **stats}}
|
||||
yield f"data: {json.dumps(complete_data, ensure_ascii=False)}\n\n"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"SSE extraction error: {e}")
|
||||
yield f"data: {json.dumps({'status': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
return Response(generate(), mimetype='text/event-stream', headers={
|
||||
'Cache-Control': 'no-cache',
|
||||
'X-Accel-Buffering': 'no'
|
||||
})
|
||||
|
||||
|
||||
@app.route('/admin/zopk/knowledge/embeddings/stream', methods=['GET'])
|
||||
@login_required
|
||||
def admin_zopk_embeddings_stream():
|
||||
"""
|
||||
SSE endpoint for streaming embeddings generation progress.
|
||||
|
||||
Query params:
|
||||
- limit: int (default 50) - max chunks to process
|
||||
"""
|
||||
if not current_user.is_admin:
|
||||
return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
|
||||
|
||||
limit = min(int(request.args.get('limit', 50)), 200)
|
||||
user_id = current_user.id
|
||||
|
||||
def generate():
|
||||
import json
|
||||
import time
|
||||
from gemini_service import GeminiService
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
from database import ZOPKKnowledgeChunk
|
||||
|
||||
gemini = GeminiService()
|
||||
|
||||
# Find chunks without embeddings
|
||||
chunks = db.query(ZOPKKnowledgeChunk).filter(
|
||||
ZOPKKnowledgeChunk.embedding.is_(None)
|
||||
).limit(limit).all()
|
||||
|
||||
total = len(chunks)
|
||||
|
||||
if total == 0:
|
||||
yield f"data: {json.dumps({'status': 'complete', 'message': 'Brak chunks bez embeddingów', 'total': 0}, ensure_ascii=False)}\n\n"
|
||||
return
|
||||
|
||||
# Send initial
|
||||
yield f"data: {json.dumps({'current': 0, 'total': total, 'percent': 0, 'stage': 'embedding', 'status': 'processing', 'message': f'Generuję embeddingi dla {total} chunks...'}, ensure_ascii=False)}\n\n"
|
||||
|
||||
stats = {'success': 0, 'failed': 0}
|
||||
start_time = time.time()
|
||||
|
||||
for idx, chunk in enumerate(chunks, 1):
|
||||
summary_short = chunk.summary[:40] if chunk.summary else f'chunk_{chunk.id}'
|
||||
|
||||
# Send processing update
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round((idx-1)/total*100, 1), 'stage': 'embedding', 'status': 'processing', 'message': f'Embedding {idx}/{total}: {summary_short}...', 'details': stats}, ensure_ascii=False)}\n\n"
|
||||
|
||||
try:
|
||||
embedding = gemini.generate_embedding(
|
||||
text=chunk.content,
|
||||
task_type='retrieval_document',
|
||||
title=chunk.summary,
|
||||
user_id=user_id,
|
||||
feature='zopk_chunk_embedding'
|
||||
)
|
||||
|
||||
if embedding:
|
||||
chunk.embedding = json.dumps(embedding)
|
||||
stats['success'] += 1
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'success', 'message': f'✓ 768 dim: {summary_short}', 'details': stats}, ensure_ascii=False)}\n\n"
|
||||
else:
|
||||
stats['failed'] += 1
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'failed', 'message': f'✗ Brak odpowiedzi API', 'details': stats}, ensure_ascii=False)}\n\n"
|
||||
|
||||
except Exception as e:
|
||||
stats['failed'] += 1
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'failed', 'message': f'✗ {str(e)[:40]}', 'details': {'error': str(e), **stats}}, ensure_ascii=False)}\n\n"
|
||||
|
||||
db.commit()
|
||||
processing_time = round(time.time() - start_time, 2)
|
||||
|
||||
# Send completion
|
||||
success_count = stats['success']
|
||||
complete_msg = f'Zakończono: {success_count}/{total} embeddingów'
|
||||
complete_data = {'current': total, 'total': total, 'percent': 100, 'stage': 'embedding', 'status': 'complete', 'message': complete_msg, 'details': {'processing_time': processing_time, **stats}}
|
||||
yield f"data: {json.dumps(complete_data, ensure_ascii=False)}\n\n"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"SSE embedding error: {e}")
|
||||
yield f"data: {json.dumps({'status': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
return Response(generate(), mimetype='text/event-stream', headers={
|
||||
'Cache-Control': 'no-cache',
|
||||
'X-Accel-Buffering': 'no'
|
||||
})
|
||||
|
||||
|
||||
@app.route('/api/zopk/knowledge/search', methods=['POST'])
|
||||
@login_required
|
||||
def api_zopk_knowledge_search():
|
||||
|
||||
@ -1445,6 +1445,252 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- AI Operations Progress Modal -->
|
||||
<div class="modal-overlay" id="aiOpsModal">
|
||||
<div class="modal modal-wide">
|
||||
<div class="modal-header">
|
||||
<div class="modal-icon" id="aiOpsIcon">⏳</div>
|
||||
<h3 id="aiOpsTitle">Operacja w toku...</h3>
|
||||
<button type="button" class="modal-close-btn" id="aiOpsCloseBtn" style="display: none;" onclick="closeAiOpsModal()">×</button>
|
||||
</div>
|
||||
|
||||
<!-- Progress Section -->
|
||||
<div class="ai-ops-progress">
|
||||
<div class="ai-progress-bar">
|
||||
<div class="ai-progress-fill" id="aiOpsProgressFill" style="width: 0%;"></div>
|
||||
</div>
|
||||
<div class="ai-ops-stats">
|
||||
<span id="aiOpsPercent">0%</span>
|
||||
<span id="aiOpsCounter">0 / 0</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Current Operation -->
|
||||
<div class="ai-ops-current" id="aiOpsCurrentOp">
|
||||
<span class="spinner-small"></span>
|
||||
<span id="aiOpsCurrentText">Inicjalizacja...</span>
|
||||
</div>
|
||||
|
||||
<!-- Live Log (scrollable) -->
|
||||
<div class="ai-ops-log-container">
|
||||
<div class="ai-ops-log-header">
|
||||
<span>📋 Log operacji</span>
|
||||
<span id="aiOpsLogCount">0 wpisów</span>
|
||||
</div>
|
||||
<div class="ai-ops-log" id="aiOpsLog">
|
||||
<!-- Log entries will be added here -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Summary Stats (shown on complete) -->
|
||||
<div class="ai-ops-summary" id="aiOpsSummary" style="display: none;">
|
||||
<div class="summary-row">
|
||||
<span class="summary-label">✓ Sukces:</span>
|
||||
<span class="summary-value" id="aiOpsSummarySuccess">0</span>
|
||||
</div>
|
||||
<div class="summary-row">
|
||||
<span class="summary-label">✗ Błędy:</span>
|
||||
<span class="summary-value" id="aiOpsSummaryFailed">0</span>
|
||||
</div>
|
||||
<div class="summary-row" id="aiOpsSummarySkippedRow" style="display: none;">
|
||||
<span class="summary-label">⊘ Pominięto:</span>
|
||||
<span class="summary-value" id="aiOpsSummarySkipped">0</span>
|
||||
</div>
|
||||
<div class="summary-row">
|
||||
<span class="summary-label">⏱️ Czas:</span>
|
||||
<span class="summary-value" id="aiOpsSummaryTime">0s</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Actions -->
|
||||
<div class="modal-actions" id="aiOpsActions" style="display: none;">
|
||||
<button type="button" class="btn btn-primary" onclick="closeAiOpsModal(); loadKnowledgeStats();">Zamknij i odśwież</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
/* AI Operations Modal Styles */
|
||||
.modal-wide {
|
||||
max-width: 700px;
|
||||
width: 95%;
|
||||
}
|
||||
|
||||
.modal-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 12px;
|
||||
margin-bottom: 20px;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.modal-header h3 {
|
||||
margin: 0;
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.modal-close-btn {
|
||||
position: absolute;
|
||||
right: 0;
|
||||
top: 0;
|
||||
background: none;
|
||||
border: none;
|
||||
font-size: 24px;
|
||||
cursor: pointer;
|
||||
color: var(--text-muted);
|
||||
padding: 5px 10px;
|
||||
}
|
||||
|
||||
.modal-close-btn:hover {
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.ai-ops-progress {
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
.ai-ops-stats {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
font-size: 14px;
|
||||
margin-top: 8px;
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
#aiOpsPercent {
|
||||
font-weight: 600;
|
||||
color: var(--color-primary);
|
||||
}
|
||||
|
||||
.ai-ops-current {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
padding: 12px 15px;
|
||||
background: var(--bg-secondary);
|
||||
border-radius: 8px;
|
||||
margin-bottom: 15px;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.ai-ops-log-container {
|
||||
border: 1px solid var(--border-color);
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
.ai-ops-log-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
padding: 8px 12px;
|
||||
background: var(--bg-tertiary);
|
||||
font-size: 13px;
|
||||
font-weight: 500;
|
||||
border-bottom: 1px solid var(--border-color);
|
||||
}
|
||||
|
||||
.ai-ops-log {
|
||||
height: 250px;
|
||||
overflow-y: auto;
|
||||
padding: 10px;
|
||||
font-family: 'Monaco', 'Menlo', 'Consolas', monospace;
|
||||
font-size: 12px;
|
||||
background: var(--bg-primary);
|
||||
}
|
||||
|
||||
.log-entry {
|
||||
padding: 4px 8px;
|
||||
margin: 2px 0;
|
||||
border-radius: 4px;
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.log-entry.processing {
|
||||
background: rgba(59, 130, 246, 0.1);
|
||||
color: #3b82f6;
|
||||
}
|
||||
|
||||
.log-entry.success {
|
||||
background: rgba(34, 197, 94, 0.1);
|
||||
color: #22c55e;
|
||||
}
|
||||
|
||||
.log-entry.failed {
|
||||
background: rgba(239, 68, 68, 0.1);
|
||||
color: #ef4444;
|
||||
}
|
||||
|
||||
.log-entry.skipped {
|
||||
background: rgba(234, 179, 8, 0.1);
|
||||
color: #eab308;
|
||||
}
|
||||
|
||||
.log-entry.complete {
|
||||
background: rgba(168, 85, 247, 0.1);
|
||||
color: #a855f7;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.log-time {
|
||||
color: var(--text-muted);
|
||||
font-size: 11px;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.log-message {
|
||||
flex: 1;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.ai-ops-summary {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
|
||||
gap: 10px;
|
||||
padding: 15px;
|
||||
background: var(--bg-secondary);
|
||||
border-radius: 8px;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
.summary-row {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
}
|
||||
|
||||
.summary-label {
|
||||
font-size: 12px;
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.summary-value {
|
||||
font-size: 24px;
|
||||
font-weight: 700;
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
/* Spinning icon animation */
|
||||
@keyframes spin {
|
||||
from { transform: rotate(0deg); }
|
||||
to { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
.modal-icon.spinning {
|
||||
animation: spin 2s linear infinite;
|
||||
}
|
||||
|
||||
/* Responsive log height */
|
||||
@media (max-height: 800px) {
|
||||
.ai-ops-log {
|
||||
height: 180px;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
<!-- Add News Modal -->
|
||||
<div class="modal-overlay" id="addNewsModal">
|
||||
<div class="modal">
|
||||
@ -2544,18 +2790,193 @@ async function loadKnowledgeStats() {
|
||||
}
|
||||
}
|
||||
|
||||
async function scrapeContent() {
|
||||
const btn = document.getElementById('scrapeBtn');
|
||||
const originalContent = btn.innerHTML;
|
||||
// ===========================================
|
||||
// AI Operations Modal Functions
|
||||
// ===========================================
|
||||
|
||||
let aiOpsEventSource = null;
|
||||
let aiOpsLogEntries = 0;
|
||||
|
||||
function openAiOpsModal(title, icon) {
|
||||
const modal = document.getElementById('aiOpsModal');
|
||||
document.getElementById('aiOpsTitle').textContent = title;
|
||||
document.getElementById('aiOpsIcon').textContent = icon;
|
||||
document.getElementById('aiOpsIcon').classList.add('spinning');
|
||||
|
||||
// Reset state
|
||||
document.getElementById('aiOpsProgressFill').style.width = '0%';
|
||||
document.getElementById('aiOpsPercent').textContent = '0%';
|
||||
document.getElementById('aiOpsCounter').textContent = '0 / 0';
|
||||
document.getElementById('aiOpsCurrentText').textContent = 'Inicjalizacja...';
|
||||
document.getElementById('aiOpsLog').innerHTML = '';
|
||||
document.getElementById('aiOpsSummary').style.display = 'none';
|
||||
document.getElementById('aiOpsActions').style.display = 'none';
|
||||
document.getElementById('aiOpsCloseBtn').style.display = 'none';
|
||||
document.getElementById('aiOpsCurrentOp').style.display = 'flex';
|
||||
aiOpsLogEntries = 0;
|
||||
document.getElementById('aiOpsLogCount').textContent = '0 wpisów';
|
||||
|
||||
modal.classList.add('active');
|
||||
}
|
||||
|
||||
function closeAiOpsModal() {
|
||||
const modal = document.getElementById('aiOpsModal');
|
||||
modal.classList.remove('active');
|
||||
|
||||
// Close SSE connection if active
|
||||
if (aiOpsEventSource) {
|
||||
aiOpsEventSource.close();
|
||||
aiOpsEventSource = null;
|
||||
}
|
||||
}
|
||||
|
||||
function addAiOpsLogEntry(status, message) {
|
||||
const log = document.getElementById('aiOpsLog');
|
||||
const time = new Date().toLocaleTimeString('pl-PL', { hour: '2-digit', minute: '2-digit', second: '2-digit' });
|
||||
|
||||
const entry = document.createElement('div');
|
||||
entry.className = `log-entry ${status}`;
|
||||
entry.innerHTML = `
|
||||
<span class="log-time">${time}</span>
|
||||
<span class="log-message">${message}</span>
|
||||
`;
|
||||
|
||||
log.appendChild(entry);
|
||||
log.scrollTop = log.scrollHeight;
|
||||
|
||||
aiOpsLogEntries++;
|
||||
document.getElementById('aiOpsLogCount').textContent = `${aiOpsLogEntries} wpisów`;
|
||||
}
|
||||
|
||||
function updateAiOpsProgress(data) {
|
||||
// Update progress bar
|
||||
if (data.percent !== undefined) {
|
||||
document.getElementById('aiOpsProgressFill').style.width = `${data.percent}%`;
|
||||
document.getElementById('aiOpsPercent').textContent = `${Math.round(data.percent)}%`;
|
||||
}
|
||||
|
||||
// Update counter
|
||||
if (data.current !== undefined && data.total !== undefined) {
|
||||
document.getElementById('aiOpsCounter').textContent = `${data.current} / ${data.total}`;
|
||||
}
|
||||
|
||||
// Update current operation text
|
||||
if (data.message) {
|
||||
document.getElementById('aiOpsCurrentText').textContent = data.message;
|
||||
}
|
||||
|
||||
// Add log entry
|
||||
if (data.status && data.message) {
|
||||
addAiOpsLogEntry(data.status, data.message);
|
||||
}
|
||||
}
|
||||
|
||||
function completeAiOpsModal(data) {
|
||||
// Stop spinning
|
||||
document.getElementById('aiOpsIcon').classList.remove('spinning');
|
||||
document.getElementById('aiOpsIcon').textContent = data.status === 'error' ? '❌' : '✅';
|
||||
document.getElementById('aiOpsTitle').textContent = data.status === 'error' ? 'Błąd operacji' : 'Operacja zakończona';
|
||||
|
||||
// Hide current operation
|
||||
document.getElementById('aiOpsCurrentOp').style.display = 'none';
|
||||
|
||||
// Show summary
|
||||
const details = data.details || {};
|
||||
document.getElementById('aiOpsSummarySuccess').textContent = details.success || details.scraped || 0;
|
||||
document.getElementById('aiOpsSummaryFailed').textContent = details.failed || 0;
|
||||
|
||||
if (details.skipped !== undefined) {
|
||||
document.getElementById('aiOpsSummarySkippedRow').style.display = 'flex';
|
||||
document.getElementById('aiOpsSummarySkipped').textContent = details.skipped;
|
||||
}
|
||||
|
||||
if (details.processing_time) {
|
||||
document.getElementById('aiOpsSummaryTime').textContent = `${details.processing_time}s`;
|
||||
}
|
||||
|
||||
document.getElementById('aiOpsSummary').style.display = 'grid';
|
||||
document.getElementById('aiOpsActions').style.display = 'flex';
|
||||
document.getElementById('aiOpsCloseBtn').style.display = 'block';
|
||||
}
|
||||
|
||||
function startSSEOperation(endpoint, title, icon, limit) {
|
||||
openAiOpsModal(title, icon);
|
||||
|
||||
const url = `${endpoint}?limit=${limit}`;
|
||||
aiOpsEventSource = new EventSource(url);
|
||||
|
||||
aiOpsEventSource.onmessage = function(event) {
|
||||
const data = JSON.parse(event.data);
|
||||
|
||||
if (data.status === 'complete' || data.status === 'error') {
|
||||
aiOpsEventSource.close();
|
||||
aiOpsEventSource = null;
|
||||
completeAiOpsModal(data);
|
||||
} else {
|
||||
updateAiOpsProgress(data);
|
||||
}
|
||||
};
|
||||
|
||||
aiOpsEventSource.onerror = function(event) {
|
||||
console.error('SSE error:', event);
|
||||
aiOpsEventSource.close();
|
||||
aiOpsEventSource = null;
|
||||
completeAiOpsModal({ status: 'error', message: 'Błąd połączenia', details: {} });
|
||||
};
|
||||
}
|
||||
|
||||
// ===========================================
|
||||
// AI Knowledge Base Functions (with SSE)
|
||||
// ===========================================
|
||||
|
||||
async function scrapeContent() {
|
||||
const confirmed = await showConfirm(
|
||||
'Czy chcesz rozpocząć scrapowanie treści artykułów?<br><br>' +
|
||||
'<small>Proces pobierze pełną treść z zatwierdzonych newsów które jeszcze nie mają treści.</small>',
|
||||
'<small>Proces pobierze pełną treść z zatwierdzonych newsów które jeszcze nie mają treści.<br>' +
|
||||
'Postęp będzie wyświetlany na żywo.</small>',
|
||||
{ icon: '📄', title: 'Scraping treści', okText: 'Rozpocznij', okClass: 'btn-primary' }
|
||||
);
|
||||
|
||||
if (!confirmed) return;
|
||||
|
||||
startSSEOperation('/admin/zopk/news/scrape-content/stream', 'Scraping treści artykułów', '📄', 30);
|
||||
}
|
||||
|
||||
async function extractKnowledge() {
|
||||
const confirmed = await showConfirm(
|
||||
'Czy chcesz uruchomić ekstrakcję wiedzy przez AI?<br><br>' +
|
||||
'<small>Gemini AI przeanalizuje zescrapowane artykuły i wyekstrahuje:<br>' +
|
||||
'• Chunks (fragmenty tekstu)<br>' +
|
||||
'• Fakty (daty, liczby, decyzje)<br>' +
|
||||
'• Encje (firmy, osoby, projekty)<br><br>' +
|
||||
'Postęp będzie wyświetlany na żywo.</small>',
|
||||
{ icon: '🤖', title: 'Ekstrakcja wiedzy', okText: 'Uruchom AI', okClass: 'btn-primary' }
|
||||
);
|
||||
|
||||
if (!confirmed) return;
|
||||
|
||||
startSSEOperation('/admin/zopk/knowledge/extract/stream', 'Ekstrakcja wiedzy (Gemini AI)', '🤖', 10);
|
||||
}
|
||||
|
||||
async function generateEmbeddings() {
|
||||
const confirmed = await showConfirm(
|
||||
'Czy chcesz wygenerować embeddingi dla semantic search?<br><br>' +
|
||||
'<small>Google Text Embedding API przekształci tekst w wektory 768-wymiarowe.<br>' +
|
||||
'Embeddingi umożliwiają inteligentne wyszukiwanie w bazie wiedzy.<br><br>' +
|
||||
'Postęp będzie wyświetlany na żywo.</small>',
|
||||
{ icon: '🔍', title: 'Generowanie embeddingów', okText: 'Generuj', okClass: 'btn-primary' }
|
||||
);
|
||||
|
||||
if (!confirmed) return;
|
||||
|
||||
startSSEOperation('/admin/zopk/knowledge/embeddings/stream', 'Generowanie embeddingów', '🔍', 50);
|
||||
}
|
||||
|
||||
// Keep old code for backward compatibility (non-SSE version - can be removed later)
|
||||
async function scrapeContentOld() {
|
||||
const btn = document.getElementById('scrapeBtn');
|
||||
const originalContent = btn.innerHTML;
|
||||
|
||||
btn.disabled = true;
|
||||
btn.innerHTML = '<span class="spinner-small"></span>';
|
||||
|
||||
@ -2581,21 +3002,10 @@ async function scrapeContent() {
|
||||
}
|
||||
}
|
||||
|
||||
async function extractKnowledge() {
|
||||
async function extractKnowledgeOld() {
|
||||
const btn = document.getElementById('extractBtn');
|
||||
const originalContent = btn.innerHTML;
|
||||
|
||||
const confirmed = await showConfirm(
|
||||
'Czy chcesz uruchomić ekstrakcję wiedzy przez AI?<br><br>' +
|
||||
'<small>Gemini AI przeanalizuje zescrapowane artykuły i wyekstrahuje:<br>' +
|
||||
'• Chunks (fragmenty tekstu)<br>' +
|
||||
'• Fakty (daty, liczby, decyzje)<br>' +
|
||||
'• Encje (firmy, osoby, projekty)</small>',
|
||||
{ icon: '🤖', title: 'Ekstrakcja wiedzy', okText: 'Uruchom AI', okClass: 'btn-primary' }
|
||||
);
|
||||
|
||||
if (!confirmed) return;
|
||||
|
||||
btn.disabled = true;
|
||||
btn.innerHTML = '<span class="spinner-small"></span>';
|
||||
|
||||
@ -2621,19 +3031,10 @@ async function extractKnowledge() {
|
||||
}
|
||||
}
|
||||
|
||||
async function generateEmbeddings() {
|
||||
async function generateEmbeddingsOld() {
|
||||
const btn = document.getElementById('embeddingsBtn');
|
||||
const originalContent = btn.innerHTML;
|
||||
|
||||
const confirmed = await showConfirm(
|
||||
'Czy chcesz wygenerować embeddingi dla semantic search?<br><br>' +
|
||||
'<small>Google Text Embedding API przekształci tekst w wektory 768-wymiarowe.<br>' +
|
||||
'Embeddingi umożliwiają inteligentne wyszukiwanie w bazie wiedzy.</small>',
|
||||
{ icon: '🔍', title: 'Generowanie embeddingów', okText: 'Generuj', okClass: 'btn-primary' }
|
||||
);
|
||||
|
||||
if (!confirmed) return;
|
||||
|
||||
btn.disabled = true;
|
||||
btn.innerHTML = '<span class="spinner-small"></span>';
|
||||
|
||||
|
||||
@ -19,9 +19,9 @@ import logging
|
||||
import hashlib
|
||||
import base64
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from typing import Dict, List, Optional, Tuple, Callable, Any
|
||||
from urllib.parse import urlparse, parse_qs, unquote
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup, Comment, NavigableString
|
||||
@ -312,6 +312,24 @@ class ScrapeResult:
|
||||
status: str = 'pending' # scraped, failed, skipped
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProgressUpdate:
|
||||
"""Progress update for batch operations."""
|
||||
current: int
|
||||
total: int
|
||||
percent: float
|
||||
stage: str # 'scraping', 'extracting', 'embedding'
|
||||
status: str # 'processing', 'success', 'failed', 'complete'
|
||||
message: str
|
||||
details: Dict[str, Any] = field(default_factory=dict)
|
||||
article_id: Optional[int] = None
|
||||
article_title: Optional[str] = None
|
||||
|
||||
|
||||
# Type alias for progress callback
|
||||
ProgressCallback = Optional[Callable[[ProgressUpdate], None]]
|
||||
|
||||
|
||||
# ============================================================
|
||||
# SCRAPER CLASS
|
||||
# ============================================================
|
||||
@ -704,7 +722,8 @@ class ZOPKContentScraper:
|
||||
self,
|
||||
limit: int = 50,
|
||||
status_filter: Optional[str] = None,
|
||||
force: bool = False
|
||||
force: bool = False,
|
||||
progress_callback: ProgressCallback = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Batch scrape articles.
|
||||
@ -713,6 +732,7 @@ class ZOPKContentScraper:
|
||||
limit: Maximum number of articles to scrape
|
||||
status_filter: Filter by approval status (approved, auto_approved)
|
||||
force: If True, re-scrape even already scraped articles
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
Dict with statistics
|
||||
@ -743,10 +763,11 @@ class ZOPKContentScraper:
|
||||
|
||||
# Limit
|
||||
articles = query.limit(limit).all()
|
||||
total = len(articles)
|
||||
|
||||
# Statistics
|
||||
stats = {
|
||||
'total': len(articles),
|
||||
'total': total,
|
||||
'scraped': 0,
|
||||
'failed': 0,
|
||||
'skipped': 0,
|
||||
@ -755,9 +776,40 @@ class ZOPKContentScraper:
|
||||
'processing_time': 0
|
||||
}
|
||||
|
||||
# Send initial progress
|
||||
if progress_callback and total > 0:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=0,
|
||||
total=total,
|
||||
percent=0.0,
|
||||
stage='scraping',
|
||||
status='processing',
|
||||
message=f'Rozpoczynam scraping {total} artykułów...',
|
||||
details={'scraped': 0, 'failed': 0, 'skipped': 0}
|
||||
))
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
for article in articles:
|
||||
for idx, article in enumerate(articles, 1):
|
||||
# Send progress update before processing
|
||||
if progress_callback:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=idx,
|
||||
total=total,
|
||||
percent=round((idx - 1) / total * 100, 1),
|
||||
stage='scraping',
|
||||
status='processing',
|
||||
message=f'Pobieram treść: {article.title[:50]}...',
|
||||
article_id=article.id,
|
||||
article_title=article.title[:80],
|
||||
details={
|
||||
'scraped': stats['scraped'],
|
||||
'failed': stats['failed'],
|
||||
'skipped': stats['skipped'],
|
||||
'source': article.source_name or 'nieznane'
|
||||
}
|
||||
))
|
||||
|
||||
result = self.scrape_article(article.id)
|
||||
|
||||
if result.status == 'scraped':
|
||||
@ -768,8 +820,37 @@ class ZOPKContentScraper:
|
||||
'word_count': result.word_count,
|
||||
'source': article.source_name
|
||||
})
|
||||
# Send success progress
|
||||
if progress_callback:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=idx,
|
||||
total=total,
|
||||
percent=round(idx / total * 100, 1),
|
||||
stage='scraping',
|
||||
status='success',
|
||||
message=f'✓ Pobrano {result.word_count} słów: {article.title[:40]}...',
|
||||
article_id=article.id,
|
||||
article_title=article.title[:80],
|
||||
details={
|
||||
'scraped': stats['scraped'],
|
||||
'failed': stats['failed'],
|
||||
'skipped': stats['skipped'],
|
||||
'word_count': result.word_count
|
||||
}
|
||||
))
|
||||
elif result.status == 'skipped':
|
||||
stats['skipped'] += 1
|
||||
if progress_callback:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=idx,
|
||||
total=total,
|
||||
percent=round(idx / total * 100, 1),
|
||||
stage='scraping',
|
||||
status='skipped',
|
||||
message=f'⊘ Pominięto: {article.title[:40]}...',
|
||||
article_id=article.id,
|
||||
details={'scraped': stats['scraped'], 'failed': stats['failed'], 'skipped': stats['skipped']}
|
||||
))
|
||||
else:
|
||||
stats['failed'] += 1
|
||||
stats['errors'].append({
|
||||
@ -777,9 +858,43 @@ class ZOPKContentScraper:
|
||||
'url': article.url,
|
||||
'error': result.error
|
||||
})
|
||||
if progress_callback:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=idx,
|
||||
total=total,
|
||||
percent=round(idx / total * 100, 1),
|
||||
stage='scraping',
|
||||
status='failed',
|
||||
message=f'✗ Błąd: {result.error[:50]}...' if result.error else '✗ Błąd',
|
||||
article_id=article.id,
|
||||
article_title=article.title[:80],
|
||||
details={
|
||||
'scraped': stats['scraped'],
|
||||
'failed': stats['failed'],
|
||||
'skipped': stats['skipped'],
|
||||
'error': result.error
|
||||
}
|
||||
))
|
||||
|
||||
stats['processing_time'] = round(time.time() - start_time, 2)
|
||||
|
||||
# Send completion progress
|
||||
if progress_callback:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=total,
|
||||
total=total,
|
||||
percent=100.0,
|
||||
stage='scraping',
|
||||
status='complete',
|
||||
message=f'Zakończono: {stats["scraped"]} pobrano, {stats["failed"]} błędów, {stats["skipped"]} pominięto',
|
||||
details={
|
||||
'scraped': stats['scraped'],
|
||||
'failed': stats['failed'],
|
||||
'skipped': stats['skipped'],
|
||||
'processing_time': stats['processing_time']
|
||||
}
|
||||
))
|
||||
|
||||
logger.info(
|
||||
f"Batch scrape complete: {stats['scraped']} scraped, "
|
||||
f"{stats['failed']} failed, {stats['skipped']} skipped "
|
||||
|
||||
@ -22,9 +22,12 @@ import json
|
||||
import logging
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
from typing import Dict, List, Optional, Tuple, Any, Callable
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
# Import progress tracking from scraper
|
||||
from zopk_content_scraper import ProgressUpdate, ProgressCallback
|
||||
|
||||
from database import (
|
||||
ZOPKNews,
|
||||
ZOPKKnowledgeChunk,
|
||||
@ -663,12 +666,13 @@ class ZOPKKnowledgeService:
|
||||
processing_time=processing_time
|
||||
)
|
||||
|
||||
def batch_extract(self, limit: int = 50) -> Dict:
|
||||
def batch_extract(self, limit: int = 50, progress_callback: ProgressCallback = None) -> Dict:
|
||||
"""
|
||||
Batch extract knowledge from scraped articles.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of articles to process
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
Dict with statistics
|
||||
@ -686,8 +690,9 @@ class ZOPKKnowledgeService:
|
||||
ZOPKNews.created_at.desc()
|
||||
).limit(limit).all()
|
||||
|
||||
total = len(articles)
|
||||
stats = {
|
||||
'total': len(articles),
|
||||
'total': total,
|
||||
'success': 0,
|
||||
'failed': 0,
|
||||
'chunks_created': 0,
|
||||
@ -698,9 +703,41 @@ class ZOPKKnowledgeService:
|
||||
'processing_time': 0
|
||||
}
|
||||
|
||||
# Send initial progress
|
||||
if progress_callback and total > 0:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=0,
|
||||
total=total,
|
||||
percent=0.0,
|
||||
stage='extracting',
|
||||
status='processing',
|
||||
message=f'Rozpoczynam ekstrakcję wiedzy z {total} artykułów...',
|
||||
details={'success': 0, 'failed': 0, 'chunks': 0, 'facts': 0, 'entities': 0}
|
||||
))
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
for article in articles:
|
||||
for idx, article in enumerate(articles, 1):
|
||||
# Send progress update before processing
|
||||
if progress_callback:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=idx,
|
||||
total=total,
|
||||
percent=round((idx - 1) / total * 100, 1),
|
||||
stage='extracting',
|
||||
status='processing',
|
||||
message=f'Analizuję przez AI: {article.title[:50]}...',
|
||||
article_id=article.id,
|
||||
article_title=article.title[:80],
|
||||
details={
|
||||
'success': stats['success'],
|
||||
'failed': stats['failed'],
|
||||
'chunks': stats['chunks_created'],
|
||||
'facts': stats['facts_created'],
|
||||
'entities': stats['entities_created']
|
||||
}
|
||||
))
|
||||
|
||||
result = self.extract_from_news(article.id)
|
||||
|
||||
if result.success:
|
||||
@ -709,6 +746,28 @@ class ZOPKKnowledgeService:
|
||||
stats['facts_created'] += result.facts_created
|
||||
stats['entities_created'] += result.entities_created
|
||||
stats['relations_created'] += result.relations_created
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=idx,
|
||||
total=total,
|
||||
percent=round(idx / total * 100, 1),
|
||||
stage='extracting',
|
||||
status='success',
|
||||
message=f'✓ Wyekstrahowano: {result.chunks_created} chunks, {result.facts_created} faktów, {result.entities_created} encji',
|
||||
article_id=article.id,
|
||||
article_title=article.title[:80],
|
||||
details={
|
||||
'success': stats['success'],
|
||||
'failed': stats['failed'],
|
||||
'chunks': stats['chunks_created'],
|
||||
'facts': stats['facts_created'],
|
||||
'entities': stats['entities_created'],
|
||||
'new_chunks': result.chunks_created,
|
||||
'new_facts': result.facts_created,
|
||||
'new_entities': result.entities_created
|
||||
}
|
||||
))
|
||||
else:
|
||||
stats['failed'] += 1
|
||||
if result.error:
|
||||
@ -718,8 +777,47 @@ class ZOPKKnowledgeService:
|
||||
'error': result.error
|
||||
})
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=idx,
|
||||
total=total,
|
||||
percent=round(idx / total * 100, 1),
|
||||
stage='extracting',
|
||||
status='failed',
|
||||
message=f'✗ Błąd ekstrakcji: {result.error[:50]}...' if result.error else '✗ Błąd',
|
||||
article_id=article.id,
|
||||
article_title=article.title[:80],
|
||||
details={
|
||||
'success': stats['success'],
|
||||
'failed': stats['failed'],
|
||||
'error': result.error
|
||||
}
|
||||
))
|
||||
|
||||
stats['processing_time'] = round(time.time() - start_time, 2)
|
||||
|
||||
# Send completion progress
|
||||
if progress_callback:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=total,
|
||||
total=total,
|
||||
percent=100.0,
|
||||
stage='extracting',
|
||||
status='complete',
|
||||
message=f'Zakończono: {stats["success"]}/{total} artykułów. '
|
||||
f'Utworzono: {stats["chunks_created"]} chunks, {stats["facts_created"]} faktów, '
|
||||
f'{stats["entities_created"]} encji',
|
||||
details={
|
||||
'success': stats['success'],
|
||||
'failed': stats['failed'],
|
||||
'chunks': stats['chunks_created'],
|
||||
'facts': stats['facts_created'],
|
||||
'entities': stats['entities_created'],
|
||||
'relations': stats['relations_created'],
|
||||
'processing_time': stats['processing_time']
|
||||
}
|
||||
))
|
||||
|
||||
logger.info(
|
||||
f"Batch extraction complete: {stats['success']}/{stats['total']} success "
|
||||
f"in {stats['processing_time']}s"
|
||||
@ -950,7 +1048,12 @@ def get_relevant_facts(
|
||||
return results[:limit]
|
||||
|
||||
|
||||
def generate_chunk_embeddings(db_session, limit: int = 100, user_id: Optional[int] = None) -> Dict:
|
||||
def generate_chunk_embeddings(
|
||||
db_session,
|
||||
limit: int = 100,
|
||||
user_id: Optional[int] = None,
|
||||
progress_callback: ProgressCallback = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Generate embeddings for chunks that don't have them.
|
||||
|
||||
@ -958,11 +1061,13 @@ def generate_chunk_embeddings(db_session, limit: int = 100, user_id: Optional[in
|
||||
db_session: SQLAlchemy session
|
||||
limit: Max chunks to process
|
||||
user_id: User ID for cost tracking
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
Dict with statistics
|
||||
"""
|
||||
import json
|
||||
import time
|
||||
from gemini_service import GeminiService
|
||||
|
||||
gemini = GeminiService()
|
||||
@ -972,13 +1077,52 @@ def generate_chunk_embeddings(db_session, limit: int = 100, user_id: Optional[in
|
||||
ZOPKKnowledgeChunk.embedding.is_(None)
|
||||
).limit(limit).all()
|
||||
|
||||
total = len(chunks)
|
||||
stats = {
|
||||
'total': len(chunks),
|
||||
'total': total,
|
||||
'success': 0,
|
||||
'failed': 0
|
||||
'failed': 0,
|
||||
'processing_time': 0
|
||||
}
|
||||
|
||||
for chunk in chunks:
|
||||
# Send initial progress
|
||||
if progress_callback and total > 0:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=0,
|
||||
total=total,
|
||||
percent=0.0,
|
||||
stage='embedding',
|
||||
status='processing',
|
||||
message=f'Rozpoczynam generowanie embeddingów dla {total} chunks...',
|
||||
details={'success': 0, 'failed': 0}
|
||||
))
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
for idx, chunk in enumerate(chunks, 1):
|
||||
# Send progress update before processing
|
||||
if progress_callback:
|
||||
# Get article title from chunk's source news
|
||||
article_title = None
|
||||
if chunk.source_news:
|
||||
article_title = chunk.source_news.title[:80]
|
||||
|
||||
progress_callback(ProgressUpdate(
|
||||
current=idx,
|
||||
total=total,
|
||||
percent=round((idx - 1) / total * 100, 1),
|
||||
stage='embedding',
|
||||
status='processing',
|
||||
message=f'Generuję embedding {idx}/{total}: {chunk.summary[:40] if chunk.summary else "chunk"}...',
|
||||
article_id=chunk.source_news_id,
|
||||
article_title=article_title,
|
||||
details={
|
||||
'success': stats['success'],
|
||||
'failed': stats['failed'],
|
||||
'chunk_id': chunk.id
|
||||
}
|
||||
))
|
||||
|
||||
try:
|
||||
embedding = gemini.generate_embedding(
|
||||
text=chunk.content,
|
||||
@ -992,14 +1136,70 @@ def generate_chunk_embeddings(db_session, limit: int = 100, user_id: Optional[in
|
||||
# Store as JSON string
|
||||
chunk.embedding = json.dumps(embedding)
|
||||
stats['success'] += 1
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=idx,
|
||||
total=total,
|
||||
percent=round(idx / total * 100, 1),
|
||||
stage='embedding',
|
||||
status='success',
|
||||
message=f'✓ Wygenerowano embedding (768 dim)',
|
||||
article_id=chunk.source_news_id,
|
||||
details={
|
||||
'success': stats['success'],
|
||||
'failed': stats['failed'],
|
||||
'chunk_id': chunk.id
|
||||
}
|
||||
))
|
||||
else:
|
||||
stats['failed'] += 1
|
||||
if progress_callback:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=idx,
|
||||
total=total,
|
||||
percent=round(idx / total * 100, 1),
|
||||
stage='embedding',
|
||||
status='failed',
|
||||
message='✗ Nie udało się wygenerować embeddingu',
|
||||
article_id=chunk.source_news_id,
|
||||
details={'success': stats['success'], 'failed': stats['failed']}
|
||||
))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating embedding for chunk {chunk.id}: {e}")
|
||||
stats['failed'] += 1
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=idx,
|
||||
total=total,
|
||||
percent=round(idx / total * 100, 1),
|
||||
stage='embedding',
|
||||
status='failed',
|
||||
message=f'✗ Błąd: {str(e)[:50]}...',
|
||||
article_id=chunk.source_news_id,
|
||||
details={'success': stats['success'], 'failed': stats['failed'], 'error': str(e)}
|
||||
))
|
||||
|
||||
db_session.commit()
|
||||
stats['processing_time'] = round(time.time() - start_time, 2)
|
||||
|
||||
# Send completion progress
|
||||
if progress_callback:
|
||||
progress_callback(ProgressUpdate(
|
||||
current=total,
|
||||
total=total,
|
||||
percent=100.0,
|
||||
stage='embedding',
|
||||
status='complete',
|
||||
message=f'Zakończono: {stats["success"]}/{total} embeddingów wygenerowanych',
|
||||
details={
|
||||
'success': stats['success'],
|
||||
'failed': stats['failed'],
|
||||
'processing_time': stats['processing_time']
|
||||
}
|
||||
))
|
||||
|
||||
logger.info(f"Generated embeddings: {stats['success']}/{stats['total']} success")
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user