feat(admin): Paski postępu dla operacji AI w panelu ZOPK

Dodano Server-Sent Events (SSE) dla śledzenia postępu w czasie rzeczywistym: - Scraping treści artykułów - Ekstrakcja wiedzy przez Gemini AI - Generowanie embeddingów Funkcje: - Modal z paskiem postępu i statystykami - Live log operacji z kolorowaniem statusów - Podsumowanie na zakończenie (sukces/błędy/czas) - Możliwość zamknięcia modalu po zakończeniu Zmiany techniczne: - 3 nowe SSE endpointy (/stream) - ProgressUpdate dataclass w scraperze - Callback pattern w batch_scrape, batch_extract, generate_chunk_embeddings Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-16 23:23:05 +01:00 · 2026-01-16 23:23:05 +01:00 · 37af8abc73
commit 37af8abc73
parent 31d5a112b8
4 changed files with 1103 additions and 39 deletions
--- a/app.py
+++ b/app.py
@ -11131,6 +11131,354 @@ def admin_zopk_generate_embeddings():
        db.close()


+# ============================================================
+# ZOPK SSE ENDPOINTS (Server-Sent Events for Progress Tracking)
+# ============================================================
+
+def sse_progress_generator(operation_func, db, **kwargs):
+    """
+    Generic SSE generator for progress tracking.
+
+    Args:
+        operation_func: Function to call (must accept progress_callback)
+        db: Database session
+        **kwargs: Additional arguments for operation_func
+
+    Yields:
+        SSE formatted progress events
+    """
+    import json
+    from dataclasses import asdict
+
+    progress_queue = []
+
+    def progress_callback(update):
+        progress_queue.append(update)
+
+    def run_operation():
+        try:
+            result = operation_func(progress_callback=progress_callback, **kwargs)
+            return result
+        except Exception as e:
+            logger.error(f"SSE operation error: {e}")
+            return {'error': str(e)}
+
+    # Start operation in separate thread
+    import threading
+    result_container = [None]
+
+    def thread_target():
+        result_container[0] = run_operation()
+
+    thread = threading.Thread(target=thread_target)
+    thread.start()
+
+    # Yield progress updates while thread is running
+    while thread.is_alive() or progress_queue:
+        while progress_queue:
+            update = progress_queue.pop(0)
+            data = asdict(update)
+            yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
+
+        if thread.is_alive():
+            import time
+            time.sleep(0.1)
+
+    thread.join()
+
+    # Send final result
+    final_result = result_container[0] or {}
+    yield f"data: {json.dumps({'type': 'result', **final_result}, ensure_ascii=False)}\n\n"
+
+
+@app.route('/admin/zopk/news/scrape-content/stream', methods=['GET'])
+@login_required
+def admin_zopk_scrape_content_stream():
+    """
+    SSE endpoint for streaming scrape progress.
+
+    Query params:
+    - limit: int (default 30) - max articles to scrape
+    - force: bool (default false) - re-scrape already scraped
+    """
+    if not current_user.is_admin:
+        return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
+
+    from zopk_content_scraper import ZOPKContentScraper
+
+    limit = min(int(request.args.get('limit', 30)), 100)
+    force = request.args.get('force', 'false').lower() == 'true'
+    user_id = current_user.id
+
+    def generate():
+        import json
+        from dataclasses import asdict
+
+        db = SessionLocal()
+        try:
+            scraper = ZOPKContentScraper(db, user_id=user_id)
+
+            def progress_callback(update):
+                data = asdict(update)
+                yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
+
+            # This won't work with generator, need different approach
+            # Use a queue-based approach instead
+
+            progress_updates = []
+
+            def queue_callback(update):
+                progress_updates.append(update)
+
+            # Run in this thread, yielding updates as they come
+            from zopk_content_scraper import ZOPKContentScraper, ProgressUpdate
+            import time
+
+            # Get articles to scrape
+            from database import ZOPKNews
+            query = db.query(ZOPKNews).filter(
+                ZOPKNews.status.in_(['approved', 'auto_approved'])
+            )
+
+            if not force:
+                from zopk_content_scraper import MAX_RETRY_ATTEMPTS
+                query = query.filter(ZOPKNews.scrape_status.in_(['pending', 'failed']))
+                query = query.filter(
+                    (ZOPKNews.scrape_status == 'pending') |
+                    ((ZOPKNews.scrape_status == 'failed') & (ZOPKNews.scrape_attempts < MAX_RETRY_ATTEMPTS))
+                )
+
+            query = query.order_by(ZOPKNews.created_at.desc())
+            articles = query.limit(limit).all()
+            total = len(articles)
+
+            if total == 0:
+                yield f"data: {json.dumps({'status': 'complete', 'message': 'Brak artykułów do scrapowania', 'total': 0}, ensure_ascii=False)}\n\n"
+                return
+
+            # Send initial
+            yield f"data: {json.dumps({'current': 0, 'total': total, 'percent': 0, 'stage': 'scraping', 'status': 'processing', 'message': f'Rozpoczynam scraping {total} artykułów...'}, ensure_ascii=False)}\n\n"
+
+            stats = {'scraped': 0, 'failed': 0, 'skipped': 0}
+            start_time = time.time()
+
+            for idx, article in enumerate(articles, 1):
+                # Send processing update
+                yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round((idx-1)/total*100, 1), 'stage': 'scraping', 'status': 'processing', 'message': f'Pobieram: {article.title[:50]}...', 'article_id': article.id, 'article_title': article.title[:80], 'details': {'source': article.source_name or 'nieznane', **stats}}, ensure_ascii=False)}\n\n"
+
+                result = scraper.scrape_article(article.id)
+
+                if result.status == 'scraped':
+                    stats['scraped'] += 1
+                    yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'scraping', 'status': 'success', 'message': f'✓ {result.word_count} słów: {article.title[:40]}...', 'article_id': article.id, 'details': {'word_count': result.word_count, **stats}}, ensure_ascii=False)}\n\n"
+                elif result.status == 'skipped':
+                    stats['skipped'] += 1
+                    yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'scraping', 'status': 'skipped', 'message': f'⊘ Pominięto: {article.title[:40]}...', 'article_id': article.id, 'details': stats}, ensure_ascii=False)}\n\n"
+                else:
+                    stats['failed'] += 1
+                    error_msg = result.error[:50] if result.error else 'Nieznany błąd'
+                    yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'scraping', 'status': 'failed', 'message': f'✗ {error_msg}', 'article_id': article.id, 'details': {'error': result.error, **stats}}, ensure_ascii=False)}\n\n"
+
+            processing_time = round(time.time() - start_time, 2)
+
+            # Send completion
+            scraped_count = stats['scraped']
+            failed_count = stats['failed']
+            skipped_count = stats['skipped']
+            complete_msg = f'Zakończono: {scraped_count} pobrano, {failed_count} błędów, {skipped_count} pominięto'
+            complete_data = {'current': total, 'total': total, 'percent': 100, 'stage': 'scraping', 'status': 'complete', 'message': complete_msg, 'details': {'processing_time': processing_time, **stats}}
+            yield f"data: {json.dumps(complete_data, ensure_ascii=False)}\n\n"
+
+        except Exception as e:
+            logger.error(f"SSE scraping error: {e}")
+            yield f"data: {json.dumps({'status': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
+        finally:
+            db.close()
+
+    return Response(generate(), mimetype='text/event-stream', headers={
+        'Cache-Control': 'no-cache',
+        'X-Accel-Buffering': 'no'
+    })
+
+
+@app.route('/admin/zopk/knowledge/extract/stream', methods=['GET'])
+@login_required
+def admin_zopk_knowledge_extract_stream():
+    """
+    SSE endpoint for streaming knowledge extraction progress.
+
+    Query params:
+    - limit: int (default 10) - max articles to process
+    """
+    if not current_user.is_admin:
+        return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
+
+    limit = min(int(request.args.get('limit', 10)), 50)
+    user_id = current_user.id
+
+    def generate():
+        import json
+        import time
+
+        db = SessionLocal()
+        try:
+            from zopk_knowledge_service import ZOPKKnowledgeService
+            from database import ZOPKNews
+
+            service = ZOPKKnowledgeService(db, user_id=user_id)
+
+            # Find articles ready for extraction
+            articles = db.query(ZOPKNews).filter(
+                ZOPKNews.status.in_(['approved', 'auto_approved']),
+                ZOPKNews.scrape_status == 'scraped',
+                ZOPKNews.knowledge_extracted == False
+            ).order_by(
+                ZOPKNews.created_at.desc()
+            ).limit(limit).all()
+
+            total = len(articles)
+
+            if total == 0:
+                yield f"data: {json.dumps({'status': 'complete', 'message': 'Brak artykułów do ekstrakcji', 'total': 0}, ensure_ascii=False)}\n\n"
+                return
+
+            # Send initial
+            yield f"data: {json.dumps({'current': 0, 'total': total, 'percent': 0, 'stage': 'extracting', 'status': 'processing', 'message': f'Rozpoczynam ekstrakcję z {total} artykułów...'}, ensure_ascii=False)}\n\n"
+
+            stats = {'success': 0, 'failed': 0, 'chunks': 0, 'facts': 0, 'entities': 0}
+            start_time = time.time()
+
+            for idx, article in enumerate(articles, 1):
+                # Send processing update
+                yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round((idx-1)/total*100, 1), 'stage': 'extracting', 'status': 'processing', 'message': f'Analizuję AI: {article.title[:50]}...', 'article_id': article.id, 'article_title': article.title[:80], 'details': stats}, ensure_ascii=False)}\n\n"
+
+                result = service.extract_from_news(article.id)
+
+                if result.success:
+                    stats['success'] += 1
+                    stats['chunks'] += result.chunks_created
+                    stats['facts'] += result.facts_created
+                    stats['entities'] += result.entities_created
+                    yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'success', 'message': f'✓ {result.chunks_created}ch, {result.facts_created}f, {result.entities_created}e', 'article_id': article.id, 'details': {'new_chunks': result.chunks_created, 'new_facts': result.facts_created, 'new_entities': result.entities_created, **stats}}, ensure_ascii=False)}\n\n"
+                else:
+                    stats['failed'] += 1
+                    error_msg = result.error[:50] if result.error else 'Nieznany błąd'
+                    yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'failed', 'message': f'✗ {error_msg}', 'article_id': article.id, 'details': {'error': result.error, **stats}}, ensure_ascii=False)}\n\n"
+
+            processing_time = round(time.time() - start_time, 2)
+
+            # Send completion
+            success_count = stats['success']
+            chunks_count = stats['chunks']
+            facts_count = stats['facts']
+            entities_count = stats['entities']
+            complete_msg = f'Zakończono: {success_count}/{total}. Utworzono: {chunks_count}ch, {facts_count}f, {entities_count}e'
+            complete_data = {'current': total, 'total': total, 'percent': 100, 'stage': 'extracting', 'status': 'complete', 'message': complete_msg, 'details': {'processing_time': processing_time, **stats}}
+            yield f"data: {json.dumps(complete_data, ensure_ascii=False)}\n\n"
+
+        except Exception as e:
+            logger.error(f"SSE extraction error: {e}")
+            yield f"data: {json.dumps({'status': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
+        finally:
+            db.close()
+
+    return Response(generate(), mimetype='text/event-stream', headers={
+        'Cache-Control': 'no-cache',
+        'X-Accel-Buffering': 'no'
+    })
+
+
+@app.route('/admin/zopk/knowledge/embeddings/stream', methods=['GET'])
+@login_required
+def admin_zopk_embeddings_stream():
+    """
+    SSE endpoint for streaming embeddings generation progress.
+
+    Query params:
+    - limit: int (default 50) - max chunks to process
+    """
+    if not current_user.is_admin:
+        return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
+
+    limit = min(int(request.args.get('limit', 50)), 200)
+    user_id = current_user.id
+
+    def generate():
+        import json
+        import time
+        from gemini_service import GeminiService
+
+        db = SessionLocal()
+        try:
+            from database import ZOPKKnowledgeChunk
+
+            gemini = GeminiService()
+
+            # Find chunks without embeddings
+            chunks = db.query(ZOPKKnowledgeChunk).filter(
+                ZOPKKnowledgeChunk.embedding.is_(None)
+            ).limit(limit).all()
+
+            total = len(chunks)
+
+            if total == 0:
+                yield f"data: {json.dumps({'status': 'complete', 'message': 'Brak chunks bez embeddingów', 'total': 0}, ensure_ascii=False)}\n\n"
+                return
+
+            # Send initial
+            yield f"data: {json.dumps({'current': 0, 'total': total, 'percent': 0, 'stage': 'embedding', 'status': 'processing', 'message': f'Generuję embeddingi dla {total} chunks...'}, ensure_ascii=False)}\n\n"
+
+            stats = {'success': 0, 'failed': 0}
+            start_time = time.time()
+
+            for idx, chunk in enumerate(chunks, 1):
+                summary_short = chunk.summary[:40] if chunk.summary else f'chunk_{chunk.id}'
+
+                # Send processing update
+                yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round((idx-1)/total*100, 1), 'stage': 'embedding', 'status': 'processing', 'message': f'Embedding {idx}/{total}: {summary_short}...', 'details': stats}, ensure_ascii=False)}\n\n"
+
+                try:
+                    embedding = gemini.generate_embedding(
+                        text=chunk.content,
+                        task_type='retrieval_document',
+                        title=chunk.summary,
+                        user_id=user_id,
+                        feature='zopk_chunk_embedding'
+                    )
+
+                    if embedding:
+                        chunk.embedding = json.dumps(embedding)
+                        stats['success'] += 1
+                        yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'success', 'message': f'✓ 768 dim: {summary_short}', 'details': stats}, ensure_ascii=False)}\n\n"
+                    else:
+                        stats['failed'] += 1
+                        yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'failed', 'message': f'✗ Brak odpowiedzi API', 'details': stats}, ensure_ascii=False)}\n\n"
+
+                except Exception as e:
+                    stats['failed'] += 1
+                    yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'failed', 'message': f'✗ {str(e)[:40]}', 'details': {'error': str(e), **stats}}, ensure_ascii=False)}\n\n"
+
+            db.commit()
+            processing_time = round(time.time() - start_time, 2)
+
+            # Send completion
+            success_count = stats['success']
+            complete_msg = f'Zakończono: {success_count}/{total} embeddingów'
+            complete_data = {'current': total, 'total': total, 'percent': 100, 'stage': 'embedding', 'status': 'complete', 'message': complete_msg, 'details': {'processing_time': processing_time, **stats}}
+            yield f"data: {json.dumps(complete_data, ensure_ascii=False)}\n\n"
+
+        except Exception as e:
+            logger.error(f"SSE embedding error: {e}")
+            yield f"data: {json.dumps({'status': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
+        finally:
+            db.close()
+
+    return Response(generate(), mimetype='text/event-stream', headers={
+        'Cache-Control': 'no-cache',
+        'X-Accel-Buffering': 'no'
+    })
+
+
@app.route('/api/zopk/knowledge/search', methods=['POST'])
@login_required
 def api_zopk_knowledge_search():
--- a/templates/admin/zopk_dashboard.html
+++ b/templates/admin/zopk_dashboard.html
@ -1445,6 +1445,252 @@
    </div>
 </div>

+<!-- AI Operations Progress Modal -->
+<div class="modal-overlay" id="aiOpsModal">
+    <div class="modal modal-wide">
+        <div class="modal-header">
+            <div class="modal-icon" id="aiOpsIcon">⏳</div>
+            <h3 id="aiOpsTitle">Operacja w toku...</h3>
+            <button type="button" class="modal-close-btn" id="aiOpsCloseBtn" style="display: none;" onclick="closeAiOpsModal()">×</button>
+        </div>
+
+        <!-- Progress Section -->
+        <div class="ai-ops-progress">
+            <div class="ai-progress-bar">
+                <div class="ai-progress-fill" id="aiOpsProgressFill" style="width: 0%;"></div>
+            </div>
+            <div class="ai-ops-stats">
+                <span id="aiOpsPercent">0%</span>
+                <span id="aiOpsCounter">0 / 0</span>
+            </div>
+        </div>
+
+        <!-- Current Operation -->
+        <div class="ai-ops-current" id="aiOpsCurrentOp">
+            <span class="spinner-small"></span>
+            <span id="aiOpsCurrentText">Inicjalizacja...</span>
+        </div>
+
+        <!-- Live Log (scrollable) -->
+        <div class="ai-ops-log-container">
+            <div class="ai-ops-log-header">
+                <span>📋 Log operacji</span>
+                <span id="aiOpsLogCount">0 wpisów</span>
+            </div>
+            <div class="ai-ops-log" id="aiOpsLog">
+                <!-- Log entries will be added here -->
+            </div>
+        </div>
+
+        <!-- Summary Stats (shown on complete) -->
+        <div class="ai-ops-summary" id="aiOpsSummary" style="display: none;">
+            <div class="summary-row">
+                <span class="summary-label">✓ Sukces:</span>
+                <span class="summary-value" id="aiOpsSummarySuccess">0</span>
+            </div>
+            <div class="summary-row">
+                <span class="summary-label">✗ Błędy:</span>
+                <span class="summary-value" id="aiOpsSummaryFailed">0</span>
+            </div>
+            <div class="summary-row" id="aiOpsSummarySkippedRow" style="display: none;">
+                <span class="summary-label">⊘ Pominięto:</span>
+                <span class="summary-value" id="aiOpsSummarySkipped">0</span>
+            </div>
+            <div class="summary-row">
+                <span class="summary-label">⏱️ Czas:</span>
+                <span class="summary-value" id="aiOpsSummaryTime">0s</span>
+            </div>
+        </div>
+
+        <!-- Actions -->
+        <div class="modal-actions" id="aiOpsActions" style="display: none;">
+            <button type="button" class="btn btn-primary" onclick="closeAiOpsModal(); loadKnowledgeStats();">Zamknij i odśwież</button>
+        </div>
+    </div>
+</div>
+
+<style>
+/* AI Operations Modal Styles */
+.modal-wide {
+    max-width: 700px;
+    width: 95%;
+}
+
+.modal-header {
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    margin-bottom: 20px;
+    position: relative;
+}
+
+.modal-header h3 {
+    margin: 0;
+    flex: 1;
+}
+
+.modal-close-btn {
+    position: absolute;
+    right: 0;
+    top: 0;
+    background: none;
+    border: none;
+    font-size: 24px;
+    cursor: pointer;
+    color: var(--text-muted);
+    padding: 5px 10px;
+}
+
+.modal-close-btn:hover {
+    color: var(--text-primary);
+}
+
+.ai-ops-progress {
+    margin-bottom: 15px;
+}
+
+.ai-ops-stats {
+    display: flex;
+    justify-content: space-between;
+    font-size: 14px;
+    margin-top: 8px;
+    color: var(--text-muted);
+}
+
+#aiOpsPercent {
+    font-weight: 600;
+    color: var(--color-primary);
+}
+
+.ai-ops-current {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    padding: 12px 15px;
+    background: var(--bg-secondary);
+    border-radius: 8px;
+    margin-bottom: 15px;
+    font-size: 14px;
+}
+
+.ai-ops-log-container {
+    border: 1px solid var(--border-color);
+    border-radius: 8px;
+    overflow: hidden;
+    margin-bottom: 15px;
+}
+
+.ai-ops-log-header {
+    display: flex;
+    justify-content: space-between;
+    padding: 8px 12px;
+    background: var(--bg-tertiary);
+    font-size: 13px;
+    font-weight: 500;
+    border-bottom: 1px solid var(--border-color);
+}
+
+.ai-ops-log {
+    height: 250px;
+    overflow-y: auto;
+    padding: 10px;
+    font-family: 'Monaco', 'Menlo', 'Consolas', monospace;
+    font-size: 12px;
+    background: var(--bg-primary);
+}
+
+.log-entry {
+    padding: 4px 8px;
+    margin: 2px 0;
+    border-radius: 4px;
+    display: flex;
+    align-items: flex-start;
+    gap: 8px;
+}
+
+.log-entry.processing {
+    background: rgba(59, 130, 246, 0.1);
+    color: #3b82f6;
+}
+
+.log-entry.success {
+    background: rgba(34, 197, 94, 0.1);
+    color: #22c55e;
+}
+
+.log-entry.failed {
+    background: rgba(239, 68, 68, 0.1);
+    color: #ef4444;
+}
+
+.log-entry.skipped {
+    background: rgba(234, 179, 8, 0.1);
+    color: #eab308;
+}
+
+.log-entry.complete {
+    background: rgba(168, 85, 247, 0.1);
+    color: #a855f7;
+    font-weight: 600;
+}
+
+.log-time {
+    color: var(--text-muted);
+    font-size: 11px;
+    white-space: nowrap;
+}
+
+.log-message {
+    flex: 1;
+    word-break: break-word;
+}
+
+.ai-ops-summary {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
+    gap: 10px;
+    padding: 15px;
+    background: var(--bg-secondary);
+    border-radius: 8px;
+    margin-bottom: 15px;
+}
+
+.summary-row {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    gap: 4px;
+}
+
+.summary-label {
+    font-size: 12px;
+    color: var(--text-muted);
+}
+
+.summary-value {
+    font-size: 24px;
+    font-weight: 700;
+    color: var(--text-primary);
+}
+
+/* Spinning icon animation */
+@keyframes spin {
+    from { transform: rotate(0deg); }
+    to { transform: rotate(360deg); }
+}
+
+.modal-icon.spinning {
+    animation: spin 2s linear infinite;
+}
+
+/* Responsive log height */
+@media (max-height: 800px) {
+    .ai-ops-log {
+        height: 180px;
+    }
+}
+</style>
+
 <!-- Add News Modal -->
 <div class="modal-overlay" id="addNewsModal">
    <div class="modal">
@ -2544,18 +2790,193 @@ async function loadKnowledgeStats() {
    }
 }

-async function scrapeContent() {
-    const btn = document.getElementById('scrapeBtn');
-    const originalContent = btn.innerHTML;
+// ===========================================
+// AI Operations Modal Functions
+// ===========================================

+let aiOpsEventSource = null;
+let aiOpsLogEntries = 0;
+
+function openAiOpsModal(title, icon) {
+    const modal = document.getElementById('aiOpsModal');
+    document.getElementById('aiOpsTitle').textContent = title;
+    document.getElementById('aiOpsIcon').textContent = icon;
+    document.getElementById('aiOpsIcon').classList.add('spinning');
+
+    // Reset state
+    document.getElementById('aiOpsProgressFill').style.width = '0%';
+    document.getElementById('aiOpsPercent').textContent = '0%';
+    document.getElementById('aiOpsCounter').textContent = '0 / 0';
+    document.getElementById('aiOpsCurrentText').textContent = 'Inicjalizacja...';
+    document.getElementById('aiOpsLog').innerHTML = '';
+    document.getElementById('aiOpsSummary').style.display = 'none';
+    document.getElementById('aiOpsActions').style.display = 'none';
+    document.getElementById('aiOpsCloseBtn').style.display = 'none';
+    document.getElementById('aiOpsCurrentOp').style.display = 'flex';
+    aiOpsLogEntries = 0;
+    document.getElementById('aiOpsLogCount').textContent = '0 wpisów';
+
+    modal.classList.add('active');
+}
+
+function closeAiOpsModal() {
+    const modal = document.getElementById('aiOpsModal');
+    modal.classList.remove('active');
+
+    // Close SSE connection if active
+    if (aiOpsEventSource) {
+        aiOpsEventSource.close();
+        aiOpsEventSource = null;
+    }
+}
+
+function addAiOpsLogEntry(status, message) {
+    const log = document.getElementById('aiOpsLog');
+    const time = new Date().toLocaleTimeString('pl-PL', { hour: '2-digit', minute: '2-digit', second: '2-digit' });
+
+    const entry = document.createElement('div');
+    entry.className = `log-entry ${status}`;
+    entry.innerHTML = `
+        <span class="log-time">${time}</span>
+        <span class="log-message">${message}</span>
+    `;
+
+    log.appendChild(entry);
+    log.scrollTop = log.scrollHeight;
+
+    aiOpsLogEntries++;
+    document.getElementById('aiOpsLogCount').textContent = `${aiOpsLogEntries} wpisów`;
+}
+
+function updateAiOpsProgress(data) {
+    // Update progress bar
+    if (data.percent !== undefined) {
+        document.getElementById('aiOpsProgressFill').style.width = `${data.percent}%`;
+        document.getElementById('aiOpsPercent').textContent = `${Math.round(data.percent)}%`;
+    }
+
+    // Update counter
+    if (data.current !== undefined && data.total !== undefined) {
+        document.getElementById('aiOpsCounter').textContent = `${data.current} / ${data.total}`;
+    }
+
+    // Update current operation text
+    if (data.message) {
+        document.getElementById('aiOpsCurrentText').textContent = data.message;
+    }
+
+    // Add log entry
+    if (data.status && data.message) {
+        addAiOpsLogEntry(data.status, data.message);
+    }
+}
+
+function completeAiOpsModal(data) {
+    // Stop spinning
+    document.getElementById('aiOpsIcon').classList.remove('spinning');
+    document.getElementById('aiOpsIcon').textContent = data.status === 'error' ? '❌' : '✅';
+    document.getElementById('aiOpsTitle').textContent = data.status === 'error' ? 'Błąd operacji' : 'Operacja zakończona';
+
+    // Hide current operation
+    document.getElementById('aiOpsCurrentOp').style.display = 'none';
+
+    // Show summary
+    const details = data.details || {};
+    document.getElementById('aiOpsSummarySuccess').textContent = details.success || details.scraped || 0;
+    document.getElementById('aiOpsSummaryFailed').textContent = details.failed || 0;
+
+    if (details.skipped !== undefined) {
+        document.getElementById('aiOpsSummarySkippedRow').style.display = 'flex';
+        document.getElementById('aiOpsSummarySkipped').textContent = details.skipped;
+    }
+
+    if (details.processing_time) {
+        document.getElementById('aiOpsSummaryTime').textContent = `${details.processing_time}s`;
+    }
+
+    document.getElementById('aiOpsSummary').style.display = 'grid';
+    document.getElementById('aiOpsActions').style.display = 'flex';
+    document.getElementById('aiOpsCloseBtn').style.display = 'block';
+}
+
+function startSSEOperation(endpoint, title, icon, limit) {
+    openAiOpsModal(title, icon);
+
+    const url = `${endpoint}?limit=${limit}`;
+    aiOpsEventSource = new EventSource(url);
+
+    aiOpsEventSource.onmessage = function(event) {
+        const data = JSON.parse(event.data);
+
+        if (data.status === 'complete' || data.status === 'error') {
+            aiOpsEventSource.close();
+            aiOpsEventSource = null;
+            completeAiOpsModal(data);
+        } else {
+            updateAiOpsProgress(data);
+        }
+    };
+
+    aiOpsEventSource.onerror = function(event) {
+        console.error('SSE error:', event);
+        aiOpsEventSource.close();
+        aiOpsEventSource = null;
+        completeAiOpsModal({ status: 'error', message: 'Błąd połączenia', details: {} });
+    };
+}
+
+// ===========================================
+// AI Knowledge Base Functions (with SSE)
+// ===========================================
+
+async function scrapeContent() {
    const confirmed = await showConfirm(
        'Czy chcesz rozpocząć scrapowanie treści artykułów?<br><br>' +
-        '<small>Proces pobierze pełną treść z zatwierdzonych newsów które jeszcze nie mają treści.</small>',
+        '<small>Proces pobierze pełną treść z zatwierdzonych newsów które jeszcze nie mają treści.<br>' +
+        'Postęp będzie wyświetlany na żywo.</small>',
        { icon: '📄', title: 'Scraping treści', okText: 'Rozpocznij', okClass: 'btn-primary' }
    );

    if (!confirmed) return;

+    startSSEOperation('/admin/zopk/news/scrape-content/stream', 'Scraping treści artykułów', '📄', 30);
+}
+
+async function extractKnowledge() {
+    const confirmed = await showConfirm(
+        'Czy chcesz uruchomić ekstrakcję wiedzy przez AI?<br><br>' +
+        '<small>Gemini AI przeanalizuje zescrapowane artykuły i wyekstrahuje:<br>' +
+        '• Chunks (fragmenty tekstu)<br>' +
+        '• Fakty (daty, liczby, decyzje)<br>' +
+        '• Encje (firmy, osoby, projekty)<br><br>' +
+        'Postęp będzie wyświetlany na żywo.</small>',
+        { icon: '🤖', title: 'Ekstrakcja wiedzy', okText: 'Uruchom AI', okClass: 'btn-primary' }
+    );
+
+    if (!confirmed) return;
+
+    startSSEOperation('/admin/zopk/knowledge/extract/stream', 'Ekstrakcja wiedzy (Gemini AI)', '🤖', 10);
+}
+
+async function generateEmbeddings() {
+    const confirmed = await showConfirm(
+        'Czy chcesz wygenerować embeddingi dla semantic search?<br><br>' +
+        '<small>Google Text Embedding API przekształci tekst w wektory 768-wymiarowe.<br>' +
+        'Embeddingi umożliwiają inteligentne wyszukiwanie w bazie wiedzy.<br><br>' +
+        'Postęp będzie wyświetlany na żywo.</small>',
+        { icon: '🔍', title: 'Generowanie embeddingów', okText: 'Generuj', okClass: 'btn-primary' }
+    );
+
+    if (!confirmed) return;
+
+    startSSEOperation('/admin/zopk/knowledge/embeddings/stream', 'Generowanie embeddingów', '🔍', 50);
+}
+
+// Keep old code for backward compatibility (non-SSE version - can be removed later)
+async function scrapeContentOld() {
+    const btn = document.getElementById('scrapeBtn');
+    const originalContent = btn.innerHTML;
+
    btn.disabled = true;
    btn.innerHTML = '<span class="spinner-small"></span>';

@ -2581,21 +3002,10 @@ async function scrapeContent() {
    }
 }

-async function extractKnowledge() {
+async function extractKnowledgeOld() {
    const btn = document.getElementById('extractBtn');
    const originalContent = btn.innerHTML;

-    const confirmed = await showConfirm(
-        'Czy chcesz uruchomić ekstrakcję wiedzy przez AI?<br><br>' +
-        '<small>Gemini AI przeanalizuje zescrapowane artykuły i wyekstrahuje:<br>' +
-        '• Chunks (fragmenty tekstu)<br>' +
-        '• Fakty (daty, liczby, decyzje)<br>' +
-        '• Encje (firmy, osoby, projekty)</small>',
-        { icon: '🤖', title: 'Ekstrakcja wiedzy', okText: 'Uruchom AI', okClass: 'btn-primary' }
-    );
-
-    if (!confirmed) return;
-
    btn.disabled = true;
    btn.innerHTML = '<span class="spinner-small"></span>';

@ -2621,19 +3031,10 @@ async function extractKnowledge() {
    }
 }

-async function generateEmbeddings() {
+async function generateEmbeddingsOld() {
    const btn = document.getElementById('embeddingsBtn');
    const originalContent = btn.innerHTML;

-    const confirmed = await showConfirm(
-        'Czy chcesz wygenerować embeddingi dla semantic search?<br><br>' +
-        '<small>Google Text Embedding API przekształci tekst w wektory 768-wymiarowe.<br>' +
-        'Embeddingi umożliwiają inteligentne wyszukiwanie w bazie wiedzy.</small>',
-        { icon: '🔍', title: 'Generowanie embeddingów', okText: 'Generuj', okClass: 'btn-primary' }
-    );
-
-    if (!confirmed) return;
-
    btn.disabled = true;
    btn.innerHTML = '<span class="spinner-small"></span>';

--- a/zopk_content_scraper.py
+++ b/zopk_content_scraper.py
@ -19,9 +19,9 @@ import logging
 import hashlib
 import base64
 from datetime import datetime
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple, Callable, Any
 from urllib.parse import urlparse, parse_qs, unquote
-from dataclasses import dataclass
+from dataclasses import dataclass, field

 import requests
 from bs4 import BeautifulSoup, Comment, NavigableString
@ -312,6 +312,24 @@ class ScrapeResult:
    status: str = 'pending'  # scraped, failed, skipped


+@dataclass
+class ProgressUpdate:
+    """Progress update for batch operations."""
+    current: int
+    total: int
+    percent: float
+    stage: str  # 'scraping', 'extracting', 'embedding'
+    status: str  # 'processing', 'success', 'failed', 'complete'
+    message: str
+    details: Dict[str, Any] = field(default_factory=dict)
+    article_id: Optional[int] = None
+    article_title: Optional[str] = None
+
+
+# Type alias for progress callback
+ProgressCallback = Optional[Callable[[ProgressUpdate], None]]
+
+
 # ============================================================
 # SCRAPER CLASS
 # ============================================================
@ -704,7 +722,8 @@ class ZOPKContentScraper:
        self,
        limit: int = 50,
        status_filter: Optional[str] = None,
-        force: bool = False
+        force: bool = False,
+        progress_callback: ProgressCallback = None
    ) -> Dict:
        """
        Batch scrape articles.
@ -713,6 +732,7 @@ class ZOPKContentScraper:
            limit: Maximum number of articles to scrape
            status_filter: Filter by approval status (approved, auto_approved)
            force: If True, re-scrape even already scraped articles
+            progress_callback: Optional callback for progress updates

        Returns:
            Dict with statistics
@ -743,10 +763,11 @@ class ZOPKContentScraper:

        # Limit
        articles = query.limit(limit).all()
+        total = len(articles)

        # Statistics
        stats = {
-            'total': len(articles),
+            'total': total,
            'scraped': 0,
            'failed': 0,
            'skipped': 0,
@ -755,9 +776,40 @@ class ZOPKContentScraper:
            'processing_time': 0
        }

+        # Send initial progress
+        if progress_callback and total > 0:
+            progress_callback(ProgressUpdate(
+                current=0,
+                total=total,
+                percent=0.0,
+                stage='scraping',
+                status='processing',
+                message=f'Rozpoczynam scraping {total} artykułów...',
+                details={'scraped': 0, 'failed': 0, 'skipped': 0}
+            ))
+
        start_time = time.time()

-        for article in articles:
+        for idx, article in enumerate(articles, 1):
+            # Send progress update before processing
+            if progress_callback:
+                progress_callback(ProgressUpdate(
+                    current=idx,
+                    total=total,
+                    percent=round((idx - 1) / total * 100, 1),
+                    stage='scraping',
+                    status='processing',
+                    message=f'Pobieram treść: {article.title[:50]}...',
+                    article_id=article.id,
+                    article_title=article.title[:80],
+                    details={
+                        'scraped': stats['scraped'],
+                        'failed': stats['failed'],
+                        'skipped': stats['skipped'],
+                        'source': article.source_name or 'nieznane'
+                    }
+                ))
+
            result = self.scrape_article(article.id)

            if result.status == 'scraped':
@ -768,8 +820,37 @@ class ZOPKContentScraper:
                    'word_count': result.word_count,
                    'source': article.source_name
                })
+                # Send success progress
+                if progress_callback:
+                    progress_callback(ProgressUpdate(
+                        current=idx,
+                        total=total,
+                        percent=round(idx / total * 100, 1),
+                        stage='scraping',
+                        status='success',
+                        message=f'✓ Pobrano {result.word_count} słów: {article.title[:40]}...',
+                        article_id=article.id,
+                        article_title=article.title[:80],
+                        details={
+                            'scraped': stats['scraped'],
+                            'failed': stats['failed'],
+                            'skipped': stats['skipped'],
+                            'word_count': result.word_count
+                        }
+                    ))
            elif result.status == 'skipped':
                stats['skipped'] += 1
+                if progress_callback:
+                    progress_callback(ProgressUpdate(
+                        current=idx,
+                        total=total,
+                        percent=round(idx / total * 100, 1),
+                        stage='scraping',
+                        status='skipped',
+                        message=f'⊘ Pominięto: {article.title[:40]}...',
+                        article_id=article.id,
+                        details={'scraped': stats['scraped'], 'failed': stats['failed'], 'skipped': stats['skipped']}
+                    ))
            else:
                stats['failed'] += 1
                stats['errors'].append({
@ -777,9 +858,43 @@ class ZOPKContentScraper:
                    'url': article.url,
                    'error': result.error
                })
+                if progress_callback:
+                    progress_callback(ProgressUpdate(
+                        current=idx,
+                        total=total,
+                        percent=round(idx / total * 100, 1),
+                        stage='scraping',
+                        status='failed',
+                        message=f'✗ Błąd: {result.error[:50]}...' if result.error else '✗ Błąd',
+                        article_id=article.id,
+                        article_title=article.title[:80],
+                        details={
+                            'scraped': stats['scraped'],
+                            'failed': stats['failed'],
+                            'skipped': stats['skipped'],
+                            'error': result.error
+                        }
+                    ))

        stats['processing_time'] = round(time.time() - start_time, 2)

+        # Send completion progress
+        if progress_callback:
+            progress_callback(ProgressUpdate(
+                current=total,
+                total=total,
+                percent=100.0,
+                stage='scraping',
+                status='complete',
+                message=f'Zakończono: {stats["scraped"]} pobrano, {stats["failed"]} błędów, {stats["skipped"]} pominięto',
+                details={
+                    'scraped': stats['scraped'],
+                    'failed': stats['failed'],
+                    'skipped': stats['skipped'],
+                    'processing_time': stats['processing_time']
+                }
+            ))
+
        logger.info(
            f"Batch scrape complete: {stats['scraped']} scraped, "
            f"{stats['failed']} failed, {stats['skipped']} skipped "
--- a/zopk_knowledge_service.py
+++ b/zopk_knowledge_service.py
@ -22,9 +22,12 @@ import json
 import logging
 import hashlib
 from datetime import datetime
-from typing import Dict, List, Optional, Tuple, Any
+from typing import Dict, List, Optional, Tuple, Any, Callable
 from dataclasses import dataclass, field

+# Import progress tracking from scraper
+from zopk_content_scraper import ProgressUpdate, ProgressCallback
+
 from database import (
    ZOPKNews,
    ZOPKKnowledgeChunk,
@ -663,12 +666,13 @@ class ZOPKKnowledgeService:
            processing_time=processing_time
        )

-    def batch_extract(self, limit: int = 50) -> Dict:
+    def batch_extract(self, limit: int = 50, progress_callback: ProgressCallback = None) -> Dict:
        """
        Batch extract knowledge from scraped articles.

        Args:
            limit: Maximum number of articles to process
+            progress_callback: Optional callback for progress updates

        Returns:
            Dict with statistics
@ -686,8 +690,9 @@ class ZOPKKnowledgeService:
            ZOPKNews.created_at.desc()
        ).limit(limit).all()

+        total = len(articles)
        stats = {
-            'total': len(articles),
+            'total': total,
            'success': 0,
            'failed': 0,
            'chunks_created': 0,
@ -698,9 +703,41 @@ class ZOPKKnowledgeService:
            'processing_time': 0
        }

+        # Send initial progress
+        if progress_callback and total > 0:
+            progress_callback(ProgressUpdate(
+                current=0,
+                total=total,
+                percent=0.0,
+                stage='extracting',
+                status='processing',
+                message=f'Rozpoczynam ekstrakcję wiedzy z {total} artykułów...',
+                details={'success': 0, 'failed': 0, 'chunks': 0, 'facts': 0, 'entities': 0}
+            ))
+
        start_time = time.time()

-        for article in articles:
+        for idx, article in enumerate(articles, 1):
+            # Send progress update before processing
+            if progress_callback:
+                progress_callback(ProgressUpdate(
+                    current=idx,
+                    total=total,
+                    percent=round((idx - 1) / total * 100, 1),
+                    stage='extracting',
+                    status='processing',
+                    message=f'Analizuję przez AI: {article.title[:50]}...',
+                    article_id=article.id,
+                    article_title=article.title[:80],
+                    details={
+                        'success': stats['success'],
+                        'failed': stats['failed'],
+                        'chunks': stats['chunks_created'],
+                        'facts': stats['facts_created'],
+                        'entities': stats['entities_created']
+                    }
+                ))
+
            result = self.extract_from_news(article.id)

            if result.success:
@ -709,6 +746,28 @@ class ZOPKKnowledgeService:
                stats['facts_created'] += result.facts_created
                stats['entities_created'] += result.entities_created
                stats['relations_created'] += result.relations_created
+
+                if progress_callback:
+                    progress_callback(ProgressUpdate(
+                        current=idx,
+                        total=total,
+                        percent=round(idx / total * 100, 1),
+                        stage='extracting',
+                        status='success',
+                        message=f'✓ Wyekstrahowano: {result.chunks_created} chunks, {result.facts_created} faktów, {result.entities_created} encji',
+                        article_id=article.id,
+                        article_title=article.title[:80],
+                        details={
+                            'success': stats['success'],
+                            'failed': stats['failed'],
+                            'chunks': stats['chunks_created'],
+                            'facts': stats['facts_created'],
+                            'entities': stats['entities_created'],
+                            'new_chunks': result.chunks_created,
+                            'new_facts': result.facts_created,
+                            'new_entities': result.entities_created
+                        }
+                    ))
            else:
                stats['failed'] += 1
                if result.error:
@ -718,8 +777,47 @@ class ZOPKKnowledgeService:
                        'error': result.error
                    })

+                if progress_callback:
+                    progress_callback(ProgressUpdate(
+                        current=idx,
+                        total=total,
+                        percent=round(idx / total * 100, 1),
+                        stage='extracting',
+                        status='failed',
+                        message=f'✗ Błąd ekstrakcji: {result.error[:50]}...' if result.error else '✗ Błąd',
+                        article_id=article.id,
+                        article_title=article.title[:80],
+                        details={
+                            'success': stats['success'],
+                            'failed': stats['failed'],
+                            'error': result.error
+                        }
+                    ))
+
        stats['processing_time'] = round(time.time() - start_time, 2)

+        # Send completion progress
+        if progress_callback:
+            progress_callback(ProgressUpdate(
+                current=total,
+                total=total,
+                percent=100.0,
+                stage='extracting',
+                status='complete',
+                message=f'Zakończono: {stats["success"]}/{total} artykułów. '
+                       f'Utworzono: {stats["chunks_created"]} chunks, {stats["facts_created"]} faktów, '
+                       f'{stats["entities_created"]} encji',
+                details={
+                    'success': stats['success'],
+                    'failed': stats['failed'],
+                    'chunks': stats['chunks_created'],
+                    'facts': stats['facts_created'],
+                    'entities': stats['entities_created'],
+                    'relations': stats['relations_created'],
+                    'processing_time': stats['processing_time']
+                }
+            ))
+
        logger.info(
            f"Batch extraction complete: {stats['success']}/{stats['total']} success "
            f"in {stats['processing_time']}s"
@ -950,7 +1048,12 @@ def get_relevant_facts(
    return results[:limit]


-def generate_chunk_embeddings(db_session, limit: int = 100, user_id: Optional[int] = None) -> Dict:
+def generate_chunk_embeddings(
+    db_session,
+    limit: int = 100,
+    user_id: Optional[int] = None,
+    progress_callback: ProgressCallback = None
+) -> Dict:
    """
    Generate embeddings for chunks that don't have them.

@ -958,11 +1061,13 @@ def generate_chunk_embeddings(db_session, limit: int = 100, user_id: Optional[in
        db_session: SQLAlchemy session
        limit: Max chunks to process
        user_id: User ID for cost tracking
+        progress_callback: Optional callback for progress updates

    Returns:
        Dict with statistics
    """
    import json
+    import time
    from gemini_service import GeminiService

    gemini = GeminiService()
@ -972,13 +1077,52 @@ def generate_chunk_embeddings(db_session, limit: int = 100, user_id: Optional[in
        ZOPKKnowledgeChunk.embedding.is_(None)
    ).limit(limit).all()

+    total = len(chunks)
    stats = {
-        'total': len(chunks),
+        'total': total,
        'success': 0,
-        'failed': 0
+        'failed': 0,
+        'processing_time': 0
    }

-    for chunk in chunks:
+    # Send initial progress
+    if progress_callback and total > 0:
+        progress_callback(ProgressUpdate(
+            current=0,
+            total=total,
+            percent=0.0,
+            stage='embedding',
+            status='processing',
+            message=f'Rozpoczynam generowanie embeddingów dla {total} chunks...',
+            details={'success': 0, 'failed': 0}
+        ))
+
+    start_time = time.time()
+
+    for idx, chunk in enumerate(chunks, 1):
+        # Send progress update before processing
+        if progress_callback:
+            # Get article title from chunk's source news
+            article_title = None
+            if chunk.source_news:
+                article_title = chunk.source_news.title[:80]
+
+            progress_callback(ProgressUpdate(
+                current=idx,
+                total=total,
+                percent=round((idx - 1) / total * 100, 1),
+                stage='embedding',
+                status='processing',
+                message=f'Generuję embedding {idx}/{total}: {chunk.summary[:40] if chunk.summary else "chunk"}...',
+                article_id=chunk.source_news_id,
+                article_title=article_title,
+                details={
+                    'success': stats['success'],
+                    'failed': stats['failed'],
+                    'chunk_id': chunk.id
+                }
+            ))
+
        try:
            embedding = gemini.generate_embedding(
                text=chunk.content,
@ -992,14 +1136,70 @@ def generate_chunk_embeddings(db_session, limit: int = 100, user_id: Optional[in
                # Store as JSON string
                chunk.embedding = json.dumps(embedding)
                stats['success'] += 1
+
+                if progress_callback:
+                    progress_callback(ProgressUpdate(
+                        current=idx,
+                        total=total,
+                        percent=round(idx / total * 100, 1),
+                        stage='embedding',
+                        status='success',
+                        message=f'✓ Wygenerowano embedding (768 dim)',
+                        article_id=chunk.source_news_id,
+                        details={
+                            'success': stats['success'],
+                            'failed': stats['failed'],
+                            'chunk_id': chunk.id
+                        }
+                    ))
            else:
                stats['failed'] += 1
+                if progress_callback:
+                    progress_callback(ProgressUpdate(
+                        current=idx,
+                        total=total,
+                        percent=round(idx / total * 100, 1),
+                        stage='embedding',
+                        status='failed',
+                        message='✗ Nie udało się wygenerować embeddingu',
+                        article_id=chunk.source_news_id,
+                        details={'success': stats['success'], 'failed': stats['failed']}
+                    ))

        except Exception as e:
            logger.error(f"Error generating embedding for chunk {chunk.id}: {e}")
            stats['failed'] += 1

+            if progress_callback:
+                progress_callback(ProgressUpdate(
+                    current=idx,
+                    total=total,
+                    percent=round(idx / total * 100, 1),
+                    stage='embedding',
+                    status='failed',
+                    message=f'✗ Błąd: {str(e)[:50]}...',
+                    article_id=chunk.source_news_id,
+                    details={'success': stats['success'], 'failed': stats['failed'], 'error': str(e)}
+                ))
+
    db_session.commit()
+    stats['processing_time'] = round(time.time() - start_time, 2)
+
+    # Send completion progress
+    if progress_callback:
+        progress_callback(ProgressUpdate(
+            current=total,
+            total=total,
+            percent=100.0,
+            stage='embedding',
+            status='complete',
+            message=f'Zakończono: {stats["success"]}/{total} embeddingów wygenerowanych',
+            details={
+                'success': stats['success'],
+                'failed': stats['failed'],
+                'processing_time': stats['processing_time']
+            }
+        ))

    logger.info(f"Generated embeddings: {stats['success']}/{stats['total']} success")