fix: improve bulk discovery progress tracking with log-based polling

Replace single latest_result field with cumulative log array and offset-based polling to prevent missed entries and race conditions. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 08:33:14 +01:00 · 2026-02-21 08:33:14 +01:00 · 51a0301e9b
commit 51a0301e9b
parent 126eff8af6
2 changed files with 28 additions and 10 deletions
--- a/blueprints/admin/routes_website_discovery.py
+++ b/blueprints/admin/routes_website_discovery.py
@ -58,7 +58,7 @@ def discover_websites_bulk():
    job_id = str(uuid.uuid4())[:8]

    def run_bulk(job_id):
-        _bulk_jobs[job_id] = {'status': 'running', 'processed': 0, 'total': 0, 'latest_result': ''}
+        _bulk_jobs[job_id] = {'status': 'running', 'processed': 0, 'total': 0, 'log': []}
        db = SessionLocal()
        try:
            companies = db.query(Company).filter(
@ -72,7 +72,6 @@ def discover_websites_bulk():
            import time
            for company in companies:
                result = service.discover_for_company(company)
-                _bulk_jobs[job_id]['processed'] += 1

                status_text = f"{company.name}: "
                if result.get('status') == 'found':
@ -81,7 +80,10 @@ def discover_websites_bulk():
                    status_text += "kandydat już istnieje"
                else:
                    status_text += result.get('error', 'błąd')
-                _bulk_jobs[job_id]['latest_result'] = status_text
+
+                # Update processed AFTER building status text — atomic from poll perspective
+                _bulk_jobs[job_id]['log'].append(status_text)
+                _bulk_jobs[job_id]['processed'] += 1

                if _bulk_jobs[job_id]['processed'] < _bulk_jobs[job_id]['total']:
                    time.sleep(2)
@ -90,7 +92,7 @@ def discover_websites_bulk():
        except Exception as e:
            logger.error(f"Bulk discovery error: {e}")
            _bulk_jobs[job_id]['status'] = 'error'
-            _bulk_jobs[job_id]['latest_result'] = str(e)
+            _bulk_jobs[job_id]['log'].append(f"Błąd: {e}")
        finally:
            db.close()

@ -108,7 +110,18 @@ def discover_websites_status():
    job_id = request.args.get('job_id')
    if not job_id or job_id not in _bulk_jobs:
        return jsonify({'error': 'Job not found'}), 404
-    return jsonify(_bulk_jobs[job_id])
+
+    job = _bulk_jobs[job_id]
+    log_offset = request.args.get('log_offset', 0, type=int)
+    new_entries = job['log'][log_offset:]
+
+    return jsonify({
+        'status': job['status'],
+        'processed': job['processed'],
+        'total': job['total'],
+        'log_entries': new_entries,
+        'log_offset': len(job['log']),
+    })


@bp.route('/discovery/<int:candidate_id>/accept', methods=['POST'])
--- a/templates/admin/data_quality_dashboard.html
+++ b/templates/admin/data_quality_dashboard.html
@ -1220,24 +1220,29 @@
        });
    }

+    var _discLogOffset = 0;
+
    function pollDiscoveryProgress(jobId) {
-        fetch('/admin/discover-websites-status?job_id=' + jobId)
+        fetch('/admin/discover-websites-status?job_id=' + jobId + '&log_offset=' + _discLogOffset)
        .then(function(r) { return r.json(); })
        .then(function(data) {
            var total = data.total || 1;
            var processed = data.processed || 0;
-            var pct = Math.round(processed / total * 100);
+            var pct = total > 0 ? Math.round(processed / total * 100) : 0;
            document.getElementById('discProgressBar').style.width = pct + '%';
            document.getElementById('discProgressText').textContent = processed + '/' + total;

-            if (data.latest_result) {
+            if (data.log_entries && data.log_entries.length > 0) {
                var log = document.getElementById('discProgressLog');
-                log.innerHTML += '<div>' + data.latest_result + '</div>';
+                data.log_entries.forEach(function(entry) {
+                    log.innerHTML += '<div>' + entry + '</div>';
+                });
                log.scrollTop = log.scrollHeight;
+                _discLogOffset = data.log_offset;
            }

            if (data.status === 'running') {
-                setTimeout(function() { pollDiscoveryProgress(jobId); }, 3000);
+                setTimeout(function() { pollDiscoveryProgress(jobId); }, 2000);
            } else {
                document.getElementById('discProgressLog').innerHTML += '<div style="color: #22c55e; font-weight: 600;">Zakończono! Odśwież stronę aby zobaczyć wyniki.</div>';
                document.getElementById('discCloseBtn').style.display = 'inline-block';