fix: skip companies with existing candidates in bulk discovery
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

Bulk query now excludes companies that already have pending/accepted
candidates, so only truly new companies are processed via Brave API.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-21 08:40:18 +01:00
parent 70a0e1c557
commit 13cf5ebccf

View File

@ -61,9 +61,17 @@ def discover_websites_bulk():
_bulk_jobs[job_id] = {'status': 'running', 'processed': 0, 'total': 0, 'log': []}
db = SessionLocal()
try:
# Skip companies that already have a pending/accepted candidate
already_have = set(
r[0] for r in db.query(WebsiteDiscoveryCandidate.company_id).filter(
WebsiteDiscoveryCandidate.status.in_(['pending', 'accepted'])
).distinct().all()
)
companies = db.query(Company).filter(
Company.status.in_(['active', 'pending']),
(Company.website == None) | (Company.website == ''),
~Company.id.in_(already_have) if already_have else True,
).order_by(Company.name).limit(50).all()
_bulk_jobs[job_id]['total'] = len(companies)
@ -79,7 +87,7 @@ def discover_websites_bulk():
elif result.get('status') == 'exists':
status_text += "kandydat już istnieje"
else:
status_text += result.get('error', 'błąd')
status_text += result.get('error', 'brak wyników')
# Update processed AFTER building status text — atomic from poll perspective
_bulk_jobs[job_id]['log'].append(status_text)