feat: remember rejected candidates, skip in future bulk discovery
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
- Bulk discovery skips companies with any candidate (including rejected) - Single discovery skips URLs from previously rejected domains - Dashboard shows list of companies rejected by admin with note that they won't be re-searched in bulk mode Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d8a0485986
commit
601bd99559
@ -260,6 +260,29 @@ def admin_data_quality():
|
||||
),
|
||||
})
|
||||
|
||||
# Companies with rejected candidates (already reviewed)
|
||||
rejected_company_ids = set(
|
||||
r[0] for r in db.query(WebsiteDiscoveryCandidate.company_id).filter(
|
||||
WebsiteDiscoveryCandidate.status == 'rejected'
|
||||
).distinct().all()
|
||||
)
|
||||
# Exclude companies that also have pending/accepted candidates
|
||||
active_candidate_ids = set(
|
||||
r[0] for r in db.query(WebsiteDiscoveryCandidate.company_id).filter(
|
||||
WebsiteDiscoveryCandidate.status.in_(['pending', 'accepted'])
|
||||
).distinct().all()
|
||||
)
|
||||
only_rejected_ids = rejected_company_ids - active_candidate_ids
|
||||
rejected_companies = []
|
||||
for cid in only_rejected_ids:
|
||||
comp = company_map.get(cid)
|
||||
if comp and not comp.website:
|
||||
rejected_companies.append({
|
||||
'company_name': comp.name,
|
||||
'company_id': cid,
|
||||
})
|
||||
rejected_companies.sort(key=lambda x: x['company_name'])
|
||||
|
||||
# Count companies without website
|
||||
companies_without_website = sum(1 for c in companies_table if not c['website'])
|
||||
|
||||
@ -273,6 +296,7 @@ def admin_data_quality():
|
||||
companies_table=companies_table,
|
||||
available_data=available_data,
|
||||
discovery_data=discovery_data,
|
||||
rejected_companies=rejected_companies,
|
||||
companies_without_website=companies_without_website,
|
||||
now=now,
|
||||
)
|
||||
|
||||
@ -80,10 +80,10 @@ def discover_websites_bulk():
|
||||
_save_job(job_id, job)
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Skip companies that already have a pending/accepted candidate
|
||||
# Skip companies that already have any candidate (pending/accepted/rejected)
|
||||
already_have = set(
|
||||
r[0] for r in db.query(WebsiteDiscoveryCandidate.company_id).filter(
|
||||
WebsiteDiscoveryCandidate.status.in_(['pending', 'accepted'])
|
||||
WebsiteDiscoveryCandidate.status.in_(['pending', 'accepted', 'rejected'])
|
||||
).distinct().all()
|
||||
)
|
||||
|
||||
|
||||
@ -288,12 +288,19 @@ class WebsiteDiscoveryService:
|
||||
if domain.startswith('www.'):
|
||||
domain = domain[4:]
|
||||
|
||||
# Check for existing candidate with this URL
|
||||
# Check for existing candidate (exact URL or same domain rejected)
|
||||
existing = db.query(WebsiteDiscoveryCandidate).filter_by(
|
||||
company_id=company.id, candidate_url=url
|
||||
).first()
|
||||
if existing:
|
||||
continue
|
||||
rejected_domain = db.query(WebsiteDiscoveryCandidate).filter(
|
||||
WebsiteDiscoveryCandidate.company_id == company.id,
|
||||
WebsiteDiscoveryCandidate.candidate_domain == domain,
|
||||
WebsiteDiscoveryCandidate.status == 'rejected',
|
||||
).first()
|
||||
if rejected_domain:
|
||||
continue
|
||||
|
||||
# Fetch root + common subpages for verification data
|
||||
all_text = ''
|
||||
|
||||
@ -750,6 +750,20 @@
|
||||
{% else %}
|
||||
<p style="color: var(--text-secondary); font-size: var(--font-size-sm);">Brak kandydatów. Kliknij "Szukaj WWW" aby uruchomić wyszukiwanie.</p>
|
||||
{% endif %}
|
||||
|
||||
{% if rejected_companies %}
|
||||
<div style="margin-top: var(--spacing-lg); padding: var(--spacing-md); background: #fef2f2; border: 1px solid #fecaca; border-radius: var(--radius);">
|
||||
<span style="font-size: var(--font-size-sm); color: #991b1b; font-weight: 500;">
|
||||
Odrzucone przez admina ({{ rejected_companies|length }}):
|
||||
</span>
|
||||
<span style="font-size: var(--font-size-xs); color: #b91c1c;">
|
||||
{% for rc in rejected_companies %}{{ rc.company_name }}{% if not loop.last %}, {% endif %}{% endfor %}
|
||||
</span>
|
||||
<span style="font-size: var(--font-size-xs); color: #991b1b; display: block; margin-top: 4px;">
|
||||
Te firmy nie będą ponownie wyszukiwane w trybie zbiorczym.
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- Bulk Discovery Modal -->
|
||||
|
||||
Loading…
Reference in New Issue
Block a user