feat: remember rejected candidates, skip in future bulk discovery
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
- Bulk discovery skips companies with any candidate (including rejected) - Single discovery skips URLs from previously rejected domains - Dashboard shows list of companies rejected by admin with note that they won't be re-searched in bulk mode Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d8a0485986
commit
601bd99559
@ -260,6 +260,29 @@ def admin_data_quality():
|
|||||||
),
|
),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Companies with rejected candidates (already reviewed)
|
||||||
|
rejected_company_ids = set(
|
||||||
|
r[0] for r in db.query(WebsiteDiscoveryCandidate.company_id).filter(
|
||||||
|
WebsiteDiscoveryCandidate.status == 'rejected'
|
||||||
|
).distinct().all()
|
||||||
|
)
|
||||||
|
# Exclude companies that also have pending/accepted candidates
|
||||||
|
active_candidate_ids = set(
|
||||||
|
r[0] for r in db.query(WebsiteDiscoveryCandidate.company_id).filter(
|
||||||
|
WebsiteDiscoveryCandidate.status.in_(['pending', 'accepted'])
|
||||||
|
).distinct().all()
|
||||||
|
)
|
||||||
|
only_rejected_ids = rejected_company_ids - active_candidate_ids
|
||||||
|
rejected_companies = []
|
||||||
|
for cid in only_rejected_ids:
|
||||||
|
comp = company_map.get(cid)
|
||||||
|
if comp and not comp.website:
|
||||||
|
rejected_companies.append({
|
||||||
|
'company_name': comp.name,
|
||||||
|
'company_id': cid,
|
||||||
|
})
|
||||||
|
rejected_companies.sort(key=lambda x: x['company_name'])
|
||||||
|
|
||||||
# Count companies without website
|
# Count companies without website
|
||||||
companies_without_website = sum(1 for c in companies_table if not c['website'])
|
companies_without_website = sum(1 for c in companies_table if not c['website'])
|
||||||
|
|
||||||
@ -273,6 +296,7 @@ def admin_data_quality():
|
|||||||
companies_table=companies_table,
|
companies_table=companies_table,
|
||||||
available_data=available_data,
|
available_data=available_data,
|
||||||
discovery_data=discovery_data,
|
discovery_data=discovery_data,
|
||||||
|
rejected_companies=rejected_companies,
|
||||||
companies_without_website=companies_without_website,
|
companies_without_website=companies_without_website,
|
||||||
now=now,
|
now=now,
|
||||||
)
|
)
|
||||||
|
|||||||
@ -80,10 +80,10 @@ def discover_websites_bulk():
|
|||||||
_save_job(job_id, job)
|
_save_job(job_id, job)
|
||||||
db = SessionLocal()
|
db = SessionLocal()
|
||||||
try:
|
try:
|
||||||
# Skip companies that already have a pending/accepted candidate
|
# Skip companies that already have any candidate (pending/accepted/rejected)
|
||||||
already_have = set(
|
already_have = set(
|
||||||
r[0] for r in db.query(WebsiteDiscoveryCandidate.company_id).filter(
|
r[0] for r in db.query(WebsiteDiscoveryCandidate.company_id).filter(
|
||||||
WebsiteDiscoveryCandidate.status.in_(['pending', 'accepted'])
|
WebsiteDiscoveryCandidate.status.in_(['pending', 'accepted', 'rejected'])
|
||||||
).distinct().all()
|
).distinct().all()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -288,12 +288,19 @@ class WebsiteDiscoveryService:
|
|||||||
if domain.startswith('www.'):
|
if domain.startswith('www.'):
|
||||||
domain = domain[4:]
|
domain = domain[4:]
|
||||||
|
|
||||||
# Check for existing candidate with this URL
|
# Check for existing candidate (exact URL or same domain rejected)
|
||||||
existing = db.query(WebsiteDiscoveryCandidate).filter_by(
|
existing = db.query(WebsiteDiscoveryCandidate).filter_by(
|
||||||
company_id=company.id, candidate_url=url
|
company_id=company.id, candidate_url=url
|
||||||
).first()
|
).first()
|
||||||
if existing:
|
if existing:
|
||||||
continue
|
continue
|
||||||
|
rejected_domain = db.query(WebsiteDiscoveryCandidate).filter(
|
||||||
|
WebsiteDiscoveryCandidate.company_id == company.id,
|
||||||
|
WebsiteDiscoveryCandidate.candidate_domain == domain,
|
||||||
|
WebsiteDiscoveryCandidate.status == 'rejected',
|
||||||
|
).first()
|
||||||
|
if rejected_domain:
|
||||||
|
continue
|
||||||
|
|
||||||
# Fetch root + common subpages for verification data
|
# Fetch root + common subpages for verification data
|
||||||
all_text = ''
|
all_text = ''
|
||||||
|
|||||||
@ -750,6 +750,20 @@
|
|||||||
{% else %}
|
{% else %}
|
||||||
<p style="color: var(--text-secondary); font-size: var(--font-size-sm);">Brak kandydatów. Kliknij "Szukaj WWW" aby uruchomić wyszukiwanie.</p>
|
<p style="color: var(--text-secondary); font-size: var(--font-size-sm);">Brak kandydatów. Kliknij "Szukaj WWW" aby uruchomić wyszukiwanie.</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
{% if rejected_companies %}
|
||||||
|
<div style="margin-top: var(--spacing-lg); padding: var(--spacing-md); background: #fef2f2; border: 1px solid #fecaca; border-radius: var(--radius);">
|
||||||
|
<span style="font-size: var(--font-size-sm); color: #991b1b; font-weight: 500;">
|
||||||
|
Odrzucone przez admina ({{ rejected_companies|length }}):
|
||||||
|
</span>
|
||||||
|
<span style="font-size: var(--font-size-xs); color: #b91c1c;">
|
||||||
|
{% for rc in rejected_companies %}{{ rc.company_name }}{% if not loop.last %}, {% endif %}{% endfor %}
|
||||||
|
</span>
|
||||||
|
<span style="font-size: var(--font-size-xs); color: #991b1b; display: block; margin-top: 4px;">
|
||||||
|
Te firmy nie będą ponownie wyszukiwane w trybie zbiorczym.
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Bulk Discovery Modal -->
|
<!-- Bulk Discovery Modal -->
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user