Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
- Bulk discovery skips companies with any candidate (including rejected) - Single discovery skips URLs from previously rejected domains - Dashboard shows list of companies rejected by admin with note that they won't be re-searched in bulk mode Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
215 lines
6.7 KiB
Python
215 lines
6.7 KiB
Python
"""
|
|
Admin Website Discovery Routes
|
|
================================
|
|
|
|
Endpoints for discovering and managing website candidates for companies.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import threading
|
|
from datetime import datetime
|
|
|
|
from flask import request, jsonify
|
|
from flask_login import login_required
|
|
|
|
from . import bp
|
|
from database import SessionLocal, Company, WebsiteDiscoveryCandidate, SystemRole
|
|
from utils.decorators import role_required
|
|
from utils.data_quality import update_company_data_quality
|
|
from services.website_discovery_service import WebsiteDiscoveryService
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# File-based job state (shared across gunicorn workers)
|
|
JOB_DIR = '/tmp/nordabiz_discovery_jobs'
|
|
|
|
|
|
def _save_job(job_id, data):
|
|
os.makedirs(JOB_DIR, exist_ok=True)
|
|
path = os.path.join(JOB_DIR, f'{job_id}.json')
|
|
with open(path, 'w') as f:
|
|
json.dump(data, f)
|
|
|
|
|
|
def _load_job(job_id):
|
|
path = os.path.join(JOB_DIR, f'{job_id}.json')
|
|
try:
|
|
with open(path) as f:
|
|
return json.load(f)
|
|
except (FileNotFoundError, json.JSONDecodeError):
|
|
return None
|
|
|
|
|
|
@bp.route('/discover-website/<int:company_id>', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def discover_website(company_id):
|
|
"""Discover website for a single company."""
|
|
db = SessionLocal()
|
|
try:
|
|
company = db.query(Company).get(company_id)
|
|
if not company:
|
|
return jsonify({'error': 'Firma nie znaleziona'}), 404
|
|
|
|
service = WebsiteDiscoveryService(db=db)
|
|
result = service.discover_for_company(company)
|
|
|
|
if result.get('error'):
|
|
return jsonify({'success': False, 'error': result['error']})
|
|
|
|
return jsonify({'success': True, **result})
|
|
except Exception as e:
|
|
logger.error(f"Discovery error: {e}")
|
|
return jsonify({'success': False, 'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/discover-websites-bulk', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def discover_websites_bulk():
|
|
"""Start bulk website discovery in background."""
|
|
import uuid
|
|
job_id = str(uuid.uuid4())[:8]
|
|
|
|
def run_bulk(job_id):
|
|
job = {'status': 'running', 'processed': 0, 'total': 0, 'log': []}
|
|
_save_job(job_id, job)
|
|
db = SessionLocal()
|
|
try:
|
|
# Skip companies that already have any candidate (pending/accepted/rejected)
|
|
already_have = set(
|
|
r[0] for r in db.query(WebsiteDiscoveryCandidate.company_id).filter(
|
|
WebsiteDiscoveryCandidate.status.in_(['pending', 'accepted', 'rejected'])
|
|
).distinct().all()
|
|
)
|
|
|
|
companies = db.query(Company).filter(
|
|
Company.status.in_(['active', 'pending']),
|
|
(Company.website == None) | (Company.website == ''),
|
|
~Company.id.in_(already_have) if already_have else True,
|
|
).order_by(Company.name).limit(50).all()
|
|
|
|
job['total'] = len(companies)
|
|
_save_job(job_id, job)
|
|
|
|
service = WebsiteDiscoveryService(db=db)
|
|
|
|
import time
|
|
for company in companies:
|
|
result = service.discover_for_company(company)
|
|
|
|
status_text = f"{company.name}: "
|
|
if result.get('status') == 'found':
|
|
status_text += f"znaleziono {result.get('url', '?')} ({result.get('confidence', '?')})"
|
|
elif result.get('status') == 'exists':
|
|
status_text += "kandydat już istnieje"
|
|
else:
|
|
status_text += result.get('error', 'brak wyników')
|
|
|
|
job['log'].append(status_text)
|
|
job['processed'] += 1
|
|
_save_job(job_id, job)
|
|
|
|
if job['processed'] < job['total']:
|
|
time.sleep(5)
|
|
|
|
job['status'] = 'completed'
|
|
_save_job(job_id, job)
|
|
except Exception as e:
|
|
logger.error(f"Bulk discovery error: {e}")
|
|
job['status'] = 'error'
|
|
job['log'].append(f"Błąd: {e}")
|
|
_save_job(job_id, job)
|
|
finally:
|
|
db.close()
|
|
|
|
thread = threading.Thread(target=run_bulk, args=(job_id,), daemon=True)
|
|
thread.start()
|
|
|
|
return jsonify({'success': True, 'job_id': job_id})
|
|
|
|
|
|
@bp.route('/discover-websites-status')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def discover_websites_status():
|
|
"""Poll bulk discovery progress."""
|
|
job_id = request.args.get('job_id')
|
|
if not job_id:
|
|
return jsonify({'error': 'Job not found'}), 404
|
|
|
|
job = _load_job(job_id)
|
|
if not job:
|
|
return jsonify({'error': 'Job not found'}), 404
|
|
|
|
log_offset = request.args.get('log_offset', 0, type=int)
|
|
new_entries = job['log'][log_offset:]
|
|
|
|
return jsonify({
|
|
'status': job['status'],
|
|
'processed': job['processed'],
|
|
'total': job['total'],
|
|
'log_entries': new_entries,
|
|
'log_offset': len(job['log']),
|
|
})
|
|
|
|
|
|
@bp.route('/discovery/<int:candidate_id>/accept', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def accept_discovery(candidate_id):
|
|
"""Accept a discovery candidate - set company.website."""
|
|
db = SessionLocal()
|
|
try:
|
|
candidate = db.query(WebsiteDiscoveryCandidate).get(candidate_id)
|
|
if not candidate:
|
|
return jsonify({'error': 'Kandydat nie znaleziony'}), 404
|
|
|
|
company = db.query(Company).get(candidate.company_id)
|
|
if not company:
|
|
return jsonify({'error': 'Firma nie znaleziona'}), 404
|
|
|
|
# Set website
|
|
company.website = candidate.candidate_url
|
|
candidate.status = 'accepted'
|
|
candidate.reviewed_at = datetime.now()
|
|
|
|
# Update data quality
|
|
update_company_data_quality(company, db)
|
|
|
|
db.commit()
|
|
logger.info(f"Accepted website {candidate.candidate_url} for company {company.name}")
|
|
return jsonify({'success': True, 'url': candidate.candidate_url})
|
|
except Exception as e:
|
|
db.rollback()
|
|
logger.error(f"Accept error: {e}")
|
|
return jsonify({'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/discovery/<int:candidate_id>/reject', methods=['POST'])
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def reject_discovery(candidate_id):
|
|
"""Reject a discovery candidate."""
|
|
db = SessionLocal()
|
|
try:
|
|
candidate = db.query(WebsiteDiscoveryCandidate).get(candidate_id)
|
|
if not candidate:
|
|
return jsonify({'error': 'Kandydat nie znaleziony'}), 404
|
|
|
|
candidate.status = 'rejected'
|
|
candidate.reviewed_at = datetime.now()
|
|
db.commit()
|
|
return jsonify({'success': True})
|
|
except Exception as e:
|
|
db.rollback()
|
|
return jsonify({'error': str(e)}), 500
|
|
finally:
|
|
db.close()
|