Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Automated discovery using Brave Search API to find company websites, scrape verification data (NIP/REGON/KRS/email/phone), and present candidates with match badges in the data quality dashboard. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
39 lines
1.4 KiB
SQL
39 lines
1.4 KiB
SQL
-- Website Discovery Candidates
|
|
-- Stores website candidates found via Brave Search for companies missing website field
|
|
|
|
CREATE TABLE IF NOT EXISTS website_discovery_candidates (
|
|
id SERIAL PRIMARY KEY,
|
|
company_id INTEGER NOT NULL REFERENCES companies(id) ON DELETE CASCADE,
|
|
discovered_at TIMESTAMP DEFAULT NOW(),
|
|
search_query TEXT,
|
|
candidate_url VARCHAR(500) NOT NULL,
|
|
candidate_domain VARCHAR(255),
|
|
brave_title TEXT,
|
|
brave_description TEXT,
|
|
extracted_nips TEXT[],
|
|
extracted_regons TEXT[],
|
|
extracted_krs TEXT[],
|
|
extracted_phones TEXT[],
|
|
extracted_emails TEXT[],
|
|
page_text_snippet TEXT,
|
|
match_nip BOOLEAN DEFAULT FALSE,
|
|
match_regon BOOLEAN DEFAULT FALSE,
|
|
match_krs BOOLEAN DEFAULT FALSE,
|
|
match_phone BOOLEAN DEFAULT FALSE,
|
|
match_email BOOLEAN DEFAULT FALSE,
|
|
match_city BOOLEAN DEFAULT FALSE,
|
|
match_owner BOOLEAN DEFAULT FALSE,
|
|
confidence VARCHAR(10) DEFAULT 'low',
|
|
match_score INTEGER DEFAULT 0,
|
|
status VARCHAR(20) DEFAULT 'pending',
|
|
reviewed_at TIMESTAMP,
|
|
error_message TEXT,
|
|
UNIQUE(company_id, candidate_url)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_wdc_status ON website_discovery_candidates(status);
|
|
CREATE INDEX IF NOT EXISTS idx_wdc_company ON website_discovery_candidates(company_id);
|
|
|
|
GRANT ALL ON TABLE website_discovery_candidates TO nordabiz_app;
|
|
GRANT USAGE, SELECT ON SEQUENCE website_discovery_candidates_id_seq TO nordabiz_app;
|