nordabiz/database/migrations/076_website_discovery_candidates.sql
Maciej Pienczyn 126eff8af6
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
feat: add website discovery service for companies without websites
Automated discovery using Brave Search API to find company websites,
scrape verification data (NIP/REGON/KRS/email/phone), and present
candidates with match badges in the data quality dashboard.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 08:27:13 +01:00

39 lines
1.4 KiB
SQL

-- Website Discovery Candidates
-- Stores website candidates found via Brave Search for companies missing website field
CREATE TABLE IF NOT EXISTS website_discovery_candidates (
id SERIAL PRIMARY KEY,
company_id INTEGER NOT NULL REFERENCES companies(id) ON DELETE CASCADE,
discovered_at TIMESTAMP DEFAULT NOW(),
search_query TEXT,
candidate_url VARCHAR(500) NOT NULL,
candidate_domain VARCHAR(255),
brave_title TEXT,
brave_description TEXT,
extracted_nips TEXT[],
extracted_regons TEXT[],
extracted_krs TEXT[],
extracted_phones TEXT[],
extracted_emails TEXT[],
page_text_snippet TEXT,
match_nip BOOLEAN DEFAULT FALSE,
match_regon BOOLEAN DEFAULT FALSE,
match_krs BOOLEAN DEFAULT FALSE,
match_phone BOOLEAN DEFAULT FALSE,
match_email BOOLEAN DEFAULT FALSE,
match_city BOOLEAN DEFAULT FALSE,
match_owner BOOLEAN DEFAULT FALSE,
confidence VARCHAR(10) DEFAULT 'low',
match_score INTEGER DEFAULT 0,
status VARCHAR(20) DEFAULT 'pending',
reviewed_at TIMESTAMP,
error_message TEXT,
UNIQUE(company_id, candidate_url)
);
CREATE INDEX IF NOT EXISTS idx_wdc_status ON website_discovery_candidates(status);
CREATE INDEX IF NOT EXISTS idx_wdc_company ON website_discovery_candidates(company_id);
GRANT ALL ON TABLE website_discovery_candidates TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE website_discovery_candidates_id_seq TO nordabiz_app;