-- Norda Biznes - PostgreSQL Database Schema -- Version: 1.0 -- Created: 2025-11-23 -- Description: Complete database structure for company directory -- Enable UUID extension CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; -- ============================================================ -- LOOKUP TABLES (Reference Data) -- ============================================================ -- Categories for companies CREATE TABLE categories ( id SERIAL PRIMARY KEY, name VARCHAR(100) NOT NULL UNIQUE, slug VARCHAR(100) NOT NULL UNIQUE, description TEXT, icon VARCHAR(50), sort_order INTEGER DEFAULT 0, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); -- Data quality levels CREATE TYPE data_quality_level AS ENUM ('basic', 'partial', 'complete', 'verified'); -- Company status CREATE TYPE company_status AS ENUM ('active', 'inactive', 'pending', 'archived'); -- ============================================================ -- MAIN COMPANIES TABLE -- ============================================================ CREATE TABLE companies ( id SERIAL PRIMARY KEY, -- Basic Info name VARCHAR(255) NOT NULL, legal_name VARCHAR(255), slug VARCHAR(255) NOT NULL UNIQUE, -- Category category_id INTEGER REFERENCES categories(id), -- Descriptions description_short TEXT, description_full TEXT, -- Legal Identifiers nip VARCHAR(10) UNIQUE, regon VARCHAR(14), krs VARCHAR(10), -- Contact Info website VARCHAR(500), email VARCHAR(255), phone VARCHAR(50), -- Address (denormalized for simplicity) address_street VARCHAR(255), address_city VARCHAR(100), address_postal VARCHAR(10), address_full TEXT, -- Business Data year_established INTEGER, employees_count INTEGER, capital_amount DECIMAL(15, 2), -- Status & Quality status company_status DEFAULT 'active', data_quality data_quality_level DEFAULT 'basic', -- Metadata last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, researched_at TIMESTAMP, verified_at TIMESTAMP, -- Search optimization search_vector tsvector, -- Constraints CONSTRAINT valid_nip CHECK (nip ~ '^\d{10}$' OR nip IS NULL), CONSTRAINT valid_email CHECK (email ~* '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$' OR email IS NULL) ); -- Indexes for performance CREATE INDEX idx_companies_category ON companies(category_id); CREATE INDEX idx_companies_nip ON companies(nip); CREATE INDEX idx_companies_status ON companies(status); CREATE INDEX idx_companies_quality ON companies(data_quality); CREATE INDEX idx_companies_search ON companies USING gin(search_vector); CREATE INDEX idx_companies_slug ON companies(slug); -- Full-text search trigger CREATE OR REPLACE FUNCTION companies_search_trigger() RETURNS trigger AS $$ BEGIN NEW.search_vector := setweight(to_tsvector('polish', COALESCE(NEW.name, '')), 'A') || setweight(to_tsvector('polish', COALESCE(NEW.description_short, '')), 'B') || setweight(to_tsvector('polish', COALESCE(NEW.description_full, '')), 'C'); RETURN NEW; END; $$ LANGUAGE plpgsql; CREATE TRIGGER tsvector_update BEFORE INSERT OR UPDATE ON companies FOR EACH ROW EXECUTE FUNCTION companies_search_trigger(); -- ============================================================ -- SERVICES (Many-to-Many) -- ============================================================ CREATE TABLE services ( id SERIAL PRIMARY KEY, name VARCHAR(255) NOT NULL UNIQUE, slug VARCHAR(255) NOT NULL UNIQUE, description TEXT, icon VARCHAR(50), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE company_services ( company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE, service_id INTEGER REFERENCES services(id) ON DELETE CASCADE, is_primary BOOLEAN DEFAULT FALSE, added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (company_id, service_id) ); CREATE INDEX idx_company_services_company ON company_services(company_id); CREATE INDEX idx_company_services_service ON company_services(service_id); -- ============================================================ -- COMPETENCIES (Many-to-Many) -- ============================================================ CREATE TABLE competencies ( id SERIAL PRIMARY KEY, name VARCHAR(255) NOT NULL UNIQUE, slug VARCHAR(255) NOT NULL UNIQUE, category VARCHAR(100), -- e.g., "Technology", "Certification", "Skill" description TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE company_competencies ( company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE, competency_id INTEGER REFERENCES competencies(id) ON DELETE CASCADE, level VARCHAR(50), -- e.g., "Expert", "Advanced", "Certified" added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (company_id, competency_id) ); CREATE INDEX idx_company_competencies_company ON company_competencies(company_id); CREATE INDEX idx_company_competencies_competency ON company_competencies(competency_id); -- ============================================================ -- CERTIFICATIONS -- ============================================================ CREATE TABLE certifications ( id SERIAL PRIMARY KEY, company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE, name VARCHAR(255) NOT NULL, issuer VARCHAR(255), certificate_number VARCHAR(100), issue_date DATE, expiry_date DATE, is_active BOOLEAN DEFAULT TRUE, document_url VARCHAR(500), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX idx_certifications_company ON certifications(company_id); CREATE INDEX idx_certifications_active ON certifications(is_active); -- ============================================================ -- AWARDS -- ============================================================ CREATE TABLE awards ( id SERIAL PRIMARY KEY, company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE, name VARCHAR(255) NOT NULL, issuer VARCHAR(255), year INTEGER, description TEXT, rank VARCHAR(50), -- e.g., "Gold", "Diamond", "1st Place" url VARCHAR(500), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX idx_awards_company ON awards(company_id); CREATE INDEX idx_awards_year ON awards(year); -- ============================================================ -- FINANCIAL DATA (Historical) -- ============================================================ CREATE TABLE financial_data ( id SERIAL PRIMARY KEY, company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE, year INTEGER NOT NULL, revenue DECIMAL(15, 2), profit DECIMAL(15, 2), assets DECIMAL(15, 2), liabilities DECIMAL(15, 2), equity DECIMAL(15, 2), employees INTEGER, currency VARCHAR(3) DEFAULT 'PLN', source VARCHAR(255), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(company_id, year) ); CREATE INDEX idx_financial_company ON financial_data(company_id); CREATE INDEX idx_financial_year ON financial_data(year); -- ============================================================ -- SOCIAL MEDIA -- ============================================================ CREATE TABLE social_media ( id SERIAL PRIMARY KEY, company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE, platform VARCHAR(50) NOT NULL, -- 'facebook', 'linkedin', 'instagram', 'twitter', 'youtube' url VARCHAR(500) NOT NULL, follower_count INTEGER, is_verified BOOLEAN DEFAULT FALSE, last_checked TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, UNIQUE(company_id, platform) ); CREATE INDEX idx_social_company ON social_media(company_id); CREATE INDEX idx_social_platform ON social_media(platform); -- ============================================================ -- DATA SOURCES (Tracking) -- ============================================================ CREATE TABLE data_sources ( id SERIAL PRIMARY KEY, name VARCHAR(100) NOT NULL UNIQUE, url VARCHAR(500), type VARCHAR(50), -- 'website', 'registry', 'api', 'manual' reliability_score INTEGER DEFAULT 5, -- 1-5 stars is_active BOOLEAN DEFAULT TRUE, description TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE company_data_sources ( company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE, source_id INTEGER REFERENCES data_sources(id) ON DELETE CASCADE, collected_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, fields_collected TEXT[], -- Array of field names collected from this source PRIMARY KEY (company_id, source_id) ); CREATE INDEX idx_company_sources_company ON company_data_sources(company_id); CREATE INDEX idx_company_sources_source ON company_data_sources(source_id); -- ============================================================ -- COLLECTION LOG (Agent Activity) -- ============================================================ CREATE TABLE collection_log ( id SERIAL PRIMARY KEY, company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE, action VARCHAR(50) NOT NULL, -- 'created', 'updated', 'verified', 'enriched' status VARCHAR(50) NOT NULL, -- 'success', 'partial', 'failed' source VARCHAR(100), fields_updated TEXT[], error_message TEXT, duration_ms INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX idx_collection_log_company ON collection_log(company_id); CREATE INDEX idx_collection_log_created ON collection_log(created_at); -- ============================================================ -- INITIAL REFERENCE DATA -- ============================================================ -- Insert categories INSERT INTO categories (name, slug, description, sort_order) VALUES ('IT', 'it', 'Technologie informacyjne, oprogramowanie, usługi IT', 1), ('Produkcja', 'produkcja', 'Firmy produkcyjne, wytwórcze', 2), ('Handel', 'handel', 'Firmy handlowe, dystrybucja', 3), ('Usługi', 'uslugi', 'Usługi dla biznesu i konsumentów', 4), ('Budownictwo', 'budownictwo', 'Firmy budowlane, deweloperzy', 5), ('Doradztwo', 'doradztwo', 'Doradztwo biznesowe, prawne, finansowe', 6), ('Media', 'media', 'Media, marketing, reklama', 7), ('Finanse', 'finanse', 'Usługi finansowe, bankowość', 8), ('Inne', 'inne', 'Pozostałe branże', 99); -- Insert common data sources INSERT INTO data_sources (name, url, type, reliability_score, description) VALUES ('ALEO.com', 'https://aleo.com', 'registry', 5, 'Oficjalny rejestr firm Polski - dane urzędowe'), ('Google Search', 'https://google.com', 'search', 4, 'Wyszukiwanie stron WWW firm'), ('Company Website', NULL, 'website', 5, 'Oficjalna strona internetowa firmy'), ('LinkedIn', 'https://linkedin.com', 'social', 3, 'Profile firmowe LinkedIn'), ('Facebook', 'https://facebook.com', 'social', 3, 'Strony firmowe Facebook'), ('norda-biznes.info', 'https://norda-biznes.info', 'website', 2, 'Portal Norda Biznes (tymczasowo nieaktywny)'), ('Manual Research', NULL, 'manual', 4, 'Ręcznie zebrane dane podczas researchu'); -- ============================================================ -- USEFUL VIEWS -- ============================================================ -- Complete company view with all relations CREATE VIEW v_companies_full AS SELECT c.*, cat.name as category_name, cat.slug as category_slug, -- Count relations (SELECT COUNT(*) FROM company_services WHERE company_id = c.id) as services_count, (SELECT COUNT(*) FROM company_competencies WHERE company_id = c.id) as competencies_count, (SELECT COUNT(*) FROM certifications WHERE company_id = c.id) as certifications_count, (SELECT COUNT(*) FROM awards WHERE company_id = c.id) as awards_count, -- Latest financial data (SELECT revenue FROM financial_data WHERE company_id = c.id ORDER BY year DESC LIMIT 1) as latest_revenue, (SELECT year FROM financial_data WHERE company_id = c.id ORDER BY year DESC LIMIT 1) as latest_revenue_year FROM companies c LEFT JOIN categories cat ON c.category_id = cat.id; -- Data quality statistics CREATE VIEW v_data_quality_stats AS SELECT status, data_quality, COUNT(*) as count, ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) as percentage FROM companies GROUP BY status, data_quality ORDER BY status, data_quality; -- Companies missing critical data CREATE VIEW v_companies_incomplete AS SELECT id, name, CASE WHEN nip IS NULL THEN 'missing_nip' END as missing_nip, CASE WHEN website IS NULL THEN 'missing_website' END as missing_website, CASE WHEN email IS NULL THEN 'missing_email' END as missing_email, CASE WHEN phone IS NULL THEN 'missing_phone' END as missing_phone, data_quality, last_updated FROM companies WHERE nip IS NULL OR website IS NULL OR email IS NULL OR phone IS NULL ORDER BY data_quality, last_updated; -- ============================================================ -- HELPER FUNCTIONS -- ============================================================ -- Function to calculate data quality score CREATE OR REPLACE FUNCTION calculate_data_quality(company_id_param INTEGER) RETURNS data_quality_level AS $$ DECLARE score INTEGER := 0; c RECORD; BEGIN SELECT * INTO c FROM companies WHERE id = company_id_param; -- Required fields (5 points each) IF c.nip IS NOT NULL THEN score := score + 5; END IF; IF c.website IS NOT NULL THEN score := score + 5; END IF; IF c.email IS NOT NULL THEN score := score + 5; END IF; IF c.phone IS NOT NULL THEN score := score + 5; END IF; -- High priority fields (3 points each) IF c.legal_name IS NOT NULL THEN score := score + 3; END IF; IF c.regon IS NOT NULL THEN score := score + 3; END IF; IF c.krs IS NOT NULL THEN score := score + 3; END IF; IF c.address_full IS NOT NULL THEN score := score + 3; END IF; IF c.description_full IS NOT NULL THEN score := score + 3; END IF; -- Services and competencies IF (SELECT COUNT(*) FROM company_services WHERE company_id = company_id_param) >= 3 THEN score := score + 3; END IF; IF (SELECT COUNT(*) FROM company_competencies WHERE company_id = company_id_param) >= 3 THEN score := score + 3; END IF; -- Return quality level based on score IF score >= 35 THEN RETURN 'complete'::data_quality_level; ELSIF score >= 20 THEN RETURN 'partial'::data_quality_level; ELSIF score >= 10 THEN RETURN 'basic'::data_quality_level; ELSE RETURN 'basic'::data_quality_level; END IF; END; $$ LANGUAGE plpgsql; -- Function to update data quality for all companies CREATE OR REPLACE FUNCTION update_all_data_quality() RETURNS void AS $$ BEGIN UPDATE companies SET data_quality = calculate_data_quality(id); END; $$ LANGUAGE plpgsql; -- ============================================================ -- GRANTS (adjust for your user) -- ============================================================ -- Create application user -- CREATE USER nordabiz_app WITH PASSWORD 'your_secure_password'; -- Grant permissions -- GRANT CONNECT ON DATABASE nordabiz TO nordabiz_app; -- GRANT USAGE ON SCHEMA public TO nordabiz_app; -- GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO nordabiz_app; -- GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO nordabiz_app; -- ============================================================ -- END OF SCHEMA -- ============================================================