425 lines
15 KiB
PL/PgSQL
425 lines
15 KiB
PL/PgSQL
-- Norda Biznes - PostgreSQL Database Schema
|
|
-- Version: 1.0
|
|
-- Created: 2025-11-23
|
|
-- Description: Complete database structure for company directory
|
|
|
|
-- Enable UUID extension
|
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
|
|
|
-- ============================================================
|
|
-- LOOKUP TABLES (Reference Data)
|
|
-- ============================================================
|
|
|
|
-- Categories for companies
|
|
CREATE TABLE categories (
|
|
id SERIAL PRIMARY KEY,
|
|
name VARCHAR(100) NOT NULL UNIQUE,
|
|
slug VARCHAR(100) NOT NULL UNIQUE,
|
|
description TEXT,
|
|
icon VARCHAR(50),
|
|
sort_order INTEGER DEFAULT 0,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- Data quality levels
|
|
CREATE TYPE data_quality_level AS ENUM ('basic', 'partial', 'complete', 'verified');
|
|
|
|
-- Company status
|
|
CREATE TYPE company_status AS ENUM ('active', 'inactive', 'pending', 'archived');
|
|
|
|
-- ============================================================
|
|
-- MAIN COMPANIES TABLE
|
|
-- ============================================================
|
|
|
|
CREATE TABLE companies (
|
|
id SERIAL PRIMARY KEY,
|
|
|
|
-- Basic Info
|
|
name VARCHAR(255) NOT NULL,
|
|
legal_name VARCHAR(255),
|
|
slug VARCHAR(255) NOT NULL UNIQUE,
|
|
|
|
-- Category
|
|
category_id INTEGER REFERENCES categories(id),
|
|
|
|
-- Descriptions
|
|
description_short TEXT,
|
|
description_full TEXT,
|
|
|
|
-- Legal Identifiers
|
|
nip VARCHAR(10) UNIQUE,
|
|
regon VARCHAR(14),
|
|
krs VARCHAR(10),
|
|
|
|
-- Contact Info
|
|
website VARCHAR(500),
|
|
email VARCHAR(255),
|
|
phone VARCHAR(50),
|
|
|
|
-- Address (denormalized for simplicity)
|
|
address_street VARCHAR(255),
|
|
address_city VARCHAR(100),
|
|
address_postal VARCHAR(10),
|
|
address_full TEXT,
|
|
|
|
-- Business Data
|
|
year_established INTEGER,
|
|
employees_count INTEGER,
|
|
capital_amount DECIMAL(15, 2),
|
|
|
|
-- Status & Quality
|
|
status company_status DEFAULT 'active',
|
|
data_quality data_quality_level DEFAULT 'basic',
|
|
|
|
-- Metadata
|
|
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
researched_at TIMESTAMP,
|
|
verified_at TIMESTAMP,
|
|
|
|
-- Search optimization
|
|
search_vector tsvector,
|
|
|
|
-- Constraints
|
|
CONSTRAINT valid_nip CHECK (nip ~ '^\d{10}$' OR nip IS NULL),
|
|
CONSTRAINT valid_email CHECK (email ~* '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$' OR email IS NULL)
|
|
);
|
|
|
|
-- Indexes for performance
|
|
CREATE INDEX idx_companies_category ON companies(category_id);
|
|
CREATE INDEX idx_companies_nip ON companies(nip);
|
|
CREATE INDEX idx_companies_status ON companies(status);
|
|
CREATE INDEX idx_companies_quality ON companies(data_quality);
|
|
CREATE INDEX idx_companies_search ON companies USING gin(search_vector);
|
|
CREATE INDEX idx_companies_slug ON companies(slug);
|
|
|
|
-- Full-text search trigger
|
|
CREATE OR REPLACE FUNCTION companies_search_trigger() RETURNS trigger AS $$
|
|
BEGIN
|
|
NEW.search_vector :=
|
|
setweight(to_tsvector('polish', COALESCE(NEW.name, '')), 'A') ||
|
|
setweight(to_tsvector('polish', COALESCE(NEW.description_short, '')), 'B') ||
|
|
setweight(to_tsvector('polish', COALESCE(NEW.description_full, '')), 'C');
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CREATE TRIGGER tsvector_update BEFORE INSERT OR UPDATE
|
|
ON companies FOR EACH ROW EXECUTE FUNCTION companies_search_trigger();
|
|
|
|
-- ============================================================
|
|
-- SERVICES (Many-to-Many)
|
|
-- ============================================================
|
|
|
|
CREATE TABLE services (
|
|
id SERIAL PRIMARY KEY,
|
|
name VARCHAR(255) NOT NULL UNIQUE,
|
|
slug VARCHAR(255) NOT NULL UNIQUE,
|
|
description TEXT,
|
|
icon VARCHAR(50),
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE TABLE company_services (
|
|
company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE,
|
|
service_id INTEGER REFERENCES services(id) ON DELETE CASCADE,
|
|
is_primary BOOLEAN DEFAULT FALSE,
|
|
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (company_id, service_id)
|
|
);
|
|
|
|
CREATE INDEX idx_company_services_company ON company_services(company_id);
|
|
CREATE INDEX idx_company_services_service ON company_services(service_id);
|
|
|
|
-- ============================================================
|
|
-- COMPETENCIES (Many-to-Many)
|
|
-- ============================================================
|
|
|
|
CREATE TABLE competencies (
|
|
id SERIAL PRIMARY KEY,
|
|
name VARCHAR(255) NOT NULL UNIQUE,
|
|
slug VARCHAR(255) NOT NULL UNIQUE,
|
|
category VARCHAR(100), -- e.g., "Technology", "Certification", "Skill"
|
|
description TEXT,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE TABLE company_competencies (
|
|
company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE,
|
|
competency_id INTEGER REFERENCES competencies(id) ON DELETE CASCADE,
|
|
level VARCHAR(50), -- e.g., "Expert", "Advanced", "Certified"
|
|
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (company_id, competency_id)
|
|
);
|
|
|
|
CREATE INDEX idx_company_competencies_company ON company_competencies(company_id);
|
|
CREATE INDEX idx_company_competencies_competency ON company_competencies(competency_id);
|
|
|
|
-- ============================================================
|
|
-- CERTIFICATIONS
|
|
-- ============================================================
|
|
|
|
CREATE TABLE certifications (
|
|
id SERIAL PRIMARY KEY,
|
|
company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE,
|
|
name VARCHAR(255) NOT NULL,
|
|
issuer VARCHAR(255),
|
|
certificate_number VARCHAR(100),
|
|
issue_date DATE,
|
|
expiry_date DATE,
|
|
is_active BOOLEAN DEFAULT TRUE,
|
|
document_url VARCHAR(500),
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX idx_certifications_company ON certifications(company_id);
|
|
CREATE INDEX idx_certifications_active ON certifications(is_active);
|
|
|
|
-- ============================================================
|
|
-- AWARDS
|
|
-- ============================================================
|
|
|
|
CREATE TABLE awards (
|
|
id SERIAL PRIMARY KEY,
|
|
company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE,
|
|
name VARCHAR(255) NOT NULL,
|
|
issuer VARCHAR(255),
|
|
year INTEGER,
|
|
description TEXT,
|
|
rank VARCHAR(50), -- e.g., "Gold", "Diamond", "1st Place"
|
|
url VARCHAR(500),
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX idx_awards_company ON awards(company_id);
|
|
CREATE INDEX idx_awards_year ON awards(year);
|
|
|
|
-- ============================================================
|
|
-- FINANCIAL DATA (Historical)
|
|
-- ============================================================
|
|
|
|
CREATE TABLE financial_data (
|
|
id SERIAL PRIMARY KEY,
|
|
company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE,
|
|
year INTEGER NOT NULL,
|
|
revenue DECIMAL(15, 2),
|
|
profit DECIMAL(15, 2),
|
|
assets DECIMAL(15, 2),
|
|
liabilities DECIMAL(15, 2),
|
|
equity DECIMAL(15, 2),
|
|
employees INTEGER,
|
|
currency VARCHAR(3) DEFAULT 'PLN',
|
|
source VARCHAR(255),
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(company_id, year)
|
|
);
|
|
|
|
CREATE INDEX idx_financial_company ON financial_data(company_id);
|
|
CREATE INDEX idx_financial_year ON financial_data(year);
|
|
|
|
-- ============================================================
|
|
-- SOCIAL MEDIA
|
|
-- ============================================================
|
|
|
|
CREATE TABLE social_media (
|
|
id SERIAL PRIMARY KEY,
|
|
company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE,
|
|
platform VARCHAR(50) NOT NULL, -- 'facebook', 'linkedin', 'instagram', 'twitter', 'youtube'
|
|
url VARCHAR(500) NOT NULL,
|
|
follower_count INTEGER,
|
|
is_verified BOOLEAN DEFAULT FALSE,
|
|
last_checked TIMESTAMP,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE(company_id, platform)
|
|
);
|
|
|
|
CREATE INDEX idx_social_company ON social_media(company_id);
|
|
CREATE INDEX idx_social_platform ON social_media(platform);
|
|
|
|
-- ============================================================
|
|
-- DATA SOURCES (Tracking)
|
|
-- ============================================================
|
|
|
|
CREATE TABLE data_sources (
|
|
id SERIAL PRIMARY KEY,
|
|
name VARCHAR(100) NOT NULL UNIQUE,
|
|
url VARCHAR(500),
|
|
type VARCHAR(50), -- 'website', 'registry', 'api', 'manual'
|
|
reliability_score INTEGER DEFAULT 5, -- 1-5 stars
|
|
is_active BOOLEAN DEFAULT TRUE,
|
|
description TEXT,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE TABLE company_data_sources (
|
|
company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE,
|
|
source_id INTEGER REFERENCES data_sources(id) ON DELETE CASCADE,
|
|
collected_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
fields_collected TEXT[], -- Array of field names collected from this source
|
|
PRIMARY KEY (company_id, source_id)
|
|
);
|
|
|
|
CREATE INDEX idx_company_sources_company ON company_data_sources(company_id);
|
|
CREATE INDEX idx_company_sources_source ON company_data_sources(source_id);
|
|
|
|
-- ============================================================
|
|
-- COLLECTION LOG (Agent Activity)
|
|
-- ============================================================
|
|
|
|
CREATE TABLE collection_log (
|
|
id SERIAL PRIMARY KEY,
|
|
company_id INTEGER REFERENCES companies(id) ON DELETE CASCADE,
|
|
action VARCHAR(50) NOT NULL, -- 'created', 'updated', 'verified', 'enriched'
|
|
status VARCHAR(50) NOT NULL, -- 'success', 'partial', 'failed'
|
|
source VARCHAR(100),
|
|
fields_updated TEXT[],
|
|
error_message TEXT,
|
|
duration_ms INTEGER,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX idx_collection_log_company ON collection_log(company_id);
|
|
CREATE INDEX idx_collection_log_created ON collection_log(created_at);
|
|
|
|
-- ============================================================
|
|
-- INITIAL REFERENCE DATA
|
|
-- ============================================================
|
|
|
|
-- Insert categories
|
|
INSERT INTO categories (name, slug, description, sort_order) VALUES
|
|
('IT', 'it', 'Technologie informacyjne, oprogramowanie, usługi IT', 1),
|
|
('Produkcja', 'produkcja', 'Firmy produkcyjne, wytwórcze', 2),
|
|
('Handel', 'handel', 'Firmy handlowe, dystrybucja', 3),
|
|
('Usługi', 'uslugi', 'Usługi dla biznesu i konsumentów', 4),
|
|
('Budownictwo', 'budownictwo', 'Firmy budowlane, deweloperzy', 5),
|
|
('Doradztwo', 'doradztwo', 'Doradztwo biznesowe, prawne, finansowe', 6),
|
|
('Media', 'media', 'Media, marketing, reklama', 7),
|
|
('Finanse', 'finanse', 'Usługi finansowe, bankowość', 8),
|
|
('Inne', 'inne', 'Pozostałe branże', 99);
|
|
|
|
-- Insert common data sources
|
|
INSERT INTO data_sources (name, url, type, reliability_score, description) VALUES
|
|
('ALEO.com', 'https://aleo.com', 'registry', 5, 'Oficjalny rejestr firm Polski - dane urzędowe'),
|
|
('Google Search', 'https://google.com', 'search', 4, 'Wyszukiwanie stron WWW firm'),
|
|
('Company Website', NULL, 'website', 5, 'Oficjalna strona internetowa firmy'),
|
|
('LinkedIn', 'https://linkedin.com', 'social', 3, 'Profile firmowe LinkedIn'),
|
|
('Facebook', 'https://facebook.com', 'social', 3, 'Strony firmowe Facebook'),
|
|
('norda-biznes.info', 'https://norda-biznes.info', 'website', 2, 'Portal Norda Biznes (tymczasowo nieaktywny)'),
|
|
('Manual Research', NULL, 'manual', 4, 'Ręcznie zebrane dane podczas researchu');
|
|
|
|
-- ============================================================
|
|
-- USEFUL VIEWS
|
|
-- ============================================================
|
|
|
|
-- Complete company view with all relations
|
|
CREATE VIEW v_companies_full AS
|
|
SELECT
|
|
c.*,
|
|
cat.name as category_name,
|
|
cat.slug as category_slug,
|
|
-- Count relations
|
|
(SELECT COUNT(*) FROM company_services WHERE company_id = c.id) as services_count,
|
|
(SELECT COUNT(*) FROM company_competencies WHERE company_id = c.id) as competencies_count,
|
|
(SELECT COUNT(*) FROM certifications WHERE company_id = c.id) as certifications_count,
|
|
(SELECT COUNT(*) FROM awards WHERE company_id = c.id) as awards_count,
|
|
-- Latest financial data
|
|
(SELECT revenue FROM financial_data WHERE company_id = c.id ORDER BY year DESC LIMIT 1) as latest_revenue,
|
|
(SELECT year FROM financial_data WHERE company_id = c.id ORDER BY year DESC LIMIT 1) as latest_revenue_year
|
|
FROM companies c
|
|
LEFT JOIN categories cat ON c.category_id = cat.id;
|
|
|
|
-- Data quality statistics
|
|
CREATE VIEW v_data_quality_stats AS
|
|
SELECT
|
|
status,
|
|
data_quality,
|
|
COUNT(*) as count,
|
|
ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) as percentage
|
|
FROM companies
|
|
GROUP BY status, data_quality
|
|
ORDER BY status, data_quality;
|
|
|
|
-- Companies missing critical data
|
|
CREATE VIEW v_companies_incomplete AS
|
|
SELECT
|
|
id,
|
|
name,
|
|
CASE WHEN nip IS NULL THEN 'missing_nip' END as missing_nip,
|
|
CASE WHEN website IS NULL THEN 'missing_website' END as missing_website,
|
|
CASE WHEN email IS NULL THEN 'missing_email' END as missing_email,
|
|
CASE WHEN phone IS NULL THEN 'missing_phone' END as missing_phone,
|
|
data_quality,
|
|
last_updated
|
|
FROM companies
|
|
WHERE nip IS NULL OR website IS NULL OR email IS NULL OR phone IS NULL
|
|
ORDER BY data_quality, last_updated;
|
|
|
|
-- ============================================================
|
|
-- HELPER FUNCTIONS
|
|
-- ============================================================
|
|
|
|
-- Function to calculate data quality score
|
|
CREATE OR REPLACE FUNCTION calculate_data_quality(company_id_param INTEGER)
|
|
RETURNS data_quality_level AS $$
|
|
DECLARE
|
|
score INTEGER := 0;
|
|
c RECORD;
|
|
BEGIN
|
|
SELECT * INTO c FROM companies WHERE id = company_id_param;
|
|
|
|
-- Required fields (5 points each)
|
|
IF c.nip IS NOT NULL THEN score := score + 5; END IF;
|
|
IF c.website IS NOT NULL THEN score := score + 5; END IF;
|
|
IF c.email IS NOT NULL THEN score := score + 5; END IF;
|
|
IF c.phone IS NOT NULL THEN score := score + 5; END IF;
|
|
|
|
-- High priority fields (3 points each)
|
|
IF c.legal_name IS NOT NULL THEN score := score + 3; END IF;
|
|
IF c.regon IS NOT NULL THEN score := score + 3; END IF;
|
|
IF c.krs IS NOT NULL THEN score := score + 3; END IF;
|
|
IF c.address_full IS NOT NULL THEN score := score + 3; END IF;
|
|
IF c.description_full IS NOT NULL THEN score := score + 3; END IF;
|
|
|
|
-- Services and competencies
|
|
IF (SELECT COUNT(*) FROM company_services WHERE company_id = company_id_param) >= 3 THEN
|
|
score := score + 3;
|
|
END IF;
|
|
IF (SELECT COUNT(*) FROM company_competencies WHERE company_id = company_id_param) >= 3 THEN
|
|
score := score + 3;
|
|
END IF;
|
|
|
|
-- Return quality level based on score
|
|
IF score >= 35 THEN RETURN 'complete'::data_quality_level;
|
|
ELSIF score >= 20 THEN RETURN 'partial'::data_quality_level;
|
|
ELSIF score >= 10 THEN RETURN 'basic'::data_quality_level;
|
|
ELSE RETURN 'basic'::data_quality_level;
|
|
END IF;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Function to update data quality for all companies
|
|
CREATE OR REPLACE FUNCTION update_all_data_quality()
|
|
RETURNS void AS $$
|
|
BEGIN
|
|
UPDATE companies
|
|
SET data_quality = calculate_data_quality(id);
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- ============================================================
|
|
-- GRANTS (adjust for your user)
|
|
-- ============================================================
|
|
|
|
-- Create application user
|
|
-- CREATE USER nordabiz_app WITH PASSWORD 'your_secure_password';
|
|
|
|
-- Grant permissions
|
|
-- GRANT CONNECT ON DATABASE nordabiz TO nordabiz_app;
|
|
-- GRANT USAGE ON SCHEMA public TO nordabiz_app;
|
|
-- GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO nordabiz_app;
|
|
-- GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO nordabiz_app;
|
|
|
|
-- ============================================================
|
|
-- END OF SCHEMA
|
|
-- ============================================================
|