nordabiz/database/migrations/005_zopk_knowledge_base.sql
Maciej Pienczyn e399022223 feat: Add 1-5 star rating to ZOPK news AI evaluation
- Add ai_relevance_score column (1-5) to zopk_news table
- Update AI prompt to return score with detailed criteria:
  * 1 star = very weak (loose connection to region/industry)
  * 2 stars = weak (general industry news)
  * 3 stars = medium (relates to ZOPK industry but not directly)
  * 4 stars = strong (directly about ZOPK investments/companies)
  * 5 stars = perfect (main topic is ZOPK, Kongsberg, offshore Baltic)
- Display star ratings in admin dashboard with color-coded badges
- Score >= 3 marks news as relevant, < 3 as not relevant

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 07:34:36 +01:00

465 lines
17 KiB
SQL

-- ============================================================
-- NordaBiz - Migration 005: ZOPK Knowledge Base
-- ============================================================
-- Created: 2026-01-11
-- Description:
-- Zielony Okręg Przemysłowy Kaszubia (ZOPK) - Knowledge Base
-- Tables for projects, stakeholders, news, resources, and company links
-- ============================================================
-- ============================================================
-- 1. ZOPK PROJECTS (sub-initiatives)
-- ============================================================
CREATE TABLE IF NOT EXISTS zopk_projects (
id SERIAL PRIMARY KEY,
slug VARCHAR(100) UNIQUE NOT NULL,
name VARCHAR(255) NOT NULL,
description TEXT,
-- Project details
project_type VARCHAR(50), -- energy, infrastructure, technology, defense
status VARCHAR(50) DEFAULT 'planned', -- planned, in_progress, completed
start_date DATE,
end_date DATE,
-- Location
location VARCHAR(255),
region VARCHAR(100), -- Wejherowo, Rumia, Gdynia
-- Key metrics
estimated_investment NUMERIC(15, 2), -- PLN
estimated_jobs INTEGER,
-- Visual
icon VARCHAR(50), -- CSS icon or emoji
color VARCHAR(20), -- HEX color
-- Display
sort_order INTEGER DEFAULT 0,
is_featured BOOLEAN DEFAULT FALSE,
is_active BOOLEAN DEFAULT TRUE,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zopk_projects_slug ON zopk_projects(slug);
CREATE INDEX IF NOT EXISTS idx_zopk_projects_status ON zopk_projects(status);
CREATE INDEX IF NOT EXISTS idx_zopk_projects_type ON zopk_projects(project_type);
COMMENT ON TABLE zopk_projects IS 'Sub-projects within ZOPK initiative (offshore, nuclear, data centers, etc.)';
-- ============================================================
-- 2. ZOPK STAKEHOLDERS (people and organizations)
-- ============================================================
CREATE TABLE IF NOT EXISTS zopk_stakeholders (
id SERIAL PRIMARY KEY,
stakeholder_type VARCHAR(20) NOT NULL, -- person, organization
name VARCHAR(255) NOT NULL,
-- Role and affiliation
role VARCHAR(255), -- Koordynator, Minister, Starosta
organization VARCHAR(255), -- MON, Starostwo Wejherowskie
-- Contact (public info)
email VARCHAR(255),
phone VARCHAR(50),
website VARCHAR(500),
-- Social media
linkedin_url VARCHAR(500),
twitter_url VARCHAR(500),
-- Visual
photo_url VARCHAR(500),
-- Description
bio TEXT,
-- Categorization
category VARCHAR(50), -- government, local_authority, business, academic
importance INTEGER DEFAULT 0,
is_active BOOLEAN DEFAULT TRUE,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zopk_stakeholders_type ON zopk_stakeholders(stakeholder_type);
CREATE INDEX IF NOT EXISTS idx_zopk_stakeholders_category ON zopk_stakeholders(category);
COMMENT ON TABLE zopk_stakeholders IS 'Key people and organizations involved in ZOPK';
-- ============================================================
-- 3. STAKEHOLDER-PROJECT LINKS
-- ============================================================
CREATE TABLE IF NOT EXISTS zopk_stakeholder_projects (
id SERIAL PRIMARY KEY,
stakeholder_id INTEGER NOT NULL REFERENCES zopk_stakeholders(id) ON DELETE CASCADE,
project_id INTEGER NOT NULL REFERENCES zopk_projects(id) ON DELETE CASCADE,
role_in_project VARCHAR(255),
created_at TIMESTAMP DEFAULT NOW(),
CONSTRAINT uq_stakeholder_project UNIQUE (stakeholder_id, project_id)
);
CREATE INDEX IF NOT EXISTS idx_zopk_sp_stakeholder ON zopk_stakeholder_projects(stakeholder_id);
CREATE INDEX IF NOT EXISTS idx_zopk_sp_project ON zopk_stakeholder_projects(project_id);
COMMENT ON TABLE zopk_stakeholder_projects IS 'Link table: stakeholders to projects';
-- ============================================================
-- 4. ZOPK NEWS (with approval workflow)
-- ============================================================
CREATE TABLE IF NOT EXISTS zopk_news (
id SERIAL PRIMARY KEY,
-- Source information
title VARCHAR(500) NOT NULL,
description TEXT,
url VARCHAR(1000) NOT NULL,
source_name VARCHAR(200), -- trojmiasto.pl, etc.
source_domain VARCHAR(200),
-- Article details
published_at TIMESTAMP,
author VARCHAR(255),
image_url VARCHAR(1000),
-- Categorization
news_type VARCHAR(50), -- news, announcement, interview, press_release
project_id INTEGER REFERENCES zopk_projects(id),
-- AI Analysis
relevance_score NUMERIC(3, 2), -- 0.00-1.00
sentiment VARCHAR(20), -- positive, neutral, negative
ai_summary TEXT,
keywords TEXT[], -- Extracted keywords array
-- Moderation workflow
status VARCHAR(20) DEFAULT 'pending', -- pending, approved, rejected
moderated_by INTEGER REFERENCES users(id),
moderated_at TIMESTAMP,
rejection_reason TEXT,
-- Source tracking
source_type VARCHAR(50) DEFAULT 'manual', -- manual, brave_search, rss
fetch_job_id VARCHAR(100),
-- Deduplication
url_hash VARCHAR(64) UNIQUE,
is_featured BOOLEAN DEFAULT FALSE,
views_count INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zopk_news_status ON zopk_news(status);
CREATE INDEX IF NOT EXISTS idx_zopk_news_project ON zopk_news(project_id);
CREATE INDEX IF NOT EXISTS idx_zopk_news_published ON zopk_news(published_at DESC);
CREATE INDEX IF NOT EXISTS idx_zopk_news_type ON zopk_news(news_type);
CREATE INDEX IF NOT EXISTS idx_zopk_news_url_hash ON zopk_news(url_hash);
COMMENT ON TABLE zopk_news IS 'News articles about ZOPK with approval workflow';
-- ============================================================
-- 5. ZOPK RESOURCES (documents, links, media)
-- ============================================================
CREATE TABLE IF NOT EXISTS zopk_resources (
id SERIAL PRIMARY KEY,
title VARCHAR(255) NOT NULL,
description TEXT,
-- Resource type
resource_type VARCHAR(50) NOT NULL, -- link, document, image, video, map
-- URL or file
url VARCHAR(1000),
file_path VARCHAR(500),
file_size INTEGER,
mime_type VARCHAR(100),
-- Thumbnail
thumbnail_url VARCHAR(1000),
-- Categorization
category VARCHAR(50), -- official, media, research, presentation
project_id INTEGER REFERENCES zopk_projects(id),
-- Tags
tags TEXT[],
-- Source
source_name VARCHAR(255),
source_date DATE,
-- Moderation
status VARCHAR(20) DEFAULT 'approved',
uploaded_by INTEGER REFERENCES users(id),
is_featured BOOLEAN DEFAULT FALSE,
sort_order INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zopk_resources_type ON zopk_resources(resource_type);
CREATE INDEX IF NOT EXISTS idx_zopk_resources_project ON zopk_resources(project_id);
CREATE INDEX IF NOT EXISTS idx_zopk_resources_category ON zopk_resources(category);
CREATE INDEX IF NOT EXISTS idx_zopk_resources_status ON zopk_resources(status);
COMMENT ON TABLE zopk_resources IS 'Resources: documents, links, images, videos for ZOPK knowledge base';
-- ============================================================
-- 6. ZOPK COMPANY LINKS (Norda members)
-- ============================================================
CREATE TABLE IF NOT EXISTS zopk_company_links (
id SERIAL PRIMARY KEY,
company_id INTEGER NOT NULL REFERENCES companies(id) ON DELETE CASCADE,
project_id INTEGER NOT NULL REFERENCES zopk_projects(id) ON DELETE CASCADE,
-- Type of involvement
link_type VARCHAR(50) NOT NULL, -- potential_supplier, partner, investor, beneficiary
-- Description
collaboration_description TEXT,
-- Scoring
relevance_score INTEGER, -- 1-100
-- Status
status VARCHAR(20) DEFAULT 'suggested', -- suggested, confirmed, active, completed
-- Admin notes
admin_notes TEXT,
created_by INTEGER REFERENCES users(id),
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW(),
CONSTRAINT uq_company_project_link UNIQUE (company_id, project_id, link_type)
);
CREATE INDEX IF NOT EXISTS idx_zopk_cl_company ON zopk_company_links(company_id);
CREATE INDEX IF NOT EXISTS idx_zopk_cl_project ON zopk_company_links(project_id);
CREATE INDEX IF NOT EXISTS idx_zopk_cl_status ON zopk_company_links(status);
COMMENT ON TABLE zopk_company_links IS 'Links between ZOPK projects and Norda Biznes member companies';
-- ============================================================
-- 7. ZOPK NEWS FETCH JOBS (automation tracking)
-- ============================================================
CREATE TABLE IF NOT EXISTS zopk_news_fetch_jobs (
id SERIAL PRIMARY KEY,
job_id VARCHAR(100) UNIQUE NOT NULL,
-- Config
search_query VARCHAR(500),
search_api VARCHAR(50), -- brave, google, bing
date_range_start DATE,
date_range_end DATE,
-- Results
results_found INTEGER DEFAULT 0,
results_new INTEGER DEFAULT 0,
results_approved INTEGER DEFAULT 0,
-- Status
status VARCHAR(20) DEFAULT 'pending', -- pending, running, completed, failed
error_message TEXT,
-- Timing
started_at TIMESTAMP,
completed_at TIMESTAMP,
-- Trigger
triggered_by VARCHAR(50), -- cron, manual, admin
triggered_by_user INTEGER REFERENCES users(id),
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_zopk_fetch_status ON zopk_news_fetch_jobs(status);
CREATE INDEX IF NOT EXISTS idx_zopk_fetch_created ON zopk_news_fetch_jobs(created_at DESC);
COMMENT ON TABLE zopk_news_fetch_jobs IS 'Tracking for automated ZOPK news fetch jobs';
-- ============================================================
-- 8. INITIAL DATA - PROJECTS
-- ============================================================
INSERT INTO zopk_projects (slug, name, description, project_type, status, region, icon, color, sort_order, is_featured)
VALUES
('offshore-wind', 'Morska Energetyka Wiatrowa', 'Farmy wiatrowe na Baltyku - kluczowy element transformacji energetycznej regionu', 'energy', 'in_progress', 'Baltyk / Trojmiasto', 'wind', '#0891b2', 1, TRUE),
('nuclear-plant', 'Elektrownia Jadrowa', 'Budowa pierwszej polskiej elektrowni jadrowej w lokalizacji Lubiatowo-Kopalino', 'energy', 'planned', 'Choczewo', 'atom', '#7c3aed', 2, TRUE),
('data-centers', 'Centra Danych', 'Nowoczesne centra przetwarzania danych wykorzystujace czysta energie', 'technology', 'planned', 'Trojmiasto', 'server', '#2563eb', 3, FALSE),
('hydrogen-labs', 'Laboratoria Wodorowe', 'Badania i rozwoj technologii wodorowych dla przemyslu', 'technology', 'planned', 'Gdynia / Rumia', 'flask', '#059669', 4, FALSE),
('kongsberg', 'Inwestycja Kongsberg', 'Zaklad produkcyjny norweskiego koncernu zbrojeniowego Kongsberg', 'defense', 'in_progress', 'Rumia', 'shield', '#dc2626', 5, TRUE)
ON CONFLICT (slug) DO NOTHING;
-- ============================================================
-- 9. INITIAL DATA - STAKEHOLDERS
-- ============================================================
INSERT INTO zopk_stakeholders (stakeholder_type, name, role, organization, category, importance, is_active)
VALUES
('person', 'Maciej Samsonowicz', 'Koordynator projektu, Doradca MON', 'Ministerstwo Obrony Narodowej', 'government', 100, TRUE),
('person', 'Wladyslaw Kosiniak-Kamysz', 'Minister Obrony Narodowej', 'Ministerstwo Obrony Narodowej', 'government', 95, TRUE),
('person', 'Marcin Kaczmarek', 'Starosta Wejherowski', 'Starostwo Powiatowe w Wejherowie', 'local_authority', 80, TRUE),
('person', 'Michal Pasieczny', 'Burmistrz Rumi', 'Urzad Miasta Rumi', 'local_authority', 75, TRUE),
('organization', 'Ministerstwo Obrony Narodowej', 'Ministerstwo odpowiedzialne za obronnosc', NULL, 'government', 90, TRUE),
('organization', 'Kongsberg Defence & Aerospace', 'Norweski koncern zbrojeniowy', NULL, 'business', 85, TRUE),
('organization', 'Polskie Elektrownie Jadrowe', 'Spolka realizujaca program jadrowy', NULL, 'business', 85, TRUE)
ON CONFLICT DO NOTHING;
-- ============================================================
-- 10. PERMISSIONS
-- ============================================================
GRANT ALL ON TABLE zopk_projects TO nordabiz_app;
GRANT ALL ON TABLE zopk_stakeholders TO nordabiz_app;
GRANT ALL ON TABLE zopk_stakeholder_projects TO nordabiz_app;
GRANT ALL ON TABLE zopk_news TO nordabiz_app;
GRANT ALL ON TABLE zopk_resources TO nordabiz_app;
GRANT ALL ON TABLE zopk_company_links TO nordabiz_app;
GRANT ALL ON TABLE zopk_news_fetch_jobs TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE zopk_projects_id_seq TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE zopk_stakeholders_id_seq TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE zopk_stakeholder_projects_id_seq TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE zopk_news_id_seq TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE zopk_resources_id_seq TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE zopk_company_links_id_seq TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE zopk_news_fetch_jobs_id_seq TO nordabiz_app;
-- ============================================================
-- 11. ALTER TABLE - Multi-source cross-verification columns
-- ============================================================
-- These columns support automatic cross-verification from multiple sources
-- Confidence score (1-5 based on source count)
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'confidence_score') THEN
ALTER TABLE zopk_news ADD COLUMN confidence_score INTEGER DEFAULT 1;
END IF;
END $$;
-- Number of sources that found this story
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'source_count') THEN
ALTER TABLE zopk_news ADD COLUMN source_count INTEGER DEFAULT 1;
END IF;
END $$;
-- List of sources (e.g., ['brave', 'google_news', 'rss_trojmiasto'])
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'sources_list') THEN
ALTER TABLE zopk_news ADD COLUMN sources_list TEXT[];
END IF;
END $$;
-- Title hash for fuzzy deduplication (normalized title)
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'title_hash') THEN
ALTER TABLE zopk_news ADD COLUMN title_hash VARCHAR(64);
END IF;
END $$;
-- Auto-verified flag (True if 3+ sources confirmed)
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'is_auto_verified') THEN
ALTER TABLE zopk_news ADD COLUMN is_auto_verified BOOLEAN DEFAULT FALSE;
END IF;
END $$;
-- Update status to include 'auto_approved' option
-- (no alter needed, just documentation that status can be: pending, approved, rejected, auto_approved)
-- Index for title_hash (fuzzy matching)
CREATE INDEX IF NOT EXISTS idx_zopk_news_title_hash ON zopk_news(title_hash);
-- Index for confidence score (filtering high-confidence news)
CREATE INDEX IF NOT EXISTS idx_zopk_news_confidence ON zopk_news(confidence_score);
-- ============================================================
-- 12. ALTER TABLE - AI Relevance Evaluation columns
-- ============================================================
-- These columns support AI-based relevance evaluation using Google Gemini
-- AI relevance flag (True = relevant to ZOPK, False = not relevant, NULL = not evaluated)
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'ai_relevant') THEN
ALTER TABLE zopk_news ADD COLUMN ai_relevant BOOLEAN;
END IF;
END $$;
-- AI evaluation reason/explanation
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'ai_evaluation_reason') THEN
ALTER TABLE zopk_news ADD COLUMN ai_evaluation_reason TEXT;
END IF;
END $$;
-- When AI evaluation was performed
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'ai_evaluated_at') THEN
ALTER TABLE zopk_news ADD COLUMN ai_evaluated_at TIMESTAMP;
END IF;
END $$;
-- Which AI model was used for evaluation
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'ai_model') THEN
ALTER TABLE zopk_news ADD COLUMN ai_model VARCHAR(100);
END IF;
END $$;
-- AI relevance score (1-5 stars: 1=weak, 5=perfect match)
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'ai_relevance_score') THEN
ALTER TABLE zopk_news ADD COLUMN ai_relevance_score INTEGER CHECK (ai_relevance_score >= 1 AND ai_relevance_score <= 5);
END IF;
END $$;
-- Index for AI relevance filtering
CREATE INDEX IF NOT EXISTS idx_zopk_news_ai_relevant ON zopk_news(ai_relevant);
CREATE INDEX IF NOT EXISTS idx_zopk_news_ai_score ON zopk_news(ai_relevance_score);
-- ============================================================
-- MIGRATION COMPLETE
-- ============================================================