feat(zopk): Rozbudowa bazy wiedzy ZOPK

- Dodano skrypt cron do automatycznej ekstrakcji wiedzy (scripts/cron_extract_knowledge.py)
- Dodano panel deduplikacji faktów (/admin/zopk/knowledge/fact-duplicates)
- Dodano API i funkcje auto-weryfikacji encji i faktów
- Dodano panel Timeline ZOPK (/admin/zopk/timeline) z CRUD
- Rozszerzono dashboard bazy wiedzy o statystyki weryfikacji i przyciski auto-weryfikacji
- Dodano migrację 016_zopk_milestones.sql dla tabeli kamieni milowych
- Naprawiono duplikat modelu ZOPKMilestone w database.py

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-01-17 10:57:11 +01:00
parent 3b27dcaf0d
commit 96fa0058c2
8 changed files with 1225 additions and 67 deletions

265
app.py
View File

@ -12052,6 +12052,271 @@ def api_zopk_knowledge_graph_data():
db.close() db.close()
# ============================================================
# ZOPK KNOWLEDGE - FACT DUPLICATES
# ============================================================
@app.route('/admin/zopk/knowledge/fact-duplicates')
@login_required
def admin_zopk_fact_duplicates():
"""Panel deduplikacji faktów."""
if not current_user.is_admin:
flash('Brak uprawnień.', 'error')
return redirect(url_for('dashboard'))
return render_template('admin/zopk_fact_duplicates.html')
@app.route('/api/zopk/knowledge/fact-duplicates')
@login_required
def api_zopk_fact_duplicates():
"""API - lista duplikatów faktów."""
if not current_user.is_admin:
return jsonify({'error': 'Forbidden'}), 403
from zopk_knowledge_service import find_duplicate_facts
db = SessionLocal()
try:
min_sim = float(request.args.get('min_similarity', 0.7))
fact_type = request.args.get('fact_type', '')
limit = min(int(request.args.get('limit', 100)), 500)
duplicates = find_duplicate_facts(db, min_sim, limit, fact_type if fact_type else None)
return jsonify({'success': True, 'duplicates': duplicates, 'count': len(duplicates)})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@app.route('/api/zopk/knowledge/fact-duplicates/merge', methods=['POST'])
@login_required
def api_zopk_fact_merge():
"""API - merge duplikatów faktów."""
if not current_user.is_admin:
return jsonify({'error': 'Forbidden'}), 403
from zopk_knowledge_service import merge_facts
db = SessionLocal()
try:
data = request.get_json()
primary_id = data.get('primary_id')
duplicate_id = data.get('duplicate_id')
new_text = data.get('new_text')
result = merge_facts(db, primary_id, duplicate_id, new_text)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
# ============================================================
# ZOPK KNOWLEDGE - AUTO VERIFY
# ============================================================
@app.route('/api/zopk/knowledge/auto-verify/entities', methods=['POST'])
@login_required
def api_zopk_auto_verify_entities():
"""Auto-weryfikacja encji z wysoką liczbą wzmianek."""
if not current_user.is_admin:
return jsonify({'error': 'Forbidden'}), 403
from zopk_knowledge_service import auto_verify_top_entities
db = SessionLocal()
try:
data = request.get_json() or {}
min_mentions = int(data.get('min_mentions', 5))
limit = int(data.get('limit', 100))
result = auto_verify_top_entities(db, min_mentions, limit)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@app.route('/api/zopk/knowledge/auto-verify/facts', methods=['POST'])
@login_required
def api_zopk_auto_verify_facts():
"""Auto-weryfikacja faktów z wysoką ważnością."""
if not current_user.is_admin:
return jsonify({'error': 'Forbidden'}), 403
from zopk_knowledge_service import auto_verify_top_facts
db = SessionLocal()
try:
data = request.get_json() or {}
min_importance = float(data.get('min_importance', 0.7))
limit = int(data.get('limit', 200))
result = auto_verify_top_facts(db, min_importance, limit)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
# ============================================================
# ZOPK KNOWLEDGE - DASHBOARD
# ============================================================
@app.route('/api/zopk/knowledge/dashboard-stats')
@login_required
def api_zopk_dashboard_stats():
"""API - statystyki dashboardu."""
if not current_user.is_admin:
return jsonify({'error': 'Forbidden'}), 403
from zopk_knowledge_service import get_knowledge_dashboard_stats
db = SessionLocal()
try:
stats = get_knowledge_dashboard_stats(db)
return jsonify({'success': True, **stats})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
# ============================================================
# ZOPK MILESTONES / TIMELINE
# ============================================================
@app.route('/admin/zopk/timeline')
@login_required
def admin_zopk_timeline():
"""Panel Timeline ZOPK."""
if not current_user.is_admin:
flash('Brak uprawnień.', 'error')
return redirect(url_for('dashboard'))
return render_template('admin/zopk_timeline.html')
@app.route('/api/zopk/milestones')
@login_required
def api_zopk_milestones():
"""API - lista kamieni milowych ZOPK."""
from database import ZOPKMilestone
db = SessionLocal()
try:
milestones = db.query(ZOPKMilestone).order_by(ZOPKMilestone.target_date).all()
return jsonify({
'success': True,
'milestones': [{
'id': m.id,
'title': m.title,
'description': m.description,
'category': m.category,
'target_date': m.target_date.isoformat() if m.target_date else None,
'actual_date': m.actual_date.isoformat() if m.actual_date else None,
'status': m.status,
'source_url': m.source_url
} for m in milestones]
})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@app.route('/api/zopk/milestones', methods=['POST'])
@login_required
def api_zopk_milestone_create():
"""API - utworzenie kamienia milowego."""
if not current_user.is_admin:
return jsonify({'error': 'Forbidden'}), 403
from database import ZOPKMilestone
from datetime import datetime
db = SessionLocal()
try:
data = request.get_json()
milestone = ZOPKMilestone(
title=data['title'],
description=data.get('description'),
category=data.get('category', 'other'),
target_date=datetime.strptime(data['target_date'], '%Y-%m-%d').date() if data.get('target_date') else None,
actual_date=datetime.strptime(data['actual_date'], '%Y-%m-%d').date() if data.get('actual_date') else None,
status=data.get('status', 'planned'),
source_url=data.get('source_url'),
source_news_id=data.get('source_news_id')
)
db.add(milestone)
db.commit()
return jsonify({'success': True, 'id': milestone.id})
except Exception as e:
db.rollback()
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@app.route('/api/zopk/milestones/<int:milestone_id>', methods=['PUT'])
@login_required
def api_zopk_milestone_update(milestone_id):
"""API - aktualizacja kamienia milowego."""
if not current_user.is_admin:
return jsonify({'error': 'Forbidden'}), 403
from database import ZOPKMilestone
from datetime import datetime
db = SessionLocal()
try:
milestone = db.query(ZOPKMilestone).get(milestone_id)
if not milestone:
return jsonify({'error': 'Not found'}), 404
data = request.get_json()
if 'title' in data:
milestone.title = data['title']
if 'description' in data:
milestone.description = data['description']
if 'category' in data:
milestone.category = data['category']
if 'target_date' in data:
milestone.target_date = datetime.strptime(data['target_date'], '%Y-%m-%d').date() if data['target_date'] else None
if 'actual_date' in data:
milestone.actual_date = datetime.strptime(data['actual_date'], '%Y-%m-%d').date() if data['actual_date'] else None
if 'status' in data:
milestone.status = data['status']
if 'source_url' in data:
milestone.source_url = data['source_url']
db.commit()
return jsonify({'success': True})
except Exception as e:
db.rollback()
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@app.route('/api/zopk/milestones/<int:milestone_id>', methods=['DELETE'])
@login_required
def api_zopk_milestone_delete(milestone_id):
"""API - usunięcie kamienia milowego."""
if not current_user.is_admin:
return jsonify({'error': 'Forbidden'}), 403
from database import ZOPKMilestone
db = SessionLocal()
try:
milestone = db.query(ZOPKMilestone).get(milestone_id)
if not milestone:
return jsonify({'error': 'Not found'}), 404
db.delete(milestone)
db.commit()
return jsonify({'success': True})
except Exception as e:
db.rollback()
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
# ============================================================ # ============================================================
# KRS AUDIT (Krajowy Rejestr Sądowy) # KRS AUDIT (Krajowy Rejestr Sądowy)
# ============================================================ # ============================================================

View File

@ -1841,62 +1841,6 @@ class ZOPKStakeholderProject(Base):
) )
class ZOPKMilestone(Base):
"""
Timeline milestones for ZOPK projects.
Tracks key events: announcements, decisions, construction, completions.
Used for public timeline visualization on /zopk page.
"""
__tablename__ = 'zopk_milestones'
id = Column(Integer, primary_key=True)
# Basic info
title = Column(String(255), nullable=False)
description = Column(Text)
# Categorization
# Types: announcement, decision, construction_start, construction_progress,
# completion, investment, agreement, regulation
milestone_type = Column(String(50), nullable=False, default='announcement')
# Project association
project_id = Column(Integer, ForeignKey('zopk_projects.id', ondelete='SET NULL'))
# Timeline
target_date = Column(Date) # Planned/expected date
actual_date = Column(Date) # Actual completion date (if completed)
date_precision = Column(String(20), default='exact') # exact, month, quarter, year
# Status: planned, in_progress, completed, delayed, cancelled
status = Column(String(20), nullable=False, default='planned')
# Source linking
source_news_id = Column(Integer, ForeignKey('zopk_news.id', ondelete='SET NULL'))
source_fact_id = Column(Integer, ForeignKey('zopk_knowledge_facts.id', ondelete='SET NULL'))
source_url = Column(String(1000))
# Display settings
icon = Column(String(50), default='📌')
color = Column(String(7), default='#059669')
is_featured = Column(Boolean, default=False)
display_order = Column(Integer, default=0)
# Verification
is_verified = Column(Boolean, default=False)
verified_by = Column(Integer, ForeignKey('users.id'))
verified_at = Column(DateTime)
# Timestamps
created_at = Column(DateTime, default=datetime.now)
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)
# Relationships
project = relationship('ZOPKProject', backref='milestones')
# Note: source_news relationship defined in ZOPKNews class via backref
verifier = relationship('User', foreign_keys=[verified_by])
class ZOPKNews(Base): class ZOPKNews(Base):
""" """
News articles about ZOPK with approval workflow. News articles about ZOPK with approval workflow.
@ -3056,6 +3000,46 @@ class SecurityAlert(Base):
return f"<SecurityAlert {self.id} {self.alert_type} ({self.severity}) from {self.ip_address}>" return f"<SecurityAlert {self.id} {self.alert_type} ({self.severity}) from {self.ip_address}>"
# ============================================================
# ZOPK MILESTONES (Timeline)
# ============================================================
class ZOPKMilestone(Base):
"""
Kamienie milowe projektu ZOPK dla wizualizacji timeline.
"""
__tablename__ = 'zopk_milestones'
id = Column(Integer, primary_key=True)
title = Column(String(500), nullable=False)
description = Column(Text)
# Kategoria: nuclear, offshore, infrastructure, defense, other
category = Column(String(50), default='other')
# Daty
target_date = Column(Date) # Planowana data
actual_date = Column(Date) # Rzeczywista data (jeśli zakończone)
# Status: planned, in_progress, completed, delayed, cancelled
status = Column(String(20), default='planned')
# Źródło informacji
source_url = Column(String(1000))
source_news_id = Column(Integer, ForeignKey('zopk_news.id'))
# Wyświetlanie
icon = Column(String(50)) # emoji lub ikona
color = Column(String(20)) # kolor dla timeline
is_featured = Column(Boolean, default=False)
created_at = Column(DateTime, default=datetime.now)
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)
# Relationships
source_news = relationship('ZOPKNews', backref='milestones')
# ============================================================ # ============================================================
# DATABASE INITIALIZATION # DATABASE INITIALIZATION
# ============================================================ # ============================================================

View File

@ -13,10 +13,9 @@ CREATE TABLE IF NOT EXISTS zopk_milestones (
title VARCHAR(255) NOT NULL, title VARCHAR(255) NOT NULL,
description TEXT, description TEXT,
-- Categorization -- Categorization (for UI grouping)
milestone_type VARCHAR(50) NOT NULL DEFAULT 'announcement', category VARCHAR(50) NOT NULL DEFAULT 'other',
-- Types: announcement, decision, construction_start, construction_progress, -- Categories: nuclear, offshore, infrastructure, defense, other
-- completion, investment, agreement, regulation
-- Project association (optional) -- Project association (optional)
project_id INTEGER REFERENCES zopk_projects(id) ON DELETE SET NULL, project_id INTEGER REFERENCES zopk_projects(id) ON DELETE SET NULL,
@ -55,7 +54,7 @@ CREATE TABLE IF NOT EXISTS zopk_milestones (
CREATE INDEX IF NOT EXISTS idx_milestones_project ON zopk_milestones(project_id); CREATE INDEX IF NOT EXISTS idx_milestones_project ON zopk_milestones(project_id);
CREATE INDEX IF NOT EXISTS idx_milestones_target_date ON zopk_milestones(target_date); CREATE INDEX IF NOT EXISTS idx_milestones_target_date ON zopk_milestones(target_date);
CREATE INDEX IF NOT EXISTS idx_milestones_status ON zopk_milestones(status); CREATE INDEX IF NOT EXISTS idx_milestones_status ON zopk_milestones(status);
CREATE INDEX IF NOT EXISTS idx_milestones_type ON zopk_milestones(milestone_type); CREATE INDEX IF NOT EXISTS idx_milestones_category ON zopk_milestones(category);
CREATE INDEX IF NOT EXISTS idx_milestones_featured ON zopk_milestones(is_featured) WHERE is_featured = TRUE; CREATE INDEX IF NOT EXISTS idx_milestones_featured ON zopk_milestones(is_featured) WHERE is_featured = TRUE;
-- Grant permissions -- Grant permissions
@ -63,12 +62,12 @@ GRANT ALL ON TABLE zopk_milestones TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE zopk_milestones_id_seq TO nordabiz_app; GRANT USAGE, SELECT ON SEQUENCE zopk_milestones_id_seq TO nordabiz_app;
-- Insert sample milestones from known ZOPK events (all verified for display) -- Insert sample milestones from known ZOPK events (all verified for display)
INSERT INTO zopk_milestones (title, description, milestone_type, target_date, status, icon, color, is_featured, is_verified, display_order) VALUES INSERT INTO zopk_milestones (title, description, category, target_date, status, icon, color, is_featured, is_verified, display_order) VALUES
('Podpisanie porozumienia MON-Kongsberg', 'Podpisanie porozumienia o współpracy między MON a Kongsberg Defence & Aerospace w zakresie budowy fabryki w Rumi', 'agreement', '2024-03-15', 'completed', '📝', '#059669', TRUE, TRUE, 10), ('Podpisanie porozumienia MON-Kongsberg', 'Podpisanie porozumienia o współpracy między MON a Kongsberg Defence & Aerospace w zakresie budowy fabryki w Rumi', 'defense', '2024-03-15', 'completed', '📝', '#059669', TRUE, TRUE, 10),
('Pozwolenie środowiskowe Baltic Power', 'Uzyskanie pozwolenia środowiskowego dla morskiej farmy wiatrowej Baltic Power', 'regulation', '2025-06-15', 'completed', '📋', '#10b981', FALSE, TRUE, 40), ('Pozwolenie środowiskowe Baltic Power', 'Uzyskanie pozwolenia środowiskowego dla morskiej farmy wiatrowej Baltic Power', 'offshore', '2025-06-15', 'completed', '📋', '#10b981', FALSE, TRUE, 40),
('Rozpoczęcie budowy fabryki Kongsberg w Rumi', 'Start prac budowlanych zakładu produkcji dronów morskich w Rumi Invest Park', 'construction_start', '2025-09-01', 'in_progress', '🏗️', '#f59e0b', TRUE, TRUE, 30), ('Rozpoczęcie budowy fabryki Kongsberg w Rumi', 'Start prac budowlanych zakładu produkcji dronów morskich w Rumi Invest Park', 'defense', '2025-09-01', 'in_progress', '🏗️', '#f59e0b', TRUE, TRUE, 30),
('Decyzja lokalizacyjna elektrowni jądrowej', 'Wydanie decyzji lokalizacyjnej dla elektrowni jądrowej w Lubiatowie-Kopalino', 'decision', '2026-03-01', 'planned', '⚖️', '#3b82f6', TRUE, TRUE, 20), ('Decyzja lokalizacyjna elektrowni jądrowej', 'Wydanie decyzji lokalizacyjnej dla elektrowni jądrowej w Lubiatowie-Kopalino', 'nuclear', '2026-03-01', 'planned', '⚖️', '#3b82f6', TRUE, TRUE, 20),
('Uruchomienie pierwszego bloku jądrowego', 'Planowane uruchomienie pierwszego bloku elektrowni jądrowej w Lubiatowie', 'completion', '2033-12-01', 'planned', '', '#8b5cf6', TRUE, TRUE, 100) ('Uruchomienie pierwszego bloku jądrowego', 'Planowane uruchomienie pierwszego bloku elektrowni jądrowej w Lubiatowie', 'nuclear', '2033-12-01', 'planned', '', '#8b5cf6', TRUE, TRUE, 100)
ON CONFLICT DO NOTHING; ON CONFLICT DO NOTHING;
-- Comment -- Comment

View File

@ -0,0 +1,101 @@
#!/usr/bin/env python3
"""
Cron job do automatycznej ekstrakcji wiedzy z nowych newsów ZOPK.
Uruchamiany co 2-4 godziny.
Użycie:
python3 scripts/cron_extract_knowledge.py [--limit N] [--dry-run]
"""
import sys
import os
import argparse
import logging
from datetime import datetime
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from dotenv import load_dotenv
load_dotenv()
from database import SessionLocal, ZOPKNews, ZOPKKnowledgeChunk
from sqlalchemy import text
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def find_news_pending_extraction(db, limit: int = 20):
"""Znajdź newsy z treścią ale bez ekstrakcji."""
result = db.execute(text('''
SELECT n.id, n.title, LENGTH(n.full_content) as content_len
FROM zopk_news n
WHERE n.status IN ('approved', 'auto_approved')
AND n.full_content IS NOT NULL
AND LENGTH(n.full_content) > 500
AND NOT EXISTS (
SELECT 1 FROM zopk_knowledge_chunks c WHERE c.source_news_id = n.id
)
ORDER BY n.published_at DESC
LIMIT :limit
'''), {'limit': limit})
return result.fetchall()
def extract_knowledge_from_news(db, news_id: int) -> dict:
"""Ekstraktuj wiedzę z pojedynczego newsa."""
from zopk_knowledge_service import ZOPKKnowledgeService
service = ZOPKKnowledgeService(db_session=db)
return service.extract_from_news(news_id)
def main():
parser = argparse.ArgumentParser(description='Ekstrakcja wiedzy z newsów ZOPK')
parser.add_argument('--limit', type=int, default=10, help='Limit newsów do przetworzenia')
parser.add_argument('--dry-run', action='store_true', help='Tylko pokaż co by było przetworzone')
args = parser.parse_args()
db = SessionLocal()
try:
pending = find_news_pending_extraction(db, args.limit)
logger.info(f"Znaleziono {len(pending)} newsów do ekstrakcji")
if args.dry_run:
for row in pending:
logger.info(f" [{row.id}] {row.title[:60]}... ({row.content_len} znaków)")
return
success = 0
errors = []
for row in pending:
logger.info(f"Przetwarzam [{row.id}] {row.title[:50]}...")
try:
result = extract_knowledge_from_news(db, row.id)
if result.success:
logger.info(f" ✅ Chunks: {result.chunks_created}, Encje: {result.entities_created}, Fakty: {result.facts_created}")
success += 1
else:
errors.append(f"[{row.id}] {result.error or 'Unknown error'}")
logger.warning(f"{result.error}")
except Exception as e:
errors.append(f"[{row.id}] {str(e)}")
logger.error(f" ❌ Exception: {e}")
logger.info(f"\n{'='*50}")
logger.info(f"Zakończono: {success}/{len(pending)} sukces")
if errors:
logger.info(f"Błędy ({len(errors)}):")
for err in errors[:5]:
logger.info(f" - {err}")
finally:
db.close()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,217 @@
{% extends "base.html" %}
{% block title %}Deduplikacja Faktów - ZOPK{% endblock %}
{% block extra_css %}
<style>
.page-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: var(--spacing-lg); }
.page-header h1 { font-size: var(--font-size-2xl); color: var(--text-primary); }
.breadcrumb { display: flex; gap: var(--spacing-xs); color: var(--text-secondary); font-size: var(--font-size-sm); margin-bottom: var(--spacing-lg); }
.breadcrumb a { color: var(--primary); text-decoration: none; }
.controls { display: flex; gap: var(--spacing-md); margin-bottom: var(--spacing-lg); align-items: center; flex-wrap: wrap; }
.control-group { display: flex; align-items: center; gap: var(--spacing-xs); }
.control-group label { font-size: var(--font-size-sm); color: var(--text-secondary); }
.control-group input, .control-group select { padding: 6px 12px; border: 1px solid var(--border); border-radius: var(--radius); }
.duplicate-card { background: var(--surface); border-radius: var(--radius-lg); box-shadow: var(--shadow); margin-bottom: var(--spacing-md); overflow: hidden; }
.duplicate-header { background: linear-gradient(135deg, #f59e0b 0%, #d97706 100%); color: white; padding: var(--spacing-sm) var(--spacing-md); display: flex; justify-content: space-between; align-items: center; }
.similarity-badge { background: rgba(255,255,255,0.2); padding: 4px 12px; border-radius: 20px; font-weight: 600; }
.duplicate-content { display: grid; grid-template-columns: 1fr auto 1fr; gap: var(--spacing-md); padding: var(--spacing-md); }
.fact-box { background: var(--background); padding: var(--spacing-md); border-radius: var(--radius); }
.fact-box.primary { border-left: 4px solid var(--success); }
.fact-box.duplicate { border-left: 4px solid var(--error); }
.fact-text { font-size: var(--font-size-sm); line-height: 1.6; margin-bottom: var(--spacing-sm); }
.fact-meta { font-size: var(--font-size-xs); color: var(--text-secondary); }
.merge-arrow { display: flex; align-items: center; justify-content: center; font-size: 2rem; color: var(--primary); }
.duplicate-actions { padding: var(--spacing-sm) var(--spacing-md); background: var(--background); display: flex; justify-content: flex-end; gap: var(--spacing-sm); }
.btn { padding: 8px 16px; border-radius: var(--radius); font-weight: 500; cursor: pointer; border: none; transition: var(--transition); }
.btn-primary { background: var(--primary); color: white; }
.btn-primary:hover { background: var(--primary-dark); }
.btn-secondary { background: var(--surface); color: var(--text-primary); border: 1px solid var(--border); }
.stats-bar { display: flex; gap: var(--spacing-lg); margin-bottom: var(--spacing-lg); }
.stat-item { background: var(--surface); padding: var(--spacing-md); border-radius: var(--radius); text-align: center; }
.stat-value { font-size: var(--font-size-2xl); font-weight: 700; color: var(--primary); }
.stat-label { font-size: var(--font-size-sm); color: var(--text-secondary); }
.loading { text-align: center; padding: var(--spacing-xl); color: var(--text-secondary); }
.empty-state { text-align: center; padding: var(--spacing-xl); color: var(--text-secondary); }
</style>
{% endblock %}
{% block content %}
<div class="container">
<div class="breadcrumb">
<a href="{{ url_for('admin_zopk') }}">Panel ZOPK</a>
<span></span>
<a href="{{ url_for('admin_zopk_knowledge_dashboard') }}">Baza Wiedzy</a>
<span></span>
<span>Deduplikacja Faktów</span>
</div>
<div class="page-header">
<h1>🔀 Deduplikacja Faktów</h1>
</div>
<div class="controls">
<div class="control-group">
<label>Min. podobieństwo:</label>
<input type="range" id="minSimilarity" min="0.5" max="0.95" step="0.05" value="0.7" oninput="document.getElementById('simValue').textContent = this.value">
<span id="simValue">0.7</span>
</div>
<div class="control-group">
<label>Typ faktu:</label>
<select id="factType">
<option value="">Wszystkie</option>
<option value="investment">Inwestycja</option>
<option value="decision">Decyzja</option>
<option value="milestone">Kamień milowy</option>
<option value="statistic">Statystyka</option>
</select>
</div>
<button class="btn btn-primary" onclick="loadDuplicates()">🔍 Szukaj duplikatów</button>
<button class="btn btn-secondary" onclick="mergeAllHigh()">⚡ Połącz wszystkie >90%</button>
</div>
<div class="stats-bar">
<div class="stat-item">
<div class="stat-value" id="totalPairs">-</div>
<div class="stat-label">Par duplikatów</div>
</div>
<div class="stat-item">
<div class="stat-value" id="avgSimilarity">-</div>
<div class="stat-label">Śr. podobieństwo</div>
</div>
</div>
<div id="duplicatesList">
<div class="loading">Kliknij "Szukaj duplikatów" aby rozpocząć...</div>
</div>
</div>
{% endblock %}
{% block extra_js %}
let duplicatesData = [];
async function loadDuplicates() {
const minSim = document.getElementById('minSimilarity').value;
const factType = document.getElementById('factType').value;
document.getElementById('duplicatesList').innerHTML = '<div class="loading">Ładowanie...</div>';
try {
const url = `/api/zopk/knowledge/fact-duplicates?min_similarity=${minSim}&fact_type=${factType}&limit=100`;
const response = await fetch(url);
const data = await response.json();
if (data.success) {
duplicatesData = data.duplicates;
document.getElementById('totalPairs').textContent = data.count;
if (data.count > 0) {
const avgSim = duplicatesData.reduce((sum, d) => sum + d.similarity, 0) / data.count;
document.getElementById('avgSimilarity').textContent = (avgSim * 100).toFixed(0) + '%';
}
renderDuplicates();
}
} catch (error) {
document.getElementById('duplicatesList').innerHTML = '<div class="empty-state">Błąd ładowania: ' + error + '</div>';
}
}
function renderDuplicates() {
if (duplicatesData.length === 0) {
document.getElementById('duplicatesList').innerHTML = '<div class="empty-state">Brak duplikatów do pokazania</div>';
return;
}
const html = duplicatesData.map((d, idx) => `
<div class="duplicate-card" id="dup-${idx}">
<div class="duplicate-header">
<span>${d.fact1.fact_type || 'fakt'}</span>
<span class="similarity-badge">${(d.similarity * 100).toFixed(0)}% podobieństwa</span>
</div>
<div class="duplicate-content">
<div class="fact-box primary">
<div class="fact-text">${escapeHtml(d.fact1.text)}</div>
<div class="fact-meta">
ID: ${d.fact1.id} | Ważność: ${(d.fact1.importance_score * 100).toFixed(0)}%
${d.fact1.is_verified ? ' | ✅ Zweryfikowany' : ''}
</div>
</div>
<div class="merge-arrow"></div>
<div class="fact-box duplicate">
<div class="fact-text">${escapeHtml(d.fact2.text)}</div>
<div class="fact-meta">
ID: ${d.fact2.id} | Ważność: ${(d.fact2.importance_score * 100).toFixed(0)}%
${d.fact2.is_verified ? ' | ✅ Zweryfikowany' : ''}
</div>
</div>
</div>
<div class="duplicate-actions">
<button class="btn btn-secondary" onclick="skipDuplicate(${idx})">⏭️ Pomiń</button>
<button class="btn btn-primary" onclick="mergeFacts(${d.fact1.id}, ${d.fact2.id}, ${idx})">🔀 Połącz</button>
</div>
</div>
`).join('');
document.getElementById('duplicatesList').innerHTML = html;
}
function escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text || '';
return div.innerHTML;
}
async function mergeFacts(primaryId, duplicateId, idx) {
try {
const response = await fetch('/api/zopk/knowledge/fact-duplicates/merge', {
method: 'POST',
headers: {'Content-Type': 'application/json', 'X-CSRFToken': '{{ csrf_token() }}'},
body: JSON.stringify({primary_id: primaryId, duplicate_id: duplicateId})
});
const data = await response.json();
if (data.success) {
document.getElementById('dup-' + idx).remove();
duplicatesData.splice(idx, 1);
document.getElementById('totalPairs').textContent = duplicatesData.length;
} else {
alert('Błąd: ' + data.error);
}
} catch (error) {
alert('Błąd: ' + error);
}
}
function skipDuplicate(idx) {
document.getElementById('dup-' + idx).remove();
duplicatesData.splice(idx, 1);
document.getElementById('totalPairs').textContent = duplicatesData.length;
}
async function mergeAllHigh() {
const highSim = duplicatesData.filter(d => d.similarity >= 0.9);
if (highSim.length === 0) {
alert('Brak duplikatów z podobieństwem >= 90%');
return;
}
if (!confirm(`Połączyć ${highSim.length} par z podobieństwem >= 90%?`)) return;
let merged = 0;
for (const d of highSim) {
try {
const response = await fetch('/api/zopk/knowledge/fact-duplicates/merge', {
method: 'POST',
headers: {'Content-Type': 'application/json', 'X-CSRFToken': '{{ csrf_token() }}'},
body: JSON.stringify({primary_id: d.fact1.id, duplicate_id: d.fact2.id})
});
const data = await response.json();
if (data.success) merged++;
} catch (e) {}
}
alert(`Połączono ${merged}/${highSim.length} par`);
loadDuplicates();
}
{% endblock %}

View File

@ -345,6 +345,20 @@
<div class="quick-link-desc">Newsy ZOPK (źródło wiedzy)</div> <div class="quick-link-desc">Newsy ZOPK (źródło wiedzy)</div>
</div> </div>
</a> </a>
<a href="{{ url_for('admin_zopk_fact_duplicates') }}" class="quick-link" style="border-color: #f59e0b;">
<div class="quick-link-icon">🔀</div>
<div class="quick-link-text">
<div class="quick-link-title">Duplikaty faktów</div>
<div class="quick-link-desc">Łączenie podobnych faktów</div>
</div>
</a>
<a href="{{ url_for('admin_zopk_timeline') }}" class="quick-link" style="border-color: #10b981;">
<div class="quick-link-icon">🗺️</div>
<div class="quick-link-text">
<div class="quick-link-title">Timeline ZOPK</div>
<div class="quick-link-desc">Roadmapa projektu</div>
</div>
</a>
</div> </div>
</div> </div>
@ -374,6 +388,20 @@
<div class="quick-link-desc">Wektory dla chunków bez embeddingów</div> <div class="quick-link-desc">Wektory dla chunków bez embeddingów</div>
</div> </div>
</div> </div>
<div class="quick-link" onclick="autoVerifyEntities()" style="cursor: pointer; border-left: 3px solid #10b981;">
<div class="quick-link-icon"></div>
<div class="quick-link-text">
<div class="quick-link-title">Auto-weryfikuj encje</div>
<div class="quick-link-desc">Zweryfikuj encje z ≥5 wzmiankami</div>
</div>
</div>
<div class="quick-link" onclick="autoVerifyFacts()" style="cursor: pointer; border-left: 3px solid #3b82f6;">
<div class="quick-link-icon">📌</div>
<div class="quick-link-text">
<div class="quick-link-title">Auto-weryfikuj fakty</div>
<div class="quick-link-desc">Zweryfikuj fakty z ważnością ≥70%</div>
</div>
</div>
<a href="{{ url_for('admin_zopk') }}" class="quick-link"> <a href="{{ url_for('admin_zopk') }}" class="quick-link">
<div class="quick-link-icon">📊</div> <div class="quick-link-icon">📊</div>
<div class="quick-link-text"> <div class="quick-link-text">
@ -383,6 +411,14 @@
</a> </a>
</div> </div>
</div> </div>
<!-- Verification Stats -->
<div class="section">
<h2 class="section-title">✅ Status weryfikacji</h2>
<div id="verificationStats" class="stats-grid" style="grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));">
<div class="loading">Ładowanie statystyk weryfikacji...</div>
</div>
</div>
</div> </div>
{% endblock %} {% endblock %}
@ -536,4 +572,105 @@ async function generateEmbeddings() {
alert('Błąd: ' + error.message); alert('Błąd: ' + error.message);
} }
} }
async function autoVerifyEntities() {
if (!confirm('Auto-weryfikować encje z ≥5 wzmiankami?')) return;
try {
const response = await fetch('/api/zopk/knowledge/auto-verify/entities', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-CSRFToken': '{{ csrf_token() }}'
},
body: JSON.stringify({ min_mentions: 5, limit: 100 })
});
const data = await response.json();
if (data.success) {
alert(`✅ Zweryfikowano ${data.verified_count} encji`);
loadStats();
loadVerificationStats();
} else {
alert('Błąd: ' + data.error);
}
} catch (error) {
alert('Błąd: ' + error.message);
}
}
async function autoVerifyFacts() {
if (!confirm('Auto-weryfikować fakty z ważnością ≥70%?')) return;
try {
const response = await fetch('/api/zopk/knowledge/auto-verify/facts', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-CSRFToken': '{{ csrf_token() }}'
},
body: JSON.stringify({ min_importance: 0.7, limit: 200 })
});
const data = await response.json();
if (data.success) {
alert(`✅ Zweryfikowano ${data.verified_count} faktów`);
loadStats();
loadVerificationStats();
} else {
alert('Błąd: ' + data.error);
}
} catch (error) {
alert('Błąd: ' + error.message);
}
}
async function loadVerificationStats() {
try {
const response = await fetch('/api/zopk/knowledge/dashboard-stats');
const data = await response.json();
if (data.success) {
renderVerificationStats(data);
}
} catch (error) {
console.error('Error loading verification stats:', error);
}
}
function renderVerificationStats(data) {
const stats = data.verification || {};
const html = `
<div class="stat-card" style="border-left: 3px solid #10b981;">
<div class="stat-icon">🏢</div>
<div class="stat-value">${stats.entities_verified || 0}/${stats.entities_total || 0}</div>
<div class="stat-label">Encje zweryfikowane</div>
<div class="stat-sublabel">${stats.entities_pending || 0} oczekuje</div>
</div>
<div class="stat-card" style="border-left: 3px solid #3b82f6;">
<div class="stat-icon">📌</div>
<div class="stat-value">${stats.facts_verified || 0}/${stats.facts_total || 0}</div>
<div class="stat-label">Fakty zweryfikowane</div>
<div class="stat-sublabel">${stats.facts_pending || 0} oczekuje</div>
</div>
<div class="stat-card" style="border-left: 3px solid #8b5cf6;">
<div class="stat-icon">📄</div>
<div class="stat-value">${stats.chunks_verified || 0}/${stats.chunks_total || 0}</div>
<div class="stat-label">Chunks zweryfikowane</div>
<div class="stat-sublabel">${stats.chunks_pending || 0} oczekuje</div>
</div>
<div class="stat-card" style="border-left: 3px solid #f59e0b;">
<div class="stat-icon">🔗</div>
<div class="stat-value">${stats.relations_verified || 0}/${stats.relations_total || 0}</div>
<div class="stat-label">Relacje zweryfikowane</div>
<div class="stat-sublabel">${stats.relations_pending || 0} oczekuje</div>
</div>
`;
document.getElementById('verificationStats').innerHTML = html;
}
// Load verification stats on page load
document.addEventListener('DOMContentLoaded', function() {
loadVerificationStats();
});
{% endblock %} {% endblock %}

View File

@ -0,0 +1,302 @@
{% extends "base.html" %}
{% block title %}Timeline ZOPK - Roadmapa{% endblock %}
{% block extra_css %}
<style>
.page-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: var(--spacing-lg); }
.page-header h1 { font-size: var(--font-size-2xl); color: var(--text-primary); }
.breadcrumb { display: flex; gap: var(--spacing-xs); color: var(--text-secondary); font-size: var(--font-size-sm); margin-bottom: var(--spacing-lg); }
.breadcrumb a { color: var(--primary); text-decoration: none; }
.timeline-container { position: relative; padding: var(--spacing-lg) 0; }
.timeline-line { position: absolute; left: 50%; top: 0; bottom: 0; width: 4px; background: var(--border); transform: translateX(-50%); }
.timeline-item { display: flex; margin-bottom: var(--spacing-xl); position: relative; }
.timeline-item:nth-child(odd) { flex-direction: row-reverse; }
.timeline-item:nth-child(odd) .timeline-content { text-align: right; padding-right: var(--spacing-xl); }
.timeline-item:nth-child(even) .timeline-content { padding-left: var(--spacing-xl); }
.timeline-content { width: 45%; }
.timeline-dot { position: absolute; left: 50%; transform: translateX(-50%); width: 20px; height: 20px; border-radius: 50%; border: 4px solid var(--surface); z-index: 1; }
.timeline-card { background: var(--surface); border-radius: var(--radius-lg); box-shadow: var(--shadow); padding: var(--spacing-md); }
.timeline-date { font-size: var(--font-size-sm); color: var(--text-secondary); margin-bottom: var(--spacing-xs); }
.timeline-title { font-size: var(--font-size-lg); font-weight: 600; margin-bottom: var(--spacing-xs); }
.timeline-desc { font-size: var(--font-size-sm); color: var(--text-secondary); margin-bottom: var(--spacing-sm); }
.timeline-meta { display: flex; gap: var(--spacing-sm); flex-wrap: wrap; }
.timeline-badge { padding: 2px 8px; border-radius: 12px; font-size: var(--font-size-xs); }
.status-planned { background: #e5e7eb; color: #374151; }
.status-in_progress { background: #dbeafe; color: #1d4ed8; }
.status-completed { background: #d1fae5; color: #047857; }
.status-delayed { background: #fef3c7; color: #b45309; }
.category-nuclear { --cat-color: #ef4444; }
.category-offshore { --cat-color: #3b82f6; }
.category-infrastructure { --cat-color: #8b5cf6; }
.category-defense { --cat-color: #059669; }
.category-other { --cat-color: #6b7280; }
.timeline-dot.category-nuclear { background: #ef4444; }
.timeline-dot.category-offshore { background: #3b82f6; }
.timeline-dot.category-infrastructure { background: #8b5cf6; }
.timeline-dot.category-defense { background: #059669; }
.timeline-dot.category-other { background: #6b7280; }
.btn { padding: 8px 16px; border-radius: var(--radius); font-weight: 500; cursor: pointer; border: none; }
.btn-primary { background: var(--primary); color: white; }
.btn-sm { padding: 4px 8px; font-size: var(--font-size-xs); }
.legend { display: flex; gap: var(--spacing-lg); margin-bottom: var(--spacing-lg); flex-wrap: wrap; }
.legend-item { display: flex; align-items: center; gap: var(--spacing-xs); font-size: var(--font-size-sm); }
.legend-dot { width: 12px; height: 12px; border-radius: 50%; }
.modal { display: none; position: fixed; inset: 0; background: rgba(0,0,0,0.5); z-index: 1000; align-items: center; justify-content: center; }
.modal.active { display: flex; }
.modal-content { background: var(--surface); border-radius: var(--radius-lg); padding: var(--spacing-lg); width: 500px; max-width: 90%; }
.modal-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: var(--spacing-md); }
.form-group { margin-bottom: var(--spacing-md); }
.form-group label { display: block; font-size: var(--font-size-sm); margin-bottom: 4px; }
.form-group input, .form-group select, .form-group textarea { width: 100%; padding: 8px 12px; border: 1px solid var(--border); border-radius: var(--radius); }
.form-row { display: grid; grid-template-columns: 1fr 1fr; gap: var(--spacing-md); }
.empty-state { text-align: center; padding: var(--spacing-xl); color: var(--text-secondary); }
</style>
{% endblock %}
{% block content %}
<div class="container">
<div class="breadcrumb">
<a href="{{ url_for('admin_zopk') }}">Panel ZOPK</a>
<span></span>
<span>Timeline / Roadmapa</span>
</div>
<div class="page-header">
<h1>🗺️ Timeline ZOPK</h1>
<button class="btn btn-primary" onclick="openAddModal()"> Dodaj kamień milowy</button>
</div>
<div class="legend">
<div class="legend-item"><div class="legend-dot" style="background: #ef4444;"></div> Energia jądrowa</div>
<div class="legend-item"><div class="legend-dot" style="background: #3b82f6;"></div> Offshore wind</div>
<div class="legend-item"><div class="legend-dot" style="background: #8b5cf6;"></div> Infrastruktura</div>
<div class="legend-item"><div class="legend-dot" style="background: #059669;"></div> Obronność</div>
<div class="legend-item"><div class="legend-dot" style="background: #6b7280;"></div> Inne</div>
</div>
<div class="timeline-container">
<div class="timeline-line"></div>
<div id="timelineItems">
<div class="empty-state">Ładowanie...</div>
</div>
</div>
</div>
<!-- Modal dodawania -->
<div class="modal" id="addModal">
<div class="modal-content">
<div class="modal-header">
<h3 id="modalTitle">Dodaj kamień milowy</h3>
<button onclick="closeModal()" style="background: none; border: none; font-size: 1.5rem; cursor: pointer;">&times;</button>
</div>
<form id="milestoneForm" onsubmit="saveMilestone(event)">
<input type="hidden" id="milestoneId">
<div class="form-group">
<label>Tytuł *</label>
<input type="text" id="title" required>
</div>
<div class="form-group">
<label>Opis</label>
<textarea id="description" rows="3"></textarea>
</div>
<div class="form-row">
<div class="form-group">
<label>Kategoria</label>
<select id="category">
<option value="nuclear">Energia jądrowa</option>
<option value="offshore">Offshore wind</option>
<option value="infrastructure">Infrastruktura</option>
<option value="defense">Obronność</option>
<option value="other">Inne</option>
</select>
</div>
<div class="form-group">
<label>Status</label>
<select id="status">
<option value="planned">Planowane</option>
<option value="in_progress">W trakcie</option>
<option value="completed">Zakończone</option>
<option value="delayed">Opóźnione</option>
</select>
</div>
</div>
<div class="form-row">
<div class="form-group">
<label>Data planowana</label>
<input type="date" id="targetDate">
</div>
<div class="form-group">
<label>Data rzeczywista</label>
<input type="date" id="actualDate">
</div>
</div>
<div class="form-group">
<label>Źródło (URL)</label>
<input type="url" id="sourceUrl">
</div>
<div style="display: flex; justify-content: flex-end; gap: var(--spacing-sm);">
<button type="button" class="btn" onclick="closeModal()">Anuluj</button>
<button type="submit" class="btn btn-primary">Zapisz</button>
</div>
</form>
</div>
</div>
{% endblock %}
{% block extra_js %}
let milestones = [];
let editingId = null;
async function loadMilestones() {
try {
const response = await fetch('/api/zopk/milestones');
const data = await response.json();
if (data.success) {
milestones = data.milestones;
renderTimeline();
}
} catch (error) {
document.getElementById('timelineItems').innerHTML = '<div class="empty-state">Błąd ładowania: ' + error + '</div>';
}
}
function renderTimeline() {
if (milestones.length === 0) {
document.getElementById('timelineItems').innerHTML = '<div class="empty-state">Brak kamieni milowych. Dodaj pierwszy!</div>';
return;
}
const statusLabels = {planned: 'Planowane', in_progress: 'W trakcie', completed: 'Zakończone', delayed: 'Opóźnione'};
const categoryLabels = {nuclear: 'Energia jądrowa', offshore: 'Offshore', infrastructure: 'Infrastruktura', defense: 'Obronność', other: 'Inne'};
const html = milestones.map(m => `
<div class="timeline-item">
<div class="timeline-content">
<div class="timeline-card">
<div class="timeline-date">${formatDate(m.target_date)}</div>
<div class="timeline-title">${escapeHtml(m.title)}</div>
<div class="timeline-desc">${escapeHtml(m.description || '')}</div>
<div class="timeline-meta">
<span class="timeline-badge status-${m.status}">${statusLabels[m.status] || m.status}</span>
<span class="timeline-badge" style="background: var(--cat-color);">${categoryLabels[m.category] || m.category}</span>
<button class="btn btn-sm" onclick="editMilestone(${m.id})">✏️</button>
<button class="btn btn-sm" onclick="deleteMilestone(${m.id})">🗑️</button>
</div>
</div>
</div>
<div class="timeline-dot category-${m.category}"></div>
</div>
`).join('');
document.getElementById('timelineItems').innerHTML = html;
}
function formatDate(dateStr) {
if (!dateStr) return 'Brak daty';
const d = new Date(dateStr);
return d.toLocaleDateString('pl-PL', {year: 'numeric', month: 'long'});
}
function escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text || '';
return div.innerHTML;
}
function openAddModal() {
editingId = null;
document.getElementById('modalTitle').textContent = 'Dodaj kamień milowy';
document.getElementById('milestoneForm').reset();
document.getElementById('addModal').classList.add('active');
}
function editMilestone(id) {
const m = milestones.find(x => x.id === id);
if (!m) return;
editingId = id;
document.getElementById('modalTitle').textContent = 'Edytuj kamień milowy';
document.getElementById('title').value = m.title;
document.getElementById('description').value = m.description || '';
document.getElementById('category').value = m.category;
document.getElementById('status').value = m.status;
document.getElementById('targetDate').value = m.target_date || '';
document.getElementById('actualDate').value = m.actual_date || '';
document.getElementById('sourceUrl').value = m.source_url || '';
document.getElementById('addModal').classList.add('active');
}
function closeModal() {
document.getElementById('addModal').classList.remove('active');
}
async function saveMilestone(e) {
e.preventDefault();
const data = {
title: document.getElementById('title').value,
description: document.getElementById('description').value,
category: document.getElementById('category').value,
status: document.getElementById('status').value,
target_date: document.getElementById('targetDate').value || null,
actual_date: document.getElementById('actualDate').value || null,
source_url: document.getElementById('sourceUrl').value || null
};
try {
const url = editingId ? `/api/zopk/milestones/${editingId}` : '/api/zopk/milestones';
const method = editingId ? 'PUT' : 'POST';
const response = await fetch(url, {
method: method,
headers: {'Content-Type': 'application/json', 'X-CSRFToken': '{{ csrf_token() }}'},
body: JSON.stringify(data)
});
const result = await response.json();
if (result.success) {
closeModal();
loadMilestones();
} else {
alert('Błąd: ' + result.error);
}
} catch (error) {
alert('Błąd: ' + error);
}
}
async function deleteMilestone(id) {
if (!confirm('Usunąć ten kamień milowy?')) return;
try {
const response = await fetch(`/api/zopk/milestones/${id}`, {
method: 'DELETE',
headers: {'X-CSRFToken': '{{ csrf_token() }}'}
});
const result = await response.json();
if (result.success) {
loadMilestones();
} else {
alert('Błąd: ' + result.error);
}
} catch (error) {
alert('Błąd: ' + error);
}
}
// Init
loadMilestones();
{% endblock %}

View File

@ -1953,3 +1953,156 @@ def get_entity_merge_preview(
'new_mentions_count': primary.mentions_count + duplicate.mentions_count 'new_mentions_count': primary.mentions_count + duplicate.mentions_count
} }
} }
# ============================================================
# FACT DEDUPLICATION
# ============================================================
def find_duplicate_facts(
db_session,
min_similarity: float = 0.7,
limit: int = 100,
fact_type: Optional[str] = None
) -> List[Dict]:
"""Find potential duplicate facts using text similarity."""
from sqlalchemy import text
type_filter = f"AND f1.fact_type = '{fact_type}'" if fact_type else ""
query = text(f"""
SELECT
f1.id as id1, f1.full_text as text1, f1.fact_type as type1,
f1.is_verified as verified1, f1.importance_score as score1,
f2.id as id2, f2.full_text as text2, f2.fact_type as type2,
f2.is_verified as verified2, f2.importance_score as score2,
similarity(LOWER(f1.full_text), LOWER(f2.full_text)) as sim
FROM zopk_knowledge_facts f1
JOIN zopk_knowledge_facts f2 ON f1.id < f2.id
WHERE similarity(LOWER(f1.full_text), LOWER(f2.full_text)) >= :min_sim
{type_filter}
ORDER BY sim DESC, GREATEST(f1.importance_score, f2.importance_score) DESC
LIMIT :limit
""")
result = db_session.execute(query, {'min_sim': min_similarity, 'limit': limit})
duplicates = []
for row in result:
duplicates.append({
'fact1': {
'id': row.id1, 'text': row.text1, 'fact_type': row.type1,
'is_verified': row.verified1,
'importance_score': float(row.score1) if row.score1 else 0
},
'fact2': {
'id': row.id2, 'text': row.text2, 'fact_type': row.type2,
'is_verified': row.verified2,
'importance_score': float(row.score2) if row.score2 else 0
},
'similarity': float(row.sim)
})
return duplicates
def merge_facts(db_session, primary_id: int, duplicate_id: int, new_text: Optional[str] = None) -> Dict:
"""Merge duplicate fact into primary."""
primary = db_session.query(ZOPKKnowledgeFact).get(primary_id)
duplicate = db_session.query(ZOPKKnowledgeFact).get(duplicate_id)
if not primary:
return {'success': False, 'error': f'Primary fact {primary_id} not found'}
if not duplicate:
return {'success': False, 'error': f'Duplicate fact {duplicate_id} not found'}
try:
if new_text:
primary.full_text = new_text
if duplicate.importance_score and (not primary.importance_score or duplicate.importance_score > primary.importance_score):
primary.importance_score = duplicate.importance_score
if duplicate.confidence_score and (not primary.confidence_score or duplicate.confidence_score > primary.confidence_score):
primary.confidence_score = duplicate.confidence_score
if duplicate.is_verified:
primary.is_verified = True
db_session.delete(duplicate)
db_session.commit()
return {'success': True, 'primary_id': primary_id, 'deleted_id': duplicate_id}
except Exception as e:
db_session.rollback()
return {'success': False, 'error': str(e)}
# ============================================================
# AUTO-VERIFICATION
# ============================================================
def auto_verify_top_entities(db_session, min_mentions: int = 5, limit: int = 100) -> Dict:
"""Auto-verify entities with high mention counts."""
entities = db_session.query(ZOPKKnowledgeEntity).filter(
ZOPKKnowledgeEntity.is_verified == False,
ZOPKKnowledgeEntity.mentions_count >= min_mentions
).order_by(ZOPKKnowledgeEntity.mentions_count.desc()).limit(limit).all()
for entity in entities:
entity.is_verified = True
db_session.commit()
return {'success': True, 'verified_count': len(entities), 'min_mentions': min_mentions}
def auto_verify_top_facts(db_session, min_importance: float = 0.7, limit: int = 200) -> Dict:
"""Auto-verify facts with high importance scores."""
facts = db_session.query(ZOPKKnowledgeFact).filter(
ZOPKKnowledgeFact.is_verified == False,
ZOPKKnowledgeFact.importance_score >= min_importance
).order_by(ZOPKKnowledgeFact.importance_score.desc()).limit(limit).all()
for fact in facts:
fact.is_verified = True
db_session.commit()
return {'success': True, 'verified_count': len(facts), 'min_importance': min_importance}
# ============================================================
# DASHBOARD STATS
# ============================================================
def get_knowledge_dashboard_stats(db_session) -> Dict:
"""Get comprehensive stats for knowledge dashboard."""
from sqlalchemy import func, text
chunks_total = db_session.query(func.count(ZOPKKnowledgeChunk.id)).scalar() or 0
chunks_verified = db_session.query(func.count(ZOPKKnowledgeChunk.id)).filter(ZOPKKnowledgeChunk.is_verified == True).scalar() or 0
chunks_with_embedding = db_session.query(func.count(ZOPKKnowledgeChunk.id)).filter(ZOPKKnowledgeChunk.embedding.isnot(None)).scalar() or 0
entities_total = db_session.query(func.count(ZOPKKnowledgeEntity.id)).scalar() or 0
entities_verified = db_session.query(func.count(ZOPKKnowledgeEntity.id)).filter(ZOPKKnowledgeEntity.is_verified == True).scalar() or 0
facts_total = db_session.query(func.count(ZOPKKnowledgeFact.id)).scalar() or 0
facts_verified = db_session.query(func.count(ZOPKKnowledgeFact.id)).filter(ZOPKKnowledgeFact.is_verified == True).scalar() or 0
news_total = db_session.execute(text("SELECT COUNT(*) FROM zopk_news WHERE status IN ('approved', 'auto_approved')")).scalar() or 0
news_with_extraction = db_session.execute(text('''
SELECT COUNT(DISTINCT n.id) FROM zopk_news n
JOIN zopk_knowledge_chunks c ON c.source_news_id = n.id
WHERE n.status IN ('approved', 'auto_approved')
''')).scalar() or 0
entity_types = db_session.execute(text('SELECT entity_type, COUNT(*) FROM zopk_knowledge_entities GROUP BY entity_type ORDER BY 2 DESC')).fetchall()
fact_types = db_session.execute(text('SELECT fact_type, COUNT(*) FROM zopk_knowledge_facts GROUP BY fact_type ORDER BY 2 DESC')).fetchall()
top_entities = db_session.query(ZOPKKnowledgeEntity).order_by(ZOPKKnowledgeEntity.mentions_count.desc()).limit(10).all()
return {
'chunks': {'total': chunks_total, 'verified': chunks_verified, 'with_embedding': chunks_with_embedding,
'verified_pct': round(100 * chunks_verified / chunks_total, 1) if chunks_total else 0},
'entities': {'total': entities_total, 'verified': entities_verified,
'verified_pct': round(100 * entities_verified / entities_total, 1) if entities_total else 0,
'by_type': [{'type': r[0], 'count': r[1]} for r in entity_types]},
'facts': {'total': facts_total, 'verified': facts_verified,
'verified_pct': round(100 * facts_verified / facts_total, 1) if facts_total else 0,
'by_type': [{'type': r[0] or 'unknown', 'count': r[1]} for r in fact_types]},
'news': {'total': news_total, 'with_extraction': news_with_extraction, 'pending': news_total - news_with_extraction},
'top_entities': [{'id': e.id, 'name': e.name, 'type': e.entity_type, 'mentions': e.mentions_count} for e in top_entities]
}