feat(zopk): Improve AI scoring and auto-approve threshold
Changes: - Lower auto-approve threshold from 4★ to 3★ (verified 2026-01-15) - Add detailed progress bar for ZOPK search process - Add auto-approved articles list with star ratings - Document ZOPK topics (ZOP Kaszubia) in CLAUDE.md - Add 8-second countdown before auto-refresh Technical: - zopk_news_service.py: Changed score threshold from >=4 to >=3 - Templates: New CSS for progress phases and results display - CLAUDE.md: Added "ZOP Kaszubia News" section with topic guidelines Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
90da6b9c64
commit
db02d3660f
50
CLAUDE.md
50
CLAUDE.md
@ -788,6 +788,56 @@ AKTUALNOŚCI
|
||||
# 0 */6 * * * cd /var/www/nordabiznes && /var/www/nordabiznes/venv/bin/python3 scripts/fetch_company_news.py --all >> /var/log/nordabiznes/news_fetch.log 2>&1
|
||||
```
|
||||
|
||||
## ZOP Kaszubia News (ZOPK)
|
||||
|
||||
### Opis
|
||||
|
||||
System monitoringu newsów związanych z projektem **Zielony Okręg Przemysłowy Kaszubia**.
|
||||
Panel admina: `/admin/zopk/news`
|
||||
|
||||
### Tematy ZOP Kaszubia (istotne)
|
||||
|
||||
- **Zielony Okręg Przemysłowy Kaszubia** - główny projekt
|
||||
- **Elektrownia jądrowa na Pomorzu** - Lubiatowo-Kopalino
|
||||
- **Offshore wind Bałtyk** - farmy wiatrowe, Baltic Power, Baltica
|
||||
- **Via Pomerania** - droga ekspresowa Ustka-Bydgoszcz
|
||||
- **Droga Czerwona** - połączenie z Portem Gdynia
|
||||
- **Kongsberg** - norweskie inwestycje zbrojeniowe w Rumi
|
||||
- **Pakt Bezpieczeństwa Pomorze Środkowe** - MON
|
||||
- **Izba Przedsiębiorców NORDA** - lokalne organizacje biznesowe
|
||||
|
||||
### Tematy NIEZWIĄZANE (do odrzucenia)
|
||||
|
||||
- Turystyka na Kaszubach (kuligi, lodowiska, hotele)
|
||||
- Polityka ogólnopolska (Ziobro, polexit)
|
||||
- Inne regiony Polski (Śląsk, Lubuskie, Małopolska)
|
||||
- Wypadki i wydarzenia kryminalne
|
||||
- Clickbait i lifestyle
|
||||
|
||||
### Reguły auto-approve (WAŻNE!)
|
||||
|
||||
**Próg auto-approve: score >= 3** (verified 2026-01-15)
|
||||
|
||||
| Score | Status | Opis |
|
||||
|-------|--------|------|
|
||||
| 1-2 | `pending` | Wymaga ręcznej moderacji |
|
||||
| 3-5 | `auto_approved` | Automatycznie zatwierdzony |
|
||||
|
||||
**Plik:** `zopk_news_service.py` (linie 890, 1124, 1145)
|
||||
|
||||
### Tabela zopk_news
|
||||
|
||||
```sql
|
||||
zopk_news (
|
||||
id, title, url, description,
|
||||
source_name, source_domain, source_type,
|
||||
ai_relevance_score INTEGER, -- 1-5 gwiazdek
|
||||
status VARCHAR(20), -- pending, auto_approved, approved, rejected
|
||||
confidence_score, source_count,
|
||||
created_at, updated_at
|
||||
)
|
||||
```
|
||||
|
||||
## Social Media - Stan aktualny
|
||||
|
||||
### Statystyki (2025-12-29)
|
||||
|
||||
128
app.py
128
app.py
@ -10245,6 +10245,7 @@ def admin_zopk_news():
|
||||
try:
|
||||
page = request.args.get('page', 1, type=int)
|
||||
status = request.args.get('status', 'all')
|
||||
stars = request.args.get('stars', 'all') # 'all', '1'-'5', 'none'
|
||||
sort_by = request.args.get('sort', 'date') # 'date', 'score', 'title'
|
||||
sort_dir = request.args.get('dir', 'desc') # 'asc', 'desc'
|
||||
per_page = 50
|
||||
@ -10253,6 +10254,13 @@ def admin_zopk_news():
|
||||
if status != 'all':
|
||||
query = query.filter(ZOPKNews.status == status)
|
||||
|
||||
# Filter by star rating
|
||||
if stars == 'none':
|
||||
query = query.filter(ZOPKNews.ai_relevance_score.is_(None))
|
||||
elif stars in ['1', '2', '3', '4', '5']:
|
||||
query = query.filter(ZOPKNews.ai_relevance_score == int(stars))
|
||||
# 'all' - no filter
|
||||
|
||||
# Apply sorting
|
||||
sort_func = desc if sort_dir == 'desc' else asc
|
||||
if sort_by == 'score':
|
||||
@ -10277,6 +10285,7 @@ def admin_zopk_news():
|
||||
total_pages=total_pages,
|
||||
total=total,
|
||||
current_status=status,
|
||||
current_stars=stars,
|
||||
current_sort=sort_by,
|
||||
current_dir=sort_dir
|
||||
)
|
||||
@ -10483,6 +10492,117 @@ def admin_zopk_reject_old_news():
|
||||
db.close()
|
||||
|
||||
|
||||
@app.route('/admin/zopk/news/star-counts')
|
||||
@login_required
|
||||
def admin_zopk_news_star_counts():
|
||||
"""Get counts of pending news items grouped by star rating"""
|
||||
if not current_user.is_admin:
|
||||
return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
|
||||
|
||||
from database import ZOPKNews
|
||||
from sqlalchemy import func
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Count pending news for each star rating (1-5 and NULL)
|
||||
counts = {}
|
||||
|
||||
# Count for each star 1-5
|
||||
for star in range(1, 6):
|
||||
count = db.query(func.count(ZOPKNews.id)).filter(
|
||||
ZOPKNews.status == 'pending',
|
||||
ZOPKNews.ai_relevance_score == star
|
||||
).scalar()
|
||||
counts[star] = count
|
||||
|
||||
# Count for NULL (no AI evaluation)
|
||||
count_null = db.query(func.count(ZOPKNews.id)).filter(
|
||||
ZOPKNews.status == 'pending',
|
||||
ZOPKNews.ai_relevance_score.is_(None)
|
||||
).scalar()
|
||||
counts[0] = count_null
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'counts': counts
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting ZOPK news star counts: {e}")
|
||||
return jsonify({'success': False, 'error': 'Wystąpił błąd'}), 500
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.route('/admin/zopk/news/reject-by-stars', methods=['POST'])
|
||||
@login_required
|
||||
def admin_zopk_reject_by_stars():
|
||||
"""Reject all pending news items with specified star ratings"""
|
||||
if not current_user.is_admin:
|
||||
return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
|
||||
|
||||
from database import ZOPKNews
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
data = request.get_json() or {}
|
||||
stars = data.get('stars', []) # List of star ratings to reject (0 = no rating)
|
||||
reason = data.get('reason', '')
|
||||
|
||||
if not stars:
|
||||
return jsonify({'success': False, 'error': 'Nie wybrano ocen do odrzucenia'}), 400
|
||||
|
||||
# Validate stars input
|
||||
valid_stars = [s for s in stars if s in [0, 1, 2, 3, 4, 5]]
|
||||
if not valid_stars:
|
||||
return jsonify({'success': False, 'error': 'Nieprawidłowe oceny gwiazdkowe'}), 400
|
||||
|
||||
# Build query for pending news with specified stars
|
||||
from sqlalchemy import or_
|
||||
conditions = []
|
||||
for star in valid_stars:
|
||||
if star == 0:
|
||||
conditions.append(ZOPKNews.ai_relevance_score.is_(None))
|
||||
else:
|
||||
conditions.append(ZOPKNews.ai_relevance_score == star)
|
||||
|
||||
news_to_reject = db.query(ZOPKNews).filter(
|
||||
ZOPKNews.status == 'pending',
|
||||
or_(*conditions)
|
||||
).all()
|
||||
|
||||
count = len(news_to_reject)
|
||||
|
||||
# Reject them all
|
||||
default_reason = f"Masowo odrzucone - oceny: {', '.join(str(s) + '★' if s > 0 else 'brak oceny' for s in valid_stars)}"
|
||||
final_reason = reason if reason else default_reason
|
||||
|
||||
for news in news_to_reject:
|
||||
news.status = 'rejected'
|
||||
news.moderated_by = current_user.id
|
||||
news.moderated_at = datetime.now()
|
||||
news.rejection_reason = final_reason
|
||||
|
||||
db.commit()
|
||||
|
||||
logger.info(f"Admin {current_user.email} rejected {count} ZOPK news with stars {valid_stars}")
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'message': f'Odrzucono {count} artykułów',
|
||||
'count': count
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
logger.error(f"Error rejecting ZOPK news by stars: {e}")
|
||||
return jsonify({'success': False, 'error': 'Wystąpił błąd podczas odrzucania'}), 500
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.route('/admin/zopk/news/evaluate-ai', methods=['POST'])
|
||||
@login_required
|
||||
def admin_zopk_evaluate_ai():
|
||||
@ -10620,7 +10740,13 @@ def api_zopk_search_news():
|
||||
'saved_new': results['saved_new'],
|
||||
'updated_existing': results['updated_existing'],
|
||||
'auto_approved': results['auto_approved'],
|
||||
'source_stats': results['source_stats']
|
||||
'ai_approved': results.get('ai_approved', 0),
|
||||
'ai_rejected': results.get('ai_rejected', 0),
|
||||
'blacklisted': results.get('blacklisted', 0),
|
||||
'keyword_filtered': results.get('keyword_filtered', 0),
|
||||
'source_stats': results['source_stats'],
|
||||
'process_log': results.get('process_log', []),
|
||||
'auto_approved_articles': results.get('auto_approved_articles', [])
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
|
||||
307
database.py
307
database.py
@ -2041,6 +2041,313 @@ class ZOPKNewsFetchJob(Base):
|
||||
user = relationship('User', foreign_keys=[triggered_by_user])
|
||||
|
||||
|
||||
# ============================================================
|
||||
# ZOPK KNOWLEDGE BASE (AI-powered, with pgvector)
|
||||
# ============================================================
|
||||
|
||||
class ZOPKKnowledgeChunk(Base):
|
||||
"""
|
||||
Knowledge chunks extracted from approved ZOPK news articles.
|
||||
Each chunk is a semantically coherent piece of text with embedding vector
|
||||
for similarity search (RAG - Retrieval Augmented Generation).
|
||||
|
||||
Best practices:
|
||||
- Chunk size: 500-1000 tokens with ~100 token overlap
|
||||
- Embedding model: text-embedding-004 (768 dimensions)
|
||||
"""
|
||||
__tablename__ = 'zopk_knowledge_chunks'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
|
||||
# Source tracking
|
||||
source_news_id = Column(Integer, ForeignKey('zopk_news.id'), nullable=False, index=True)
|
||||
|
||||
# Chunk content
|
||||
content = Column(Text, nullable=False) # The actual text chunk
|
||||
content_clean = Column(Text) # Cleaned/normalized version for processing
|
||||
chunk_index = Column(Integer) # Position in the original article (0, 1, 2...)
|
||||
token_count = Column(Integer) # Approximate token count
|
||||
|
||||
# Semantic embedding (pgvector)
|
||||
# Using 768 dimensions for Google text-embedding-004
|
||||
# Will be stored as: embedding vector(768)
|
||||
embedding = Column(Text) # Stored as JSON string, converted to vector for queries
|
||||
|
||||
# AI-extracted metadata
|
||||
chunk_type = Column(String(50)) # narrative, fact, quote, statistic, event, definition
|
||||
summary = Column(Text) # 1-2 sentence summary
|
||||
keywords = Column(PG_ARRAY(String(100)) if not IS_SQLITE else Text) # Extracted keywords
|
||||
language = Column(String(10), default='pl') # pl, en
|
||||
|
||||
# Context information
|
||||
context_date = Column(Date) # Date the information refers to (not article date)
|
||||
context_location = Column(String(255)) # Geographic location if mentioned
|
||||
|
||||
# Quality & relevance
|
||||
importance_score = Column(Integer) # 1-5, how important this information is
|
||||
confidence_score = Column(Numeric(3, 2)) # 0.00-1.00, AI confidence in extraction
|
||||
|
||||
# Moderation
|
||||
is_verified = Column(Boolean, default=False) # Human verified
|
||||
verified_by = Column(Integer, ForeignKey('users.id'))
|
||||
verified_at = Column(DateTime)
|
||||
|
||||
# Processing metadata
|
||||
extraction_model = Column(String(100)) # gemini-2.0-flash, gpt-4, etc.
|
||||
extracted_at = Column(DateTime, default=datetime.now)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.now)
|
||||
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)
|
||||
|
||||
# Relationships
|
||||
source_news = relationship('ZOPKNews', backref='knowledge_chunks')
|
||||
verifier = relationship('User', foreign_keys=[verified_by])
|
||||
|
||||
|
||||
class ZOPKKnowledgeEntity(Base):
|
||||
"""
|
||||
Named entities extracted from ZOPK knowledge base.
|
||||
Entities are deduplicated and enriched across all sources.
|
||||
|
||||
Types: company, person, place, organization, project, technology
|
||||
"""
|
||||
__tablename__ = 'zopk_knowledge_entities'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
|
||||
# Entity identification
|
||||
entity_type = Column(String(50), nullable=False, index=True)
|
||||
name = Column(String(255), nullable=False)
|
||||
normalized_name = Column(String(255), index=True) # Lowercase, no special chars (for dedup)
|
||||
aliases = Column(PG_ARRAY(String(255)) if not IS_SQLITE else Text) # Alternative names
|
||||
|
||||
# Description
|
||||
description = Column(Text) # AI-generated description
|
||||
short_description = Column(String(500)) # One-liner
|
||||
|
||||
# Linking to existing data
|
||||
company_id = Column(Integer, ForeignKey('companies.id')) # Link to Norda company if exists
|
||||
zopk_project_id = Column(Integer, ForeignKey('zopk_projects.id')) # Link to ZOPK project
|
||||
external_url = Column(String(1000)) # Wikipedia, company website, etc.
|
||||
|
||||
# Entity metadata (JSONB for flexibility)
|
||||
metadata = Column(PG_JSONB if not IS_SQLITE else Text) # {role: "CEO", founded: 2020, ...}
|
||||
|
||||
# Statistics
|
||||
mentions_count = Column(Integer, default=0)
|
||||
first_mentioned_at = Column(DateTime)
|
||||
last_mentioned_at = Column(DateTime)
|
||||
|
||||
# Embedding for entity similarity
|
||||
embedding = Column(Text) # Entity description embedding
|
||||
|
||||
# Quality
|
||||
is_verified = Column(Boolean, default=False)
|
||||
merged_into_id = Column(Integer, ForeignKey('zopk_knowledge_entities.id')) # For deduplication
|
||||
|
||||
created_at = Column(DateTime, default=datetime.now)
|
||||
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)
|
||||
|
||||
# Relationships
|
||||
company = relationship('Company', foreign_keys=[company_id])
|
||||
zopk_project = relationship('ZOPKProject', foreign_keys=[zopk_project_id])
|
||||
merged_into = relationship('ZOPKKnowledgeEntity', remote_side=[id], foreign_keys=[merged_into_id])
|
||||
|
||||
|
||||
class ZOPKKnowledgeFact(Base):
|
||||
"""
|
||||
Structured facts extracted from knowledge chunks.
|
||||
Facts are atomic, verifiable pieces of information.
|
||||
|
||||
Examples:
|
||||
- "ZOPK otrzymał 500 mln PLN dofinansowania w 2024"
|
||||
- "Port Gdynia jest głównym partnerem projektu"
|
||||
- "Projekt zakłada utworzenie 5000 miejsc pracy"
|
||||
"""
|
||||
__tablename__ = 'zopk_knowledge_facts'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
|
||||
# Source
|
||||
source_chunk_id = Column(Integer, ForeignKey('zopk_knowledge_chunks.id'), nullable=False, index=True)
|
||||
source_news_id = Column(Integer, ForeignKey('zopk_news.id'), index=True)
|
||||
|
||||
# Fact content
|
||||
fact_type = Column(String(50), nullable=False) # statistic, event, statement, decision, milestone
|
||||
subject = Column(String(255)) # Who/what the fact is about
|
||||
predicate = Column(String(100)) # Action/relation type
|
||||
object = Column(Text) # The actual information
|
||||
full_text = Column(Text, nullable=False) # Complete fact as sentence
|
||||
|
||||
# Structured data (for queryable facts)
|
||||
numeric_value = Column(Numeric(20, 2)) # If fact contains number
|
||||
numeric_unit = Column(String(50)) # PLN, EUR, jobs, MW, etc.
|
||||
date_value = Column(Date) # If fact refers to specific date
|
||||
|
||||
# Context
|
||||
context = Column(Text) # Surrounding context for disambiguation
|
||||
citation = Column(Text) # Original quote if applicable
|
||||
|
||||
# Entities involved (denormalized for quick access)
|
||||
entities_involved = Column(PG_JSONB if not IS_SQLITE else Text) # [{id: 1, name: "...", type: "company"}, ...]
|
||||
|
||||
# Quality & verification
|
||||
confidence_score = Column(Numeric(3, 2)) # AI confidence
|
||||
is_verified = Column(Boolean, default=False)
|
||||
contradicts_fact_id = Column(Integer, ForeignKey('zopk_knowledge_facts.id')) # If contradicted
|
||||
|
||||
# Embedding for fact similarity
|
||||
embedding = Column(Text)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.now)
|
||||
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)
|
||||
|
||||
# Relationships
|
||||
source_chunk = relationship('ZOPKKnowledgeChunk', backref='facts')
|
||||
source_news = relationship('ZOPKNews', backref='facts')
|
||||
contradicted_by = relationship('ZOPKKnowledgeFact', remote_side=[id], foreign_keys=[contradicts_fact_id])
|
||||
|
||||
|
||||
class ZOPKKnowledgeEntityMention(Base):
|
||||
"""
|
||||
Links between knowledge chunks and entities.
|
||||
Tracks where each entity is mentioned and in what context.
|
||||
"""
|
||||
__tablename__ = 'zopk_knowledge_entity_mentions'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
|
||||
chunk_id = Column(Integer, ForeignKey('zopk_knowledge_chunks.id'), nullable=False, index=True)
|
||||
entity_id = Column(Integer, ForeignKey('zopk_knowledge_entities.id'), nullable=False, index=True)
|
||||
|
||||
# Mention details
|
||||
mention_text = Column(String(500)) # Exact text that matched the entity
|
||||
mention_type = Column(String(50)) # direct, reference, pronoun
|
||||
start_position = Column(Integer) # Character position in chunk
|
||||
end_position = Column(Integer)
|
||||
|
||||
# Context
|
||||
sentiment = Column(String(20)) # positive, neutral, negative
|
||||
role_in_context = Column(String(100)) # subject, object, beneficiary, partner
|
||||
|
||||
confidence = Column(Numeric(3, 2)) # Entity linking confidence
|
||||
|
||||
created_at = Column(DateTime, default=datetime.now)
|
||||
|
||||
# Relationships
|
||||
chunk = relationship('ZOPKKnowledgeChunk', backref='entity_mentions')
|
||||
entity = relationship('ZOPKKnowledgeEntity', backref='mentions')
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint('chunk_id', 'entity_id', 'start_position', name='uq_chunk_entity_position'),
|
||||
)
|
||||
|
||||
|
||||
class ZOPKKnowledgeRelation(Base):
|
||||
"""
|
||||
Relationships between entities discovered in the knowledge base.
|
||||
Forms a knowledge graph of ZOPK ecosystem.
|
||||
|
||||
Examples:
|
||||
- Company A → "partner" → Company B
|
||||
- Person X → "CEO of" → Company Y
|
||||
- Project Z → "funded by" → Organization W
|
||||
"""
|
||||
__tablename__ = 'zopk_knowledge_relations'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
|
||||
# Entities involved
|
||||
entity_a_id = Column(Integer, ForeignKey('zopk_knowledge_entities.id'), nullable=False, index=True)
|
||||
entity_b_id = Column(Integer, ForeignKey('zopk_knowledge_entities.id'), nullable=False, index=True)
|
||||
|
||||
# Relation definition
|
||||
relation_type = Column(String(100), nullable=False) # partner, investor, supplier, competitor, subsidiary, employs
|
||||
relation_subtype = Column(String(100)) # More specific: strategic_partner, minority_investor
|
||||
is_bidirectional = Column(Boolean, default=False) # True for "partners", False for "invests in"
|
||||
|
||||
# Evidence
|
||||
source_chunk_id = Column(Integer, ForeignKey('zopk_knowledge_chunks.id'))
|
||||
source_fact_id = Column(Integer, ForeignKey('zopk_knowledge_facts.id'))
|
||||
evidence_text = Column(Text) # Quote proving the relation
|
||||
|
||||
# Temporal aspects
|
||||
valid_from = Column(Date) # When relation started
|
||||
valid_until = Column(Date) # When relation ended (NULL = still valid)
|
||||
is_current = Column(Boolean, default=True)
|
||||
|
||||
# Strength & confidence
|
||||
strength = Column(Integer) # 1-5, how strong the relation is
|
||||
confidence = Column(Numeric(3, 2)) # AI confidence in the relation
|
||||
mention_count = Column(Integer, default=1) # How many times this relation was found
|
||||
|
||||
# Quality
|
||||
is_verified = Column(Boolean, default=False)
|
||||
verified_by = Column(Integer, ForeignKey('users.id'))
|
||||
|
||||
created_at = Column(DateTime, default=datetime.now)
|
||||
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)
|
||||
|
||||
# Relationships
|
||||
entity_a = relationship('ZOPKKnowledgeEntity', foreign_keys=[entity_a_id], backref='relations_as_subject')
|
||||
entity_b = relationship('ZOPKKnowledgeEntity', foreign_keys=[entity_b_id], backref='relations_as_object')
|
||||
source_chunk = relationship('ZOPKKnowledgeChunk', backref='discovered_relations')
|
||||
source_fact = relationship('ZOPKKnowledgeFact', backref='relation_evidence')
|
||||
verifier = relationship('User', foreign_keys=[verified_by])
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint('entity_a_id', 'entity_b_id', 'relation_type', name='uq_entity_relation'),
|
||||
)
|
||||
|
||||
|
||||
class ZOPKKnowledgeExtractionJob(Base):
|
||||
"""
|
||||
Tracks knowledge extraction jobs from approved articles.
|
||||
One job per article, tracks progress and results.
|
||||
"""
|
||||
__tablename__ = 'zopk_knowledge_extraction_jobs'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
job_id = Column(String(100), unique=True, nullable=False, index=True)
|
||||
|
||||
# Source
|
||||
news_id = Column(Integer, ForeignKey('zopk_news.id'), nullable=False, index=True)
|
||||
|
||||
# Configuration
|
||||
extraction_model = Column(String(100)) # gemini-2.0-flash
|
||||
chunk_size = Column(Integer, default=800) # Target tokens per chunk
|
||||
chunk_overlap = Column(Integer, default=100) # Overlap tokens
|
||||
|
||||
# Results
|
||||
chunks_created = Column(Integer, default=0)
|
||||
entities_extracted = Column(Integer, default=0)
|
||||
facts_extracted = Column(Integer, default=0)
|
||||
relations_discovered = Column(Integer, default=0)
|
||||
|
||||
# Costs
|
||||
tokens_used = Column(Integer, default=0)
|
||||
cost_cents = Column(Numeric(10, 4), default=0)
|
||||
|
||||
# Status
|
||||
status = Column(String(20), default='pending') # pending, running, completed, failed
|
||||
error_message = Column(Text)
|
||||
progress_percent = Column(Integer, default=0)
|
||||
|
||||
# Timing
|
||||
started_at = Column(DateTime)
|
||||
completed_at = Column(DateTime)
|
||||
|
||||
# Trigger
|
||||
triggered_by = Column(String(50)) # auto (on approval), manual, batch
|
||||
triggered_by_user = Column(Integer, ForeignKey('users.id'))
|
||||
|
||||
created_at = Column(DateTime, default=datetime.now)
|
||||
|
||||
# Relationships
|
||||
news = relationship('ZOPKNews', backref='extraction_jobs')
|
||||
user = relationship('User', foreign_keys=[triggered_by_user])
|
||||
|
||||
|
||||
# ============================================================
|
||||
# AI USAGE TRACKING MODELS
|
||||
# ============================================================
|
||||
|
||||
@ -818,6 +818,156 @@
|
||||
flex-direction: column;
|
||||
}
|
||||
}
|
||||
|
||||
/* Progress phases (search → filter → AI → save) */
|
||||
.progress-phases {
|
||||
display: flex;
|
||||
gap: var(--spacing-xs);
|
||||
margin-bottom: var(--spacing-md);
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.progress-phase {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
padding: 6px 12px;
|
||||
border-radius: var(--radius);
|
||||
font-size: var(--font-size-xs);
|
||||
background: rgba(255,255,255,0.1);
|
||||
opacity: 0.5;
|
||||
transition: all 0.3s ease;
|
||||
}
|
||||
|
||||
.progress-phase.active {
|
||||
opacity: 1;
|
||||
background: rgba(255,255,255,0.25);
|
||||
animation: pulse 1.5s ease-in-out infinite;
|
||||
}
|
||||
|
||||
.progress-phase.completed {
|
||||
opacity: 1;
|
||||
background: rgba(34, 197, 94, 0.3);
|
||||
}
|
||||
|
||||
@keyframes pulse {
|
||||
0%, 100% { transform: scale(1); }
|
||||
50% { transform: scale(1.02); }
|
||||
}
|
||||
|
||||
.progress-phase-icon {
|
||||
font-size: 1em;
|
||||
}
|
||||
|
||||
/* Search results container */
|
||||
.search-results-container {
|
||||
margin-top: var(--spacing-lg);
|
||||
padding: var(--spacing-lg);
|
||||
background: rgba(255,255,255,0.1);
|
||||
border-radius: var(--radius-lg);
|
||||
animation: fadeIn 0.5s ease;
|
||||
}
|
||||
|
||||
@keyframes fadeIn {
|
||||
from { opacity: 0; transform: translateY(-10px); }
|
||||
to { opacity: 1; transform: translateY(0); }
|
||||
}
|
||||
|
||||
.search-results-summary {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
|
||||
gap: var(--spacing-md);
|
||||
margin-bottom: var(--spacing-lg);
|
||||
}
|
||||
|
||||
.summary-stat {
|
||||
text-align: center;
|
||||
padding: var(--spacing-md);
|
||||
background: rgba(255,255,255,0.1);
|
||||
border-radius: var(--radius);
|
||||
}
|
||||
|
||||
.summary-stat .value {
|
||||
font-size: var(--font-size-2xl);
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.summary-stat .label {
|
||||
font-size: var(--font-size-xs);
|
||||
opacity: 0.8;
|
||||
}
|
||||
|
||||
.summary-stat.success .value { color: #86efac; }
|
||||
.summary-stat.warning .value { color: #fde68a; }
|
||||
.summary-stat.error .value { color: #fca5a5; }
|
||||
.summary-stat.info .value { color: #93c5fd; }
|
||||
|
||||
/* Auto-approved articles section */
|
||||
.auto-approved-section {
|
||||
margin-top: var(--spacing-lg);
|
||||
padding: var(--spacing-md);
|
||||
background: rgba(34, 197, 94, 0.15);
|
||||
border-radius: var(--radius);
|
||||
border: 1px solid rgba(34, 197, 94, 0.3);
|
||||
}
|
||||
|
||||
.auto-approved-section h4 {
|
||||
margin-bottom: var(--spacing-md);
|
||||
font-size: var(--font-size-sm);
|
||||
}
|
||||
|
||||
.auto-approved-list {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: var(--spacing-xs);
|
||||
max-height: 200px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.auto-approved-item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: var(--spacing-sm);
|
||||
padding: var(--spacing-xs) var(--spacing-sm);
|
||||
background: rgba(255,255,255,0.1);
|
||||
border-radius: var(--radius-sm);
|
||||
font-size: var(--font-size-xs);
|
||||
}
|
||||
|
||||
.auto-approved-item .stars {
|
||||
color: #fbbf24;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.auto-approved-item .title {
|
||||
flex: 1;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.auto-approved-item .source {
|
||||
color: rgba(255,255,255,0.6);
|
||||
flex-shrink: 0;
|
||||
font-size: 10px;
|
||||
}
|
||||
|
||||
/* Refresh countdown */
|
||||
.refresh-countdown {
|
||||
margin-top: var(--spacing-lg);
|
||||
padding: var(--spacing-md);
|
||||
background: rgba(255,255,255,0.1);
|
||||
border-radius: var(--radius);
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
font-size: var(--font-size-sm);
|
||||
}
|
||||
|
||||
.refresh-countdown strong {
|
||||
font-size: var(--font-size-lg);
|
||||
color: #fde68a;
|
||||
}
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
@ -938,13 +1088,28 @@
|
||||
<div class="progress-bar-container">
|
||||
<div class="progress-bar-fill" id="progressBar"></div>
|
||||
</div>
|
||||
<div class="progress-phases" id="progressPhases">
|
||||
<!-- Phases will be rendered by JS -->
|
||||
</div>
|
||||
<div class="progress-steps" id="progressSteps"></div>
|
||||
</div>
|
||||
|
||||
<!-- Source Stats (shown after completion) -->
|
||||
<div class="source-stats" id="sourceStats">
|
||||
<h4>Statystyki źródeł</h4>
|
||||
<div class="source-stats-grid" id="sourceStatsGrid"></div>
|
||||
<!-- Results Container (shown after completion) -->
|
||||
<div class="search-results-container" id="searchResultsContainer" style="display: none;">
|
||||
<!-- Summary Stats -->
|
||||
<div class="search-results-summary" id="searchResultsSummary"></div>
|
||||
|
||||
<!-- Auto-approved articles list -->
|
||||
<div class="auto-approved-section" id="autoApprovedSection" style="display: none;">
|
||||
<h4>✅ Artykuły automatycznie zaakceptowane (3+★)</h4>
|
||||
<div class="auto-approved-list" id="autoApprovedList"></div>
|
||||
</div>
|
||||
|
||||
<!-- Countdown to refresh -->
|
||||
<div class="refresh-countdown" id="refreshCountdown">
|
||||
<span>Odświeżam za <strong id="countdownSeconds">8</strong> sekund...</span>
|
||||
<button type="button" class="btn btn-sm btn-secondary" onclick="location.reload()">Odśwież teraz</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="searchResult" style="margin-top: var(--spacing-md); display: none;"></div>
|
||||
@ -1758,64 +1923,77 @@ const ALL_SOURCES = Object.keys(SOURCE_NAMES);
|
||||
|
||||
async function searchNews() {
|
||||
const btn = document.getElementById('searchBtn');
|
||||
const resultDiv = document.getElementById('searchResult');
|
||||
const progressContainer = document.getElementById('progressContainer');
|
||||
const progressBar = document.getElementById('progressBar');
|
||||
const progressStatus = document.getElementById('progressStatus');
|
||||
const progressPercent = document.getElementById('progressPercent');
|
||||
const progressPhases = document.getElementById('progressPhases');
|
||||
const progressSteps = document.getElementById('progressSteps');
|
||||
const sourceStats = document.getElementById('sourceStats');
|
||||
const sourceStatsGrid = document.getElementById('sourceStatsGrid');
|
||||
const resultsContainer = document.getElementById('searchResultsContainer');
|
||||
const resultsSummary = document.getElementById('searchResultsSummary');
|
||||
const autoApprovedSection = document.getElementById('autoApprovedSection');
|
||||
const autoApprovedList = document.getElementById('autoApprovedList');
|
||||
const query = document.getElementById('searchQuery').value;
|
||||
|
||||
// Process phases definition
|
||||
const PHASES = [
|
||||
{ id: 'search', icon: '🔍', label: 'Wyszukiwanie' },
|
||||
{ id: 'filter', icon: '🚫', label: 'Filtrowanie' },
|
||||
{ id: 'ai', icon: '🤖', label: 'Analiza AI' },
|
||||
{ id: 'save', icon: '💾', label: 'Zapisywanie' }
|
||||
];
|
||||
|
||||
// Reset UI
|
||||
btn.disabled = true;
|
||||
btn.textContent = 'Szukam...';
|
||||
resultDiv.style.display = 'none';
|
||||
sourceStats.classList.remove('active');
|
||||
resultsContainer.style.display = 'none';
|
||||
autoApprovedSection.style.display = 'none';
|
||||
progressContainer.classList.add('active');
|
||||
progressBar.style.width = '0%';
|
||||
progressBar.style.background = ''; // Reset color
|
||||
progressPercent.textContent = '0%';
|
||||
|
||||
// Build initial progress steps
|
||||
progressSteps.innerHTML = ALL_SOURCES.map((src, idx) => `
|
||||
<div class="progress-step pending" id="step-${src}">
|
||||
<span class="progress-step-icon"></span>
|
||||
<span>${SOURCE_NAMES[src]}</span>
|
||||
<span class="progress-step-count" id="count-${src}">-</span>
|
||||
// Build progress phases UI
|
||||
progressPhases.innerHTML = PHASES.map(phase => `
|
||||
<div class="progress-phase pending" id="phase-${phase.id}">
|
||||
<span class="progress-phase-icon">${phase.icon}</span>
|
||||
<span>${phase.label}</span>
|
||||
</div>
|
||||
`).join('');
|
||||
|
||||
// Simulate progress while waiting for API
|
||||
let currentStep = 0;
|
||||
const totalSteps = ALL_SOURCES.length + 1; // +1 for cross-verification
|
||||
// Build initial progress steps (will be populated from process_log)
|
||||
progressSteps.innerHTML = '<div class="progress-step active"><span class="progress-step-icon">⏳</span><span>Inicjalizacja...</span></div>';
|
||||
|
||||
// Simulate progress phases while waiting for API
|
||||
let currentPhaseIdx = 0;
|
||||
const phaseMessages = [
|
||||
'Przeszukuję źródła (Brave API + RSS)...',
|
||||
'Filtruję wyniki (blacklist, słowa kluczowe)...',
|
||||
'Analiza AI (Gemini ocenia artykuły)...',
|
||||
'Zapisuję do bazy wiedzy...'
|
||||
];
|
||||
|
||||
const progressInterval = setInterval(() => {
|
||||
if (currentStep < ALL_SOURCES.length) {
|
||||
// Mark previous step as completed
|
||||
if (currentStep > 0) {
|
||||
const prevStep = document.getElementById(`step-${ALL_SOURCES[currentStep - 1]}`);
|
||||
if (prevStep) {
|
||||
prevStep.classList.remove('active');
|
||||
prevStep.classList.add('completed');
|
||||
if (currentPhaseIdx < PHASES.length) {
|
||||
// Update phase UI
|
||||
PHASES.forEach((phase, idx) => {
|
||||
const el = document.getElementById(`phase-${phase.id}`);
|
||||
if (el) {
|
||||
el.classList.remove('pending', 'active', 'completed');
|
||||
if (idx < currentPhaseIdx) el.classList.add('completed');
|
||||
else if (idx === currentPhaseIdx) el.classList.add('active');
|
||||
else el.classList.add('pending');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Mark current step as active
|
||||
const currStep = document.getElementById(`step-${ALL_SOURCES[currentStep]}`);
|
||||
if (currStep) {
|
||||
currStep.classList.remove('pending');
|
||||
currStep.classList.add('active');
|
||||
}
|
||||
|
||||
progressStatus.textContent = `Przeszukiwanie: ${SOURCE_NAMES[ALL_SOURCES[currentStep]]}`;
|
||||
const percent = Math.round(((currentStep + 1) / totalSteps) * 80);
|
||||
progressStatus.textContent = phaseMessages[currentPhaseIdx];
|
||||
const percent = Math.round(((currentPhaseIdx + 1) / PHASES.length) * 80);
|
||||
progressBar.style.width = `${percent}%`;
|
||||
progressPercent.textContent = `${percent}%`;
|
||||
|
||||
currentStep++;
|
||||
currentPhaseIdx++;
|
||||
}
|
||||
}, 800);
|
||||
}, 2500); // Each phase ~2.5s for realistic timing
|
||||
|
||||
try {
|
||||
const response = await fetch('{{ url_for("api_zopk_search_news") }}', {
|
||||
@ -1831,65 +2009,112 @@ async function searchNews() {
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
// Mark all steps as completed
|
||||
ALL_SOURCES.forEach(src => {
|
||||
const step = document.getElementById(`step-${src}`);
|
||||
if (step) {
|
||||
step.classList.remove('pending', 'active');
|
||||
step.classList.add('completed');
|
||||
}
|
||||
});
|
||||
|
||||
if (data.success) {
|
||||
// Update counts from source_stats
|
||||
if (data.source_stats) {
|
||||
Object.entries(data.source_stats).forEach(([src, count]) => {
|
||||
const countEl = document.getElementById(`count-${src}`);
|
||||
if (countEl) {
|
||||
countEl.textContent = count;
|
||||
}
|
||||
});
|
||||
}
|
||||
// Mark all phases as completed
|
||||
PHASES.forEach(phase => {
|
||||
const el = document.getElementById(`phase-${phase.id}`);
|
||||
if (el) {
|
||||
el.classList.remove('pending', 'active');
|
||||
el.classList.add('completed');
|
||||
}
|
||||
});
|
||||
|
||||
// Show cross-verification step
|
||||
progressStatus.textContent = 'Weryfikacja krzyżowa zakończona';
|
||||
progressBar.style.width = '100%';
|
||||
progressPercent.textContent = '100%';
|
||||
progressStatus.textContent = '✅ Wyszukiwanie zakończone!';
|
||||
|
||||
// Show source stats
|
||||
if (data.source_stats && Object.keys(data.source_stats).length > 0) {
|
||||
sourceStatsGrid.innerHTML = Object.entries(data.source_stats)
|
||||
.filter(([src, count]) => count > 0)
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.map(([src, count]) => `
|
||||
<div class="source-stat-item">
|
||||
<span>${SOURCE_NAMES[src] || src}</span>
|
||||
<span class="count">${count}</span>
|
||||
</div>
|
||||
`).join('');
|
||||
sourceStats.classList.add('active');
|
||||
// Display process log as steps
|
||||
if (data.process_log && data.process_log.length > 0) {
|
||||
// Show last few important steps
|
||||
const importantSteps = data.process_log.filter(log =>
|
||||
log.step.includes('done') || log.step.includes('complete') || log.phase === 'complete'
|
||||
).slice(-6);
|
||||
|
||||
progressSteps.innerHTML = importantSteps.map(log => `
|
||||
<div class="progress-step completed">
|
||||
<span class="progress-step-icon">✓</span>
|
||||
<span>${log.message}</span>
|
||||
${log.count > 0 ? `<span class="progress-step-count">${log.count}</span>` : ''}
|
||||
</div>
|
||||
`).join('');
|
||||
}
|
||||
|
||||
// Show result message
|
||||
resultDiv.style.display = 'block';
|
||||
resultDiv.innerHTML = `
|
||||
<p style="color: #dcfce7;">
|
||||
✓ ${data.message}<br>
|
||||
<small>Auto-zatwierdzone (3+ źródeł): ${data.auto_approved || 0}</small>
|
||||
</p>
|
||||
// Hide progress container after a moment
|
||||
setTimeout(() => {
|
||||
progressContainer.classList.remove('active');
|
||||
}, 1500);
|
||||
|
||||
// Show results container
|
||||
resultsContainer.style.display = 'block';
|
||||
|
||||
// Build summary stats
|
||||
resultsSummary.innerHTML = `
|
||||
<div class="summary-stat info">
|
||||
<div class="value">${data.total_found || 0}</div>
|
||||
<div class="label">Znaleziono</div>
|
||||
</div>
|
||||
<div class="summary-stat warning">
|
||||
<div class="value">${(data.blacklisted || 0) + (data.keyword_filtered || 0)}</div>
|
||||
<div class="label">Odfiltrowano</div>
|
||||
</div>
|
||||
<div class="summary-stat error">
|
||||
<div class="value">${data.ai_rejected || 0}</div>
|
||||
<div class="label">AI odrzucił</div>
|
||||
</div>
|
||||
<div class="summary-stat success">
|
||||
<div class="value">${data.ai_approved || 0}</div>
|
||||
<div class="label">AI zaakceptował</div>
|
||||
</div>
|
||||
<div class="summary-stat success">
|
||||
<div class="value">${data.saved_new || 0}</div>
|
||||
<div class="label">Nowe w bazie</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
// Auto-refresh after 3 seconds
|
||||
setTimeout(() => {
|
||||
progressStatus.textContent = 'Odświeżanie strony...';
|
||||
location.reload();
|
||||
}, 3000);
|
||||
// Show auto-approved articles list
|
||||
if (data.auto_approved_articles && data.auto_approved_articles.length > 0) {
|
||||
autoApprovedSection.style.display = 'block';
|
||||
autoApprovedList.innerHTML = data.auto_approved_articles.map(article => {
|
||||
const stars = '★'.repeat(article.score) + '☆'.repeat(5 - article.score);
|
||||
return `
|
||||
<div class="auto-approved-item">
|
||||
<span class="stars">${stars}</span>
|
||||
<span class="title">${article.title}</span>
|
||||
<span class="source">${article.source || ''}</span>
|
||||
</div>
|
||||
`;
|
||||
}).join('');
|
||||
}
|
||||
|
||||
// Start countdown to refresh (8 seconds)
|
||||
let countdown = 8;
|
||||
const countdownEl = document.getElementById('countdownSeconds');
|
||||
const countdownInterval = setInterval(() => {
|
||||
countdown--;
|
||||
countdownEl.textContent = countdown;
|
||||
if (countdown <= 0) {
|
||||
clearInterval(countdownInterval);
|
||||
location.reload();
|
||||
}
|
||||
}, 1000);
|
||||
|
||||
} else {
|
||||
// Error handling
|
||||
progressBar.style.width = '100%';
|
||||
progressBar.style.background = '#fca5a5';
|
||||
progressStatus.textContent = 'Błąd wyszukiwania';
|
||||
resultDiv.style.display = 'block';
|
||||
resultDiv.innerHTML = `<p style="color: #fca5a5;">Błąd: ${data.error}</p>`;
|
||||
|
||||
PHASES.forEach(phase => {
|
||||
const el = document.getElementById(`phase-${phase.id}`);
|
||||
if (el) el.classList.remove('active');
|
||||
});
|
||||
|
||||
progressSteps.innerHTML = `
|
||||
<div class="progress-step" style="color: #fca5a5;">
|
||||
<span class="progress-step-icon">✗</span>
|
||||
<span>Błąd: ${data.error}</span>
|
||||
</div>
|
||||
`;
|
||||
btn.disabled = false;
|
||||
btn.textContent = 'Szukaj artykułów';
|
||||
}
|
||||
@ -1898,8 +2123,13 @@ async function searchNews() {
|
||||
progressBar.style.width = '100%';
|
||||
progressBar.style.background = '#fca5a5';
|
||||
progressStatus.textContent = 'Błąd połączenia';
|
||||
resultDiv.style.display = 'block';
|
||||
resultDiv.innerHTML = `<p style="color: #fca5a5;">Błąd połączenia: ${error.message}</p>`;
|
||||
|
||||
progressSteps.innerHTML = `
|
||||
<div class="progress-step" style="color: #fca5a5;">
|
||||
<span class="progress-step-icon">✗</span>
|
||||
<span>Błąd połączenia: ${error.message}</span>
|
||||
</div>
|
||||
`;
|
||||
btn.disabled = false;
|
||||
btn.textContent = 'Szukaj artykułów';
|
||||
}
|
||||
|
||||
@ -248,6 +248,60 @@
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
/* Star filter styling */
|
||||
.star-filter .star-icon {
|
||||
font-size: 10px;
|
||||
letter-spacing: -1px;
|
||||
}
|
||||
.star-filter.active .star-icon {
|
||||
color: #f59e0b;
|
||||
}
|
||||
|
||||
/* Bulk actions */
|
||||
.bulk-actions {
|
||||
background: var(--surface);
|
||||
padding: var(--spacing-md);
|
||||
border-radius: var(--radius);
|
||||
border: 1px solid var(--border);
|
||||
}
|
||||
|
||||
/* Mass reject modal */
|
||||
.mass-reject-options {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: var(--spacing-sm);
|
||||
margin: var(--spacing-lg) 0;
|
||||
}
|
||||
.mass-reject-option {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: var(--spacing-sm);
|
||||
padding: var(--spacing-sm) var(--spacing-md);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
cursor: pointer;
|
||||
transition: var(--transition);
|
||||
}
|
||||
.mass-reject-option:hover {
|
||||
background: var(--background);
|
||||
}
|
||||
.mass-reject-option.selected {
|
||||
background: #fee2e2;
|
||||
border-color: #dc3545;
|
||||
}
|
||||
.mass-reject-option input[type="checkbox"] {
|
||||
accent-color: #dc3545;
|
||||
}
|
||||
.mass-reject-stars {
|
||||
color: #f59e0b;
|
||||
font-size: 14px;
|
||||
}
|
||||
.mass-reject-count {
|
||||
margin-left: auto;
|
||||
font-size: var(--font-size-sm);
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.news-table {
|
||||
display: block;
|
||||
@ -260,6 +314,13 @@
|
||||
width: 100%;
|
||||
margin-top: var(--spacing-md);
|
||||
}
|
||||
.filters {
|
||||
flex-direction: column;
|
||||
align-items: flex-start;
|
||||
}
|
||||
.bulk-actions {
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
{% endblock %}
|
||||
@ -275,10 +336,19 @@
|
||||
|
||||
<div class="filters">
|
||||
<span class="text-muted">Status:</span>
|
||||
<a href="{{ url_for('admin_zopk_news', status='all', sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'all' %}active{% endif %}">Wszystkie</a>
|
||||
<a href="{{ url_for('admin_zopk_news', status='pending', sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'pending' %}active{% endif %}">Oczekujące</a>
|
||||
<a href="{{ url_for('admin_zopk_news', status='approved', sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'approved' %}active{% endif %}">Zatwierdzone</a>
|
||||
<a href="{{ url_for('admin_zopk_news', status='rejected', sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'rejected' %}active{% endif %}">Odrzucone</a>
|
||||
<a href="{{ url_for('admin_zopk_news', status='all', stars=current_stars, sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'all' %}active{% endif %}">Wszystkie</a>
|
||||
<a href="{{ url_for('admin_zopk_news', status='pending', stars=current_stars, sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'pending' %}active{% endif %}">Oczekujące</a>
|
||||
<a href="{{ url_for('admin_zopk_news', status='approved', stars=current_stars, sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'approved' %}active{% endif %}">Zatwierdzone</a>
|
||||
<a href="{{ url_for('admin_zopk_news', status='rejected', stars=current_stars, sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'rejected' %}active{% endif %}">Odrzucone</a>
|
||||
|
||||
<span class="text-muted" style="margin-left: var(--spacing-md);">Gwiazdki:</span>
|
||||
<a href="{{ url_for('admin_zopk_news', status=current_status, stars='all', sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_stars == 'all' %}active{% endif %}">Wszystkie</a>
|
||||
{% for star in [5, 4, 3, 2, 1] %}
|
||||
<a href="{{ url_for('admin_zopk_news', status=current_status, stars=star, sort=current_sort, dir=current_dir) }}" class="filter-btn star-filter {% if current_stars == star|string %}active{% endif %}">
|
||||
<span class="star-icon">{{ '★' * star }}{{ '☆' * (5 - star) }}</span>
|
||||
</a>
|
||||
{% endfor %}
|
||||
<a href="{{ url_for('admin_zopk_news', status=current_status, stars='none', sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_stars == 'none' %}active{% endif %}">Brak oceny</a>
|
||||
|
||||
<div class="sort-controls">
|
||||
<span class="text-muted">Sortuj:</span>
|
||||
@ -293,6 +363,19 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Mass reject by stars -->
|
||||
<div class="bulk-actions" style="margin-bottom: var(--spacing-lg); display: flex; gap: var(--spacing-md); align-items: center;">
|
||||
<span class="text-muted">Akcje masowe:</span>
|
||||
<button class="action-btn reject" onclick="showMassRejectModal()" style="padding: 6px 12px;">
|
||||
🗑️ Odrzuć po gwiazdkach
|
||||
</button>
|
||||
{% if current_stars != 'all' and current_stars != 'none' and current_status == 'pending' %}
|
||||
<button class="action-btn reject" onclick="rejectCurrentFilter()" style="padding: 6px 12px;">
|
||||
✕ Odrzuć wszystkie {{ current_stars }}★ ({{ total }})
|
||||
</button>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
{% if news_items %}
|
||||
<div class="news-table-wrapper">
|
||||
<table class="news-table">
|
||||
@ -371,21 +454,21 @@
|
||||
{% if total_pages > 1 %}
|
||||
<nav class="pagination">
|
||||
{% if page > 1 %}
|
||||
<a href="{{ url_for('admin_zopk_news', page=page-1, status=current_status, sort=current_sort, dir=current_dir) }}">« Poprzednia</a>
|
||||
<a href="{{ url_for('admin_zopk_news', page=page-1, status=current_status, stars=current_stars, sort=current_sort, dir=current_dir) }}">« Poprzednia</a>
|
||||
{% endif %}
|
||||
|
||||
{% for p in range(1, total_pages + 1) %}
|
||||
{% if p == page %}
|
||||
<span class="current">{{ p }}</span>
|
||||
{% elif p <= 3 or p > total_pages - 3 or (p >= page - 1 and p <= page + 1) %}
|
||||
<a href="{{ url_for('admin_zopk_news', page=p, status=current_status, sort=current_sort, dir=current_dir) }}">{{ p }}</a>
|
||||
<a href="{{ url_for('admin_zopk_news', page=p, status=current_status, stars=current_stars, sort=current_sort, dir=current_dir) }}">{{ p }}</a>
|
||||
{% elif p == 4 or p == total_pages - 3 %}
|
||||
<span>...</span>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
{% if page < total_pages %}
|
||||
<a href="{{ url_for('admin_zopk_news', page=page+1, status=current_status, sort=current_sort, dir=current_dir) }}">Następna »</a>
|
||||
<a href="{{ url_for('admin_zopk_news', page=page+1, status=current_status, stars=current_stars, sort=current_sort, dir=current_dir) }}">Następna »</a>
|
||||
{% endif %}
|
||||
</nav>
|
||||
{% endif %}
|
||||
@ -415,6 +498,44 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Mass Reject by Stars Modal -->
|
||||
<div class="modal-overlay" id="massRejectModal">
|
||||
<div class="modal" style="max-width: 480px;">
|
||||
<div style="text-align: center; margin-bottom: var(--spacing-md);">
|
||||
<div class="modal-icon">🗑️</div>
|
||||
<h3 style="margin-bottom: var(--spacing-xs);">Masowe odrzucanie po gwiazdkach</h3>
|
||||
<p class="modal-description">Wybierz oceny gwiazdkowe, które chcesz odrzucić.<br>Dotyczy tylko artykułów <strong>oczekujących</strong>.</p>
|
||||
</div>
|
||||
<div class="mass-reject-options" id="massRejectOptions">
|
||||
{% for star in [1, 2, 3, 4, 5] %}
|
||||
<label class="mass-reject-option" data-star="{{ star }}">
|
||||
<input type="checkbox" name="reject_stars" value="{{ star }}">
|
||||
<span class="mass-reject-stars">{{ '★' * star }}{{ '☆' * (5 - star) }}</span>
|
||||
<span>{{ star }} {{ 'gwiazdka' if star == 1 else ('gwiazdki' if star < 5 else 'gwiazdek') }}</span>
|
||||
<span class="mass-reject-count" id="star-count-{{ star }}">— szt.</span>
|
||||
</label>
|
||||
{% endfor %}
|
||||
<label class="mass-reject-option" data-star="0">
|
||||
<input type="checkbox" name="reject_stars" value="0">
|
||||
<span class="mass-reject-stars" style="color: var(--text-secondary);">—</span>
|
||||
<span>Brak oceny AI</span>
|
||||
<span class="mass-reject-count" id="star-count-0">— szt.</span>
|
||||
</label>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>Powód odrzucenia (wspólny dla wszystkich):</label>
|
||||
<input type="text" id="massRejectReason" placeholder="np. Niska ocena AI, nieistotne artykuły...">
|
||||
</div>
|
||||
<div id="massRejectSummary" style="background: #fee2e2; padding: var(--spacing-md); border-radius: var(--radius); margin-bottom: var(--spacing-md); display: none;">
|
||||
<strong>Do odrzucenia:</strong> <span id="massRejectTotal">0</span> artykułów
|
||||
</div>
|
||||
<div class="modal-actions" style="justify-content: center;">
|
||||
<button type="button" class="btn btn-secondary" onclick="closeMassRejectModal()">Anuluj</button>
|
||||
<button type="button" class="btn btn-danger" id="massRejectConfirmBtn" onclick="executeMassReject()">Odrzuć wybrane</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="toastContainer" style="position: fixed; top: 80px; right: 20px; z-index: 1100; display: flex; flex-direction: column; gap: 10px;"></div>
|
||||
|
||||
<style>
|
||||
@ -433,6 +554,7 @@
|
||||
|
||||
{% block extra_js %}
|
||||
const csrfToken = '{{ csrf_token() }}';
|
||||
const currentStars = '{{ current_stars }}';
|
||||
|
||||
// Universal Modal System
|
||||
let confirmModalResolve = null;
|
||||
@ -556,4 +678,207 @@ async function rejectNews(newsId) {
|
||||
showToast('Błąd połączenia: ' + error.message, 'error');
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Mass Reject by Stars
|
||||
// ============================================
|
||||
|
||||
let starCounts = {};
|
||||
|
||||
async function showMassRejectModal() {
|
||||
const modal = document.getElementById('massRejectModal');
|
||||
|
||||
// Fetch counts for each star rating
|
||||
try {
|
||||
const response = await fetch('/admin/zopk/news/star-counts', {
|
||||
method: 'GET',
|
||||
headers: { 'X-CSRFToken': csrfToken }
|
||||
});
|
||||
const data = await response.json();
|
||||
if (data.success) {
|
||||
starCounts = data.counts;
|
||||
// Update UI with counts
|
||||
for (let star = 0; star <= 5; star++) {
|
||||
const countEl = document.getElementById(`star-count-${star}`);
|
||||
if (countEl) {
|
||||
const count = starCounts[star] || 0;
|
||||
countEl.textContent = `${count} szt.`;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to fetch star counts:', error);
|
||||
}
|
||||
|
||||
// Reset checkboxes
|
||||
document.querySelectorAll('#massRejectOptions input[type="checkbox"]').forEach(cb => {
|
||||
cb.checked = false;
|
||||
cb.closest('.mass-reject-option').classList.remove('selected');
|
||||
});
|
||||
document.getElementById('massRejectReason').value = '';
|
||||
document.getElementById('massRejectSummary').style.display = 'none';
|
||||
|
||||
modal.classList.add('active');
|
||||
|
||||
// Add change listeners
|
||||
document.querySelectorAll('#massRejectOptions input[type="checkbox"]').forEach(cb => {
|
||||
cb.addEventListener('change', updateMassRejectSummary);
|
||||
});
|
||||
}
|
||||
|
||||
function closeMassRejectModal() {
|
||||
document.getElementById('massRejectModal').classList.remove('active');
|
||||
}
|
||||
|
||||
function updateMassRejectSummary() {
|
||||
const checkboxes = document.querySelectorAll('#massRejectOptions input[type="checkbox"]:checked');
|
||||
let total = 0;
|
||||
|
||||
checkboxes.forEach(cb => {
|
||||
const star = parseInt(cb.value);
|
||||
total += starCounts[star] || 0;
|
||||
cb.closest('.mass-reject-option').classList.add('selected');
|
||||
});
|
||||
|
||||
document.querySelectorAll('#massRejectOptions input[type="checkbox"]:not(:checked)').forEach(cb => {
|
||||
cb.closest('.mass-reject-option').classList.remove('selected');
|
||||
});
|
||||
|
||||
const summary = document.getElementById('massRejectSummary');
|
||||
const totalEl = document.getElementById('massRejectTotal');
|
||||
totalEl.textContent = total;
|
||||
|
||||
if (total > 0) {
|
||||
summary.style.display = 'block';
|
||||
} else {
|
||||
summary.style.display = 'none';
|
||||
}
|
||||
}
|
||||
|
||||
async function executeMassReject() {
|
||||
const checkboxes = document.querySelectorAll('#massRejectOptions input[type="checkbox"]:checked');
|
||||
const stars = Array.from(checkboxes).map(cb => parseInt(cb.value));
|
||||
const reason = document.getElementById('massRejectReason').value.trim();
|
||||
|
||||
if (stars.length === 0) {
|
||||
showToast('Wybierz co najmniej jedną ocenę gwiazdkową', 'warning');
|
||||
return;
|
||||
}
|
||||
|
||||
// Calculate total
|
||||
let total = 0;
|
||||
stars.forEach(s => total += starCounts[s] || 0);
|
||||
|
||||
if (total === 0) {
|
||||
showToast('Brak artykułów do odrzucenia', 'info');
|
||||
closeMassRejectModal();
|
||||
return;
|
||||
}
|
||||
|
||||
// Confirm
|
||||
const confirmed = await showConfirm(
|
||||
`Czy na pewno chcesz odrzucić ${total} artykułów?`,
|
||||
{
|
||||
icon: '⚠️',
|
||||
title: 'Potwierdzenie masowego odrzucenia',
|
||||
okText: `Odrzuć ${total} artykułów`,
|
||||
okClass: 'btn-danger'
|
||||
}
|
||||
);
|
||||
|
||||
if (!confirmed) return;
|
||||
|
||||
closeMassRejectModal();
|
||||
|
||||
try {
|
||||
const response = await fetch('/admin/zopk/news/reject-by-stars', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-CSRFToken': csrfToken
|
||||
},
|
||||
body: JSON.stringify({ stars: stars, reason: reason })
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
if (data.success) {
|
||||
showToast(`Odrzucono ${data.count} artykułów`, 'success');
|
||||
setTimeout(() => location.reload(), 1000);
|
||||
} else {
|
||||
showToast(data.error || 'Wystąpił błąd', 'error');
|
||||
}
|
||||
} catch (error) {
|
||||
showToast('Błąd połączenia: ' + error.message, 'error');
|
||||
}
|
||||
}
|
||||
|
||||
async function rejectCurrentFilter() {
|
||||
const stars = parseInt(currentStars);
|
||||
if (isNaN(stars) || stars < 1 || stars > 5) {
|
||||
showToast('Nieprawidłowy filtr gwiazdek', 'error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Fetch count first
|
||||
let count = 0;
|
||||
try {
|
||||
const response = await fetch('/admin/zopk/news/star-counts');
|
||||
const data = await response.json();
|
||||
if (data.success) {
|
||||
count = data.counts[stars] || 0;
|
||||
}
|
||||
} catch (e) {}
|
||||
|
||||
if (count === 0) {
|
||||
showToast('Brak artykułów do odrzucenia', 'info');
|
||||
return;
|
||||
}
|
||||
|
||||
const confirmed = await showConfirm(
|
||||
`Czy na pewno chcesz odrzucić wszystkie ${count} artykułów z oceną ${stars}★?`,
|
||||
{
|
||||
icon: '⚠️',
|
||||
title: 'Potwierdzenie odrzucenia',
|
||||
okText: `Odrzuć ${count} artykułów`,
|
||||
okClass: 'btn-danger'
|
||||
}
|
||||
);
|
||||
|
||||
if (!confirmed) return;
|
||||
|
||||
try {
|
||||
const response = await fetch('/admin/zopk/news/reject-by-stars', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-CSRFToken': csrfToken
|
||||
},
|
||||
body: JSON.stringify({ stars: [stars], reason: `Masowo odrzucone - ocena ${stars}★` })
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
if (data.success) {
|
||||
showToast(`Odrzucono ${data.count} artykułów`, 'success');
|
||||
setTimeout(() => location.reload(), 1000);
|
||||
} else {
|
||||
showToast(data.error || 'Wystąpił błąd', 'error');
|
||||
}
|
||||
} catch (error) {
|
||||
showToast('Błąd połączenia: ' + error.message, 'error');
|
||||
}
|
||||
}
|
||||
|
||||
// Close modal on escape key
|
||||
document.addEventListener('keydown', (e) => {
|
||||
if (e.key === 'Escape') {
|
||||
closeMassRejectModal();
|
||||
}
|
||||
});
|
||||
|
||||
// Close modal on overlay click
|
||||
document.getElementById('massRejectModal').addEventListener('click', (e) => {
|
||||
if (e.target.id === 'massRejectModal') {
|
||||
closeMassRejectModal();
|
||||
}
|
||||
});
|
||||
{% endblock %}
|
||||
|
||||
@ -138,48 +138,294 @@ RSS_SOURCES = {
|
||||
}
|
||||
}
|
||||
|
||||
# ZOPK-related keywords for filtering
|
||||
ZOPK_KEYWORDS = [
|
||||
# Project names
|
||||
'zielony okręg przemysłowy',
|
||||
'zopk',
|
||||
'kaszubia przemysłowa',
|
||||
# Energy projects
|
||||
'offshore wind polska',
|
||||
'offshore bałtyk',
|
||||
'farma wiatrowa bałtyk',
|
||||
'elektrownia jądrowa lubiatowo',
|
||||
'elektrownia jądrowa kopalino',
|
||||
'pej lubiatowo', # Polskie Elektrownie Jądrowe
|
||||
# Defense industry
|
||||
'kongsberg rumia',
|
||||
'kongsberg polska',
|
||||
'kongsberg defence',
|
||||
'przemysł obronny pomorze',
|
||||
'przemysł zbrojeniowy pomorze',
|
||||
# Technology
|
||||
'centrum danych gdynia',
|
||||
'centrum danych pomorze',
|
||||
'data center pomorze',
|
||||
'wodór pomorze',
|
||||
'hydrogen pomorze',
|
||||
'laboratoria wodorowe',
|
||||
# Key people
|
||||
'samsonowicz mon',
|
||||
'maciej samsonowicz',
|
||||
'kosiniak-kamysz przemysł',
|
||||
# Locations
|
||||
'transformacja energetyczna pomorze',
|
||||
'inwestycje wejherowo',
|
||||
'inwestycje rumia',
|
||||
'strefa ekonomiczna rumia',
|
||||
'rumia invest park',
|
||||
# Organizations
|
||||
'norda biznes',
|
||||
'spoko gospodarcze',
|
||||
'izba gospodarcza pomorze'
|
||||
# ============================================================
|
||||
# BRAVE SEARCH - PRECYZYJNE ZAPYTANIA (zamiast jednego ogólnego)
|
||||
# ============================================================
|
||||
|
||||
BRAVE_QUERIES = [
|
||||
# ============================================================
|
||||
# GRUPA 1: ZOPK BEZPOŚREDNIO (najwyższy priorytet)
|
||||
# ============================================================
|
||||
{
|
||||
'query': '"Zielony Okręg Przemysłowy" OR "ZOPK Kaszubia"',
|
||||
'weight': 5,
|
||||
'description': 'ZOPK - bezpośrednie wzmianki'
|
||||
},
|
||||
{
|
||||
'query': '"Maciej Samsonowicz" MON OR przemysł obronny',
|
||||
'weight': 5,
|
||||
'description': 'Samsonowicz - koordynator ZOPK'
|
||||
},
|
||||
|
||||
# ============================================================
|
||||
# GRUPA 2: PRZEMYSŁ OBRONNY
|
||||
# ============================================================
|
||||
{
|
||||
'query': '"Kongsberg" "Rumia" OR "Kongsberg Defence Poland"',
|
||||
'weight': 5,
|
||||
'description': 'Kongsberg Rumia'
|
||||
},
|
||||
|
||||
# ============================================================
|
||||
# GRUPA 3: OFFSHORE WIND - projekty i firmy
|
||||
# ============================================================
|
||||
{
|
||||
'query': '"Baltic Power" OR "Orsted Polska" offshore',
|
||||
'weight': 5,
|
||||
'description': 'Baltic Power / Orsted'
|
||||
},
|
||||
{
|
||||
'query': '"Baltica" Equinor offshore OR "Baltica 2" "Baltica 3"',
|
||||
'weight': 4,
|
||||
'description': 'Baltica - Equinor/Polenergia'
|
||||
},
|
||||
{
|
||||
'query': '"F.E.W. Baltic" OR "RWE" offshore Bałtyk wiatrowa',
|
||||
'weight': 4,
|
||||
'description': 'F.E.W. Baltic / RWE'
|
||||
},
|
||||
|
||||
# ============================================================
|
||||
# GRUPA 4: OFFSHORE WIND - infrastruktura i łańcuch dostaw
|
||||
# ============================================================
|
||||
{
|
||||
'query': '"port instalacyjny" offshore OR "hub serwisowy" wiatrowa Gdynia',
|
||||
'weight': 5,
|
||||
'description': 'Porty offshore'
|
||||
},
|
||||
{
|
||||
'query': '"CRIST" offshore OR "Remontowa Shipbuilding" wiatrowa',
|
||||
'weight': 4,
|
||||
'description': 'Stocznie dla offshore'
|
||||
},
|
||||
{
|
||||
'query': '"ST3 Offshore" OR "GSG Towers" wieże wiatrowe',
|
||||
'weight': 3,
|
||||
'description': 'Producenci konstrukcji'
|
||||
},
|
||||
|
||||
# ============================================================
|
||||
# GRUPA 5: ELEKTROWNIA JĄDROWA
|
||||
# ============================================================
|
||||
{
|
||||
'query': '"elektrownia jądrowa" "Lubiatowo" OR "Choczewo" OR "Kopalino"',
|
||||
'weight': 5,
|
||||
'description': 'EJ Lubiatowo-Kopalino'
|
||||
},
|
||||
{
|
||||
'query': '"Polskie Elektrownie Jądrowe" OR "PEJ" atom',
|
||||
'weight': 5,
|
||||
'description': 'PEJ - spółka'
|
||||
},
|
||||
{
|
||||
'query': '"Westinghouse" Polska OR "AP1000" elektrownia',
|
||||
'weight': 5,
|
||||
'description': 'Westinghouse - technologia'
|
||||
},
|
||||
{
|
||||
'query': '"Bechtel" Polska atom OR elektrownia jądrowa',
|
||||
'weight': 4,
|
||||
'description': 'Bechtel - wykonawca'
|
||||
},
|
||||
|
||||
# ============================================================
|
||||
# GRUPA 6: SMR (Małe Reaktory Modularne)
|
||||
# ============================================================
|
||||
{
|
||||
'query': '"SMR" Polska OR "Orlen Synthos Green Energy" reaktor',
|
||||
'weight': 4,
|
||||
'description': 'SMR - małe reaktory'
|
||||
},
|
||||
{
|
||||
'query': '"BWRX-300" OR "GE Hitachi" Polska atom',
|
||||
'weight': 4,
|
||||
'description': 'BWRX-300 / GE Hitachi'
|
||||
},
|
||||
|
||||
# ============================================================
|
||||
# GRUPA 7: WODÓR I NOWE TECHNOLOGIE
|
||||
# ============================================================
|
||||
{
|
||||
'query': '"Dolina Wodorowa" Pomorze OR "H2Gdańsk"',
|
||||
'weight': 4,
|
||||
'description': 'Dolina Wodorowa'
|
||||
},
|
||||
{
|
||||
'query': '"wodór zielony" Gdańsk OR Gdynia OR Pomorze',
|
||||
'weight': 3,
|
||||
'description': 'Wodór zielony Pomorze'
|
||||
},
|
||||
{
|
||||
'query': '"centrum danych" Gdynia OR "data center" Pomorze',
|
||||
'weight': 4,
|
||||
'description': 'Centra danych'
|
||||
},
|
||||
|
||||
# ============================================================
|
||||
# GRUPA 8: STREFY EKONOMICZNE I SAMORZĄDY
|
||||
# ============================================================
|
||||
{
|
||||
'query': '"Rumia Invest Park" OR "strefa ekonomiczna Rumia"',
|
||||
'weight': 4,
|
||||
'description': 'Rumia Invest Park'
|
||||
},
|
||||
{
|
||||
'query': '"gmina Choczewo" atom OR inwestycje',
|
||||
'weight': 4,
|
||||
'description': 'Gmina Choczewo'
|
||||
},
|
||||
{
|
||||
'query': '"gmina Krokowa" OR "powiat pucki" offshore energia',
|
||||
'weight': 3,
|
||||
'description': 'Samorządy lokalne'
|
||||
},
|
||||
|
||||
# ============================================================
|
||||
# GRUPA 9: PORTY I LOGISTYKA
|
||||
# ============================================================
|
||||
{
|
||||
'query': '"Port Gdynia" offshore OR inwestycje terminal',
|
||||
'weight': 4,
|
||||
'description': 'Port Gdynia'
|
||||
},
|
||||
{
|
||||
'query': '"Port Gdańsk" offshore OR "DCT" inwestycje',
|
||||
'weight': 3,
|
||||
'description': 'Port Gdańsk / DCT'
|
||||
},
|
||||
|
||||
# ============================================================
|
||||
# GRUPA 10: ENERGETYKA LOKALNA
|
||||
# ============================================================
|
||||
{
|
||||
'query': '"Energa" offshore OR "Energa" inwestycje Pomorze',
|
||||
'weight': 3,
|
||||
'description': 'Energa - lokalny operator'
|
||||
},
|
||||
|
||||
# ============================================================
|
||||
# GRUPA 11: WYDARZENIA BRANŻOWE
|
||||
# ============================================================
|
||||
{
|
||||
'query': '"Offshore Wind Poland" konferencja OR "PSEW" wiatrowa',
|
||||
'weight': 3,
|
||||
'description': 'Konferencje offshore'
|
||||
},
|
||||
{
|
||||
'query': '"Forum Energii" Pomorze OR "WindEurope" Polska',
|
||||
'weight': 3,
|
||||
'description': 'Eventy energetyczne'
|
||||
}
|
||||
]
|
||||
|
||||
# ============================================================
|
||||
# BLACKLISTA DOMEN - automatyczne odrzucanie
|
||||
# ============================================================
|
||||
|
||||
BLACKLISTED_DOMAINS = {
|
||||
# Sport
|
||||
'sport.pl', 'meczyki.pl', 'sportowefakty.wp.pl', 'przegladsportowy.pl',
|
||||
'sport.tvp.pl', 'goal.pl', 'sportbuzz.pl', 'pilkanozna.pl',
|
||||
# Plotki i lifestyle
|
||||
'pudelek.pl', 'plotek.pl', 'pomponik.pl', 'kozaczek.pl', 'jastrząbpost.pl',
|
||||
'plejada.pl', 'party.pl', 'viva.pl', 'gala.pl',
|
||||
# Ogólne newsy bez kontekstu lokalnego
|
||||
'se.pl', 'fakt.pl', 'natemat.pl',
|
||||
# Inne nieistotne
|
||||
'pogoda.interia.pl', 'allegro.pl', 'olx.pl', 'pracuj.pl',
|
||||
'gratka.pl', 'otodom.pl', 'otomoto.pl',
|
||||
# Zagraniczne
|
||||
'reuters.com', 'bbc.com', 'cnn.com', 'theguardian.com'
|
||||
}
|
||||
|
||||
# Domeny preferowane (bonus do oceny)
|
||||
PREFERRED_DOMAINS = {
|
||||
'trojmiasto.pl': 2, 'dziennikbaltycki.pl': 2, 'nordafm.pl': 3,
|
||||
'ttm24.pl': 3, 'nadmorski24.pl': 2, 'gdynia.pl': 2,
|
||||
'wejherowo.pl': 2, 'rumia.eu': 2, 'gov.pl': 1,
|
||||
'biznes.gov.pl': 2, 'wnp.pl': 1, 'wysokienapiecie.pl': 2,
|
||||
'energetyka24.com': 2, 'defence24.pl': 2, 'gospodarkamorska.pl': 2
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# ZOPK KEYWORDS - słowa kluczowe do pre-filtrowania
|
||||
# ============================================================
|
||||
|
||||
# ZOPK-related keywords for filtering (rozszerzone i pogrupowane)
|
||||
ZOPK_KEYWORDS_CRITICAL = [
|
||||
# MUST HAVE - bezpośrednie trafienia (wystarczy 1) → score 5
|
||||
'zielony okręg przemysłowy', 'zopk',
|
||||
# Kongsberg
|
||||
'kongsberg rumia', 'kongsberg defence', 'kongsberg poland',
|
||||
# Osoba kluczowa
|
||||
'maciej samsonowicz', 'samsonowicz mon',
|
||||
# Elektrownia jądrowa - lokalizacje
|
||||
'lubiatowo kopalino', 'elektrownia jądrowa lubiatowo', 'elektrownia jądrowa choczewo',
|
||||
# Główne projekty offshore
|
||||
'baltic power', 'baltica offshore', 'baltica 2', 'baltica 3',
|
||||
# Strefa ekonomiczna
|
||||
'rumia invest park',
|
||||
# PEJ
|
||||
'polskie elektrownie jądrowe', 'pej lubiatowo',
|
||||
# Westinghouse/Bechtel
|
||||
'westinghouse polska', 'ap1000 polska', 'bechtel polska',
|
||||
# Port instalacyjny
|
||||
'port instalacyjny offshore'
|
||||
]
|
||||
|
||||
ZOPK_KEYWORDS_STRONG = [
|
||||
# STRONG - mocne powiązania (wystarczy 1) → score 4
|
||||
# Offshore wind
|
||||
'offshore bałtyk', 'farma wiatrowa bałtyk', 'morska energetyka wiatrowa',
|
||||
'orsted polska', 'equinor polska', 'rwe offshore', 'few baltic', 'ocean winds',
|
||||
'hub serwisowy offshore',
|
||||
# Stocznie dla offshore
|
||||
'crist offshore', 'remontowa shipbuilding', 'st3 offshore', 'gsg towers',
|
||||
# Atom - wykonawcy i technologia
|
||||
'kongsberg polska', 'bwrx-300', 'ge hitachi polska',
|
||||
# SMR
|
||||
'orlen synthos', 'smr polska', 'małe reaktory modularne',
|
||||
# Przemysł obronny
|
||||
'przemysł obronny pomorze',
|
||||
# Wodór
|
||||
'dolina wodorowa', 'h2gdańsk', 'wodór zielony gdańsk', 'wodór zielony gdynia',
|
||||
'laboratoria wodorowe',
|
||||
# Data center
|
||||
'centrum danych gdynia', 'data center gdynia',
|
||||
# Samorządy
|
||||
'gmina choczewo', 'gmina krokowa', 'powiat pucki',
|
||||
# Porty
|
||||
'port gdynia offshore', 'terminal offshore gdynia',
|
||||
# Osoby
|
||||
'kosiniak-kamysz przemysł',
|
||||
# Transformacja
|
||||
'transformacja energetyczna pomorze'
|
||||
]
|
||||
|
||||
ZOPK_KEYWORDS_WEAK = [
|
||||
# WEAK - słabe powiązania (potrzeba 2+ lub w połączeniu z lokalizacją) → score 2-3
|
||||
'offshore wind', 'elektrownia jądrowa', 'przemysł obronny', 'przemysł zbrojeniowy',
|
||||
'inwestycje przemysłowe', 'strefa ekonomiczna', 'centrum danych', 'data center',
|
||||
'farma wiatrowa', 'energia odnawialna', 'atom polska', 'energetyka jądrowa',
|
||||
'morskie wiatrowe', 'turbiny wiatrowe', 'fundamenty offshore', 'monopile',
|
||||
'wodór zielony', 'hydrogen', 'magazyn energii',
|
||||
'port instalacyjny', 'hub logistyczny', 'stocznia',
|
||||
'psew', 'offshore wind poland', 'windeurope', 'forum energii',
|
||||
'energa inwestycje'
|
||||
]
|
||||
|
||||
ZOPK_LOCATIONS = [
|
||||
# Lokalizacje które wzmacniają słabe keywords
|
||||
'kaszuby', 'kaszubia', 'pomorze', 'pomorskie',
|
||||
'wejherowo', 'rumia', 'gdynia', 'gdańsk', 'reda', 'puck',
|
||||
'choczewo', 'lubiatowo', 'kopalino', 'żarnowiec', 'krokowa',
|
||||
'bałtyk', 'baltyk', 'morze bałtyckie',
|
||||
'trójmiasto', 'trojmiasto'
|
||||
]
|
||||
|
||||
# Pełna lista (dla kompatybilności wstecznej)
|
||||
ZOPK_KEYWORDS = ZOPK_KEYWORDS_CRITICAL + ZOPK_KEYWORDS_STRONG + ZOPK_KEYWORDS_WEAK
|
||||
|
||||
|
||||
@dataclass
|
||||
class NewsItem:
|
||||
@ -246,67 +492,428 @@ def normalize_title_hash(title: str) -> str:
|
||||
return hashlib.sha256(text.encode()).hexdigest()[:32]
|
||||
|
||||
|
||||
def is_zopk_relevant(title: str, description: str = '') -> bool:
|
||||
"""Check if content is relevant to ZOPK topics"""
|
||||
def is_blacklisted_domain(domain: str) -> bool:
|
||||
"""Check if domain is on the blacklist"""
|
||||
domain = domain.lower().replace('www.', '')
|
||||
return domain in BLACKLISTED_DOMAINS
|
||||
|
||||
|
||||
def get_domain_bonus(domain: str) -> int:
|
||||
"""Get bonus score for preferred domains"""
|
||||
domain = domain.lower().replace('www.', '')
|
||||
# Check exact match
|
||||
if domain in PREFERRED_DOMAINS:
|
||||
return PREFERRED_DOMAINS[domain]
|
||||
# Check if domain ends with preferred (e.g., biznes.trojmiasto.pl)
|
||||
for pref_domain, bonus in PREFERRED_DOMAINS.items():
|
||||
if domain.endswith(pref_domain):
|
||||
return bonus
|
||||
return 0
|
||||
|
||||
|
||||
def calculate_keyword_score(title: str, description: str = '') -> dict:
|
||||
"""
|
||||
Calculate keyword relevance score.
|
||||
|
||||
Returns:
|
||||
dict with:
|
||||
- score: 0-5 (0 = no match, 5 = critical keyword)
|
||||
- matches: list of matched keywords
|
||||
- reason: explanation
|
||||
"""
|
||||
text = f"{title} {description}".lower()
|
||||
|
||||
for keyword in ZOPK_KEYWORDS:
|
||||
if keyword.lower() in text:
|
||||
return True
|
||||
matches = {
|
||||
'critical': [],
|
||||
'strong': [],
|
||||
'weak': [],
|
||||
'locations': []
|
||||
}
|
||||
|
||||
return False
|
||||
# Check critical keywords (instant high score)
|
||||
for kw in ZOPK_KEYWORDS_CRITICAL:
|
||||
if kw.lower() in text:
|
||||
matches['critical'].append(kw)
|
||||
|
||||
# Check strong keywords
|
||||
for kw in ZOPK_KEYWORDS_STRONG:
|
||||
if kw.lower() in text:
|
||||
matches['strong'].append(kw)
|
||||
|
||||
# Check weak keywords
|
||||
for kw in ZOPK_KEYWORDS_WEAK:
|
||||
if kw.lower() in text:
|
||||
matches['weak'].append(kw)
|
||||
|
||||
# Check locations
|
||||
for loc in ZOPK_LOCATIONS:
|
||||
if loc.lower() in text:
|
||||
matches['locations'].append(loc)
|
||||
|
||||
# Calculate score
|
||||
if matches['critical']:
|
||||
score = 5
|
||||
reason = f"Trafienie krytyczne: {matches['critical'][0]}"
|
||||
elif matches['strong']:
|
||||
score = 4
|
||||
reason = f"Mocne powiązanie: {matches['strong'][0]}"
|
||||
elif matches['weak'] and matches['locations']:
|
||||
# Weak keyword + location = medium score
|
||||
score = 3
|
||||
reason = f"Słabe + lokalizacja: {matches['weak'][0]} + {matches['locations'][0]}"
|
||||
elif len(matches['weak']) >= 2:
|
||||
# Multiple weak keywords = medium score
|
||||
score = 3
|
||||
reason = f"Wiele słabych: {', '.join(matches['weak'][:2])}"
|
||||
elif matches['weak']:
|
||||
# Single weak keyword = low score
|
||||
score = 2
|
||||
reason = f"Tylko słabe: {matches['weak'][0]}"
|
||||
elif matches['locations']:
|
||||
# Only location, no industry keywords
|
||||
score = 1
|
||||
reason = f"Tylko lokalizacja: {matches['locations'][0]}"
|
||||
else:
|
||||
score = 0
|
||||
reason = "Brak trafień słów kluczowych"
|
||||
|
||||
return {
|
||||
'score': score,
|
||||
'matches': matches,
|
||||
'reason': reason,
|
||||
'total_matches': sum(len(v) for v in matches.values())
|
||||
}
|
||||
|
||||
|
||||
def is_zopk_relevant(title: str, description: str = '') -> bool:
|
||||
"""Check if content is relevant to ZOPK topics (legacy compatibility)"""
|
||||
result = calculate_keyword_score(title, description)
|
||||
return result['score'] >= 3
|
||||
|
||||
|
||||
class ZOPKNewsService:
|
||||
"""
|
||||
Multi-source news search service with cross-verification.
|
||||
Multi-source news search service with cross-verification and AI pre-filtering.
|
||||
|
||||
NOWY PIPELINE (2026-01):
|
||||
1. Wyszukiwanie: wiele precyzyjnych zapytań Brave + RSS
|
||||
2. Pre-filtrowanie: blacklista domen + słowa kluczowe
|
||||
3. Ocena AI: PRZED zapisem do bazy (tylko 3+★)
|
||||
4. Zapis: tylko wysokiej jakości artykuły
|
||||
"""
|
||||
|
||||
def __init__(self, db_session, brave_api_key: Optional[str] = None):
|
||||
def __init__(self, db_session, brave_api_key: Optional[str] = None, enable_ai_prefilter: bool = True):
|
||||
self.db = db_session
|
||||
self.brave_api_key = brave_api_key or os.getenv('BRAVE_API_KEY')
|
||||
self.enable_ai_prefilter = enable_ai_prefilter
|
||||
self._gemini_service = None
|
||||
|
||||
def search_all_sources(self, query: str = 'Zielony Okręg Przemysłowy Kaszubia') -> Dict:
|
||||
def _get_gemini(self):
|
||||
"""Lazy load Gemini service"""
|
||||
if self._gemini_service is None:
|
||||
try:
|
||||
from gemini_service import get_gemini_service
|
||||
self._gemini_service = get_gemini_service()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load Gemini: {e}")
|
||||
return self._gemini_service
|
||||
|
||||
def search_all_sources(self, query: str = None, user_id: int = None) -> Dict:
|
||||
"""
|
||||
Search all sources and return aggregated results with cross-verification.
|
||||
Search all sources with IMPROVED PIPELINE:
|
||||
1. Multiple precise Brave queries
|
||||
2. Pre-filter by domain blacklist and keywords
|
||||
3. AI evaluation BEFORE saving (reject 1-2★)
|
||||
4. Save only quality items (3+★)
|
||||
|
||||
Args:
|
||||
query: Deprecated, ignored. Uses BRAVE_QUERIES instead.
|
||||
user_id: User ID for tracking AI usage
|
||||
|
||||
Returns:
|
||||
Dict with search results and statistics
|
||||
Dict with search results, statistics, and detailed process log
|
||||
"""
|
||||
all_items: List[NewsItem] = []
|
||||
source_stats = {}
|
||||
source_stats = {
|
||||
'brave_queries': 0,
|
||||
'brave_results': 0,
|
||||
'rss_results': 0,
|
||||
'blacklisted': 0,
|
||||
'keyword_filtered': 0,
|
||||
'ai_rejected': 0,
|
||||
'ai_approved': 0
|
||||
}
|
||||
|
||||
# Process log for frontend progress display
|
||||
process_log = []
|
||||
auto_approved_articles = [] # Track articles auto-approved (3+★)
|
||||
|
||||
# 1. BRAVE SEARCH - Multiple precise queries
|
||||
process_log.append({
|
||||
'phase': 'search',
|
||||
'step': 'brave_start',
|
||||
'message': f'Rozpoczynam wyszukiwanie Brave ({len(BRAVE_QUERIES)} zapytań)...',
|
||||
'count': len(BRAVE_QUERIES)
|
||||
})
|
||||
|
||||
# 1. Brave Search API
|
||||
if self.brave_api_key:
|
||||
brave_items = self._search_brave(query)
|
||||
all_items.extend(brave_items)
|
||||
source_stats['brave'] = len(brave_items)
|
||||
logger.info(f"Brave Search: found {len(brave_items)} items")
|
||||
for i, query_config in enumerate(BRAVE_QUERIES):
|
||||
brave_items = self._search_brave_single(query_config['query'])
|
||||
source_stats['brave_queries'] += 1
|
||||
source_stats['brave_results'] += len(brave_items)
|
||||
all_items.extend(brave_items)
|
||||
logger.info(f"Brave '{query_config['description']}': {len(brave_items)} items")
|
||||
|
||||
process_log.append({
|
||||
'phase': 'search',
|
||||
'step': f'brave_{i+1}',
|
||||
'message': f"Brave: {query_config['description']}",
|
||||
'count': len(brave_items)
|
||||
})
|
||||
else:
|
||||
process_log.append({
|
||||
'phase': 'search',
|
||||
'step': 'brave_skip',
|
||||
'message': 'Brave API niedostępne - pominięto',
|
||||
'count': 0
|
||||
})
|
||||
|
||||
process_log.append({
|
||||
'phase': 'search',
|
||||
'step': 'brave_done',
|
||||
'message': f'Brave: znaleziono {source_stats["brave_results"]} artykułów',
|
||||
'count': source_stats['brave_results']
|
||||
})
|
||||
|
||||
# 2. RSS Feeds
|
||||
process_log.append({
|
||||
'phase': 'search',
|
||||
'step': 'rss_start',
|
||||
'message': f'Przeszukuję {len(RSS_SOURCES)} źródeł RSS...',
|
||||
'count': len(RSS_SOURCES)
|
||||
})
|
||||
|
||||
for source_id, source_config in RSS_SOURCES.items():
|
||||
rss_items = self._fetch_rss(source_id, source_config)
|
||||
all_items.extend(rss_items)
|
||||
source_stats[source_id] = len(rss_items)
|
||||
logger.info(f"RSS {source_id}: found {len(rss_items)} items")
|
||||
source_stats['rss_results'] += len(rss_items)
|
||||
|
||||
# 3. Cross-verify and deduplicate
|
||||
verified_items = self._cross_verify(all_items)
|
||||
if rss_items:
|
||||
process_log.append({
|
||||
'phase': 'search',
|
||||
'step': f'rss_{source_id}',
|
||||
'message': f"RSS: {source_config['name']}",
|
||||
'count': len(rss_items)
|
||||
})
|
||||
|
||||
process_log.append({
|
||||
'phase': 'search',
|
||||
'step': 'rss_done',
|
||||
'message': f'RSS: znaleziono {source_stats["rss_results"]} artykułów',
|
||||
'count': source_stats['rss_results']
|
||||
})
|
||||
|
||||
logger.info(f"Total raw items: {len(all_items)}")
|
||||
|
||||
total_raw = len(all_items)
|
||||
process_log.append({
|
||||
'phase': 'search',
|
||||
'step': 'search_complete',
|
||||
'message': f'📥 Łącznie pobrano: {total_raw} artykułów',
|
||||
'count': total_raw
|
||||
})
|
||||
|
||||
# 3. PRE-FILTER: Domain blacklist
|
||||
process_log.append({
|
||||
'phase': 'filter',
|
||||
'step': 'blacklist_start',
|
||||
'message': 'Filtrowanie: sprawdzam blacklistę domen...',
|
||||
'count': 0
|
||||
})
|
||||
|
||||
filtered_items = []
|
||||
blacklisted_domains_found = set()
|
||||
for item in all_items:
|
||||
if is_blacklisted_domain(item.domain):
|
||||
source_stats['blacklisted'] += 1
|
||||
blacklisted_domains_found.add(item.domain)
|
||||
logger.debug(f"Blacklisted domain: {item.domain}")
|
||||
continue
|
||||
filtered_items.append(item)
|
||||
|
||||
logger.info(f"After blacklist filter: {len(filtered_items)} (removed {source_stats['blacklisted']})")
|
||||
|
||||
process_log.append({
|
||||
'phase': 'filter',
|
||||
'step': 'blacklist_done',
|
||||
'message': f'🚫 Blacklist: usunięto {source_stats["blacklisted"]} artykułów (sport, plotki, lifestyle)',
|
||||
'count': source_stats['blacklisted']
|
||||
})
|
||||
|
||||
# 4. PRE-FILTER: Keyword score (minimum 2)
|
||||
process_log.append({
|
||||
'phase': 'filter',
|
||||
'step': 'keywords_start',
|
||||
'message': 'Filtrowanie: analiza słów kluczowych ZOPK...',
|
||||
'count': 0
|
||||
})
|
||||
|
||||
keyword_filtered = []
|
||||
for item in filtered_items:
|
||||
kw_result = calculate_keyword_score(item.title, item.description)
|
||||
if kw_result['score'] >= 2: # At least weak relevance
|
||||
item.keyword_score = kw_result['score']
|
||||
item.keyword_reason = kw_result['reason']
|
||||
keyword_filtered.append(item)
|
||||
else:
|
||||
source_stats['keyword_filtered'] += 1
|
||||
|
||||
logger.info(f"After keyword filter: {len(keyword_filtered)} (removed {source_stats['keyword_filtered']})")
|
||||
|
||||
process_log.append({
|
||||
'phase': 'filter',
|
||||
'step': 'keywords_done',
|
||||
'message': f'🔑 Keywords: usunięto {source_stats["keyword_filtered"]} (brak słów kluczowych ZOPK)',
|
||||
'count': source_stats['keyword_filtered']
|
||||
})
|
||||
|
||||
process_log.append({
|
||||
'phase': 'filter',
|
||||
'step': 'filter_complete',
|
||||
'message': f'✅ Po filtrowaniu: {len(keyword_filtered)} artykułów do analizy AI',
|
||||
'count': len(keyword_filtered)
|
||||
})
|
||||
|
||||
# 5. Cross-verify and deduplicate
|
||||
verified_items = self._cross_verify(keyword_filtered)
|
||||
logger.info(f"After deduplication: {len(verified_items)} unique items")
|
||||
|
||||
process_log.append({
|
||||
'phase': 'filter',
|
||||
'step': 'dedup_done',
|
||||
'message': f'🔄 Deduplikacja: {len(verified_items)} unikalnych artykułów',
|
||||
'count': len(verified_items)
|
||||
})
|
||||
|
||||
# 6. AI EVALUATION (before saving) - only if enabled
|
||||
if self.enable_ai_prefilter and self._get_gemini():
|
||||
process_log.append({
|
||||
'phase': 'ai',
|
||||
'step': 'ai_start',
|
||||
'message': f'🤖 AI (Gemini): rozpoczynam ocenę {len(verified_items)} artykułów...',
|
||||
'count': len(verified_items)
|
||||
})
|
||||
|
||||
ai_approved = []
|
||||
ai_evaluated_count = 0
|
||||
|
||||
for item in verified_items:
|
||||
ai_result = evaluate_news_relevance(
|
||||
{
|
||||
'title': item['title'],
|
||||
'description': item['description'],
|
||||
'source_name': item['source_name'],
|
||||
'published_at': item.get('published_at')
|
||||
},
|
||||
self._get_gemini(),
|
||||
user_id=user_id
|
||||
)
|
||||
|
||||
ai_evaluated_count += 1
|
||||
|
||||
if ai_result.get('evaluated'):
|
||||
ai_score = ai_result.get('score', 0)
|
||||
if ai_score >= 3:
|
||||
# Good score - save it
|
||||
item['ai_score'] = ai_score
|
||||
item['ai_reason'] = ai_result.get('reason', '')
|
||||
item['ai_relevant'] = True
|
||||
ai_approved.append(item)
|
||||
source_stats['ai_approved'] += 1
|
||||
|
||||
# Track for frontend display
|
||||
auto_approved_articles.append({
|
||||
'title': item['title'][:80] + ('...' if len(item['title']) > 80 else ''),
|
||||
'score': ai_score,
|
||||
'source': item.get('source_name', item.get('source_domain', ''))
|
||||
})
|
||||
|
||||
logger.debug(f"AI approved ({ai_score}★): {item['title'][:50]}")
|
||||
else:
|
||||
# Low score - reject before saving
|
||||
source_stats['ai_rejected'] += 1
|
||||
logger.debug(f"AI rejected ({ai_score}★): {item['title'][:50]}")
|
||||
else:
|
||||
# AI evaluation failed - save as pending for manual review
|
||||
item['ai_score'] = None
|
||||
item['ai_reason'] = ai_result.get('reason', 'AI evaluation failed')
|
||||
item['ai_relevant'] = None
|
||||
ai_approved.append(item)
|
||||
source_stats['ai_approved'] += 1
|
||||
|
||||
verified_items = ai_approved
|
||||
logger.info(f"After AI filter: {len(verified_items)} approved, {source_stats['ai_rejected']} rejected")
|
||||
|
||||
process_log.append({
|
||||
'phase': 'ai',
|
||||
'step': 'ai_done',
|
||||
'message': f'🤖 AI: oceniono {ai_evaluated_count}, zaakceptowano {source_stats["ai_approved"]} (3+★), odrzucono {source_stats["ai_rejected"]}',
|
||||
'count': source_stats['ai_approved']
|
||||
})
|
||||
else:
|
||||
logger.info("AI pre-filter disabled or Gemini unavailable")
|
||||
process_log.append({
|
||||
'phase': 'ai',
|
||||
'step': 'ai_skip',
|
||||
'message': '🤖 AI: wyłączony lub niedostępny',
|
||||
'count': 0
|
||||
})
|
||||
|
||||
# 7. Save to database (only quality items)
|
||||
process_log.append({
|
||||
'phase': 'save',
|
||||
'step': 'save_start',
|
||||
'message': f'💾 Zapisuję {len(verified_items)} artykułów do bazy...',
|
||||
'count': len(verified_items)
|
||||
})
|
||||
|
||||
# 4. Save to database
|
||||
saved_count, updated_count = self._save_to_database(verified_items)
|
||||
|
||||
process_log.append({
|
||||
'phase': 'save',
|
||||
'step': 'save_done',
|
||||
'message': f'💾 Zapisano: {saved_count} nowych, {updated_count} zaktualizowanych',
|
||||
'count': saved_count + updated_count
|
||||
})
|
||||
|
||||
# Final summary
|
||||
# Note: score >= 3 triggers auto-approve (verified 2026-01-15)
|
||||
auto_approved_count = sum(1 for item in verified_items if item.get('auto_approve', False) or (item.get('ai_score') and item['ai_score'] >= 3))
|
||||
|
||||
process_log.append({
|
||||
'phase': 'complete',
|
||||
'step': 'done',
|
||||
'message': f'✅ Zakończono! {saved_count} nowych artykułów w bazie wiedzy.',
|
||||
'count': saved_count
|
||||
})
|
||||
|
||||
return {
|
||||
'total_found': len(all_items),
|
||||
'total_found': source_stats['brave_results'] + source_stats['rss_results'],
|
||||
'blacklisted': source_stats['blacklisted'],
|
||||
'keyword_filtered': source_stats['keyword_filtered'],
|
||||
'ai_rejected': source_stats['ai_rejected'],
|
||||
'ai_approved': source_stats['ai_approved'],
|
||||
'unique_items': len(verified_items),
|
||||
'saved_new': saved_count,
|
||||
'updated_existing': updated_count,
|
||||
'source_stats': source_stats,
|
||||
'auto_approved': sum(1 for item in verified_items if item.get('auto_approve', False))
|
||||
'auto_approved': auto_approved_count,
|
||||
'process_log': process_log,
|
||||
'auto_approved_articles': auto_approved_articles
|
||||
}
|
||||
|
||||
def _search_brave(self, query: str) -> List[NewsItem]:
|
||||
"""Search Brave API for news"""
|
||||
def _search_brave_single(self, query: str) -> List[NewsItem]:
|
||||
"""Search Brave API with a single query"""
|
||||
if not self.brave_api_key:
|
||||
return []
|
||||
|
||||
@ -318,8 +925,8 @@ class ZOPKNewsService:
|
||||
}
|
||||
params = {
|
||||
'q': query,
|
||||
'count': 20,
|
||||
'freshness': 'pm', # past month
|
||||
'count': 10, # Fewer results per query (we have 8 queries)
|
||||
'freshness': 'pw', # past week (more relevant than past month)
|
||||
'country': 'pl',
|
||||
'search_lang': 'pl'
|
||||
}
|
||||
@ -341,18 +948,26 @@ class ZOPKNewsService:
|
||||
description=item.get('description', ''),
|
||||
source_name=item.get('source', ''),
|
||||
source_type='brave',
|
||||
source_id='brave_search',
|
||||
published_at=datetime.now(), # Brave doesn't provide exact date
|
||||
source_id=f'brave_{query[:20]}',
|
||||
published_at=datetime.now(),
|
||||
image_url=item.get('thumbnail', {}).get('src')
|
||||
))
|
||||
else:
|
||||
logger.error(f"Brave API error: {response.status_code}")
|
||||
logger.error(f"Brave API error for '{query[:30]}': {response.status_code}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Brave search error: {e}")
|
||||
|
||||
return items
|
||||
|
||||
def _search_brave(self, query: str) -> List[NewsItem]:
|
||||
"""Legacy method - redirects to new multi-query approach"""
|
||||
# Kept for compatibility, but now uses multiple queries
|
||||
all_items = []
|
||||
for query_config in BRAVE_QUERIES:
|
||||
all_items.extend(self._search_brave_single(query_config['query']))
|
||||
return all_items
|
||||
|
||||
def _fetch_rss(self, source_id: str, config: Dict) -> List[NewsItem]:
|
||||
"""Fetch and parse RSS feed"""
|
||||
items = []
|
||||
@ -503,7 +1118,15 @@ class ZOPKNewsService:
|
||||
updated_count += 1
|
||||
else:
|
||||
# Create new entry
|
||||
status = 'auto_approved' if item['auto_approve'] else 'pending'
|
||||
# Determine status based on AI score (if available)
|
||||
# Note: score >= 3 triggers auto-approve (verified 2026-01-15)
|
||||
ai_score = item.get('ai_score')
|
||||
if ai_score and ai_score >= 3:
|
||||
status = 'auto_approved' # AI score 3+ = auto-approve
|
||||
elif item.get('auto_approve'):
|
||||
status = 'auto_approved' # Multiple sources = auto-approve
|
||||
else:
|
||||
status = 'pending'
|
||||
|
||||
news = ZOPKNews(
|
||||
title=item['title'],
|
||||
@ -515,12 +1138,18 @@ class ZOPKNewsService:
|
||||
source_domain=item['source_domain'],
|
||||
source_type=item['source_type'],
|
||||
published_at=item['published_at'],
|
||||
image_url=item['image_url'],
|
||||
image_url=item.get('image_url'),
|
||||
confidence_score=item['confidence_score'],
|
||||
source_count=item['source_count'],
|
||||
sources_list=item['sources_list'],
|
||||
is_auto_verified=item['auto_approve'],
|
||||
status=status
|
||||
is_auto_verified=item.get('auto_approve', False) or (ai_score and ai_score >= 3),
|
||||
status=status,
|
||||
# AI evaluation results from pre-filtering
|
||||
ai_relevant=item.get('ai_relevant'),
|
||||
ai_relevance_score=ai_score,
|
||||
ai_evaluation_reason=item.get('ai_reason', '')[:255] if item.get('ai_reason') else None,
|
||||
ai_evaluated_at=datetime.now() if ai_score else None,
|
||||
ai_model='gemini-2.0-flash' if ai_score else None
|
||||
)
|
||||
self.db.add(news)
|
||||
new_count += 1
|
||||
@ -529,16 +1158,29 @@ class ZOPKNewsService:
|
||||
return new_count, updated_count
|
||||
|
||||
|
||||
def search_zopk_news(db_session, query: str = None) -> Dict:
|
||||
def search_zopk_news(db_session, query: str = None, user_id: int = None, enable_ai_prefilter: bool = True) -> Dict:
|
||||
"""
|
||||
Convenience function to search ZOPK news from all sources.
|
||||
|
||||
NOWY PIPELINE (2026-01):
|
||||
- 8 precyzyjnych zapytań Brave (zamiast 1 ogólnego)
|
||||
- Blacklista domen (sport, plotki, lifestyle)
|
||||
- Pre-filtrowanie po słowach kluczowych (min. score 2)
|
||||
- Ocena AI PRZED zapisem (odrzuca 1-2★)
|
||||
- Tylko artykuły 3+★ lądują w bazie
|
||||
|
||||
Args:
|
||||
db_session: SQLAlchemy session
|
||||
query: Deprecated, ignored
|
||||
user_id: User ID for tracking AI usage
|
||||
enable_ai_prefilter: If True, evaluate with AI before saving (default: True)
|
||||
|
||||
Usage:
|
||||
from zopk_news_service import search_zopk_news
|
||||
results = search_zopk_news(db)
|
||||
results = search_zopk_news(db, user_id=current_user.id)
|
||||
"""
|
||||
service = ZOPKNewsService(db_session)
|
||||
return service.search_all_sources(query or 'Zielony Okręg Przemysłowy Kaszubia')
|
||||
service = ZOPKNewsService(db_session, enable_ai_prefilter=enable_ai_prefilter)
|
||||
return service.search_all_sources(user_id=user_id)
|
||||
|
||||
|
||||
# ============================================================
|
||||
|
||||
Loading…
Reference in New Issue
Block a user