""" Audit AI Service ================= Centralny serwis AI do analizy wyników audytów i generowania priorytetowanych akcji z treścią gotową do wdrożenia. Obsługiwane typy audytów: - SEO (PageSpeed, on-page, technical, local SEO) - GBP (Google Business Profile completeness) - Social Media (presence across platforms) Używa Gemini API (via gemini_service.py) do generowania analiz. Author: Norda Biznes Development Team Created: 2026-02-07 """ import hashlib import json import logging import re import requests from datetime import datetime, timedelta from html import unescape from database import ( SessionLocal, Company, CompanyWebsiteAnalysis, CompanySocialMedia, CompanyCitation, AuditAction, AuditAICache ) from youtube_service import YouTubeService from crux_service import CrUXService from benchmark_service import BenchmarkService logger = logging.getLogger(__name__) # Cache expiry: 7 days CACHE_EXPIRY_DAYS = 7 # Gemini Structured Output schema for audit analysis responses. # Uses Gemini API schema format (STRING, INTEGER, OBJECT, ARRAY). # This enforces valid JSON output from the model, eliminating manual parsing. AUDIT_ANALYSIS_SCHEMA = { 'type': 'OBJECT', 'required': ['summary', 'actions'], 'properties': { 'summary': { 'type': 'STRING', }, 'actions': { 'type': 'ARRAY', 'items': { 'type': 'OBJECT', 'required': ['action_type', 'title', 'description', 'priority', 'impact_score', 'effort_score', 'platform'], 'properties': { 'action_type': {'type': 'STRING'}, 'title': {'type': 'STRING'}, 'description': {'type': 'STRING'}, 'priority': { 'type': 'STRING', 'enum': ['critical', 'high', 'medium', 'low'], }, 'impact_score': {'type': 'INTEGER'}, 'effort_score': {'type': 'INTEGER'}, 'platform': {'type': 'STRING'}, } } } } } def _get_gemini_service(): """Get the initialized Gemini service instance.""" from gemini_service import get_gemini_service service = get_gemini_service() if not service: raise RuntimeError("Gemini service not initialized") return service def _hash_data(data: dict) -> str: """Generate SHA256 hash of audit data for cache invalidation.""" serialized = json.dumps(data, sort_keys=True, default=str) return hashlib.sha256(serialized.encode()).hexdigest() # ============================================================ # SEO AUDIT DATA COLLECTION # ============================================================ def _collect_seo_data(db, company) -> dict: """Collect SEO audit data for AI analysis.""" analysis = db.query(CompanyWebsiteAnalysis).filter( CompanyWebsiteAnalysis.company_id == company.id ).order_by(CompanyWebsiteAnalysis.seo_audited_at.desc()).first() if not analysis or not analysis.seo_audited_at: return {} citations = db.query(CompanyCitation).filter( CompanyCitation.company_id == company.id ).all() # Security headers check (live request) security_headers = {} if company.website: try: resp = requests.head(company.website, timeout=5, allow_redirects=True) headers = resp.headers security_headers = { 'has_hsts': 'strict-transport-security' in headers, 'has_csp': 'content-security-policy' in headers, 'has_x_frame_options': 'x-frame-options' in headers, 'has_x_content_type_options': 'x-content-type-options' in headers, 'security_headers_count': sum([ 'strict-transport-security' in headers, 'content-security-policy' in headers, 'x-frame-options' in headers, 'x-content-type-options' in headers, ]), } except Exception: pass # Image format analysis (from existing data if available) image_formats = {} if company.website: try: resp = requests.get(company.website, timeout=10, allow_redirects=True) if resp.status_code == 200: img_srcs = re.findall(r']+src=["\']([^"\']+)["\']', resp.text, re.IGNORECASE) webp_count = sum(1 for s in img_srcs if '.webp' in s.lower()) avif_count = sum(1 for s in img_srcs if '.avif' in s.lower()) svg_count = sum(1 for s in img_srcs if '.svg' in s.lower()) modern_count = webp_count + avif_count + svg_count legacy_count = len(img_srcs) - modern_count image_formats = { 'total_images_found': len(img_srcs), 'webp_count': webp_count, 'avif_count': avif_count, 'svg_count': svg_count, 'modern_format_count': modern_count, 'legacy_format_count': legacy_count, 'modern_format_ratio': round(modern_count / len(img_srcs) * 100, 1) if img_srcs else 0, } except Exception: pass # CrUX field data (real user metrics from Chrome) crux_data = {} if company.website: try: crux = CrUXService() crux_data = crux.get_field_data(company.website) or {} except Exception as e: logger.warning(f"CrUX error for {company.website}: {e}") return { 'company_name': company.name, 'company_category': company.category.name if company.category else None, 'website': company.website, 'city': company.address_city, # PageSpeed scores 'seo_score': analysis.pagespeed_seo_score, 'performance_score': analysis.pagespeed_performance_score, 'accessibility_score': analysis.pagespeed_accessibility_score, 'best_practices_score': analysis.pagespeed_best_practices_score, # On-page 'meta_title': unescape(analysis.meta_title or ''), 'meta_description': unescape(analysis.meta_description or ''), 'h1_count': analysis.h1_count, 'h1_text': unescape(analysis.h1_text or ''), 'h2_count': analysis.h2_count, 'h3_count': analysis.h3_count, 'total_images': analysis.total_images, 'images_without_alt': analysis.images_without_alt, # Technical 'has_ssl': analysis.has_ssl, 'has_sitemap': analysis.has_sitemap, 'has_robots_txt': analysis.has_robots_txt, 'has_canonical': analysis.has_canonical, 'is_indexable': analysis.is_indexable, 'is_mobile_friendly': getattr(analysis, 'is_mobile_friendly', None), 'load_time_ms': analysis.load_time_ms, # Structured data 'has_structured_data': analysis.has_structured_data, 'structured_data_types': analysis.structured_data_types, 'has_local_business_schema': analysis.has_local_business_schema, # Social/analytics 'has_og_tags': analysis.has_og_tags, 'has_twitter_cards': analysis.has_twitter_cards, 'has_google_analytics': analysis.has_google_analytics, 'has_google_tag_manager': analysis.has_google_tag_manager, # Local SEO 'local_seo_score': analysis.local_seo_score, 'has_google_maps_embed': analysis.has_google_maps_embed, 'has_local_keywords': analysis.has_local_keywords, 'nap_on_website': analysis.nap_on_website, # Core Web Vitals 'lcp_ms': analysis.largest_contentful_paint_ms, 'inp_ms': getattr(analysis, 'interaction_to_next_paint_ms', None), # Replaced FID in March 2024 'cls': float(analysis.cumulative_layout_shift) if analysis.cumulative_layout_shift else None, # Additional performance metrics (10 missing metrics) 'fcp_ms': getattr(analysis, 'first_contentful_paint_ms', None), 'ttfb_ms': getattr(analysis, 'time_to_first_byte_ms', None), 'tbt_ms': getattr(analysis, 'total_blocking_time_ms', None), 'speed_index': getattr(analysis, 'speed_index_ms', None), 'meta_title_length': len(analysis.meta_title or ''), 'meta_description_length': len(analysis.meta_description or ''), 'html_lang': analysis.html_lang, 'local_business_schema_fields': analysis.local_business_schema_fields, # Content 'content_freshness_score': analysis.content_freshness_score, 'word_count_homepage': analysis.word_count_homepage, # Links 'internal_links_count': analysis.internal_links_count, 'external_links_count': analysis.external_links_count, 'broken_links_count': analysis.broken_links_count, # Citations 'citations_count': len(citations), 'citations_found': len([c for c in citations if c.status == 'found']), # Security headers 'has_hsts': security_headers.get('has_hsts', None), 'has_csp': security_headers.get('has_csp', None), 'has_x_frame_options': security_headers.get('has_x_frame_options', None), 'has_x_content_type_options': security_headers.get('has_x_content_type_options', None), 'security_headers_count': security_headers.get('security_headers_count', None), # Image formats 'modern_format_ratio': image_formats.get('modern_format_ratio', None), 'webp_count': image_formats.get('webp_count', None), 'legacy_image_count': image_formats.get('legacy_format_count', None), # CrUX field data (real user metrics) 'crux_lcp_ms': crux_data.get('crux_lcp_ms'), 'crux_inp_ms': crux_data.get('crux_inp_ms'), 'crux_cls': crux_data.get('crux_cls'), 'crux_fcp_ms': crux_data.get('crux_fcp_ms'), 'crux_ttfb_ms': crux_data.get('crux_ttfb_ms'), 'crux_lcp_good_pct': crux_data.get('crux_lcp_ms_good_pct'), 'crux_inp_good_pct': crux_data.get('crux_inp_ms_good_pct'), 'crux_period_end': crux_data.get('crux_period_end'), } def _collect_gbp_data(db, company) -> dict: """Collect GBP audit data for AI analysis.""" try: from gbp_audit_service import get_company_audit audit = get_company_audit(db, company.id) except ImportError: audit = None if not audit: return {} # Get Places API (New) enriched data from CompanyWebsiteAnalysis analysis = db.query(CompanyWebsiteAnalysis).filter( CompanyWebsiteAnalysis.company_id == company.id ).order_by(CompanyWebsiteAnalysis.analyzed_at.desc()).first() # Build descriptive photo status for AI context photo_count = audit.photo_count or 0 if photo_count == 0: photo_status = "Brak zdjęć w profilu" elif photo_count == 1: photo_status = "1 zdjęcie (prawdopodobnie logo)" elif photo_count == 2: photo_status = f"2 zdjęcia (prawdopodobnie logo i zdjęcie w tle)" elif photo_count < 10: photo_status = f"{photo_count} zdjęć (logo i zdjęcie w tle prawdopodobnie ustawione, ale mało zdjęć dodatkowych)" else: photo_status = f"{photo_count} zdjęć (dobra ilość)" # AI-enhanced review sentiment (if available) review_sentiment = audit.review_sentiment return { 'company_name': company.name, 'company_category': company.category.name if company.category else None, 'city': company.address_city, 'completeness_score': audit.completeness_score, # Field presence 'has_name': audit.has_name, 'has_address': audit.has_address, 'has_phone': audit.has_phone, 'has_website': audit.has_website, 'has_hours': audit.has_hours, 'has_categories': audit.has_categories, 'has_photos': audit.has_photos, 'has_description': audit.has_description, 'has_services': audit.has_services, 'has_reviews': audit.has_reviews, # Reviews 'review_count': audit.review_count, 'average_rating': float(audit.average_rating) if audit.average_rating else None, 'reviews_with_response': audit.reviews_with_response, 'reviews_without_response': audit.reviews_without_response, 'review_response_rate': float(audit.review_response_rate) if audit.review_response_rate else None, 'review_keywords': audit.review_keywords, # Top keywords from reviews (already collected) 'review_sentiment': review_sentiment, # AI-enhanced sentiment analysis # Activity 'has_posts': audit.has_posts, 'posts_count_30d': audit.posts_count_30d, 'has_products': audit.has_products, 'has_qa': audit.has_qa, # Photos 'photo_count': photo_count, 'logo_present': audit.logo_present, 'cover_photo_present': audit.cover_photo_present, 'photo_status': photo_status, # NAP 'nap_consistent': audit.nap_consistent, 'nap_issues': audit.nap_issues, # Keywords 'description_keywords': audit.description_keywords, # Already collected during audit # Places API (New) enriched data 'primary_type': getattr(analysis, 'google_primary_type', None) if analysis else None, 'editorial_summary': getattr(analysis, 'google_editorial_summary', None) if analysis else None, 'price_level': getattr(analysis, 'google_price_level', None) if analysis else None, 'attributes': getattr(analysis, 'google_attributes', None) if analysis else None, 'photos_metadata': getattr(analysis, 'google_photos_metadata', None) if analysis else None, } def _collect_social_data(db, company) -> dict: """Collect social media audit data for AI analysis.""" profiles = db.query(CompanySocialMedia).filter( CompanySocialMedia.company_id == company.id ).all() all_platforms = ['facebook', 'instagram', 'linkedin', 'youtube', 'twitter', 'tiktok'] profiles_dict = {} for p in profiles: profiles_dict[p.platform] = { 'url': p.url, 'is_valid': p.is_valid, 'followers_count': p.followers_count, 'has_bio': p.has_bio, 'has_profile_photo': p.has_profile_photo, 'has_cover_photo': p.has_cover_photo, 'posts_count_30d': p.posts_count_30d, 'last_post_date': str(p.last_post_date) if p.last_post_date else None, 'posting_frequency_score': p.posting_frequency_score, 'engagement_rate': float(p.engagement_rate) if p.engagement_rate else None, 'profile_completeness_score': p.profile_completeness_score, 'page_name': getattr(p, 'page_name', None), } present = [p for p in all_platforms if p in profiles_dict] missing = [p for p in all_platforms if p not in profiles_dict] # Fetch YouTube API data if profile exists youtube_data = None if 'youtube' in profiles_dict and profiles_dict['youtube'].get('url'): try: yt_service = YouTubeService() channel_id = yt_service.extract_channel_id_from_url(profiles_dict['youtube']['url']) if channel_id: youtube_data = yt_service.get_channel_stats(channel_id) if youtube_data: profiles_dict['youtube']['subscriber_count'] = youtube_data.get('subscriber_count') profiles_dict['youtube']['view_count'] = youtube_data.get('view_count') profiles_dict['youtube']['video_count'] = youtube_data.get('video_count') except Exception as e: logger.warning(f"YouTube API error: {e}") return { 'company_name': company.name, 'company_category': company.category.name if company.category else None, 'city': company.address_city, 'platforms_present': present, 'platforms_missing': missing, 'profiles': profiles_dict, 'total_platforms': len(all_platforms), 'platforms_found': len(present), 'score': int((len(present) / len(all_platforms)) * 100) if all_platforms else 0, } # ============================================================ # GEMINI PROMPTS # ============================================================ def _build_seo_prompt(data: dict) -> str: """Build Gemini prompt for SEO audit analysis.""" # Benchmark comparison benchmark_section = "" benchmarks = data.get('_benchmarks') if benchmarks and benchmarks.get('seo'): bm = benchmarks['seo'] cat = benchmarks.get('category_name', '?') benchmark_section = f""" BENCHMARK (średnia w kategorii "{cat}", {benchmarks.get('category_company_count', '?')} firm): - Średni wynik SEO: {bm.get('avg_seo_score', '?')}/100 - Średnia wydajność: {bm.get('avg_performance_score', '?')}/100 - Średni czas ładowania: {bm.get('avg_load_time_ms', '?')} ms Porównaj wyniki tej firmy ze średnią kategorii w analizie.""" return f"""Jesteś ekspertem SEO analizującym stronę internetową lokalnej firmy w Polsce. DANE FIRMY: - Nazwa: {data.get('company_name', 'N/A')} - Branża: {data.get('company_category', 'N/A')} - Miasto: {data.get('city', 'N/A')} - Strona: {data.get('website', 'N/A')} WYNIKI AUDYTU SEO: - Wynik SEO (PageSpeed): {data.get('seo_score', 'brak')}/100 - Wydajność: {data.get('performance_score', 'brak')}/100 - Dostępność: {data.get('accessibility_score', 'brak')}/100 - Best Practices: {data.get('best_practices_score', 'brak')}/100 Core Web Vitals (lab data z PageSpeed): - LCP: {data.get('lcp_ms', 'brak')} ms - INP: {data.get('inp_ms', 'brak')} ms (zastąpił FID w marcu 2024) - CLS: {data.get('cls', 'brak')} CrUX Field Data (dane od realnych użytkowników Chrome): - LCP (field): {data.get('crux_lcp_ms', 'brak danych')} ms ({data.get('crux_lcp_good_pct', '?')}% dobrych) - INP (field): {data.get('crux_inp_ms', 'brak danych')} ms ({data.get('crux_inp_good_pct', '?')}% dobrych) - CLS (field): {data.get('crux_cls', 'brak danych')} - FCP (field): {data.get('crux_fcp_ms', 'brak danych')} ms - TTFB (field): {data.get('crux_ttfb_ms', 'brak danych')} ms - Okres pomiarowy: do {data.get('crux_period_end', 'brak')} UWAGA: "brak danych" oznacza, że strona nie ma wystarczającego ruchu z Chrome do raportowania CrUX. Dodatkowe metryki wydajności (lab data): - FCP: {data.get('fcp_ms', 'brak')} ms - TTFB: {data.get('ttfb_ms', 'brak')} ms - TBT: {data.get('tbt_ms', 'brak')} ms - Speed Index: {data.get('speed_index', 'brak')} ms - Czas ładowania: {data.get('load_time_ms', 'brak')} ms On-Page SEO: - Meta title: {data.get('meta_title', 'brak')} (długość: {data.get('meta_title_length', '?')} znaków, optymalna: 50-60) - Meta description: {'tak' if data.get('meta_description') else 'BRAK'} (długość: {data.get('meta_description_length', '?')} znaków, optymalna: 150-160) - H1: {data.get('h1_count', 0)} (treść: {data.get('h1_text', 'brak')}) - H2: {data.get('h2_count', 0)}, H3: {data.get('h3_count', 0)} - Obrazy: {data.get('total_images', 0)} (bez alt: {data.get('images_without_alt', 0)}) - Linki wewnętrzne: {data.get('internal_links_count', 0)}, zewnętrzne: {data.get('external_links_count', 0)}, uszkodzone: {data.get('broken_links_count', 0)} Technical SEO: - SSL: {'tak' if data.get('has_ssl') else 'NIE'} - Sitemap: {'tak' if data.get('has_sitemap') else 'NIE'} - Robots.txt: {'tak' if data.get('has_robots_txt') else 'NIE'} - Canonical: {'tak' if data.get('has_canonical') else 'NIE'} - Indeksowalna: {'tak' if data.get('is_indexable') else 'NIE'} - Mobile-friendly: {'tak' if data.get('is_mobile_friendly') else 'NIE/brak danych'} Security Headers: - HSTS: {'tak' if data.get('has_hsts') else 'NIE' if data.get('has_hsts') is not None else 'brak danych'} - CSP: {'tak' if data.get('has_csp') else 'NIE' if data.get('has_csp') is not None else 'brak danych'} - X-Frame-Options: {'tak' if data.get('has_x_frame_options') else 'NIE' if data.get('has_x_frame_options') is not None else 'brak danych'} - X-Content-Type-Options: {'tak' if data.get('has_x_content_type_options') else 'NIE' if data.get('has_x_content_type_options') is not None else 'brak danych'} - Nagłówki bezpieczeństwa: {data.get('security_headers_count', '?')}/4 Dane strukturalne: - Schema.org: {'tak' if data.get('has_structured_data') else 'NIE'} (typy: {data.get('structured_data_types', [])}) - LocalBusiness Schema: {'tak' if data.get('has_local_business_schema') else 'NIE'} - Pola LocalBusiness Schema: {data.get('local_business_schema_fields', 'brak danych')} - Język strony (html lang): {data.get('html_lang', 'brak')} Social & Analytics: - Open Graph: {'tak' if data.get('has_og_tags') else 'NIE'} - Twitter Cards: {'tak' if data.get('has_twitter_cards') else 'NIE'} - Google Analytics: {'tak' if data.get('has_google_analytics') else 'NIE'} - GTM: {'tak' if data.get('has_google_tag_manager') else 'NIE'} Local SEO (wynik: {data.get('local_seo_score', 'brak')}/100): - Mapa Google: {'tak' if data.get('has_google_maps_embed') else 'NIE'} - Lokalne słowa kluczowe: {'tak' if data.get('has_local_keywords') else 'NIE'} - NAP na stronie: {'tak' if data.get('nap_on_website') else 'NIE'} - Cytacje: {data.get('citations_found', 0)}/{data.get('citations_count', 0)} znalezionych Treść: - Świeżość: {data.get('content_freshness_score', 'brak')}/100 - Słów na stronie głównej: {data.get('word_count_homepage', 'brak')} Formaty obrazów: - Nowoczesne (WebP/AVIF/SVG): {data.get('modern_format_ratio', '?')}% ({data.get('webp_count', 0)} WebP) - Legacy (JPEG/PNG): {data.get('legacy_image_count', '?')} obrazów {benchmark_section} ZADANIE: Przygotuj analizę w formacie JSON z dwoma kluczami: 1. "summary" - krótki akapit (2-4 zdania) podsumowujący stan SEO strony, co jest dobrze, a co wymaga poprawy. Pisz bezpośrednio do właściciela firmy, po polsku. 2. "actions" - lista od 3 do 8 priorytetowanych akcji do podjęcia. Każda akcja to obiekt: {{ "action_type": "typ akcji z listy: generate_schema_org, generate_meta_description, suggest_heading_fix, generate_alt_texts, seo_roadmap, add_analytics, add_sitemap, fix_ssl, add_og_tags, improve_performance, add_local_keywords, add_nap, fix_broken_links, improve_security_headers, optimize_images", "title": "krótki tytuł po polsku", "description": "opis co trzeba zrobić i dlaczego, 1-2 zdania", "priority": "critical/high/medium/low", "impact_score": 1-10, "effort_score": 1-10, "platform": "website" }} Priorytetyzuj wg: impact_score / effort_score (wyższy stosunek = wyższy priorytet). NIE sugeruj akcji dla rzeczy, które firma już ma poprawnie. Odpowiedz WYŁĄCZNIE poprawnym JSON-em, bez markdown, bez komentarzy.""" def _build_gbp_prompt(data: dict) -> str: """Build Gemini prompt for GBP audit analysis.""" # Benchmark comparison gbp_benchmark_section = "" benchmarks = data.get('_benchmarks') if benchmarks and benchmarks.get('gbp'): bm = benchmarks['gbp'] cat = benchmarks.get('category_name', '?') gbp_benchmark_section = f""" BENCHMARK (średnia w kategorii "{cat}", {benchmarks.get('category_company_count', '?')} firm): - Średnia kompletność GBP: {bm.get('avg_completeness_score', '?')}/100 - Średnia ocena: {bm.get('avg_rating', '?')}/5 - Średnia liczba opinii: {bm.get('avg_review_count', '?')} - Średnia liczba zdjęć: {bm.get('avg_photo_count', '?')} Porównaj wyniki tej firmy ze średnią kategorii w analizie.""" # Build review keywords line (if available) review_keywords_line = "" if data.get('review_keywords'): review_keywords_line = f"\n- Top słowa kluczowe z opinii: {', '.join(data.get('review_keywords', []))}" # Build AI sentiment section sentiment_section = "" sentiment = data.get('review_sentiment') if sentiment and isinstance(sentiment, dict): pos = sentiment.get('positive', 0) neu = sentiment.get('neutral', 0) neg = sentiment.get('negative', 0) total = pos + neu + neg if total > 0: sentiment_section = f"\n- Sentyment opinii: {pos} pozytywnych, {neu} neutralnych, {neg} negatywnych" # Build description keywords section description_keywords_section = "\nSłowa kluczowe w opisie:\n" if data.get('description_keywords'): description_keywords_section += f"- Znalezione: {', '.join(data.get('description_keywords', []))}" else: description_keywords_section += "- Brak danych" # Build attributes section attributes_section = "" attrs = data.get('attributes') if attrs and isinstance(attrs, dict): parts = [] if attrs.get('payment'): payment_items = [k.replace('_', ' ') for k, v in attrs['payment'].items() if v] if payment_items: parts.append(f" Płatności: {', '.join(payment_items)}") if attrs.get('parking'): parking_items = [k.replace('_', ' ') for k, v in attrs['parking'].items() if v] if parking_items: parts.append(f" Parking: {', '.join(parking_items)}") if attrs.get('accessibility'): acc_items = [k.replace('_', ' ') for k, v in attrs['accessibility'].items() if v] if acc_items: parts.append(f" Dostępność: {', '.join(acc_items)}") if attrs.get('service'): svc_items = [k.replace('_', ' ') for k, v in attrs['service'].items() if v] if svc_items: parts.append(f" Usługi: {', '.join(svc_items)}") if attrs.get('amenities'): amen_items = [k.replace('_', ' ') for k, v in attrs['amenities'].items() if v] if amen_items: parts.append(f" Udogodnienia: {', '.join(amen_items)}") if attrs.get('food_and_drink'): food_items = [k for k, v in attrs['food_and_drink'].items() if v] if food_items: parts.append(f" Jedzenie/napoje: {', '.join(food_items)}") if parts: attributes_section = "\n\nAtrybuty biznesu (z Google):\n" + "\n".join(parts) # Build primary type and editorial summary primary_type_line = "" if data.get('primary_type'): primary_type_line = f"\n- Typ główny (Google): {data.get('primary_type')}" editorial_line = "" if data.get('editorial_summary'): editorial_line = f"\n- Opis Google: {data.get('editorial_summary')}" price_level_line = "" if data.get('price_level'): price_level_line = f"\n- Poziom cenowy: {data.get('price_level')}" return f"""Jesteś ekspertem Google Business Profile analizującym wizytówkę lokalnej firmy w Polsce. DANE FIRMY: - Nazwa: {data.get('company_name', 'N/A')} - Branża: {data.get('company_category', 'N/A')}{primary_type_line}{editorial_line}{price_level_line} - Miasto: {data.get('city', 'N/A')} WYNIKI AUDYTU GBP (kompletność: {data.get('completeness_score', 'brak')}/100): - Nazwa: {'✓' if data.get('has_name') else '✗'} - Adres: {'✓' if data.get('has_address') else '✗'} - Telefon: {'✓' if data.get('has_phone') else '✗'} - Strona WWW: {'✓' if data.get('has_website') else '✗'} - Godziny otwarcia: {'✓' if data.get('has_hours') else '✗'} - Kategorie: {'✓' if data.get('has_categories') else '✗'} - Zdjęcia: {'✓' if data.get('has_photos') else '✗'} ({data.get('photo_count', 0)} zdjęć) - Opis: {'✓' if data.get('has_description') else '✗'} - Usługi: {'✓' if data.get('has_services') else '✗'} - Logo: {'✓ (wykryte heurystycznie)' if data.get('logo_present') else '✗'} - Zdjęcie w tle: {'✓ (wykryte heurystycznie)' if data.get('cover_photo_present') else '✗'} - Status zdjęć: {data.get('photo_status', 'brak danych')} UWAGA: Logo i zdjęcie w tle są wykrywane heurystycznie na podstawie liczby zdjęć (1+ = logo, 2+ = cover). Nie traktuj tego jako pewnik - formułuj rekomendacje dot. zdjęć ostrożnie, np. "sprawdź czy logo jest ustawione" zamiast "BRAKUJE logo". Opinie: - Liczba opinii: {data.get('review_count', 0)} - Średnia ocena: {data.get('average_rating', 'brak')} - Z odpowiedzią: {data.get('reviews_with_response', 0)} - Bez odpowiedzi: {data.get('reviews_without_response', 0)} - Wskaźnik odpowiedzi: {data.get('review_response_rate', 'brak')}%{review_keywords_line}{sentiment_section} Aktywność (UWAGA: te pola wymagają autoryzacji OAuth i są obecnie niedostępne): - Posty: {('✓ (' + str(data.get('posts_count_30d', 0)) + ' w 30 dni)') if data.get('has_posts') else '[dane niedostępne bez autoryzacji OAuth]'} - Produkty: {'✓' if data.get('has_products') else '[dane niedostępne bez autoryzacji OAuth]'} - Pytania i odpowiedzi: {'✓' if data.get('has_qa') else '[dane niedostępne bez autoryzacji OAuth]'} NAP: - Spójność NAP: {'✓' if data.get('nap_consistent') else '✗'} - Problemy NAP: {data.get('nap_issues', 'brak')}{attributes_section} {description_keywords_section} {gbp_benchmark_section} ZADANIE: Przygotuj analizę w formacie JSON z dwoma kluczami: 1. "summary" - krótki akapit (2-4 zdania) podsumowujący stan wizytówki Google, co jest dobrze, a co wymaga poprawy. Pisz bezpośrednio do właściciela firmy, po polsku. 2. "actions" - lista od 3 do 8 priorytetowanych akcji. Każda akcja: {{ "action_type": "typ z listy: generate_gbp_description, generate_gbp_post, respond_to_review, suggest_categories, gbp_improvement_plan, add_photos, add_hours, add_services, add_products", "title": "krótki tytuł po polsku", "description": "opis co trzeba zrobić i dlaczego", "priority": "critical/high/medium/low", "impact_score": 1-10, "effort_score": 1-10, "platform": "google" }} NIE sugeruj akcji dla pól, które firma już ma poprawnie uzupełnione. Odpowiedz WYŁĄCZNIE poprawnym JSON-em, bez markdown, bez komentarzy.""" def _build_social_prompt(data: dict) -> str: """Build Gemini prompt for social media audit analysis.""" # Benchmark comparison social_benchmark_section = "" benchmarks = data.get('_benchmarks') if benchmarks and benchmarks.get('social'): bm = benchmarks['social'] cat = benchmarks.get('category_name', '?') social_benchmark_section = f""" BENCHMARK (średnia w kategorii "{cat}", {benchmarks.get('category_company_count', '?')} firm): - Średnia liczba platform: {bm.get('avg_platform_count', '?')} - Średnia liczba obserwujących: {bm.get('avg_followers', '?')} - Średnia kompletność profili: {bm.get('avg_completeness', '?')}% Porównaj wyniki tej firmy ze średnią kategorii w analizie.""" profiles_info = "" engagement_rates = [] page_names = [] for platform, info in data.get('profiles', {}).items(): profiles_info += f"\n {platform}: followers={info.get('followers_count', '?')}, " profiles_info += f"bio={'✓' if info.get('has_bio') else '✗'}, " profiles_info += f"photo={'✓' if info.get('has_profile_photo') else '✗'}, " profiles_info += f"posty_30d={info.get('posts_count_30d', '?')}, " profiles_info += f"kompletność={info.get('profile_completeness_score', '?')}%" profiles_info += f", freq_score={info.get('posting_frequency_score', '?')}/10" profiles_info += f", engagement={info.get('engagement_rate', '?')}%" profiles_info += f", nazwa='{info.get('page_name', '?')}'" if info.get('last_post_date'): profiles_info += f", ost.post={info.get('last_post_date')}" # YouTube metrics from API if platform == 'youtube' and info.get('video_count') is not None: profiles_info += f", filmy={info.get('video_count')}" profiles_info += f", wyświetlenia={info.get('view_count', '?')}" profiles_info += f", subskrybenci={info.get('subscriber_count', '?')}" # Collect engagement rates for average calculation if info.get('engagement_rate'): engagement_rates.append(info.get('engagement_rate')) # Collect page names for consistency check if info.get('page_name'): page_names.append(info.get('page_name')) # Calculate average engagement avg_engagement = round(sum(engagement_rates) / len(engagement_rates), 2) if engagement_rates else 0 # Check name consistency (simple check: all names should be similar) consistent = len(set(page_names)) <= 1 if page_names else True return f"""Jesteś ekspertem social media analizującym obecność lokalnej firmy w Polsce w mediach społecznościowych. DANE FIRMY: - Nazwa: {data.get('company_name', 'N/A')} - Branża: {data.get('company_category', 'N/A')} - Miasto: {data.get('city', 'N/A')} OBECNOŚĆ W SOCIAL MEDIA (wynik: {data.get('score', 0)}/100): - Platformy znalezione ({data.get('platforms_found', 0)}/{data.get('total_platforms', 6)}): {', '.join(data.get('platforms_present', []))} - Platformy brakujące: {', '.join(data.get('platforms_missing', []))} Szczegóły profili:{profiles_info or ' brak profili'} DODATKOWE METRYKI: - Średni engagement rate: {avg_engagement}% (szacunkowy, bez API) - Spójność nazwy: {'TAK' if consistent else 'NIE — różne nazwy na platformach'} {social_benchmark_section} ZADANIE: Przygotuj analizę w formacie JSON z dwoma kluczami: 1. "summary" - krótki akapit (2-4 zdania) podsumowujący obecność firmy w social media. Pisz po polsku, do właściciela firmy. 2. "actions" - lista od 3 do 8 priorytetowanych akcji. Każda akcja: {{ "action_type": "typ z listy: generate_social_post, generate_bio, content_calendar, content_strategy, create_profile, improve_profile, increase_engagement", "title": "krótki tytuł po polsku", "description": "opis co trzeba zrobić i dlaczego", "priority": "critical/high/medium/low", "impact_score": 1-10, "effort_score": 1-10, "platform": "facebook/instagram/linkedin/youtube/twitter/tiktok/all" }} Dla firm lokalnych priorytetyzuj: Facebook > Instagram > LinkedIn > reszta. NIE sugeruj tworzenia profili na platformach nieistotnych dla branży. Odpowiedz WYŁĄCZNIE poprawnym JSON-em, bez markdown, bez komentarzy.""" # ============================================================ # CONTENT GENERATION PROMPTS # ============================================================ CONTENT_PROMPTS = { 'generate_schema_org': """Wygeneruj kompletny JSON-LD Schema.org LocalBusiness dla firmy: - Nazwa: {company_name} - Branża: {category} - Adres: {address} - Miasto: {city} - Telefon: {phone} - Strona: {website} - Email: {email} Wygeneruj WYŁĄCZNIE poprawny tag