nordabiz/benchmark_service.py
Maciej Pienczyn 1f6938eef4
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
feat(audit): Phase 4 - AI sentiment analysis + competitor benchmarking
Sentiment analysis:
- New analyze_review_sentiment_ai() method in GBPAuditService
- Uses Gemini to analyze review text content (not just ratings)
- Extracts themes, strengths, weaknesses, sentiment score (-1 to 1)
- Review sentiment data passed to GBP AI prompt

Competitor benchmarking:
- New benchmark_service.py with BenchmarkService class
- Calculates category averages across all 150 firms (GBP, SEO, Social)
- Metrics: completeness scores, ratings, reviews, photos, PageSpeed,
  load time, follower counts, platform coverage
- Benchmark data injected into all 3 AI prompts (SEO, GBP, Social)
- Excluded from cache hash to avoid unnecessary invalidation

All 4 phases of audit completeness plan now implemented.
Estimated completeness: 52% → ~93%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 11:53:59 +01:00

169 lines
6.2 KiB
Python

"""
Competitor Benchmarking Service
===============================
Calculates category averages across all 150 NordaBiz firms and provides
comparison data for individual company audits.
Usage:
from benchmark_service import BenchmarkService
benchmarks = BenchmarkService(db).get_benchmarks(company_id)
"""
import logging
from typing import Dict, Optional
from sqlalchemy import func
from sqlalchemy.orm import Session
from database import (
Company, Category, GBPAudit, CompanyWebsiteAnalysis, CompanySocialMedia
)
logger = logging.getLogger(__name__)
class BenchmarkService:
"""Calculate and compare audit scores against category averages."""
def __init__(self, db: Session):
self.db = db
self._cache = {} # Simple in-memory cache per session
def get_category_averages(self, category_id: int) -> Dict:
"""Calculate average audit scores for a category.
Returns:
Dict with average scores for GBP, SEO, Social
"""
if category_id in self._cache:
return self._cache[category_id]
# Get company IDs in this category
company_ids = [c.id for c in self.db.query(Company.id).filter(
Company.category_id == category_id
).all()]
if not company_ids:
return {}
result = {
'company_count': len(company_ids),
'gbp': self._avg_gbp(company_ids),
'seo': self._avg_seo(company_ids),
'social': self._avg_social(company_ids),
}
self._cache[category_id] = result
return result
def _avg_gbp(self, company_ids: list) -> Dict:
"""Calculate average GBP metrics for given companies."""
from sqlalchemy import and_
# Get latest audit per company using subquery
from sqlalchemy.orm import aliased
audits = self.db.query(
func.avg(GBPAudit.completeness_score).label('avg_score'),
func.avg(GBPAudit.average_rating).label('avg_rating'),
func.avg(GBPAudit.review_count).label('avg_reviews'),
func.avg(GBPAudit.photo_count).label('avg_photos'),
func.count(GBPAudit.id).label('total'),
).filter(
GBPAudit.company_id.in_(company_ids)
).first()
if not audits or not audits.total:
return {}
return {
'avg_completeness_score': round(float(audits.avg_score or 0), 1),
'avg_rating': round(float(audits.avg_rating or 0), 2),
'avg_review_count': round(float(audits.avg_reviews or 0), 1),
'avg_photo_count': round(float(audits.avg_photos or 0), 1),
'audited_count': audits.total,
}
def _avg_seo(self, company_ids: list) -> Dict:
"""Calculate average SEO metrics for given companies."""
analyses = self.db.query(
func.avg(CompanyWebsiteAnalysis.pagespeed_seo_score).label('avg_seo'),
func.avg(CompanyWebsiteAnalysis.pagespeed_performance_score).label('avg_perf'),
func.avg(CompanyWebsiteAnalysis.pagespeed_accessibility_score).label('avg_acc'),
func.avg(CompanyWebsiteAnalysis.load_time_ms).label('avg_load'),
func.count(CompanyWebsiteAnalysis.id).label('total'),
).filter(
CompanyWebsiteAnalysis.company_id.in_(company_ids),
CompanyWebsiteAnalysis.pagespeed_seo_score.isnot(None),
).first()
if not analyses or not analyses.total:
return {}
return {
'avg_seo_score': round(float(analyses.avg_seo or 0), 1),
'avg_performance_score': round(float(analyses.avg_perf or 0), 1),
'avg_accessibility_score': round(float(analyses.avg_acc or 0), 1),
'avg_load_time_ms': round(float(analyses.avg_load or 0)),
'audited_count': analyses.total,
}
def _avg_social(self, company_ids: list) -> Dict:
"""Calculate average social media metrics for given companies."""
# Count platforms per company and average followers
profiles = self.db.query(
func.count(func.distinct(CompanySocialMedia.company_id)).label('companies_with_social'),
func.avg(CompanySocialMedia.followers_count).label('avg_followers'),
func.avg(CompanySocialMedia.profile_completeness_score).label('avg_completeness'),
).filter(
CompanySocialMedia.company_id.in_(company_ids),
CompanySocialMedia.is_valid == True,
).first()
# Average platform count per company
platform_counts = self.db.query(
CompanySocialMedia.company_id,
func.count(CompanySocialMedia.id).label('platforms'),
).filter(
CompanySocialMedia.company_id.in_(company_ids),
CompanySocialMedia.is_valid == True,
).group_by(CompanySocialMedia.company_id).all()
avg_platforms = 0
if platform_counts:
avg_platforms = round(sum(p.platforms for p in platform_counts) / len(platform_counts), 1)
if not profiles or not profiles.companies_with_social:
return {}
return {
'avg_followers': round(float(profiles.avg_followers or 0)),
'avg_completeness': round(float(profiles.avg_completeness or 0), 1),
'avg_platform_count': avg_platforms,
'companies_with_social': profiles.companies_with_social,
}
def get_benchmarks(self, company_id: int) -> Optional[Dict]:
"""Get benchmark comparison for a specific company.
Returns:
Dict with category averages and company's position relative to them.
"""
company = self.db.query(Company).filter(Company.id == company_id).first()
if not company or not company.category_id:
return None
category = self.db.query(Category).filter(Category.id == company.category_id).first()
averages = self.get_category_averages(company.category_id)
if not averages:
return None
return {
'category_name': category.name if category else 'Unknown',
'category_company_count': averages.get('company_count', 0),
'gbp': averages.get('gbp', {}),
'seo': averages.get('seo', {}),
'social': averages.get('social', {}),
}