nordabiz/tests/test_seo_audit.py
Maciej Pienczyn 5030b71beb
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
chore: update Author to Maciej Pienczyn, InPi sp. z o.o. across all files
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 08:20:47 +02:00

1112 lines
39 KiB
Python

#!/usr/bin/env python3
"""
Unit Tests for SEO Audit Functionality
======================================
Tests for:
- PageSpeed API client (scripts/pagespeed_client.py)
- On-Page SEO Analyzer (scripts/seo_analyzer.py)
- Technical SEO Checker (scripts/seo_analyzer.py)
- SEO Audit database operations (scripts/seo_audit.py)
Run tests:
cd tests
python -m pytest test_seo_audit.py -v
Author: Maciej Pienczyn, InPi sp. z o.o.
Date: 2026-01-08
"""
import json
import sys
import unittest
from datetime import datetime, date
from pathlib import Path
from unittest.mock import Mock, MagicMock, patch, PropertyMock
# Add scripts directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent / 'scripts'))
# Import modules to test
from pagespeed_client import (
GooglePageSpeedClient,
PageSpeedResult,
PageSpeedScore,
CoreWebVitals,
RateLimiter,
PageSpeedAPIError,
QuotaExceededError,
RateLimitError,
Strategy,
Category,
)
from seo_analyzer import (
OnPageSEOAnalyzer,
OnPageSEOResult,
MetaTags,
OpenGraphData,
TwitterCardData,
HeadingStructure,
ImageAnalysis,
LinkAnalysis,
StructuredData,
TechnicalSEOChecker,
TechnicalSEOResult,
RobotsTxtResult,
SitemapResult,
RedirectChainResult,
RedirectInfo,
CanonicalResult,
IndexabilityResult,
)
# ============================================================================
# PageSpeed Client Tests
# ============================================================================
class TestPageSpeedScore(unittest.TestCase):
"""Tests for PageSpeedScore dataclass."""
def test_create_score(self):
"""Test creating PageSpeedScore with all values."""
score = PageSpeedScore(
performance=95,
accessibility=88,
best_practices=92,
seo=100
)
self.assertEqual(score.performance, 95)
self.assertEqual(score.accessibility, 88)
self.assertEqual(score.best_practices, 92)
self.assertEqual(score.seo, 100)
def test_score_to_dict(self):
"""Test converting score to dictionary."""
score = PageSpeedScore(performance=95, seo=100)
result = score.to_dict()
self.assertIsInstance(result, dict)
self.assertEqual(result['performance'], 95)
self.assertEqual(result['seo'], 100)
self.assertIsNone(result['accessibility'])
self.assertIsNone(result['best_practices'])
class TestCoreWebVitals(unittest.TestCase):
"""Tests for CoreWebVitals dataclass."""
def test_create_vitals(self):
"""Test creating CoreWebVitals."""
vitals = CoreWebVitals(
lcp_ms=1500,
fid_ms=50,
cls=0.05,
fcp_ms=1200,
ttfb_ms=200
)
self.assertEqual(vitals.lcp_ms, 1500)
self.assertEqual(vitals.fid_ms, 50)
self.assertEqual(vitals.cls, 0.05)
def test_vitals_to_dict(self):
"""Test converting vitals to dictionary."""
vitals = CoreWebVitals(lcp_ms=1500, cls=0.1)
result = vitals.to_dict()
self.assertEqual(result['lcp_ms'], 1500)
self.assertEqual(result['cls'], 0.1)
class TestRateLimiter(unittest.TestCase):
"""Tests for RateLimiter class."""
def setUp(self):
"""Set up test with a temporary quota file."""
import tempfile
self.temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False)
self.temp_file.close()
self.limiter = RateLimiter(
daily_limit=100,
min_interval=0.1,
quota_file=self.temp_file.name
)
def tearDown(self):
"""Clean up temp file."""
import os
try:
os.unlink(self.temp_file.name)
except FileNotFoundError:
pass
def test_initial_state(self):
"""Test initial state of rate limiter."""
self.assertEqual(self.limiter.daily_limit, 100)
self.assertEqual(self.limiter.requests_today, 0)
self.assertEqual(self.limiter.get_remaining_quota(), 100)
def test_can_make_request_when_under_quota(self):
"""Test can_make_request returns True when under quota."""
self.assertTrue(self.limiter.can_make_request())
def test_can_make_request_when_quota_exceeded(self):
"""Test can_make_request returns False when quota exceeded."""
self.limiter.requests_today = 100
self.assertFalse(self.limiter.can_make_request())
def test_record_request(self):
"""Test recording a request updates counter."""
initial = self.limiter.requests_today
self.limiter.record_request()
self.assertEqual(self.limiter.requests_today, initial + 1)
def test_get_remaining_quota(self):
"""Test remaining quota calculation."""
self.limiter.requests_today = 30
self.assertEqual(self.limiter.get_remaining_quota(), 70)
def test_get_usage_stats(self):
"""Test usage stats returns correct structure."""
self.limiter.requests_today = 25
stats = self.limiter.get_usage_stats()
self.assertEqual(stats['requests_today'], 25)
self.assertEqual(stats['daily_limit'], 100)
self.assertEqual(stats['remaining'], 75)
self.assertEqual(stats['usage_percent'], 25.0)
class TestGooglePageSpeedClient(unittest.TestCase):
"""Tests for GooglePageSpeedClient class."""
def setUp(self):
"""Set up test with mocked dependencies."""
import tempfile
self.temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False)
self.temp_file.close()
self.rate_limiter = RateLimiter(
daily_limit=100,
min_interval=0,
quota_file=self.temp_file.name
)
self.client = GooglePageSpeedClient(
api_key='test_api_key',
rate_limiter=self.rate_limiter
)
def tearDown(self):
"""Clean up."""
import os
try:
os.unlink(self.temp_file.name)
except FileNotFoundError:
pass
def test_client_initialization(self):
"""Test client initializes correctly."""
self.assertEqual(self.client.api_key, 'test_api_key')
self.assertIsNotNone(self.client.rate_limiter)
def test_client_without_api_key(self):
"""Test client works without API key (with warning)."""
client = GooglePageSpeedClient(api_key='')
self.assertEqual(client.api_key, '')
@patch.object(GooglePageSpeedClient, '_make_request_with_retry')
def test_analyze_url_success(self, mock_request):
"""Test successful URL analysis."""
mock_response = {
'lighthouseResult': {
'finalUrl': 'https://example.com',
'categories': {
'performance': {'score': 0.95},
'accessibility': {'score': 0.88},
'best-practices': {'score': 0.92},
'seo': {'score': 1.0},
},
'audits': {
'largest-contentful-paint': {'numericValue': 1500},
'cumulative-layout-shift': {'numericValue': 0.05},
},
'lighthouseVersion': '11.0.0',
'timing': {'total': 5000},
}
}
mock_request.return_value = mock_response
result = self.client.analyze_url('https://example.com')
self.assertIsInstance(result, PageSpeedResult)
self.assertEqual(result.url, 'https://example.com')
self.assertEqual(result.scores.performance, 95)
self.assertEqual(result.scores.seo, 100)
self.assertEqual(result.core_web_vitals.lcp_ms, 1500)
def test_analyze_url_quota_exceeded(self):
"""Test QuotaExceededError when quota is 0."""
self.rate_limiter.requests_today = 100
with self.assertRaises(QuotaExceededError):
self.client.analyze_url('https://example.com')
@patch.object(GooglePageSpeedClient, '_make_request_with_retry')
def test_extract_score(self, mock_request):
"""Test score extraction converts 0-1 to 0-100."""
mock_response = {
'lighthouseResult': {
'finalUrl': 'https://example.com',
'categories': {
'seo': {'score': 0.75},
},
'audits': {},
}
}
mock_request.return_value = mock_response
result = self.client.analyze_url('https://example.com')
self.assertEqual(result.scores.seo, 75)
def test_get_remaining_quota(self):
"""Test getting remaining quota."""
self.rate_limiter.requests_today = 20
self.assertEqual(self.client.get_remaining_quota(), 80)
# ============================================================================
# On-Page SEO Analyzer Tests
# ============================================================================
class TestOnPageSEOAnalyzer(unittest.TestCase):
"""Tests for OnPageSEOAnalyzer class."""
def setUp(self):
"""Set up analyzer."""
self.analyzer = OnPageSEOAnalyzer()
def test_analyze_empty_html(self):
"""Test analysis of empty HTML."""
result = self.analyzer.analyze_html('', base_url='https://example.com')
self.assertIsInstance(result, OnPageSEOResult)
self.assertEqual(result.base_url, 'https://example.com')
def test_analyze_basic_html(self):
"""Test analysis of basic HTML page."""
html = '''
<!DOCTYPE html>
<html lang="pl">
<head>
<meta charset="utf-8">
<title>Test Page Title</title>
<meta name="description" content="This is a test page description">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="canonical" href="https://example.com/page">
</head>
<body>
<h1>Main Heading</h1>
<p>Some content here.</p>
<h2>Section 1</h2>
<h2>Section 2</h2>
</body>
</html>
'''
result = self.analyzer.analyze_html(html, base_url='https://example.com')
# Check meta tags
self.assertEqual(result.meta_tags.title, 'Test Page Title')
self.assertEqual(result.meta_tags.description, 'This is a test page description')
self.assertEqual(result.meta_tags.viewport, 'width=device-width, initial-scale=1')
self.assertEqual(result.meta_tags.canonical_url, 'https://example.com/page')
self.assertTrue(result.has_doctype)
self.assertTrue(result.has_lang_attribute)
self.assertEqual(result.lang_attribute, 'pl')
def test_analyze_headings(self):
"""Test heading structure analysis."""
html = '''
<html>
<head><title>Test</title></head>
<body>
<h1>Main Heading</h1>
<h2>Section 1</h2>
<h2>Section 2</h2>
<h3>Subsection</h3>
</body>
</html>
'''
result = self.analyzer.analyze_html(html)
self.assertEqual(result.headings.h1_count, 1)
self.assertEqual(result.headings.h2_count, 2)
self.assertEqual(result.headings.h3_count, 1)
self.assertTrue(result.headings.has_single_h1)
self.assertTrue(result.headings.has_proper_hierarchy)
self.assertEqual(result.headings.h1_texts, ['Main Heading'])
def test_analyze_multiple_h1s(self):
"""Test detection of multiple H1 headings (bad practice)."""
html = '''
<html>
<head><title>Test</title></head>
<body>
<h1>First H1</h1>
<h1>Second H1</h1>
</body>
</html>
'''
result = self.analyzer.analyze_html(html)
self.assertEqual(result.headings.h1_count, 2)
self.assertFalse(result.headings.has_single_h1)
self.assertFalse(result.headings.has_proper_hierarchy)
self.assertIn('Multiple H1 headings (2)', result.headings.hierarchy_issues)
def test_analyze_missing_h1(self):
"""Test detection of missing H1 heading."""
html = '''
<html>
<head><title>Test</title></head>
<body>
<h2>Section without H1</h2>
</body>
</html>
'''
result = self.analyzer.analyze_html(html)
self.assertEqual(result.headings.h1_count, 0)
self.assertFalse(result.headings.has_proper_hierarchy)
self.assertIn('Missing H1 heading', result.headings.hierarchy_issues)
def test_analyze_images(self):
"""Test image analysis."""
html = '''
<html>
<head><title>Test</title></head>
<body>
<img src="img1.jpg" alt="Good alt text">
<img src="img2.jpg" alt="">
<img src="img3.jpg">
<img src="img4.jpg" alt="image">
</body>
</html>
'''
result = self.analyzer.analyze_html(html)
self.assertEqual(result.images.total_images, 4)
self.assertEqual(result.images.images_with_alt, 3) # includes empty alt
self.assertEqual(result.images.images_without_alt, 1)
self.assertEqual(result.images.images_with_empty_alt, 1)
self.assertEqual(len(result.images.alt_text_quality_issues), 1) # "image" is placeholder
def test_analyze_links_internal_external(self):
"""Test link analysis distinguishing internal/external."""
html = '''
<html>
<head><title>Test</title></head>
<body>
<a href="https://example.com/page1">Internal 1</a>
<a href="/page2">Internal 2</a>
<a href="./page3">Internal 3</a>
<a href="https://other.com">External</a>
<a href="https://facebook.com" rel="nofollow">Social</a>
<a href="#">Broken</a>
</body>
</html>
'''
result = self.analyzer.analyze_html(html, base_url='https://example.com')
self.assertEqual(result.links.total_links, 6)
self.assertEqual(result.links.internal_links, 3)
self.assertEqual(result.links.external_links, 2)
self.assertEqual(result.links.nofollow_links, 1)
self.assertEqual(result.links.broken_anchor_links, 1)
def test_analyze_open_graph(self):
"""Test Open Graph metadata extraction."""
html = '''
<html>
<head>
<title>Test</title>
<meta property="og:title" content="OG Title">
<meta property="og:description" content="OG Description">
<meta property="og:image" content="https://example.com/image.jpg">
<meta property="og:type" content="website">
</head>
<body></body>
</html>
'''
result = self.analyzer.analyze_html(html)
self.assertEqual(result.open_graph.og_title, 'OG Title')
self.assertEqual(result.open_graph.og_description, 'OG Description')
self.assertEqual(result.open_graph.og_image, 'https://example.com/image.jpg')
self.assertEqual(result.open_graph.og_type, 'website')
def test_analyze_twitter_card(self):
"""Test Twitter Card metadata extraction."""
html = '''
<html>
<head>
<title>Test</title>
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:title" content="Twitter Title">
<meta name="twitter:description" content="Twitter Description">
</head>
<body></body>
</html>
'''
result = self.analyzer.analyze_html(html)
self.assertEqual(result.twitter_card.card_type, 'summary_large_image')
self.assertEqual(result.twitter_card.title, 'Twitter Title')
self.assertEqual(result.twitter_card.description, 'Twitter Description')
def test_analyze_structured_data_json_ld(self):
"""Test JSON-LD structured data detection."""
html = '''
<html>
<head>
<title>Test</title>
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "LocalBusiness",
"name": "Test Business"
}
</script>
</head>
<body></body>
</html>
'''
result = self.analyzer.analyze_html(html)
self.assertTrue(result.structured_data.has_structured_data)
self.assertEqual(result.structured_data.json_ld_count, 1)
self.assertIn('LocalBusiness', result.structured_data.json_ld_types)
self.assertIn('LocalBusiness', result.structured_data.all_types)
def test_analyze_structured_data_microdata(self):
"""Test Microdata structured data detection."""
html = '''
<html>
<head><title>Test</title></head>
<body>
<div itemscope itemtype="https://schema.org/Organization">
<span itemprop="name">Test Org</span>
</div>
</body>
</html>
'''
result = self.analyzer.analyze_html(html)
self.assertTrue(result.structured_data.has_structured_data)
self.assertEqual(result.structured_data.microdata_count, 1)
self.assertIn('Organization', result.structured_data.microdata_types)
def test_analyze_word_count(self):
"""Test word count calculation."""
html = '''
<html>
<head><title>Test</title></head>
<body>
<p>This is a sentence with seven words here.</p>
<script>var x = "not counted";</script>
<style>.hidden { display: none; }</style>
</body>
</html>
'''
result = self.analyzer.analyze_html(html)
# Should count visible text only
self.assertGreater(result.word_count, 5)
self.assertLess(result.word_count, 20)
def test_result_to_dict(self):
"""Test converting result to dictionary."""
html = '<html><head><title>Test</title></head><body><h1>Hello</h1></body></html>'
result = self.analyzer.analyze_html(html)
result_dict = result.to_dict()
self.assertIsInstance(result_dict, dict)
self.assertIn('meta_tags', result_dict)
self.assertIn('headings', result_dict)
self.assertIn('images', result_dict)
self.assertIn('links', result_dict)
self.assertIn('structured_data', result_dict)
# ============================================================================
# Technical SEO Checker Tests
# ============================================================================
class TestTechnicalSEOChecker(unittest.TestCase):
"""Tests for TechnicalSEOChecker class."""
def setUp(self):
"""Set up checker."""
self.checker = TechnicalSEOChecker(timeout=5)
@patch('requests.Session.get')
def test_check_robots_txt_exists(self, mock_get):
"""Test robots.txt detection when it exists."""
mock_response = Mock()
mock_response.status_code = 200
mock_response.text = '''
User-agent: *
Disallow: /admin/
Sitemap: https://example.com/sitemap.xml
'''
mock_get.return_value = mock_response
result = self.checker.check_robots_txt('https://example.com')
self.assertTrue(result.exists)
self.assertEqual(result.status_code, 200)
self.assertIn('/admin/', result.disallow_rules)
self.assertIn('https://example.com/sitemap.xml', result.sitemap_urls)
@patch('requests.Session.get')
def test_check_robots_txt_not_found(self, mock_get):
"""Test robots.txt detection when it doesn't exist."""
mock_response = Mock()
mock_response.status_code = 404
mock_get.return_value = mock_response
result = self.checker.check_robots_txt('https://example.com')
self.assertFalse(result.exists)
self.assertEqual(result.status_code, 404)
@patch('requests.Session.get')
def test_check_robots_txt_blocks_googlebot(self, mock_get):
"""Test detection of Googlebot blocking in robots.txt."""
mock_response = Mock()
mock_response.status_code = 200
mock_response.text = '''
User-agent: Googlebot
Disallow: /
'''
mock_get.return_value = mock_response
result = self.checker.check_robots_txt('https://example.com')
self.assertTrue(result.exists)
self.assertTrue(result.blocks_googlebot)
@patch('requests.Session.get')
def test_check_sitemap_valid_xml(self, mock_get):
"""Test valid sitemap.xml detection."""
mock_response = Mock()
mock_response.status_code = 200
# Use simpler XML without namespace for reliable parsing
mock_response.content = b'''<?xml version="1.0" encoding="UTF-8"?>
<urlset>
<url><loc>https://example.com/</loc></url>
<url><loc>https://example.com/page1</loc></url>
</urlset>
'''
mock_response.headers = {'Last-Modified': 'Tue, 07 Jan 2026 10:00:00 GMT'}
mock_get.return_value = mock_response
result = self.checker.check_sitemap('https://example.com/sitemap.xml')
self.assertTrue(result.exists)
self.assertTrue(result.is_valid_xml)
self.assertFalse(result.is_sitemap_index)
self.assertEqual(result.url_count, 2)
self.assertIn('https://example.com/', result.sample_urls)
@patch('requests.Session.get')
def test_check_sitemap_index(self, mock_get):
"""Test sitemap index detection."""
mock_response = Mock()
mock_response.status_code = 200
mock_response.content = b'''<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap><loc>https://example.com/sitemap1.xml</loc></sitemap>
<sitemap><loc>https://example.com/sitemap2.xml</loc></sitemap>
</sitemapindex>
'''
mock_response.headers = {}
mock_get.return_value = mock_response
result = self.checker.check_sitemap('https://example.com/sitemap.xml')
self.assertTrue(result.exists)
self.assertTrue(result.is_valid_xml)
self.assertTrue(result.is_sitemap_index)
self.assertEqual(result.sitemap_count, 2)
@patch('requests.Session.get')
def test_check_sitemap_not_found(self, mock_get):
"""Test sitemap.xml detection when not found."""
mock_response = Mock()
mock_response.status_code = 404
mock_get.return_value = mock_response
result = self.checker.check_sitemap('https://example.com/sitemap.xml')
self.assertFalse(result.exists)
@patch('requests.Session.get')
def test_check_redirect_chain_no_redirects(self, mock_get):
"""Test URL with no redirects."""
mock_response = Mock()
mock_response.status_code = 200
mock_get.return_value = mock_response
result = self.checker.check_redirect_chain('https://example.com')
self.assertEqual(result.chain_length, 0)
self.assertEqual(result.final_url, 'https://example.com')
self.assertFalse(result.has_redirect_loop)
self.assertEqual(len(result.redirects), 0)
@patch('requests.Session.get')
def test_check_redirect_chain_with_redirect(self, mock_get):
"""Test URL with single redirect."""
# First call returns redirect
redirect_response = Mock()
redirect_response.status_code = 301
redirect_response.headers = {'Location': 'https://www.example.com/'}
# Second call returns final page
final_response = Mock()
final_response.status_code = 200
mock_get.side_effect = [redirect_response, final_response]
result = self.checker.check_redirect_chain('https://example.com')
self.assertEqual(result.chain_length, 1)
self.assertEqual(result.final_url, 'https://www.example.com/')
self.assertEqual(len(result.redirects), 1)
self.assertEqual(result.redirects[0].status_code, 301)
@patch('requests.Session.get')
def test_check_redirect_https_upgrade(self, mock_get):
"""Test detection of HTTP to HTTPS upgrade redirect."""
redirect_response = Mock()
redirect_response.status_code = 301
redirect_response.headers = {'Location': 'https://example.com/'}
final_response = Mock()
final_response.status_code = 200
mock_get.side_effect = [redirect_response, final_response]
result = self.checker.check_redirect_chain('http://example.com')
self.assertEqual(result.chain_length, 1)
self.assertTrue(result.redirects[0].is_https_upgrade)
def test_check_canonical_self_referencing(self):
"""Test detection of self-referencing canonical URL."""
html = '''
<html>
<head>
<link rel="canonical" href="https://example.com/page">
</head>
<body></body>
</html>
'''
result = self.checker._check_canonical(html, 'https://example.com/page')
self.assertTrue(result.has_canonical)
self.assertEqual(result.canonical_url, 'https://example.com/page')
self.assertTrue(result.is_self_referencing)
self.assertFalse(result.points_to_different_domain)
def test_check_canonical_different_domain(self):
"""Test detection of canonical pointing to different domain."""
html = '''
<html>
<head>
<link rel="canonical" href="https://other.com/page">
</head>
<body></body>
</html>
'''
result = self.checker._check_canonical(html, 'https://example.com/page')
self.assertTrue(result.has_canonical)
self.assertTrue(result.points_to_different_domain)
def test_check_indexability_noindex_meta(self):
"""Test detection of noindex meta tag."""
mock_response = Mock()
mock_response.text = '''
<html>
<head>
<meta name="robots" content="noindex, nofollow">
</head>
<body></body>
</html>
'''
mock_response.headers = {}
result = self.checker._check_indexability(mock_response)
self.assertFalse(result.is_indexable)
self.assertTrue(result.has_noindex_meta)
self.assertEqual(result.noindex_source, 'meta')
def test_check_indexability_noindex_header(self):
"""Test detection of X-Robots-Tag noindex header."""
mock_response = Mock()
mock_response.text = '<html><head></head><body></body></html>'
mock_response.headers = {'X-Robots-Tag': 'noindex'}
result = self.checker._check_indexability(mock_response)
self.assertFalse(result.is_indexable)
self.assertTrue(result.has_noindex_header)
self.assertEqual(result.noindex_source, 'header')
def test_check_indexability_indexable(self):
"""Test page that is indexable."""
mock_response = Mock()
mock_response.text = '''
<html>
<head>
<meta name="robots" content="index, follow">
</head>
<body></body>
</html>
'''
mock_response.headers = {}
result = self.checker._check_indexability(mock_response)
self.assertTrue(result.is_indexable)
self.assertFalse(result.has_noindex_meta)
self.assertFalse(result.has_noindex_header)
def test_result_to_dict(self):
"""Test converting technical result to dictionary."""
result = TechnicalSEOResult(
url='https://example.com',
checked_at='2026-01-08T10:00:00',
robots_txt=RobotsTxtResult(exists=True),
sitemap=SitemapResult(exists=True),
redirect_chain=RedirectChainResult(original_url='https://example.com', final_url='https://example.com'),
canonical=CanonicalResult(has_canonical=True),
indexability=IndexabilityResult(is_indexable=True),
)
result_dict = result.to_dict()
self.assertIsInstance(result_dict, dict)
self.assertEqual(result_dict['url'], 'https://example.com')
self.assertIn('robots_txt', result_dict)
self.assertIn('sitemap', result_dict)
self.assertIn('redirect_chain', result_dict)
self.assertIn('canonical', result_dict)
self.assertIn('indexability', result_dict)
# ============================================================================
# SEO Audit Integration Tests
# ============================================================================
class TestSEOAuditScoreCalculation(unittest.TestCase):
"""Tests for SEO score calculation logic."""
def setUp(self):
"""Set up with mocked auditor."""
# Import SEOAuditor here since it may need database
with patch('seo_audit.create_engine'), \
patch('seo_audit.sessionmaker'):
from seo_audit import SEOAuditor
self.auditor = SEOAuditor.__new__(SEOAuditor)
self.auditor.engine = Mock()
self.auditor.Session = Mock()
self.auditor.pagespeed_client = Mock()
self.auditor.onpage_analyzer = OnPageSEOAnalyzer()
self.auditor.technical_checker = Mock()
self.auditor.session = Mock()
def test_calculate_onpage_score_perfect(self):
"""Test on-page score calculation with perfect page."""
onpage = {
'meta_tags': {
'title': 'Perfect Title for SEO Optimization',
'title_length': 38,
'description': 'This is a perfect meta description that is between 120 and 160 characters long for optimal SEO results.',
'description_length': 105,
'canonical_url': 'https://example.com/page',
},
'headings': {
'h1_count': 1,
'has_proper_hierarchy': True,
},
'images': {
'total_images': 10,
'images_without_alt': 0,
},
'structured_data': {
'has_structured_data': True,
},
'open_graph': {
'og_title': 'OG Title',
},
}
score = self.auditor._calculate_onpage_score(onpage)
# Should be high score with minor deductions
self.assertGreaterEqual(score, 90)
def test_calculate_onpage_score_missing_title(self):
"""Test on-page score with missing title."""
onpage = {
'meta_tags': {
'title': None,
'description': 'Some description',
'description_length': 50,
},
'headings': {'h1_count': 1, 'has_proper_hierarchy': True},
'images': {'total_images': 0, 'images_without_alt': 0},
'structured_data': {'has_structured_data': False},
'open_graph': {},
}
score = self.auditor._calculate_onpage_score(onpage)
# Should have significant deduction for missing title (-15)
self.assertLessEqual(score, 85)
def test_calculate_onpage_score_missing_h1(self):
"""Test on-page score with missing H1."""
onpage = {
'meta_tags': {
'title': 'Good Title',
'title_length': 10,
'description': 'Good description',
'description_length': 50,
},
'headings': {'h1_count': 0, 'has_proper_hierarchy': False},
'images': {'total_images': 0, 'images_without_alt': 0},
'structured_data': {'has_structured_data': False},
'open_graph': {},
}
score = self.auditor._calculate_onpage_score(onpage)
# Should have deduction for missing H1 (-10) and no structured data (-5)
self.assertLessEqual(score, 85)
def test_calculate_technical_score_perfect(self):
"""Test technical score with perfect setup."""
technical = {
'robots_txt': {
'exists': True,
'blocks_googlebot': False,
},
'sitemap': {
'exists': True,
'is_valid_xml': True,
},
'redirect_chain': {
'chain_length': 0,
'has_redirect_loop': False,
},
'indexability': {
'is_indexable': True,
},
'canonical': {
'has_canonical': True,
'points_to_different_domain': False,
},
}
score = self.auditor._calculate_technical_score(technical)
self.assertEqual(score, 100)
def test_calculate_technical_score_no_robots(self):
"""Test technical score without robots.txt."""
technical = {
'robots_txt': {'exists': False},
'sitemap': {'exists': True, 'is_valid_xml': True},
'redirect_chain': {'chain_length': 0, 'has_redirect_loop': False},
'indexability': {'is_indexable': True},
'canonical': {'has_canonical': True, 'points_to_different_domain': False},
}
score = self.auditor._calculate_technical_score(technical)
# -10 for missing robots.txt
self.assertEqual(score, 90)
def test_calculate_technical_score_blocks_googlebot(self):
"""Test technical score when blocking Googlebot."""
technical = {
'robots_txt': {'exists': True, 'blocks_googlebot': True},
'sitemap': {'exists': True, 'is_valid_xml': True},
'redirect_chain': {'chain_length': 0, 'has_redirect_loop': False},
'indexability': {'is_indexable': True},
'canonical': {'has_canonical': True, 'points_to_different_domain': False},
}
score = self.auditor._calculate_technical_score(technical)
# -20 for blocking Googlebot
self.assertEqual(score, 80)
class TestSEOAuditResultCategorization(unittest.TestCase):
"""Tests for result categorization logic."""
def setUp(self):
"""Set up with mocked auditor."""
with patch('seo_audit.create_engine'), \
patch('seo_audit.sessionmaker'):
from seo_audit import SEOAuditor
self.auditor = SEOAuditor.__new__(SEOAuditor)
def test_categorize_success(self):
"""Test categorizing successful audit."""
result = {
'errors': [],
'http_status': 200,
'onpage': {'meta_tags': {}},
'technical': {},
}
category = self.auditor._categorize_result(result)
self.assertEqual(category, 'success')
def test_categorize_no_website(self):
"""Test categorizing company with no website."""
result = {
'errors': ['No website URL configured'],
}
category = self.auditor._categorize_result(result)
self.assertEqual(category, 'no_website')
def test_categorize_timeout(self):
"""Test categorizing timeout error."""
result = {
'errors': ['Timeout after 30s'],
}
category = self.auditor._categorize_result(result)
self.assertEqual(category, 'timeout')
def test_categorize_connection_error(self):
"""Test categorizing connection error."""
result = {
'errors': ['Connection error: Failed to establish connection'],
}
category = self.auditor._categorize_result(result)
self.assertEqual(category, 'connection_error')
def test_categorize_ssl_error(self):
"""Test categorizing SSL error."""
result = {
'errors': ['SSL Error: Certificate verify failed'],
}
category = self.auditor._categorize_result(result)
self.assertEqual(category, 'ssl_error')
def test_categorize_http_error(self):
"""Test categorizing HTTP error (4xx/5xx)."""
result = {
'errors': ['HTTP 404'],
'http_status': 404,
}
category = self.auditor._categorize_result(result)
self.assertEqual(category, 'unavailable')
class TestParseBatchArgument(unittest.TestCase):
"""Tests for batch argument parsing."""
def test_parse_valid_batch(self):
"""Test parsing valid batch argument."""
from seo_audit import parse_batch_argument
start, end = parse_batch_argument('1-10')
self.assertEqual(start, 1)
self.assertEqual(end, 10)
def test_parse_batch_with_spaces(self):
"""Test parsing batch with spaces."""
from seo_audit import parse_batch_argument
start, end = parse_batch_argument(' 5 - 20 ')
self.assertEqual(start, 5)
self.assertEqual(end, 20)
def test_parse_invalid_format_no_dash(self):
"""Test parsing batch without dash fails."""
from seo_audit import parse_batch_argument
with self.assertRaises(ValueError) as ctx:
parse_batch_argument('10')
self.assertIn('Invalid batch format', str(ctx.exception))
def test_parse_invalid_format_multiple_dashes(self):
"""Test parsing batch with multiple dashes fails."""
from seo_audit import parse_batch_argument
with self.assertRaises(ValueError) as ctx:
parse_batch_argument('1-5-10')
self.assertIn('Invalid batch format', str(ctx.exception))
def test_parse_invalid_values_not_numbers(self):
"""Test parsing batch with non-numeric values fails."""
from seo_audit import parse_batch_argument
with self.assertRaises(ValueError) as ctx:
parse_batch_argument('a-b')
self.assertIn('Invalid batch values', str(ctx.exception))
def test_parse_invalid_start_less_than_one(self):
"""Test parsing batch with start < 1 fails."""
from seo_audit import parse_batch_argument
with self.assertRaises(ValueError) as ctx:
parse_batch_argument('0-10')
self.assertIn('Must be >= 1', str(ctx.exception))
def test_parse_invalid_end_less_than_start(self):
"""Test parsing batch with end < start fails."""
from seo_audit import parse_batch_argument
with self.assertRaises(ValueError) as ctx:
parse_batch_argument('10-5')
self.assertIn('END must be >= START', str(ctx.exception))
# ============================================================================
# Helper Function Tests
# ============================================================================
class TestConvenienceFunctions(unittest.TestCase):
"""Tests for convenience functions."""
def test_analyze_html_function(self):
"""Test analyze_html convenience function."""
from seo_analyzer import analyze_html
html = '<html><head><title>Test</title></head><body><h1>Hello</h1></body></html>'
result = analyze_html(html, base_url='https://example.com')
self.assertIsInstance(result, dict)
self.assertEqual(result['meta_tags']['title'], 'Test')
self.assertEqual(result['headings']['h1_count'], 1)
# ============================================================================
# Run Tests
# ============================================================================
if __name__ == '__main__':
# Run with verbose output
unittest.main(verbosity=2)