Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1112 lines
39 KiB
Python
1112 lines
39 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Unit Tests for SEO Audit Functionality
|
|
======================================
|
|
|
|
Tests for:
|
|
- PageSpeed API client (scripts/pagespeed_client.py)
|
|
- On-Page SEO Analyzer (scripts/seo_analyzer.py)
|
|
- Technical SEO Checker (scripts/seo_analyzer.py)
|
|
- SEO Audit database operations (scripts/seo_audit.py)
|
|
|
|
Run tests:
|
|
cd tests
|
|
python -m pytest test_seo_audit.py -v
|
|
|
|
Author: Maciej Pienczyn, InPi sp. z o.o.
|
|
Date: 2026-01-08
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import unittest
|
|
from datetime import datetime, date
|
|
from pathlib import Path
|
|
from unittest.mock import Mock, MagicMock, patch, PropertyMock
|
|
|
|
# Add scripts directory to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / 'scripts'))
|
|
|
|
# Import modules to test
|
|
from pagespeed_client import (
|
|
GooglePageSpeedClient,
|
|
PageSpeedResult,
|
|
PageSpeedScore,
|
|
CoreWebVitals,
|
|
RateLimiter,
|
|
PageSpeedAPIError,
|
|
QuotaExceededError,
|
|
RateLimitError,
|
|
Strategy,
|
|
Category,
|
|
)
|
|
from seo_analyzer import (
|
|
OnPageSEOAnalyzer,
|
|
OnPageSEOResult,
|
|
MetaTags,
|
|
OpenGraphData,
|
|
TwitterCardData,
|
|
HeadingStructure,
|
|
ImageAnalysis,
|
|
LinkAnalysis,
|
|
StructuredData,
|
|
TechnicalSEOChecker,
|
|
TechnicalSEOResult,
|
|
RobotsTxtResult,
|
|
SitemapResult,
|
|
RedirectChainResult,
|
|
RedirectInfo,
|
|
CanonicalResult,
|
|
IndexabilityResult,
|
|
)
|
|
|
|
|
|
# ============================================================================
|
|
# PageSpeed Client Tests
|
|
# ============================================================================
|
|
|
|
class TestPageSpeedScore(unittest.TestCase):
|
|
"""Tests for PageSpeedScore dataclass."""
|
|
|
|
def test_create_score(self):
|
|
"""Test creating PageSpeedScore with all values."""
|
|
score = PageSpeedScore(
|
|
performance=95,
|
|
accessibility=88,
|
|
best_practices=92,
|
|
seo=100
|
|
)
|
|
self.assertEqual(score.performance, 95)
|
|
self.assertEqual(score.accessibility, 88)
|
|
self.assertEqual(score.best_practices, 92)
|
|
self.assertEqual(score.seo, 100)
|
|
|
|
def test_score_to_dict(self):
|
|
"""Test converting score to dictionary."""
|
|
score = PageSpeedScore(performance=95, seo=100)
|
|
result = score.to_dict()
|
|
self.assertIsInstance(result, dict)
|
|
self.assertEqual(result['performance'], 95)
|
|
self.assertEqual(result['seo'], 100)
|
|
self.assertIsNone(result['accessibility'])
|
|
self.assertIsNone(result['best_practices'])
|
|
|
|
|
|
class TestCoreWebVitals(unittest.TestCase):
|
|
"""Tests for CoreWebVitals dataclass."""
|
|
|
|
def test_create_vitals(self):
|
|
"""Test creating CoreWebVitals."""
|
|
vitals = CoreWebVitals(
|
|
lcp_ms=1500,
|
|
fid_ms=50,
|
|
cls=0.05,
|
|
fcp_ms=1200,
|
|
ttfb_ms=200
|
|
)
|
|
self.assertEqual(vitals.lcp_ms, 1500)
|
|
self.assertEqual(vitals.fid_ms, 50)
|
|
self.assertEqual(vitals.cls, 0.05)
|
|
|
|
def test_vitals_to_dict(self):
|
|
"""Test converting vitals to dictionary."""
|
|
vitals = CoreWebVitals(lcp_ms=1500, cls=0.1)
|
|
result = vitals.to_dict()
|
|
self.assertEqual(result['lcp_ms'], 1500)
|
|
self.assertEqual(result['cls'], 0.1)
|
|
|
|
|
|
class TestRateLimiter(unittest.TestCase):
|
|
"""Tests for RateLimiter class."""
|
|
|
|
def setUp(self):
|
|
"""Set up test with a temporary quota file."""
|
|
import tempfile
|
|
self.temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False)
|
|
self.temp_file.close()
|
|
self.limiter = RateLimiter(
|
|
daily_limit=100,
|
|
min_interval=0.1,
|
|
quota_file=self.temp_file.name
|
|
)
|
|
|
|
def tearDown(self):
|
|
"""Clean up temp file."""
|
|
import os
|
|
try:
|
|
os.unlink(self.temp_file.name)
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
def test_initial_state(self):
|
|
"""Test initial state of rate limiter."""
|
|
self.assertEqual(self.limiter.daily_limit, 100)
|
|
self.assertEqual(self.limiter.requests_today, 0)
|
|
self.assertEqual(self.limiter.get_remaining_quota(), 100)
|
|
|
|
def test_can_make_request_when_under_quota(self):
|
|
"""Test can_make_request returns True when under quota."""
|
|
self.assertTrue(self.limiter.can_make_request())
|
|
|
|
def test_can_make_request_when_quota_exceeded(self):
|
|
"""Test can_make_request returns False when quota exceeded."""
|
|
self.limiter.requests_today = 100
|
|
self.assertFalse(self.limiter.can_make_request())
|
|
|
|
def test_record_request(self):
|
|
"""Test recording a request updates counter."""
|
|
initial = self.limiter.requests_today
|
|
self.limiter.record_request()
|
|
self.assertEqual(self.limiter.requests_today, initial + 1)
|
|
|
|
def test_get_remaining_quota(self):
|
|
"""Test remaining quota calculation."""
|
|
self.limiter.requests_today = 30
|
|
self.assertEqual(self.limiter.get_remaining_quota(), 70)
|
|
|
|
def test_get_usage_stats(self):
|
|
"""Test usage stats returns correct structure."""
|
|
self.limiter.requests_today = 25
|
|
stats = self.limiter.get_usage_stats()
|
|
self.assertEqual(stats['requests_today'], 25)
|
|
self.assertEqual(stats['daily_limit'], 100)
|
|
self.assertEqual(stats['remaining'], 75)
|
|
self.assertEqual(stats['usage_percent'], 25.0)
|
|
|
|
|
|
class TestGooglePageSpeedClient(unittest.TestCase):
|
|
"""Tests for GooglePageSpeedClient class."""
|
|
|
|
def setUp(self):
|
|
"""Set up test with mocked dependencies."""
|
|
import tempfile
|
|
self.temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False)
|
|
self.temp_file.close()
|
|
|
|
self.rate_limiter = RateLimiter(
|
|
daily_limit=100,
|
|
min_interval=0,
|
|
quota_file=self.temp_file.name
|
|
)
|
|
self.client = GooglePageSpeedClient(
|
|
api_key='test_api_key',
|
|
rate_limiter=self.rate_limiter
|
|
)
|
|
|
|
def tearDown(self):
|
|
"""Clean up."""
|
|
import os
|
|
try:
|
|
os.unlink(self.temp_file.name)
|
|
except FileNotFoundError:
|
|
pass
|
|
|
|
def test_client_initialization(self):
|
|
"""Test client initializes correctly."""
|
|
self.assertEqual(self.client.api_key, 'test_api_key')
|
|
self.assertIsNotNone(self.client.rate_limiter)
|
|
|
|
def test_client_without_api_key(self):
|
|
"""Test client works without API key (with warning)."""
|
|
client = GooglePageSpeedClient(api_key='')
|
|
self.assertEqual(client.api_key, '')
|
|
|
|
@patch.object(GooglePageSpeedClient, '_make_request_with_retry')
|
|
def test_analyze_url_success(self, mock_request):
|
|
"""Test successful URL analysis."""
|
|
mock_response = {
|
|
'lighthouseResult': {
|
|
'finalUrl': 'https://example.com',
|
|
'categories': {
|
|
'performance': {'score': 0.95},
|
|
'accessibility': {'score': 0.88},
|
|
'best-practices': {'score': 0.92},
|
|
'seo': {'score': 1.0},
|
|
},
|
|
'audits': {
|
|
'largest-contentful-paint': {'numericValue': 1500},
|
|
'cumulative-layout-shift': {'numericValue': 0.05},
|
|
},
|
|
'lighthouseVersion': '11.0.0',
|
|
'timing': {'total': 5000},
|
|
}
|
|
}
|
|
mock_request.return_value = mock_response
|
|
|
|
result = self.client.analyze_url('https://example.com')
|
|
|
|
self.assertIsInstance(result, PageSpeedResult)
|
|
self.assertEqual(result.url, 'https://example.com')
|
|
self.assertEqual(result.scores.performance, 95)
|
|
self.assertEqual(result.scores.seo, 100)
|
|
self.assertEqual(result.core_web_vitals.lcp_ms, 1500)
|
|
|
|
def test_analyze_url_quota_exceeded(self):
|
|
"""Test QuotaExceededError when quota is 0."""
|
|
self.rate_limiter.requests_today = 100
|
|
|
|
with self.assertRaises(QuotaExceededError):
|
|
self.client.analyze_url('https://example.com')
|
|
|
|
@patch.object(GooglePageSpeedClient, '_make_request_with_retry')
|
|
def test_extract_score(self, mock_request):
|
|
"""Test score extraction converts 0-1 to 0-100."""
|
|
mock_response = {
|
|
'lighthouseResult': {
|
|
'finalUrl': 'https://example.com',
|
|
'categories': {
|
|
'seo': {'score': 0.75},
|
|
},
|
|
'audits': {},
|
|
}
|
|
}
|
|
mock_request.return_value = mock_response
|
|
|
|
result = self.client.analyze_url('https://example.com')
|
|
self.assertEqual(result.scores.seo, 75)
|
|
|
|
def test_get_remaining_quota(self):
|
|
"""Test getting remaining quota."""
|
|
self.rate_limiter.requests_today = 20
|
|
self.assertEqual(self.client.get_remaining_quota(), 80)
|
|
|
|
|
|
# ============================================================================
|
|
# On-Page SEO Analyzer Tests
|
|
# ============================================================================
|
|
|
|
class TestOnPageSEOAnalyzer(unittest.TestCase):
|
|
"""Tests for OnPageSEOAnalyzer class."""
|
|
|
|
def setUp(self):
|
|
"""Set up analyzer."""
|
|
self.analyzer = OnPageSEOAnalyzer()
|
|
|
|
def test_analyze_empty_html(self):
|
|
"""Test analysis of empty HTML."""
|
|
result = self.analyzer.analyze_html('', base_url='https://example.com')
|
|
self.assertIsInstance(result, OnPageSEOResult)
|
|
self.assertEqual(result.base_url, 'https://example.com')
|
|
|
|
def test_analyze_basic_html(self):
|
|
"""Test analysis of basic HTML page."""
|
|
html = '''
|
|
<!DOCTYPE html>
|
|
<html lang="pl">
|
|
<head>
|
|
<meta charset="utf-8">
|
|
<title>Test Page Title</title>
|
|
<meta name="description" content="This is a test page description">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<link rel="canonical" href="https://example.com/page">
|
|
</head>
|
|
<body>
|
|
<h1>Main Heading</h1>
|
|
<p>Some content here.</p>
|
|
<h2>Section 1</h2>
|
|
<h2>Section 2</h2>
|
|
</body>
|
|
</html>
|
|
'''
|
|
result = self.analyzer.analyze_html(html, base_url='https://example.com')
|
|
|
|
# Check meta tags
|
|
self.assertEqual(result.meta_tags.title, 'Test Page Title')
|
|
self.assertEqual(result.meta_tags.description, 'This is a test page description')
|
|
self.assertEqual(result.meta_tags.viewport, 'width=device-width, initial-scale=1')
|
|
self.assertEqual(result.meta_tags.canonical_url, 'https://example.com/page')
|
|
self.assertTrue(result.has_doctype)
|
|
self.assertTrue(result.has_lang_attribute)
|
|
self.assertEqual(result.lang_attribute, 'pl')
|
|
|
|
def test_analyze_headings(self):
|
|
"""Test heading structure analysis."""
|
|
html = '''
|
|
<html>
|
|
<head><title>Test</title></head>
|
|
<body>
|
|
<h1>Main Heading</h1>
|
|
<h2>Section 1</h2>
|
|
<h2>Section 2</h2>
|
|
<h3>Subsection</h3>
|
|
</body>
|
|
</html>
|
|
'''
|
|
result = self.analyzer.analyze_html(html)
|
|
|
|
self.assertEqual(result.headings.h1_count, 1)
|
|
self.assertEqual(result.headings.h2_count, 2)
|
|
self.assertEqual(result.headings.h3_count, 1)
|
|
self.assertTrue(result.headings.has_single_h1)
|
|
self.assertTrue(result.headings.has_proper_hierarchy)
|
|
self.assertEqual(result.headings.h1_texts, ['Main Heading'])
|
|
|
|
def test_analyze_multiple_h1s(self):
|
|
"""Test detection of multiple H1 headings (bad practice)."""
|
|
html = '''
|
|
<html>
|
|
<head><title>Test</title></head>
|
|
<body>
|
|
<h1>First H1</h1>
|
|
<h1>Second H1</h1>
|
|
</body>
|
|
</html>
|
|
'''
|
|
result = self.analyzer.analyze_html(html)
|
|
|
|
self.assertEqual(result.headings.h1_count, 2)
|
|
self.assertFalse(result.headings.has_single_h1)
|
|
self.assertFalse(result.headings.has_proper_hierarchy)
|
|
self.assertIn('Multiple H1 headings (2)', result.headings.hierarchy_issues)
|
|
|
|
def test_analyze_missing_h1(self):
|
|
"""Test detection of missing H1 heading."""
|
|
html = '''
|
|
<html>
|
|
<head><title>Test</title></head>
|
|
<body>
|
|
<h2>Section without H1</h2>
|
|
</body>
|
|
</html>
|
|
'''
|
|
result = self.analyzer.analyze_html(html)
|
|
|
|
self.assertEqual(result.headings.h1_count, 0)
|
|
self.assertFalse(result.headings.has_proper_hierarchy)
|
|
self.assertIn('Missing H1 heading', result.headings.hierarchy_issues)
|
|
|
|
def test_analyze_images(self):
|
|
"""Test image analysis."""
|
|
html = '''
|
|
<html>
|
|
<head><title>Test</title></head>
|
|
<body>
|
|
<img src="img1.jpg" alt="Good alt text">
|
|
<img src="img2.jpg" alt="">
|
|
<img src="img3.jpg">
|
|
<img src="img4.jpg" alt="image">
|
|
</body>
|
|
</html>
|
|
'''
|
|
result = self.analyzer.analyze_html(html)
|
|
|
|
self.assertEqual(result.images.total_images, 4)
|
|
self.assertEqual(result.images.images_with_alt, 3) # includes empty alt
|
|
self.assertEqual(result.images.images_without_alt, 1)
|
|
self.assertEqual(result.images.images_with_empty_alt, 1)
|
|
self.assertEqual(len(result.images.alt_text_quality_issues), 1) # "image" is placeholder
|
|
|
|
def test_analyze_links_internal_external(self):
|
|
"""Test link analysis distinguishing internal/external."""
|
|
html = '''
|
|
<html>
|
|
<head><title>Test</title></head>
|
|
<body>
|
|
<a href="https://example.com/page1">Internal 1</a>
|
|
<a href="/page2">Internal 2</a>
|
|
<a href="./page3">Internal 3</a>
|
|
<a href="https://other.com">External</a>
|
|
<a href="https://facebook.com" rel="nofollow">Social</a>
|
|
<a href="#">Broken</a>
|
|
</body>
|
|
</html>
|
|
'''
|
|
result = self.analyzer.analyze_html(html, base_url='https://example.com')
|
|
|
|
self.assertEqual(result.links.total_links, 6)
|
|
self.assertEqual(result.links.internal_links, 3)
|
|
self.assertEqual(result.links.external_links, 2)
|
|
self.assertEqual(result.links.nofollow_links, 1)
|
|
self.assertEqual(result.links.broken_anchor_links, 1)
|
|
|
|
def test_analyze_open_graph(self):
|
|
"""Test Open Graph metadata extraction."""
|
|
html = '''
|
|
<html>
|
|
<head>
|
|
<title>Test</title>
|
|
<meta property="og:title" content="OG Title">
|
|
<meta property="og:description" content="OG Description">
|
|
<meta property="og:image" content="https://example.com/image.jpg">
|
|
<meta property="og:type" content="website">
|
|
</head>
|
|
<body></body>
|
|
</html>
|
|
'''
|
|
result = self.analyzer.analyze_html(html)
|
|
|
|
self.assertEqual(result.open_graph.og_title, 'OG Title')
|
|
self.assertEqual(result.open_graph.og_description, 'OG Description')
|
|
self.assertEqual(result.open_graph.og_image, 'https://example.com/image.jpg')
|
|
self.assertEqual(result.open_graph.og_type, 'website')
|
|
|
|
def test_analyze_twitter_card(self):
|
|
"""Test Twitter Card metadata extraction."""
|
|
html = '''
|
|
<html>
|
|
<head>
|
|
<title>Test</title>
|
|
<meta name="twitter:card" content="summary_large_image">
|
|
<meta name="twitter:title" content="Twitter Title">
|
|
<meta name="twitter:description" content="Twitter Description">
|
|
</head>
|
|
<body></body>
|
|
</html>
|
|
'''
|
|
result = self.analyzer.analyze_html(html)
|
|
|
|
self.assertEqual(result.twitter_card.card_type, 'summary_large_image')
|
|
self.assertEqual(result.twitter_card.title, 'Twitter Title')
|
|
self.assertEqual(result.twitter_card.description, 'Twitter Description')
|
|
|
|
def test_analyze_structured_data_json_ld(self):
|
|
"""Test JSON-LD structured data detection."""
|
|
html = '''
|
|
<html>
|
|
<head>
|
|
<title>Test</title>
|
|
<script type="application/ld+json">
|
|
{
|
|
"@context": "https://schema.org",
|
|
"@type": "LocalBusiness",
|
|
"name": "Test Business"
|
|
}
|
|
</script>
|
|
</head>
|
|
<body></body>
|
|
</html>
|
|
'''
|
|
result = self.analyzer.analyze_html(html)
|
|
|
|
self.assertTrue(result.structured_data.has_structured_data)
|
|
self.assertEqual(result.structured_data.json_ld_count, 1)
|
|
self.assertIn('LocalBusiness', result.structured_data.json_ld_types)
|
|
self.assertIn('LocalBusiness', result.structured_data.all_types)
|
|
|
|
def test_analyze_structured_data_microdata(self):
|
|
"""Test Microdata structured data detection."""
|
|
html = '''
|
|
<html>
|
|
<head><title>Test</title></head>
|
|
<body>
|
|
<div itemscope itemtype="https://schema.org/Organization">
|
|
<span itemprop="name">Test Org</span>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
'''
|
|
result = self.analyzer.analyze_html(html)
|
|
|
|
self.assertTrue(result.structured_data.has_structured_data)
|
|
self.assertEqual(result.structured_data.microdata_count, 1)
|
|
self.assertIn('Organization', result.structured_data.microdata_types)
|
|
|
|
def test_analyze_word_count(self):
|
|
"""Test word count calculation."""
|
|
html = '''
|
|
<html>
|
|
<head><title>Test</title></head>
|
|
<body>
|
|
<p>This is a sentence with seven words here.</p>
|
|
<script>var x = "not counted";</script>
|
|
<style>.hidden { display: none; }</style>
|
|
</body>
|
|
</html>
|
|
'''
|
|
result = self.analyzer.analyze_html(html)
|
|
|
|
# Should count visible text only
|
|
self.assertGreater(result.word_count, 5)
|
|
self.assertLess(result.word_count, 20)
|
|
|
|
def test_result_to_dict(self):
|
|
"""Test converting result to dictionary."""
|
|
html = '<html><head><title>Test</title></head><body><h1>Hello</h1></body></html>'
|
|
result = self.analyzer.analyze_html(html)
|
|
|
|
result_dict = result.to_dict()
|
|
|
|
self.assertIsInstance(result_dict, dict)
|
|
self.assertIn('meta_tags', result_dict)
|
|
self.assertIn('headings', result_dict)
|
|
self.assertIn('images', result_dict)
|
|
self.assertIn('links', result_dict)
|
|
self.assertIn('structured_data', result_dict)
|
|
|
|
|
|
# ============================================================================
|
|
# Technical SEO Checker Tests
|
|
# ============================================================================
|
|
|
|
class TestTechnicalSEOChecker(unittest.TestCase):
|
|
"""Tests for TechnicalSEOChecker class."""
|
|
|
|
def setUp(self):
|
|
"""Set up checker."""
|
|
self.checker = TechnicalSEOChecker(timeout=5)
|
|
|
|
@patch('requests.Session.get')
|
|
def test_check_robots_txt_exists(self, mock_get):
|
|
"""Test robots.txt detection when it exists."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = '''
|
|
User-agent: *
|
|
Disallow: /admin/
|
|
Sitemap: https://example.com/sitemap.xml
|
|
'''
|
|
mock_get.return_value = mock_response
|
|
|
|
result = self.checker.check_robots_txt('https://example.com')
|
|
|
|
self.assertTrue(result.exists)
|
|
self.assertEqual(result.status_code, 200)
|
|
self.assertIn('/admin/', result.disallow_rules)
|
|
self.assertIn('https://example.com/sitemap.xml', result.sitemap_urls)
|
|
|
|
@patch('requests.Session.get')
|
|
def test_check_robots_txt_not_found(self, mock_get):
|
|
"""Test robots.txt detection when it doesn't exist."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 404
|
|
mock_get.return_value = mock_response
|
|
|
|
result = self.checker.check_robots_txt('https://example.com')
|
|
|
|
self.assertFalse(result.exists)
|
|
self.assertEqual(result.status_code, 404)
|
|
|
|
@patch('requests.Session.get')
|
|
def test_check_robots_txt_blocks_googlebot(self, mock_get):
|
|
"""Test detection of Googlebot blocking in robots.txt."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = '''
|
|
User-agent: Googlebot
|
|
Disallow: /
|
|
'''
|
|
mock_get.return_value = mock_response
|
|
|
|
result = self.checker.check_robots_txt('https://example.com')
|
|
|
|
self.assertTrue(result.exists)
|
|
self.assertTrue(result.blocks_googlebot)
|
|
|
|
@patch('requests.Session.get')
|
|
def test_check_sitemap_valid_xml(self, mock_get):
|
|
"""Test valid sitemap.xml detection."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
# Use simpler XML without namespace for reliable parsing
|
|
mock_response.content = b'''<?xml version="1.0" encoding="UTF-8"?>
|
|
<urlset>
|
|
<url><loc>https://example.com/</loc></url>
|
|
<url><loc>https://example.com/page1</loc></url>
|
|
</urlset>
|
|
'''
|
|
mock_response.headers = {'Last-Modified': 'Tue, 07 Jan 2026 10:00:00 GMT'}
|
|
mock_get.return_value = mock_response
|
|
|
|
result = self.checker.check_sitemap('https://example.com/sitemap.xml')
|
|
|
|
self.assertTrue(result.exists)
|
|
self.assertTrue(result.is_valid_xml)
|
|
self.assertFalse(result.is_sitemap_index)
|
|
self.assertEqual(result.url_count, 2)
|
|
self.assertIn('https://example.com/', result.sample_urls)
|
|
|
|
@patch('requests.Session.get')
|
|
def test_check_sitemap_index(self, mock_get):
|
|
"""Test sitemap index detection."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.content = b'''<?xml version="1.0" encoding="UTF-8"?>
|
|
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
<sitemap><loc>https://example.com/sitemap1.xml</loc></sitemap>
|
|
<sitemap><loc>https://example.com/sitemap2.xml</loc></sitemap>
|
|
</sitemapindex>
|
|
'''
|
|
mock_response.headers = {}
|
|
mock_get.return_value = mock_response
|
|
|
|
result = self.checker.check_sitemap('https://example.com/sitemap.xml')
|
|
|
|
self.assertTrue(result.exists)
|
|
self.assertTrue(result.is_valid_xml)
|
|
self.assertTrue(result.is_sitemap_index)
|
|
self.assertEqual(result.sitemap_count, 2)
|
|
|
|
@patch('requests.Session.get')
|
|
def test_check_sitemap_not_found(self, mock_get):
|
|
"""Test sitemap.xml detection when not found."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 404
|
|
mock_get.return_value = mock_response
|
|
|
|
result = self.checker.check_sitemap('https://example.com/sitemap.xml')
|
|
|
|
self.assertFalse(result.exists)
|
|
|
|
@patch('requests.Session.get')
|
|
def test_check_redirect_chain_no_redirects(self, mock_get):
|
|
"""Test URL with no redirects."""
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_get.return_value = mock_response
|
|
|
|
result = self.checker.check_redirect_chain('https://example.com')
|
|
|
|
self.assertEqual(result.chain_length, 0)
|
|
self.assertEqual(result.final_url, 'https://example.com')
|
|
self.assertFalse(result.has_redirect_loop)
|
|
self.assertEqual(len(result.redirects), 0)
|
|
|
|
@patch('requests.Session.get')
|
|
def test_check_redirect_chain_with_redirect(self, mock_get):
|
|
"""Test URL with single redirect."""
|
|
# First call returns redirect
|
|
redirect_response = Mock()
|
|
redirect_response.status_code = 301
|
|
redirect_response.headers = {'Location': 'https://www.example.com/'}
|
|
|
|
# Second call returns final page
|
|
final_response = Mock()
|
|
final_response.status_code = 200
|
|
|
|
mock_get.side_effect = [redirect_response, final_response]
|
|
|
|
result = self.checker.check_redirect_chain('https://example.com')
|
|
|
|
self.assertEqual(result.chain_length, 1)
|
|
self.assertEqual(result.final_url, 'https://www.example.com/')
|
|
self.assertEqual(len(result.redirects), 1)
|
|
self.assertEqual(result.redirects[0].status_code, 301)
|
|
|
|
@patch('requests.Session.get')
|
|
def test_check_redirect_https_upgrade(self, mock_get):
|
|
"""Test detection of HTTP to HTTPS upgrade redirect."""
|
|
redirect_response = Mock()
|
|
redirect_response.status_code = 301
|
|
redirect_response.headers = {'Location': 'https://example.com/'}
|
|
|
|
final_response = Mock()
|
|
final_response.status_code = 200
|
|
|
|
mock_get.side_effect = [redirect_response, final_response]
|
|
|
|
result = self.checker.check_redirect_chain('http://example.com')
|
|
|
|
self.assertEqual(result.chain_length, 1)
|
|
self.assertTrue(result.redirects[0].is_https_upgrade)
|
|
|
|
def test_check_canonical_self_referencing(self):
|
|
"""Test detection of self-referencing canonical URL."""
|
|
html = '''
|
|
<html>
|
|
<head>
|
|
<link rel="canonical" href="https://example.com/page">
|
|
</head>
|
|
<body></body>
|
|
</html>
|
|
'''
|
|
result = self.checker._check_canonical(html, 'https://example.com/page')
|
|
|
|
self.assertTrue(result.has_canonical)
|
|
self.assertEqual(result.canonical_url, 'https://example.com/page')
|
|
self.assertTrue(result.is_self_referencing)
|
|
self.assertFalse(result.points_to_different_domain)
|
|
|
|
def test_check_canonical_different_domain(self):
|
|
"""Test detection of canonical pointing to different domain."""
|
|
html = '''
|
|
<html>
|
|
<head>
|
|
<link rel="canonical" href="https://other.com/page">
|
|
</head>
|
|
<body></body>
|
|
</html>
|
|
'''
|
|
result = self.checker._check_canonical(html, 'https://example.com/page')
|
|
|
|
self.assertTrue(result.has_canonical)
|
|
self.assertTrue(result.points_to_different_domain)
|
|
|
|
def test_check_indexability_noindex_meta(self):
|
|
"""Test detection of noindex meta tag."""
|
|
mock_response = Mock()
|
|
mock_response.text = '''
|
|
<html>
|
|
<head>
|
|
<meta name="robots" content="noindex, nofollow">
|
|
</head>
|
|
<body></body>
|
|
</html>
|
|
'''
|
|
mock_response.headers = {}
|
|
|
|
result = self.checker._check_indexability(mock_response)
|
|
|
|
self.assertFalse(result.is_indexable)
|
|
self.assertTrue(result.has_noindex_meta)
|
|
self.assertEqual(result.noindex_source, 'meta')
|
|
|
|
def test_check_indexability_noindex_header(self):
|
|
"""Test detection of X-Robots-Tag noindex header."""
|
|
mock_response = Mock()
|
|
mock_response.text = '<html><head></head><body></body></html>'
|
|
mock_response.headers = {'X-Robots-Tag': 'noindex'}
|
|
|
|
result = self.checker._check_indexability(mock_response)
|
|
|
|
self.assertFalse(result.is_indexable)
|
|
self.assertTrue(result.has_noindex_header)
|
|
self.assertEqual(result.noindex_source, 'header')
|
|
|
|
def test_check_indexability_indexable(self):
|
|
"""Test page that is indexable."""
|
|
mock_response = Mock()
|
|
mock_response.text = '''
|
|
<html>
|
|
<head>
|
|
<meta name="robots" content="index, follow">
|
|
</head>
|
|
<body></body>
|
|
</html>
|
|
'''
|
|
mock_response.headers = {}
|
|
|
|
result = self.checker._check_indexability(mock_response)
|
|
|
|
self.assertTrue(result.is_indexable)
|
|
self.assertFalse(result.has_noindex_meta)
|
|
self.assertFalse(result.has_noindex_header)
|
|
|
|
def test_result_to_dict(self):
|
|
"""Test converting technical result to dictionary."""
|
|
result = TechnicalSEOResult(
|
|
url='https://example.com',
|
|
checked_at='2026-01-08T10:00:00',
|
|
robots_txt=RobotsTxtResult(exists=True),
|
|
sitemap=SitemapResult(exists=True),
|
|
redirect_chain=RedirectChainResult(original_url='https://example.com', final_url='https://example.com'),
|
|
canonical=CanonicalResult(has_canonical=True),
|
|
indexability=IndexabilityResult(is_indexable=True),
|
|
)
|
|
|
|
result_dict = result.to_dict()
|
|
|
|
self.assertIsInstance(result_dict, dict)
|
|
self.assertEqual(result_dict['url'], 'https://example.com')
|
|
self.assertIn('robots_txt', result_dict)
|
|
self.assertIn('sitemap', result_dict)
|
|
self.assertIn('redirect_chain', result_dict)
|
|
self.assertIn('canonical', result_dict)
|
|
self.assertIn('indexability', result_dict)
|
|
|
|
|
|
# ============================================================================
|
|
# SEO Audit Integration Tests
|
|
# ============================================================================
|
|
|
|
class TestSEOAuditScoreCalculation(unittest.TestCase):
|
|
"""Tests for SEO score calculation logic."""
|
|
|
|
def setUp(self):
|
|
"""Set up with mocked auditor."""
|
|
# Import SEOAuditor here since it may need database
|
|
with patch('seo_audit.create_engine'), \
|
|
patch('seo_audit.sessionmaker'):
|
|
from seo_audit import SEOAuditor
|
|
self.auditor = SEOAuditor.__new__(SEOAuditor)
|
|
self.auditor.engine = Mock()
|
|
self.auditor.Session = Mock()
|
|
self.auditor.pagespeed_client = Mock()
|
|
self.auditor.onpage_analyzer = OnPageSEOAnalyzer()
|
|
self.auditor.technical_checker = Mock()
|
|
self.auditor.session = Mock()
|
|
|
|
def test_calculate_onpage_score_perfect(self):
|
|
"""Test on-page score calculation with perfect page."""
|
|
onpage = {
|
|
'meta_tags': {
|
|
'title': 'Perfect Title for SEO Optimization',
|
|
'title_length': 38,
|
|
'description': 'This is a perfect meta description that is between 120 and 160 characters long for optimal SEO results.',
|
|
'description_length': 105,
|
|
'canonical_url': 'https://example.com/page',
|
|
},
|
|
'headings': {
|
|
'h1_count': 1,
|
|
'has_proper_hierarchy': True,
|
|
},
|
|
'images': {
|
|
'total_images': 10,
|
|
'images_without_alt': 0,
|
|
},
|
|
'structured_data': {
|
|
'has_structured_data': True,
|
|
},
|
|
'open_graph': {
|
|
'og_title': 'OG Title',
|
|
},
|
|
}
|
|
|
|
score = self.auditor._calculate_onpage_score(onpage)
|
|
|
|
# Should be high score with minor deductions
|
|
self.assertGreaterEqual(score, 90)
|
|
|
|
def test_calculate_onpage_score_missing_title(self):
|
|
"""Test on-page score with missing title."""
|
|
onpage = {
|
|
'meta_tags': {
|
|
'title': None,
|
|
'description': 'Some description',
|
|
'description_length': 50,
|
|
},
|
|
'headings': {'h1_count': 1, 'has_proper_hierarchy': True},
|
|
'images': {'total_images': 0, 'images_without_alt': 0},
|
|
'structured_data': {'has_structured_data': False},
|
|
'open_graph': {},
|
|
}
|
|
|
|
score = self.auditor._calculate_onpage_score(onpage)
|
|
|
|
# Should have significant deduction for missing title (-15)
|
|
self.assertLessEqual(score, 85)
|
|
|
|
def test_calculate_onpage_score_missing_h1(self):
|
|
"""Test on-page score with missing H1."""
|
|
onpage = {
|
|
'meta_tags': {
|
|
'title': 'Good Title',
|
|
'title_length': 10,
|
|
'description': 'Good description',
|
|
'description_length': 50,
|
|
},
|
|
'headings': {'h1_count': 0, 'has_proper_hierarchy': False},
|
|
'images': {'total_images': 0, 'images_without_alt': 0},
|
|
'structured_data': {'has_structured_data': False},
|
|
'open_graph': {},
|
|
}
|
|
|
|
score = self.auditor._calculate_onpage_score(onpage)
|
|
|
|
# Should have deduction for missing H1 (-10) and no structured data (-5)
|
|
self.assertLessEqual(score, 85)
|
|
|
|
def test_calculate_technical_score_perfect(self):
|
|
"""Test technical score with perfect setup."""
|
|
technical = {
|
|
'robots_txt': {
|
|
'exists': True,
|
|
'blocks_googlebot': False,
|
|
},
|
|
'sitemap': {
|
|
'exists': True,
|
|
'is_valid_xml': True,
|
|
},
|
|
'redirect_chain': {
|
|
'chain_length': 0,
|
|
'has_redirect_loop': False,
|
|
},
|
|
'indexability': {
|
|
'is_indexable': True,
|
|
},
|
|
'canonical': {
|
|
'has_canonical': True,
|
|
'points_to_different_domain': False,
|
|
},
|
|
}
|
|
|
|
score = self.auditor._calculate_technical_score(technical)
|
|
|
|
self.assertEqual(score, 100)
|
|
|
|
def test_calculate_technical_score_no_robots(self):
|
|
"""Test technical score without robots.txt."""
|
|
technical = {
|
|
'robots_txt': {'exists': False},
|
|
'sitemap': {'exists': True, 'is_valid_xml': True},
|
|
'redirect_chain': {'chain_length': 0, 'has_redirect_loop': False},
|
|
'indexability': {'is_indexable': True},
|
|
'canonical': {'has_canonical': True, 'points_to_different_domain': False},
|
|
}
|
|
|
|
score = self.auditor._calculate_technical_score(technical)
|
|
|
|
# -10 for missing robots.txt
|
|
self.assertEqual(score, 90)
|
|
|
|
def test_calculate_technical_score_blocks_googlebot(self):
|
|
"""Test technical score when blocking Googlebot."""
|
|
technical = {
|
|
'robots_txt': {'exists': True, 'blocks_googlebot': True},
|
|
'sitemap': {'exists': True, 'is_valid_xml': True},
|
|
'redirect_chain': {'chain_length': 0, 'has_redirect_loop': False},
|
|
'indexability': {'is_indexable': True},
|
|
'canonical': {'has_canonical': True, 'points_to_different_domain': False},
|
|
}
|
|
|
|
score = self.auditor._calculate_technical_score(technical)
|
|
|
|
# -20 for blocking Googlebot
|
|
self.assertEqual(score, 80)
|
|
|
|
|
|
class TestSEOAuditResultCategorization(unittest.TestCase):
|
|
"""Tests for result categorization logic."""
|
|
|
|
def setUp(self):
|
|
"""Set up with mocked auditor."""
|
|
with patch('seo_audit.create_engine'), \
|
|
patch('seo_audit.sessionmaker'):
|
|
from seo_audit import SEOAuditor
|
|
self.auditor = SEOAuditor.__new__(SEOAuditor)
|
|
|
|
def test_categorize_success(self):
|
|
"""Test categorizing successful audit."""
|
|
result = {
|
|
'errors': [],
|
|
'http_status': 200,
|
|
'onpage': {'meta_tags': {}},
|
|
'technical': {},
|
|
}
|
|
|
|
category = self.auditor._categorize_result(result)
|
|
self.assertEqual(category, 'success')
|
|
|
|
def test_categorize_no_website(self):
|
|
"""Test categorizing company with no website."""
|
|
result = {
|
|
'errors': ['No website URL configured'],
|
|
}
|
|
|
|
category = self.auditor._categorize_result(result)
|
|
self.assertEqual(category, 'no_website')
|
|
|
|
def test_categorize_timeout(self):
|
|
"""Test categorizing timeout error."""
|
|
result = {
|
|
'errors': ['Timeout after 30s'],
|
|
}
|
|
|
|
category = self.auditor._categorize_result(result)
|
|
self.assertEqual(category, 'timeout')
|
|
|
|
def test_categorize_connection_error(self):
|
|
"""Test categorizing connection error."""
|
|
result = {
|
|
'errors': ['Connection error: Failed to establish connection'],
|
|
}
|
|
|
|
category = self.auditor._categorize_result(result)
|
|
self.assertEqual(category, 'connection_error')
|
|
|
|
def test_categorize_ssl_error(self):
|
|
"""Test categorizing SSL error."""
|
|
result = {
|
|
'errors': ['SSL Error: Certificate verify failed'],
|
|
}
|
|
|
|
category = self.auditor._categorize_result(result)
|
|
self.assertEqual(category, 'ssl_error')
|
|
|
|
def test_categorize_http_error(self):
|
|
"""Test categorizing HTTP error (4xx/5xx)."""
|
|
result = {
|
|
'errors': ['HTTP 404'],
|
|
'http_status': 404,
|
|
}
|
|
|
|
category = self.auditor._categorize_result(result)
|
|
self.assertEqual(category, 'unavailable')
|
|
|
|
|
|
class TestParseBatchArgument(unittest.TestCase):
|
|
"""Tests for batch argument parsing."""
|
|
|
|
def test_parse_valid_batch(self):
|
|
"""Test parsing valid batch argument."""
|
|
from seo_audit import parse_batch_argument
|
|
|
|
start, end = parse_batch_argument('1-10')
|
|
self.assertEqual(start, 1)
|
|
self.assertEqual(end, 10)
|
|
|
|
def test_parse_batch_with_spaces(self):
|
|
"""Test parsing batch with spaces."""
|
|
from seo_audit import parse_batch_argument
|
|
|
|
start, end = parse_batch_argument(' 5 - 20 ')
|
|
self.assertEqual(start, 5)
|
|
self.assertEqual(end, 20)
|
|
|
|
def test_parse_invalid_format_no_dash(self):
|
|
"""Test parsing batch without dash fails."""
|
|
from seo_audit import parse_batch_argument
|
|
|
|
with self.assertRaises(ValueError) as ctx:
|
|
parse_batch_argument('10')
|
|
self.assertIn('Invalid batch format', str(ctx.exception))
|
|
|
|
def test_parse_invalid_format_multiple_dashes(self):
|
|
"""Test parsing batch with multiple dashes fails."""
|
|
from seo_audit import parse_batch_argument
|
|
|
|
with self.assertRaises(ValueError) as ctx:
|
|
parse_batch_argument('1-5-10')
|
|
self.assertIn('Invalid batch format', str(ctx.exception))
|
|
|
|
def test_parse_invalid_values_not_numbers(self):
|
|
"""Test parsing batch with non-numeric values fails."""
|
|
from seo_audit import parse_batch_argument
|
|
|
|
with self.assertRaises(ValueError) as ctx:
|
|
parse_batch_argument('a-b')
|
|
self.assertIn('Invalid batch values', str(ctx.exception))
|
|
|
|
def test_parse_invalid_start_less_than_one(self):
|
|
"""Test parsing batch with start < 1 fails."""
|
|
from seo_audit import parse_batch_argument
|
|
|
|
with self.assertRaises(ValueError) as ctx:
|
|
parse_batch_argument('0-10')
|
|
self.assertIn('Must be >= 1', str(ctx.exception))
|
|
|
|
def test_parse_invalid_end_less_than_start(self):
|
|
"""Test parsing batch with end < start fails."""
|
|
from seo_audit import parse_batch_argument
|
|
|
|
with self.assertRaises(ValueError) as ctx:
|
|
parse_batch_argument('10-5')
|
|
self.assertIn('END must be >= START', str(ctx.exception))
|
|
|
|
|
|
# ============================================================================
|
|
# Helper Function Tests
|
|
# ============================================================================
|
|
|
|
class TestConvenienceFunctions(unittest.TestCase):
|
|
"""Tests for convenience functions."""
|
|
|
|
def test_analyze_html_function(self):
|
|
"""Test analyze_html convenience function."""
|
|
from seo_analyzer import analyze_html
|
|
|
|
html = '<html><head><title>Test</title></head><body><h1>Hello</h1></body></html>'
|
|
result = analyze_html(html, base_url='https://example.com')
|
|
|
|
self.assertIsInstance(result, dict)
|
|
self.assertEqual(result['meta_tags']['title'], 'Test')
|
|
self.assertEqual(result['headings']['h1_count'], 1)
|
|
|
|
|
|
# ============================================================================
|
|
# Run Tests
|
|
# ============================================================================
|
|
|
|
if __name__ == '__main__':
|
|
# Run with verbose output
|
|
unittest.main(verbosity=2)
|