nordabiz/tests/test_seo_audit.py

#!/usr/bin/env python3
"""
Unit Tests for SEO Audit Functionality
======================================

Tests for:
- PageSpeed API client (scripts/pagespeed_client.py)
- On-Page SEO Analyzer (scripts/seo_analyzer.py)
- Technical SEO Checker (scripts/seo_analyzer.py)
- SEO Audit database operations (scripts/seo_audit.py)

Run tests:
    cd tests
    python -m pytest test_seo_audit.py -v

Author: Maciej Pienczyn, InPi sp. z o.o.
Date: 2026-01-08
"""

import json
import sys
import unittest
from datetime import datetime, date
from pathlib import Path
from unittest.mock import Mock, MagicMock, patch, PropertyMock

# Add scripts directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent / 'scripts'))

# Import modules to test
from pagespeed_client import (
    GooglePageSpeedClient,
    PageSpeedResult,
    PageSpeedScore,
    CoreWebVitals,
    RateLimiter,
    PageSpeedAPIError,
    QuotaExceededError,
    RateLimitError,
    Strategy,
    Category,
)
from seo_analyzer import (
    OnPageSEOAnalyzer,
    OnPageSEOResult,
    MetaTags,
    OpenGraphData,
    TwitterCardData,
    HeadingStructure,
    ImageAnalysis,
    LinkAnalysis,
    StructuredData,
    TechnicalSEOChecker,
    TechnicalSEOResult,
    RobotsTxtResult,
    SitemapResult,
    RedirectChainResult,
    RedirectInfo,
    CanonicalResult,
    IndexabilityResult,
)


# ============================================================================
# PageSpeed Client Tests
# ============================================================================

class TestPageSpeedScore(unittest.TestCase):
    """Tests for PageSpeedScore dataclass."""

    def test_create_score(self):
        """Test creating PageSpeedScore with all values."""
        score = PageSpeedScore(
            performance=95,
            accessibility=88,
            best_practices=92,
            seo=100
        )
        self.assertEqual(score.performance, 95)
        self.assertEqual(score.accessibility, 88)
        self.assertEqual(score.best_practices, 92)
        self.assertEqual(score.seo, 100)

    def test_score_to_dict(self):
        """Test converting score to dictionary."""
        score = PageSpeedScore(performance=95, seo=100)
        result = score.to_dict()
        self.assertIsInstance(result, dict)
        self.assertEqual(result['performance'], 95)
        self.assertEqual(result['seo'], 100)
        self.assertIsNone(result['accessibility'])
        self.assertIsNone(result['best_practices'])


class TestCoreWebVitals(unittest.TestCase):
    """Tests for CoreWebVitals dataclass."""

    def test_create_vitals(self):
        """Test creating CoreWebVitals."""
        vitals = CoreWebVitals(
            lcp_ms=1500,
            fid_ms=50,
            cls=0.05,
            fcp_ms=1200,
            ttfb_ms=200
        )
        self.assertEqual(vitals.lcp_ms, 1500)
        self.assertEqual(vitals.fid_ms, 50)
        self.assertEqual(vitals.cls, 0.05)

    def test_vitals_to_dict(self):
        """Test converting vitals to dictionary."""
        vitals = CoreWebVitals(lcp_ms=1500, cls=0.1)
        result = vitals.to_dict()
        self.assertEqual(result['lcp_ms'], 1500)
        self.assertEqual(result['cls'], 0.1)


class TestRateLimiter(unittest.TestCase):
    """Tests for RateLimiter class."""

    def setUp(self):
        """Set up test with a temporary quota file."""
        import tempfile
        self.temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False)
        self.temp_file.close()
        self.limiter = RateLimiter(
            daily_limit=100,
            min_interval=0.1,
            quota_file=self.temp_file.name
        )

    def tearDown(self):
        """Clean up temp file."""
        import os
        try:
            os.unlink(self.temp_file.name)
        except FileNotFoundError:
            pass

    def test_initial_state(self):
        """Test initial state of rate limiter."""
        self.assertEqual(self.limiter.daily_limit, 100)
        self.assertEqual(self.limiter.requests_today, 0)
        self.assertEqual(self.limiter.get_remaining_quota(), 100)

    def test_can_make_request_when_under_quota(self):
        """Test can_make_request returns True when under quota."""
        self.assertTrue(self.limiter.can_make_request())

    def test_can_make_request_when_quota_exceeded(self):
        """Test can_make_request returns False when quota exceeded."""
        self.limiter.requests_today = 100
        self.assertFalse(self.limiter.can_make_request())

    def test_record_request(self):
        """Test recording a request updates counter."""
        initial = self.limiter.requests_today
        self.limiter.record_request()
        self.assertEqual(self.limiter.requests_today, initial + 1)

    def test_get_remaining_quota(self):
        """Test remaining quota calculation."""
        self.limiter.requests_today = 30
        self.assertEqual(self.limiter.get_remaining_quota(), 70)

    def test_get_usage_stats(self):
        """Test usage stats returns correct structure."""
        self.limiter.requests_today = 25
        stats = self.limiter.get_usage_stats()
        self.assertEqual(stats['requests_today'], 25)
        self.assertEqual(stats['daily_limit'], 100)
        self.assertEqual(stats['remaining'], 75)
        self.assertEqual(stats['usage_percent'], 25.0)


class TestGooglePageSpeedClient(unittest.TestCase):
    """Tests for GooglePageSpeedClient class."""

    def setUp(self):
        """Set up test with mocked dependencies."""
        import tempfile
        self.temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False)
        self.temp_file.close()

        self.rate_limiter = RateLimiter(
            daily_limit=100,
            min_interval=0,
            quota_file=self.temp_file.name
        )
        self.client = GooglePageSpeedClient(
            api_key='test_api_key',
            rate_limiter=self.rate_limiter
        )

    def tearDown(self):
        """Clean up."""
        import os
        try:
            os.unlink(self.temp_file.name)
        except FileNotFoundError:
            pass

    def test_client_initialization(self):
        """Test client initializes correctly."""
        self.assertEqual(self.client.api_key, 'test_api_key')
        self.assertIsNotNone(self.client.rate_limiter)

    def test_client_without_api_key(self):
        """Test client works without API key (with warning)."""
        client = GooglePageSpeedClient(api_key='')
        self.assertEqual(client.api_key, '')

    @patch.object(GooglePageSpeedClient, '_make_request_with_retry')
    def test_analyze_url_success(self, mock_request):
        """Test successful URL analysis."""
        mock_response = {
            'lighthouseResult': {
                'finalUrl': 'https://example.com',
                'categories': {
                    'performance': {'score': 0.95},
                    'accessibility': {'score': 0.88},
                    'best-practices': {'score': 0.92},
                    'seo': {'score': 1.0},
                },
                'audits': {
                    'largest-contentful-paint': {'numericValue': 1500},
                    'cumulative-layout-shift': {'numericValue': 0.05},
                },
                'lighthouseVersion': '11.0.0',
                'timing': {'total': 5000},
            }
        }
        mock_request.return_value = mock_response

        result = self.client.analyze_url('https://example.com')

        self.assertIsInstance(result, PageSpeedResult)
        self.assertEqual(result.url, 'https://example.com')
        self.assertEqual(result.scores.performance, 95)
        self.assertEqual(result.scores.seo, 100)
        self.assertEqual(result.core_web_vitals.lcp_ms, 1500)

    def test_analyze_url_quota_exceeded(self):
        """Test QuotaExceededError when quota is 0."""
        self.rate_limiter.requests_today = 100

        with self.assertRaises(QuotaExceededError):
            self.client.analyze_url('https://example.com')

    @patch.object(GooglePageSpeedClient, '_make_request_with_retry')
    def test_extract_score(self, mock_request):
        """Test score extraction converts 0-1 to 0-100."""
        mock_response = {
            'lighthouseResult': {
                'finalUrl': 'https://example.com',
                'categories': {
                    'seo': {'score': 0.75},
                },
                'audits': {},
            }
        }
        mock_request.return_value = mock_response

        result = self.client.analyze_url('https://example.com')
        self.assertEqual(result.scores.seo, 75)

    def test_get_remaining_quota(self):
        """Test getting remaining quota."""
        self.rate_limiter.requests_today = 20
        self.assertEqual(self.client.get_remaining_quota(), 80)


# ============================================================================
# On-Page SEO Analyzer Tests
# ============================================================================

class TestOnPageSEOAnalyzer(unittest.TestCase):
    """Tests for OnPageSEOAnalyzer class."""

    def setUp(self):
        """Set up analyzer."""
        self.analyzer = OnPageSEOAnalyzer()

    def test_analyze_empty_html(self):
        """Test analysis of empty HTML."""
        result = self.analyzer.analyze_html('', base_url='https://example.com')
        self.assertIsInstance(result, OnPageSEOResult)
        self.assertEqual(result.base_url, 'https://example.com')

    def test_analyze_basic_html(self):
        """Test analysis of basic HTML page."""
        html = '''
        <!DOCTYPE html>
        <html lang="pl">
        <head>
            <meta charset="utf-8">
            <title>Test Page Title</title>
            <meta name="description" content="This is a test page description">
            <meta name="viewport" content="width=device-width, initial-scale=1">
            <link rel="canonical" href="https://example.com/page">
        </head>
        <body>
            <h1>Main Heading</h1>
            <p>Some content here.</p>
            <h2>Section 1</h2>
            <h2>Section 2</h2>
        </body>
        </html>
        '''
        result = self.analyzer.analyze_html(html, base_url='https://example.com')

        # Check meta tags
        self.assertEqual(result.meta_tags.title, 'Test Page Title')
        self.assertEqual(result.meta_tags.description, 'This is a test page description')
        self.assertEqual(result.meta_tags.viewport, 'width=device-width, initial-scale=1')
        self.assertEqual(result.meta_tags.canonical_url, 'https://example.com/page')
        self.assertTrue(result.has_doctype)
        self.assertTrue(result.has_lang_attribute)
        self.assertEqual(result.lang_attribute, 'pl')

    def test_analyze_headings(self):
        """Test heading structure analysis."""
        html = '''
        <html>
        <head><title>Test</title></head>
        <body>
            <h1>Main Heading</h1>
            <h2>Section 1</h2>
            <h2>Section 2</h2>
            <h3>Subsection</h3>
        </body>
        </html>
        '''
        result = self.analyzer.analyze_html(html)

        self.assertEqual(result.headings.h1_count, 1)
        self.assertEqual(result.headings.h2_count, 2)
        self.assertEqual(result.headings.h3_count, 1)
        self.assertTrue(result.headings.has_single_h1)
        self.assertTrue(result.headings.has_proper_hierarchy)
        self.assertEqual(result.headings.h1_texts, ['Main Heading'])

    def test_analyze_multiple_h1s(self):
        """Test detection of multiple H1 headings (bad practice)."""
        html = '''
        <html>
        <head><title>Test</title></head>
        <body>
            <h1>First H1</h1>
            <h1>Second H1</h1>
        </body>
        </html>
        '''
        result = self.analyzer.analyze_html(html)

        self.assertEqual(result.headings.h1_count, 2)
        self.assertFalse(result.headings.has_single_h1)
        self.assertFalse(result.headings.has_proper_hierarchy)
        self.assertIn('Multiple H1 headings (2)', result.headings.hierarchy_issues)

    def test_analyze_missing_h1(self):
        """Test detection of missing H1 heading."""
        html = '''
        <html>
        <head><title>Test</title></head>
        <body>
            <h2>Section without H1</h2>
        </body>
        </html>
        '''
        result = self.analyzer.analyze_html(html)

        self.assertEqual(result.headings.h1_count, 0)
        self.assertFalse(result.headings.has_proper_hierarchy)
        self.assertIn('Missing H1 heading', result.headings.hierarchy_issues)

    def test_analyze_images(self):
        """Test image analysis."""
        html = '''
        <html>
        <head><title>Test</title></head>
        <body>
            <img src="img1.jpg" alt="Good alt text">
            <img src="img2.jpg" alt="">
            <img src="img3.jpg">
            <img src="img4.jpg" alt="image">
        </body>
        </html>
        '''
        result = self.analyzer.analyze_html(html)

        self.assertEqual(result.images.total_images, 4)
        self.assertEqual(result.images.images_with_alt, 3)  # includes empty alt
        self.assertEqual(result.images.images_without_alt, 1)
        self.assertEqual(result.images.images_with_empty_alt, 1)
        self.assertEqual(len(result.images.alt_text_quality_issues), 1)  # "image" is placeholder

    def test_analyze_links_internal_external(self):
        """Test link analysis distinguishing internal/external."""
        html = '''
        <html>
        <head><title>Test</title></head>
        <body>
            <a href="https://example.com/page1">Internal 1</a>
            <a href="/page2">Internal 2</a>
            <a href="./page3">Internal 3</a>
            <a href="https://other.com">External</a>
            <a href="https://facebook.com" rel="nofollow">Social</a>
            <a href="#">Broken</a>
        </body>
        </html>
        '''
        result = self.analyzer.analyze_html(html, base_url='https://example.com')

        self.assertEqual(result.links.total_links, 6)
        self.assertEqual(result.links.internal_links, 3)
        self.assertEqual(result.links.external_links, 2)
        self.assertEqual(result.links.nofollow_links, 1)
        self.assertEqual(result.links.broken_anchor_links, 1)

    def test_analyze_open_graph(self):
        """Test Open Graph metadata extraction."""
        html = '''
        <html>
        <head>
            <title>Test</title>
            <meta property="og:title" content="OG Title">
            <meta property="og:description" content="OG Description">
            <meta property="og:image" content="https://example.com/image.jpg">
            <meta property="og:type" content="website">
        </head>
        <body></body>
        </html>
        '''
        result = self.analyzer.analyze_html(html)

        self.assertEqual(result.open_graph.og_title, 'OG Title')
        self.assertEqual(result.open_graph.og_description, 'OG Description')
        self.assertEqual(result.open_graph.og_image, 'https://example.com/image.jpg')
        self.assertEqual(result.open_graph.og_type, 'website')

    def test_analyze_twitter_card(self):
        """Test Twitter Card metadata extraction."""
        html = '''
        <html>
        <head>
            <title>Test</title>
            <meta name="twitter:card" content="summary_large_image">
            <meta name="twitter:title" content="Twitter Title">
            <meta name="twitter:description" content="Twitter Description">
        </head>
        <body></body>
        </html>
        '''
        result = self.analyzer.analyze_html(html)

        self.assertEqual(result.twitter_card.card_type, 'summary_large_image')
        self.assertEqual(result.twitter_card.title, 'Twitter Title')
        self.assertEqual(result.twitter_card.description, 'Twitter Description')

    def test_analyze_structured_data_json_ld(self):
        """Test JSON-LD structured data detection."""
        html = '''
        <html>
        <head>
            <title>Test</title>
            <script type="application/ld+json">
            {
                "@context": "https://schema.org",
                "@type": "LocalBusiness",
                "name": "Test Business"
            }
            </script>
        </head>
        <body></body>
        </html>
        '''
        result = self.analyzer.analyze_html(html)

        self.assertTrue(result.structured_data.has_structured_data)
        self.assertEqual(result.structured_data.json_ld_count, 1)
        self.assertIn('LocalBusiness', result.structured_data.json_ld_types)
        self.assertIn('LocalBusiness', result.structured_data.all_types)

    def test_analyze_structured_data_microdata(self):
        """Test Microdata structured data detection."""
        html = '''
        <html>
        <head><title>Test</title></head>
        <body>
            <div itemscope itemtype="https://schema.org/Organization">
                <span itemprop="name">Test Org</span>
            </div>
        </body>
        </html>
        '''
        result = self.analyzer.analyze_html(html)

        self.assertTrue(result.structured_data.has_structured_data)
        self.assertEqual(result.structured_data.microdata_count, 1)
        self.assertIn('Organization', result.structured_data.microdata_types)

    def test_analyze_word_count(self):
        """Test word count calculation."""
        html = '''
        <html>
        <head><title>Test</title></head>
        <body>
            <p>This is a sentence with seven words here.</p>
            <script>var x = "not counted";</script>
            <style>.hidden { display: none; }</style>
        </body>
        </html>
        '''
        result = self.analyzer.analyze_html(html)

        # Should count visible text only
        self.assertGreater(result.word_count, 5)
        self.assertLess(result.word_count, 20)

    def test_result_to_dict(self):
        """Test converting result to dictionary."""
        html = '<html><head><title>Test</title></head><body><h1>Hello</h1></body></html>'
        result = self.analyzer.analyze_html(html)

        result_dict = result.to_dict()

        self.assertIsInstance(result_dict, dict)
        self.assertIn('meta_tags', result_dict)
        self.assertIn('headings', result_dict)
        self.assertIn('images', result_dict)
        self.assertIn('links', result_dict)
        self.assertIn('structured_data', result_dict)


# ============================================================================
# Technical SEO Checker Tests
# ============================================================================

class TestTechnicalSEOChecker(unittest.TestCase):
    """Tests for TechnicalSEOChecker class."""

    def setUp(self):
        """Set up checker."""
        self.checker = TechnicalSEOChecker(timeout=5)

    @patch('requests.Session.get')
    def test_check_robots_txt_exists(self, mock_get):
        """Test robots.txt detection when it exists."""
        mock_response = Mock()
        mock_response.status_code = 200
        mock_response.text = '''
User-agent: *
Disallow: /admin/
Sitemap: https://example.com/sitemap.xml
        '''
        mock_get.return_value = mock_response

        result = self.checker.check_robots_txt('https://example.com')

        self.assertTrue(result.exists)
        self.assertEqual(result.status_code, 200)
        self.assertIn('/admin/', result.disallow_rules)
        self.assertIn('https://example.com/sitemap.xml', result.sitemap_urls)

    @patch('requests.Session.get')
    def test_check_robots_txt_not_found(self, mock_get):
        """Test robots.txt detection when it doesn't exist."""
        mock_response = Mock()
        mock_response.status_code = 404
        mock_get.return_value = mock_response

        result = self.checker.check_robots_txt('https://example.com')

        self.assertFalse(result.exists)
        self.assertEqual(result.status_code, 404)

    @patch('requests.Session.get')
    def test_check_robots_txt_blocks_googlebot(self, mock_get):
        """Test detection of Googlebot blocking in robots.txt."""
        mock_response = Mock()
        mock_response.status_code = 200
        mock_response.text = '''
User-agent: Googlebot
Disallow: /
        '''
        mock_get.return_value = mock_response

        result = self.checker.check_robots_txt('https://example.com')

        self.assertTrue(result.exists)
        self.assertTrue(result.blocks_googlebot)

    @patch('requests.Session.get')
    def test_check_sitemap_valid_xml(self, mock_get):
        """Test valid sitemap.xml detection."""
        mock_response = Mock()
        mock_response.status_code = 200
        # Use simpler XML without namespace for reliable parsing
        mock_response.content = b'''<?xml version="1.0" encoding="UTF-8"?>
<urlset>
    <url><loc>https://example.com/</loc></url>
    <url><loc>https://example.com/page1</loc></url>
</urlset>
        '''
        mock_response.headers = {'Last-Modified': 'Tue, 07 Jan 2026 10:00:00 GMT'}
        mock_get.return_value = mock_response

        result = self.checker.check_sitemap('https://example.com/sitemap.xml')

        self.assertTrue(result.exists)
        self.assertTrue(result.is_valid_xml)
        self.assertFalse(result.is_sitemap_index)
        self.assertEqual(result.url_count, 2)
        self.assertIn('https://example.com/', result.sample_urls)

    @patch('requests.Session.get')
    def test_check_sitemap_index(self, mock_get):
        """Test sitemap index detection."""
        mock_response = Mock()
        mock_response.status_code = 200
        mock_response.content = b'''<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
    <sitemap><loc>https://example.com/sitemap1.xml</loc></sitemap>
    <sitemap><loc>https://example.com/sitemap2.xml</loc></sitemap>
</sitemapindex>
        '''
        mock_response.headers = {}
        mock_get.return_value = mock_response

        result = self.checker.check_sitemap('https://example.com/sitemap.xml')

        self.assertTrue(result.exists)
        self.assertTrue(result.is_valid_xml)
        self.assertTrue(result.is_sitemap_index)
        self.assertEqual(result.sitemap_count, 2)

    @patch('requests.Session.get')
    def test_check_sitemap_not_found(self, mock_get):
        """Test sitemap.xml detection when not found."""
        mock_response = Mock()
        mock_response.status_code = 404
        mock_get.return_value = mock_response

        result = self.checker.check_sitemap('https://example.com/sitemap.xml')

        self.assertFalse(result.exists)

    @patch('requests.Session.get')
    def test_check_redirect_chain_no_redirects(self, mock_get):
        """Test URL with no redirects."""
        mock_response = Mock()
        mock_response.status_code = 200
        mock_get.return_value = mock_response

        result = self.checker.check_redirect_chain('https://example.com')

        self.assertEqual(result.chain_length, 0)
        self.assertEqual(result.final_url, 'https://example.com')
        self.assertFalse(result.has_redirect_loop)
        self.assertEqual(len(result.redirects), 0)

    @patch('requests.Session.get')
    def test_check_redirect_chain_with_redirect(self, mock_get):
        """Test URL with single redirect."""
        # First call returns redirect
        redirect_response = Mock()
        redirect_response.status_code = 301
        redirect_response.headers = {'Location': 'https://www.example.com/'}

        # Second call returns final page
        final_response = Mock()
        final_response.status_code = 200

        mock_get.side_effect = [redirect_response, final_response]

        result = self.checker.check_redirect_chain('https://example.com')

        self.assertEqual(result.chain_length, 1)
        self.assertEqual(result.final_url, 'https://www.example.com/')
        self.assertEqual(len(result.redirects), 1)
        self.assertEqual(result.redirects[0].status_code, 301)

    @patch('requests.Session.get')
    def test_check_redirect_https_upgrade(self, mock_get):
        """Test detection of HTTP to HTTPS upgrade redirect."""
        redirect_response = Mock()
        redirect_response.status_code = 301
        redirect_response.headers = {'Location': 'https://example.com/'}

        final_response = Mock()
        final_response.status_code = 200

        mock_get.side_effect = [redirect_response, final_response]

        result = self.checker.check_redirect_chain('http://example.com')

        self.assertEqual(result.chain_length, 1)
        self.assertTrue(result.redirects[0].is_https_upgrade)

    def test_check_canonical_self_referencing(self):
        """Test detection of self-referencing canonical URL."""
        html = '''
        <html>
        <head>
            <link rel="canonical" href="https://example.com/page">
        </head>
        <body></body>
        </html>
        '''
        result = self.checker._check_canonical(html, 'https://example.com/page')

        self.assertTrue(result.has_canonical)
        self.assertEqual(result.canonical_url, 'https://example.com/page')
        self.assertTrue(result.is_self_referencing)
        self.assertFalse(result.points_to_different_domain)

    def test_check_canonical_different_domain(self):
        """Test detection of canonical pointing to different domain."""
        html = '''
        <html>
        <head>
            <link rel="canonical" href="https://other.com/page">
        </head>
        <body></body>
        </html>
        '''
        result = self.checker._check_canonical(html, 'https://example.com/page')

        self.assertTrue(result.has_canonical)
        self.assertTrue(result.points_to_different_domain)

    def test_check_indexability_noindex_meta(self):
        """Test detection of noindex meta tag."""
        mock_response = Mock()
        mock_response.text = '''
        <html>
        <head>
            <meta name="robots" content="noindex, nofollow">
        </head>
        <body></body>
        </html>
        '''
        mock_response.headers = {}

        result = self.checker._check_indexability(mock_response)

        self.assertFalse(result.is_indexable)
        self.assertTrue(result.has_noindex_meta)
        self.assertEqual(result.noindex_source, 'meta')

    def test_check_indexability_noindex_header(self):
        """Test detection of X-Robots-Tag noindex header."""
        mock_response = Mock()
        mock_response.text = '<html><head></head><body></body></html>'
        mock_response.headers = {'X-Robots-Tag': 'noindex'}

        result = self.checker._check_indexability(mock_response)

        self.assertFalse(result.is_indexable)
        self.assertTrue(result.has_noindex_header)
        self.assertEqual(result.noindex_source, 'header')

    def test_check_indexability_indexable(self):
        """Test page that is indexable."""
        mock_response = Mock()
        mock_response.text = '''
        <html>
        <head>
            <meta name="robots" content="index, follow">
        </head>
        <body></body>
        </html>
        '''
        mock_response.headers = {}

        result = self.checker._check_indexability(mock_response)

        self.assertTrue(result.is_indexable)
        self.assertFalse(result.has_noindex_meta)
        self.assertFalse(result.has_noindex_header)

    def test_result_to_dict(self):
        """Test converting technical result to dictionary."""
        result = TechnicalSEOResult(
            url='https://example.com',
            checked_at='2026-01-08T10:00:00',
            robots_txt=RobotsTxtResult(exists=True),
            sitemap=SitemapResult(exists=True),
            redirect_chain=RedirectChainResult(original_url='https://example.com', final_url='https://example.com'),
            canonical=CanonicalResult(has_canonical=True),
            indexability=IndexabilityResult(is_indexable=True),
        )

        result_dict = result.to_dict()

        self.assertIsInstance(result_dict, dict)
        self.assertEqual(result_dict['url'], 'https://example.com')
        self.assertIn('robots_txt', result_dict)
        self.assertIn('sitemap', result_dict)
        self.assertIn('redirect_chain', result_dict)
        self.assertIn('canonical', result_dict)
        self.assertIn('indexability', result_dict)


# ============================================================================
# SEO Audit Integration Tests
# ============================================================================

class TestSEOAuditScoreCalculation(unittest.TestCase):
    """Tests for SEO score calculation logic."""

    def setUp(self):
        """Set up with mocked auditor."""
        # Import SEOAuditor here since it may need database
        with patch('seo_audit.create_engine'), \
             patch('seo_audit.sessionmaker'):
            from seo_audit import SEOAuditor
            self.auditor = SEOAuditor.__new__(SEOAuditor)
            self.auditor.engine = Mock()
            self.auditor.Session = Mock()
            self.auditor.pagespeed_client = Mock()
            self.auditor.onpage_analyzer = OnPageSEOAnalyzer()
            self.auditor.technical_checker = Mock()
            self.auditor.session = Mock()

    def test_calculate_onpage_score_perfect(self):
        """Test on-page score calculation with perfect page."""
        onpage = {
            'meta_tags': {
                'title': 'Perfect Title for SEO Optimization',
                'title_length': 38,
                'description': 'This is a perfect meta description that is between 120 and 160 characters long for optimal SEO results.',
                'description_length': 105,
                'canonical_url': 'https://example.com/page',
            },
            'headings': {
                'h1_count': 1,
                'has_proper_hierarchy': True,
            },
            'images': {
                'total_images': 10,
                'images_without_alt': 0,
            },
            'structured_data': {
                'has_structured_data': True,
            },
            'open_graph': {
                'og_title': 'OG Title',
            },
        }

        score = self.auditor._calculate_onpage_score(onpage)

        # Should be high score with minor deductions
        self.assertGreaterEqual(score, 90)

    def test_calculate_onpage_score_missing_title(self):
        """Test on-page score with missing title."""
        onpage = {
            'meta_tags': {
                'title': None,
                'description': 'Some description',
                'description_length': 50,
            },
            'headings': {'h1_count': 1, 'has_proper_hierarchy': True},
            'images': {'total_images': 0, 'images_without_alt': 0},
            'structured_data': {'has_structured_data': False},
            'open_graph': {},
        }

        score = self.auditor._calculate_onpage_score(onpage)

        # Should have significant deduction for missing title (-15)
        self.assertLessEqual(score, 85)

    def test_calculate_onpage_score_missing_h1(self):
        """Test on-page score with missing H1."""
        onpage = {
            'meta_tags': {
                'title': 'Good Title',
                'title_length': 10,
                'description': 'Good description',
                'description_length': 50,
            },
            'headings': {'h1_count': 0, 'has_proper_hierarchy': False},
            'images': {'total_images': 0, 'images_without_alt': 0},
            'structured_data': {'has_structured_data': False},
            'open_graph': {},
        }

        score = self.auditor._calculate_onpage_score(onpage)

        # Should have deduction for missing H1 (-10) and no structured data (-5)
        self.assertLessEqual(score, 85)

    def test_calculate_technical_score_perfect(self):
        """Test technical score with perfect setup."""
        technical = {
            'robots_txt': {
                'exists': True,
                'blocks_googlebot': False,
            },
            'sitemap': {
                'exists': True,
                'is_valid_xml': True,
            },
            'redirect_chain': {
                'chain_length': 0,
                'has_redirect_loop': False,
            },
            'indexability': {
                'is_indexable': True,
            },
            'canonical': {
                'has_canonical': True,
                'points_to_different_domain': False,
            },
        }

        score = self.auditor._calculate_technical_score(technical)

        self.assertEqual(score, 100)

    def test_calculate_technical_score_no_robots(self):
        """Test technical score without robots.txt."""
        technical = {
            'robots_txt': {'exists': False},
            'sitemap': {'exists': True, 'is_valid_xml': True},
            'redirect_chain': {'chain_length': 0, 'has_redirect_loop': False},
            'indexability': {'is_indexable': True},
            'canonical': {'has_canonical': True, 'points_to_different_domain': False},
        }

        score = self.auditor._calculate_technical_score(technical)

        # -10 for missing robots.txt
        self.assertEqual(score, 90)

    def test_calculate_technical_score_blocks_googlebot(self):
        """Test technical score when blocking Googlebot."""
        technical = {
            'robots_txt': {'exists': True, 'blocks_googlebot': True},
            'sitemap': {'exists': True, 'is_valid_xml': True},
            'redirect_chain': {'chain_length': 0, 'has_redirect_loop': False},
            'indexability': {'is_indexable': True},
            'canonical': {'has_canonical': True, 'points_to_different_domain': False},
        }

        score = self.auditor._calculate_technical_score(technical)

        # -20 for blocking Googlebot
        self.assertEqual(score, 80)


class TestSEOAuditResultCategorization(unittest.TestCase):
    """Tests for result categorization logic."""

    def setUp(self):
        """Set up with mocked auditor."""
        with patch('seo_audit.create_engine'), \
             patch('seo_audit.sessionmaker'):
            from seo_audit import SEOAuditor
            self.auditor = SEOAuditor.__new__(SEOAuditor)

    def test_categorize_success(self):
        """Test categorizing successful audit."""
        result = {
            'errors': [],
            'http_status': 200,
            'onpage': {'meta_tags': {}},
            'technical': {},
        }

        category = self.auditor._categorize_result(result)
        self.assertEqual(category, 'success')

    def test_categorize_no_website(self):
        """Test categorizing company with no website."""
        result = {
            'errors': ['No website URL configured'],
        }

        category = self.auditor._categorize_result(result)
        self.assertEqual(category, 'no_website')

    def test_categorize_timeout(self):
        """Test categorizing timeout error."""
        result = {
            'errors': ['Timeout after 30s'],
        }

        category = self.auditor._categorize_result(result)
        self.assertEqual(category, 'timeout')

    def test_categorize_connection_error(self):
        """Test categorizing connection error."""
        result = {
            'errors': ['Connection error: Failed to establish connection'],
        }

        category = self.auditor._categorize_result(result)
        self.assertEqual(category, 'connection_error')

    def test_categorize_ssl_error(self):
        """Test categorizing SSL error."""
        result = {
            'errors': ['SSL Error: Certificate verify failed'],
        }

        category = self.auditor._categorize_result(result)
        self.assertEqual(category, 'ssl_error')

    def test_categorize_http_error(self):
        """Test categorizing HTTP error (4xx/5xx)."""
        result = {
            'errors': ['HTTP 404'],
            'http_status': 404,
        }

        category = self.auditor._categorize_result(result)
        self.assertEqual(category, 'unavailable')


class TestParseBatchArgument(unittest.TestCase):
    """Tests for batch argument parsing."""

    def test_parse_valid_batch(self):
        """Test parsing valid batch argument."""
        from seo_audit import parse_batch_argument

        start, end = parse_batch_argument('1-10')
        self.assertEqual(start, 1)
        self.assertEqual(end, 10)

    def test_parse_batch_with_spaces(self):
        """Test parsing batch with spaces."""
        from seo_audit import parse_batch_argument

        start, end = parse_batch_argument(' 5 - 20 ')
        self.assertEqual(start, 5)
        self.assertEqual(end, 20)

    def test_parse_invalid_format_no_dash(self):
        """Test parsing batch without dash fails."""
        from seo_audit import parse_batch_argument

        with self.assertRaises(ValueError) as ctx:
            parse_batch_argument('10')
        self.assertIn('Invalid batch format', str(ctx.exception))

    def test_parse_invalid_format_multiple_dashes(self):
        """Test parsing batch with multiple dashes fails."""
        from seo_audit import parse_batch_argument

        with self.assertRaises(ValueError) as ctx:
            parse_batch_argument('1-5-10')
        self.assertIn('Invalid batch format', str(ctx.exception))

    def test_parse_invalid_values_not_numbers(self):
        """Test parsing batch with non-numeric values fails."""
        from seo_audit import parse_batch_argument

        with self.assertRaises(ValueError) as ctx:
            parse_batch_argument('a-b')
        self.assertIn('Invalid batch values', str(ctx.exception))

    def test_parse_invalid_start_less_than_one(self):
        """Test parsing batch with start < 1 fails."""
        from seo_audit import parse_batch_argument

        with self.assertRaises(ValueError) as ctx:
            parse_batch_argument('0-10')
        self.assertIn('Must be >= 1', str(ctx.exception))

    def test_parse_invalid_end_less_than_start(self):
        """Test parsing batch with end < start fails."""
        from seo_audit import parse_batch_argument

        with self.assertRaises(ValueError) as ctx:
            parse_batch_argument('10-5')
        self.assertIn('END must be >= START', str(ctx.exception))


# ============================================================================
# Helper Function Tests
# ============================================================================

class TestConvenienceFunctions(unittest.TestCase):
    """Tests for convenience functions."""

    def test_analyze_html_function(self):
        """Test analyze_html convenience function."""
        from seo_analyzer import analyze_html

        html = '<html><head><title>Test</title></head><body><h1>Hello</h1></body></html>'
        result = analyze_html(html, base_url='https://example.com')

        self.assertIsInstance(result, dict)
        self.assertEqual(result['meta_tags']['title'], 'Test')
        self.assertEqual(result['headings']['h1_count'], 1)


# ============================================================================
# Run Tests
# ============================================================================

if __name__ == '__main__':
    # Run with verbose output
    unittest.main(verbosity=2)