auto-claude: 3.1 - Create scripts/seo_analyzer.py with OnPageSEOAnalyzer

Add comprehensive on-page SEO analyzer that extracts:
- Meta tags (title, description, keywords, robots, viewport, canonical)
- Open Graph metadata (og:title, og:description, og:image, etc.)
- Twitter Card metadata (card type, site, creator, etc.)
- Heading structure (h1-h6 counts, hierarchy validation)
- Image alt text analysis (missing, empty, quality issues)
- Link analysis (internal/external/nofollow/broken)
- Structured data detection (JSON-LD, Microdata, RDFa)
- Word count and document attributes (DOCTYPE, lang)

Uses dataclasses for structured results following pagespeed_client.py pattern.
Includes CLI interface for testing individual URLs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-01-08 02:07:10 +01:00
parent 623ac284bf
commit 0c257f5e48

861
scripts/seo_analyzer.py Normal file
View File

@ -0,0 +1,861 @@
#!/usr/bin/env python3
"""
On-Page SEO Analyzer
====================
Analyzes HTML content for SEO factors including:
- Meta tags (title, description, keywords, robots, viewport)
- Heading structure (h1-h6 counts and hierarchy)
- Image alt text analysis
- Link analysis (internal vs external)
- Structured data detection (JSON-LD, Microdata, RDFa)
- Open Graph and Twitter Card metadata
Usage:
from seo_analyzer import OnPageSEOAnalyzer
analyzer = OnPageSEOAnalyzer()
result = analyzer.analyze_html(html_content, base_url='https://example.com')
Author: Claude Code
Date: 2026-01-08
"""
import json
import re
import logging
from typing import Optional, Dict, List, Any, Tuple
from dataclasses import dataclass, field, asdict
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup, Comment
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
@dataclass
class MetaTags:
"""Container for meta tag information."""
title: Optional[str] = None
title_length: Optional[int] = None
description: Optional[str] = None
description_length: Optional[int] = None
keywords: Optional[str] = None
robots: Optional[str] = None
viewport: Optional[str] = None
charset: Optional[str] = None
language: Optional[str] = None
author: Optional[str] = None
generator: Optional[str] = None
canonical_url: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class OpenGraphData:
"""Open Graph protocol metadata."""
og_title: Optional[str] = None
og_description: Optional[str] = None
og_image: Optional[str] = None
og_url: Optional[str] = None
og_type: Optional[str] = None
og_site_name: Optional[str] = None
og_locale: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class TwitterCardData:
"""Twitter Card metadata."""
card_type: Optional[str] = None
site: Optional[str] = None
creator: Optional[str] = None
title: Optional[str] = None
description: Optional[str] = None
image: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class HeadingStructure:
"""Analysis of heading elements (h1-h6)."""
h1_count: int = 0
h2_count: int = 0
h3_count: int = 0
h4_count: int = 0
h5_count: int = 0
h6_count: int = 0
h1_texts: List[str] = field(default_factory=list)
h2_texts: List[str] = field(default_factory=list)
has_single_h1: bool = False
has_proper_hierarchy: bool = False
hierarchy_issues: List[str] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class ImageAnalysis:
"""Analysis of image elements and alt texts."""
total_images: int = 0
images_with_alt: int = 0
images_without_alt: int = 0
images_with_empty_alt: int = 0
missing_alt_sources: List[str] = field(default_factory=list)
alt_text_quality_issues: List[Dict[str, str]] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class LinkAnalysis:
"""Analysis of anchor links."""
total_links: int = 0
internal_links: int = 0
external_links: int = 0
nofollow_links: int = 0
broken_anchor_links: int = 0 # href="#" or empty
links_without_text: int = 0
unique_internal_domains: List[str] = field(default_factory=list)
unique_external_domains: List[str] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class StructuredData:
"""Structured data (JSON-LD, Microdata, RDFa) analysis."""
has_structured_data: bool = False
json_ld_count: int = 0
microdata_count: int = 0
rdfa_count: int = 0
json_ld_types: List[str] = field(default_factory=list)
microdata_types: List[str] = field(default_factory=list)
rdfa_types: List[str] = field(default_factory=list)
all_types: List[str] = field(default_factory=list)
json_ld_data: List[Dict[str, Any]] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class OnPageSEOResult:
"""Complete on-page SEO analysis result."""
base_url: str
meta_tags: MetaTags
open_graph: OpenGraphData
twitter_card: TwitterCardData
headings: HeadingStructure
images: ImageAnalysis
links: LinkAnalysis
structured_data: StructuredData
word_count: int = 0
has_doctype: bool = False
has_lang_attribute: bool = False
lang_attribute: Optional[str] = None
errors: List[str] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
return {
'base_url': self.base_url,
'meta_tags': self.meta_tags.to_dict(),
'open_graph': self.open_graph.to_dict(),
'twitter_card': self.twitter_card.to_dict(),
'headings': self.headings.to_dict(),
'images': self.images.to_dict(),
'links': self.links.to_dict(),
'structured_data': self.structured_data.to_dict(),
'word_count': self.word_count,
'has_doctype': self.has_doctype,
'has_lang_attribute': self.has_lang_attribute,
'lang_attribute': self.lang_attribute,
'errors': self.errors,
}
class OnPageSEOAnalyzer:
"""
Analyzes HTML content for on-page SEO factors.
This class parses HTML and extracts SEO-relevant information including
meta tags, heading structure, image alt texts, links, and structured data.
Usage:
analyzer = OnPageSEOAnalyzer()
result = analyzer.analyze_html(html_content, base_url='https://example.com')
# Access specific metrics
print(f"Title: {result.meta_tags.title}")
print(f"H1 count: {result.headings.h1_count}")
print(f"Images without alt: {result.images.images_without_alt}")
print(f"External links: {result.links.external_links}")
print(f"Has structured data: {result.structured_data.has_structured_data}")
"""
# Maximum lengths for SEO best practices
TITLE_MIN_LENGTH = 30
TITLE_MAX_LENGTH = 60
DESCRIPTION_MIN_LENGTH = 120
DESCRIPTION_MAX_LENGTH = 160
# Common placeholder alt texts that indicate poor SEO
PLACEHOLDER_ALT_TEXTS = [
'image', 'img', 'photo', 'picture', 'pic', 'logo',
'placeholder', 'untitled', 'no alt', 'alt', 'thumbnail',
'icon', 'banner', 'grafika', 'zdjęcie', 'obrazek',
]
def __init__(self):
"""Initialize the OnPageSEOAnalyzer."""
pass
def analyze_html(self, html: str, base_url: str = '') -> OnPageSEOResult:
"""
Analyze HTML content for SEO factors.
Args:
html: Raw HTML content to analyze.
base_url: Base URL for resolving relative links (e.g., 'https://example.com').
Returns:
OnPageSEOResult with comprehensive SEO analysis.
"""
errors = []
# Parse HTML
try:
soup = BeautifulSoup(html, 'lxml')
except Exception as e:
logger.warning(f"lxml parser failed, falling back to html.parser: {e}")
try:
soup = BeautifulSoup(html, 'html.parser')
except Exception as e2:
logger.error(f"HTML parsing failed: {e2}")
errors.append(f"HTML parsing failed: {str(e2)}")
return self._empty_result(base_url, errors)
# Check for DOCTYPE
has_doctype = '<!doctype' in html.lower()[:100]
# Check for lang attribute
html_tag = soup.find('html')
has_lang_attribute = False
lang_attribute = None
if html_tag:
lang_attribute = html_tag.get('lang') or html_tag.get('xml:lang')
has_lang_attribute = bool(lang_attribute)
# Parse base URL for link analysis
parsed_base = urlparse(base_url) if base_url else None
base_domain = parsed_base.netloc if parsed_base else ''
# Perform analysis
meta_tags = self._analyze_meta_tags(soup)
open_graph = self._analyze_open_graph(soup)
twitter_card = self._analyze_twitter_card(soup)
headings = self._analyze_headings(soup)
images = self._analyze_images(soup, base_url)
links = self._analyze_links(soup, base_domain, base_url)
structured_data = self._analyze_structured_data(soup, html)
word_count = self._count_words(soup)
return OnPageSEOResult(
base_url=base_url,
meta_tags=meta_tags,
open_graph=open_graph,
twitter_card=twitter_card,
headings=headings,
images=images,
links=links,
structured_data=structured_data,
word_count=word_count,
has_doctype=has_doctype,
has_lang_attribute=has_lang_attribute,
lang_attribute=lang_attribute,
errors=errors,
)
def _empty_result(self, base_url: str, errors: List[str]) -> OnPageSEOResult:
"""Return an empty result when parsing fails."""
return OnPageSEOResult(
base_url=base_url,
meta_tags=MetaTags(),
open_graph=OpenGraphData(),
twitter_card=TwitterCardData(),
headings=HeadingStructure(),
images=ImageAnalysis(),
links=LinkAnalysis(),
structured_data=StructuredData(),
errors=errors,
)
def _analyze_meta_tags(self, soup: BeautifulSoup) -> MetaTags:
"""Extract and analyze meta tags."""
result = MetaTags()
# Title tag
title_tag = soup.find('title')
if title_tag:
result.title = title_tag.get_text(strip=True)
result.title_length = len(result.title) if result.title else 0
# Meta description
meta_desc = soup.find('meta', attrs={'name': re.compile(r'^description$', re.I)})
if meta_desc:
result.description = meta_desc.get('content', '')
result.description_length = len(result.description) if result.description else 0
# Meta keywords
meta_keywords = soup.find('meta', attrs={'name': re.compile(r'^keywords$', re.I)})
if meta_keywords:
result.keywords = meta_keywords.get('content', '')
# Meta robots
meta_robots = soup.find('meta', attrs={'name': re.compile(r'^robots$', re.I)})
if meta_robots:
result.robots = meta_robots.get('content', '')
# Viewport
meta_viewport = soup.find('meta', attrs={'name': re.compile(r'^viewport$', re.I)})
if meta_viewport:
result.viewport = meta_viewport.get('content', '')
# Charset
meta_charset = soup.find('meta', attrs={'charset': True})
if meta_charset:
result.charset = meta_charset.get('charset', '')
else:
# Check for http-equiv charset
meta_content_type = soup.find('meta', attrs={'http-equiv': re.compile(r'^content-type$', re.I)})
if meta_content_type:
content = meta_content_type.get('content', '')
charset_match = re.search(r'charset=([^\s;]+)', content, re.I)
if charset_match:
result.charset = charset_match.group(1)
# Language (html tag or meta)
html_tag = soup.find('html')
if html_tag:
result.language = html_tag.get('lang') or html_tag.get('xml:lang')
if not result.language:
meta_lang = soup.find('meta', attrs={'http-equiv': re.compile(r'^content-language$', re.I)})
if meta_lang:
result.language = meta_lang.get('content', '')
# Author
meta_author = soup.find('meta', attrs={'name': re.compile(r'^author$', re.I)})
if meta_author:
result.author = meta_author.get('content', '')
# Generator
meta_generator = soup.find('meta', attrs={'name': re.compile(r'^generator$', re.I)})
if meta_generator:
result.generator = meta_generator.get('content', '')
# Canonical URL
canonical = soup.find('link', attrs={'rel': 'canonical'})
if canonical:
result.canonical_url = canonical.get('href', '')
return result
def _analyze_open_graph(self, soup: BeautifulSoup) -> OpenGraphData:
"""Extract Open Graph metadata."""
result = OpenGraphData()
og_mappings = {
'og:title': 'og_title',
'og:description': 'og_description',
'og:image': 'og_image',
'og:url': 'og_url',
'og:type': 'og_type',
'og:site_name': 'og_site_name',
'og:locale': 'og_locale',
}
for og_property, attr_name in og_mappings.items():
meta_tag = soup.find('meta', attrs={'property': og_property})
if meta_tag:
setattr(result, attr_name, meta_tag.get('content', ''))
return result
def _analyze_twitter_card(self, soup: BeautifulSoup) -> TwitterCardData:
"""Extract Twitter Card metadata."""
result = TwitterCardData()
twitter_mappings = {
'twitter:card': 'card_type',
'twitter:site': 'site',
'twitter:creator': 'creator',
'twitter:title': 'title',
'twitter:description': 'description',
'twitter:image': 'image',
}
for twitter_name, attr_name in twitter_mappings.items():
meta_tag = soup.find('meta', attrs={'name': twitter_name})
if not meta_tag:
# Some sites use property instead of name
meta_tag = soup.find('meta', attrs={'property': twitter_name})
if meta_tag:
setattr(result, attr_name, meta_tag.get('content', ''))
return result
def _analyze_headings(self, soup: BeautifulSoup) -> HeadingStructure:
"""Analyze heading structure (h1-h6)."""
result = HeadingStructure()
# Count headings
for i in range(1, 7):
tag_name = f'h{i}'
headings = soup.find_all(tag_name)
count = len(headings)
setattr(result, f'h{i}_count', count)
# Store text for h1 and h2
if i == 1:
result.h1_texts = [h.get_text(strip=True)[:200] for h in headings]
elif i == 2:
result.h2_texts = [h.get_text(strip=True)[:200] for h in headings[:10]] # Limit to first 10
# Check for single H1
result.has_single_h1 = result.h1_count == 1
# Check heading hierarchy
result.has_proper_hierarchy = True
hierarchy_issues = []
# Issue: No H1
if result.h1_count == 0:
hierarchy_issues.append("Missing H1 heading")
result.has_proper_hierarchy = False
# Issue: Multiple H1s
if result.h1_count > 1:
hierarchy_issues.append(f"Multiple H1 headings ({result.h1_count})")
result.has_proper_hierarchy = False
# Issue: H2 before H1 (if both exist)
if result.h1_count > 0 and result.h2_count > 0:
all_headings = soup.find_all(['h1', 'h2'])
if all_headings:
first_h1_index = None
first_h2_index = None
for idx, h in enumerate(all_headings):
if h.name == 'h1' and first_h1_index is None:
first_h1_index = idx
if h.name == 'h2' and first_h2_index is None:
first_h2_index = idx
if first_h1_index is not None and first_h2_index is not None:
break
if first_h2_index is not None and first_h1_index is not None:
if first_h2_index < first_h1_index:
hierarchy_issues.append("H2 appears before H1")
result.has_proper_hierarchy = False
# Issue: Skipped heading levels (e.g., h1 -> h3 without h2)
heading_levels = []
for i in range(1, 7):
if getattr(result, f'h{i}_count') > 0:
heading_levels.append(i)
if heading_levels:
for i in range(len(heading_levels) - 1):
if heading_levels[i + 1] - heading_levels[i] > 1:
hierarchy_issues.append(
f"Skipped heading level: H{heading_levels[i]} to H{heading_levels[i + 1]}"
)
result.has_proper_hierarchy = False
result.hierarchy_issues = hierarchy_issues
return result
def _analyze_images(self, soup: BeautifulSoup, base_url: str = '') -> ImageAnalysis:
"""Analyze image elements and alt text quality."""
result = ImageAnalysis()
images = soup.find_all('img')
result.total_images = len(images)
for img in images:
alt = img.get('alt')
src = img.get('src', img.get('data-src', ''))
if alt is None:
# No alt attribute at all
result.images_without_alt += 1
if src:
# Truncate long URLs
result.missing_alt_sources.append(src[:200])
elif alt.strip() == '':
# Empty alt (might be intentional for decorative images)
result.images_with_empty_alt += 1
result.images_with_alt += 1
else:
result.images_with_alt += 1
# Check for placeholder/poor quality alt texts
alt_lower = alt.lower().strip()
if alt_lower in self.PLACEHOLDER_ALT_TEXTS:
result.alt_text_quality_issues.append({
'src': src[:200] if src else '',
'alt': alt,
'issue': 'Placeholder/generic alt text'
})
elif len(alt) < 5:
result.alt_text_quality_issues.append({
'src': src[:200] if src else '',
'alt': alt,
'issue': 'Very short alt text'
})
elif len(alt) > 125:
result.alt_text_quality_issues.append({
'src': src[:200] if src else '',
'alt': alt[:50] + '...',
'issue': 'Alt text too long (>125 chars)'
})
# Limit missing_alt_sources to first 20
result.missing_alt_sources = result.missing_alt_sources[:20]
# Limit quality issues to first 20
result.alt_text_quality_issues = result.alt_text_quality_issues[:20]
return result
def _analyze_links(self, soup: BeautifulSoup, base_domain: str, base_url: str = '') -> LinkAnalysis:
"""Analyze anchor links (internal vs external)."""
result = LinkAnalysis()
internal_domains = set()
external_domains = set()
anchors = soup.find_all('a', href=True)
result.total_links = len(anchors)
for anchor in anchors:
href = anchor.get('href', '')
rel = anchor.get('rel', [])
if isinstance(rel, str):
rel = rel.split()
text = anchor.get_text(strip=True)
# Check for empty/placeholder links
if not href or href == '#' or href.startswith('javascript:'):
result.broken_anchor_links += 1
continue
# Check for links without text
if not text and not anchor.find('img'):
result.links_without_text += 1
# Check for nofollow
if 'nofollow' in rel:
result.nofollow_links += 1
# Determine if internal or external
parsed_href = urlparse(href)
# Absolute URL
if parsed_href.netloc:
link_domain = parsed_href.netloc.lower()
# Remove www. prefix for comparison
link_domain_clean = link_domain.replace('www.', '')
base_domain_clean = base_domain.lower().replace('www.', '')
if link_domain_clean == base_domain_clean or link_domain_clean.endswith('.' + base_domain_clean):
result.internal_links += 1
internal_domains.add(link_domain)
else:
result.external_links += 1
external_domains.add(link_domain)
# Relative URL
elif href.startswith('/') or href.startswith('./') or href.startswith('../'):
result.internal_links += 1
# Protocol-relative URL
elif href.startswith('//'):
link_domain = href[2:].split('/')[0].lower()
link_domain_clean = link_domain.replace('www.', '')
base_domain_clean = base_domain.lower().replace('www.', '')
if link_domain_clean == base_domain_clean:
result.internal_links += 1
internal_domains.add(link_domain)
else:
result.external_links += 1
external_domains.add(link_domain)
# mailto:, tel:, etc.
elif ':' in href:
# These are not traditional links
pass
# Relative path without leading slash
else:
result.internal_links += 1
result.unique_internal_domains = sorted(list(internal_domains))[:20]
result.unique_external_domains = sorted(list(external_domains))[:50]
return result
def _analyze_structured_data(self, soup: BeautifulSoup, raw_html: str) -> StructuredData:
"""Detect and analyze structured data (JSON-LD, Microdata, RDFa)."""
result = StructuredData()
all_types = set()
# 1. JSON-LD
json_ld_scripts = soup.find_all('script', type='application/ld+json')
result.json_ld_count = len(json_ld_scripts)
for script in json_ld_scripts:
try:
content = script.string
if content:
data = json.loads(content)
result.json_ld_data.append(data)
# Extract types
types = self._extract_json_ld_types(data)
result.json_ld_types.extend(types)
all_types.update(types)
except json.JSONDecodeError as e:
logger.debug(f"Invalid JSON-LD: {e}")
except Exception as e:
logger.debug(f"Error parsing JSON-LD: {e}")
# 2. Microdata (itemscope, itemtype)
microdata_elements = soup.find_all(attrs={'itemscope': True})
result.microdata_count = len(microdata_elements)
for element in microdata_elements:
itemtype = element.get('itemtype', '')
if itemtype:
# Extract schema type from URL
# e.g., "https://schema.org/LocalBusiness" -> "LocalBusiness"
type_name = itemtype.rstrip('/').split('/')[-1]
if type_name and type_name not in result.microdata_types:
result.microdata_types.append(type_name)
all_types.add(type_name)
# 3. RDFa (typeof, vocab)
rdfa_elements = soup.find_all(attrs={'typeof': True})
result.rdfa_count = len(rdfa_elements)
for element in rdfa_elements:
typeof = element.get('typeof', '')
if typeof:
# RDFa typeof can be space-separated
for type_name in typeof.split():
# Extract just the type name (remove prefix if present)
type_clean = type_name.split(':')[-1] if ':' in type_name else type_name
if type_clean and type_clean not in result.rdfa_types:
result.rdfa_types.append(type_clean)
all_types.add(type_clean)
# Also check for vocab attribute (RDFa lite)
rdfa_vocab_elements = soup.find_all(attrs={'vocab': True})
for element in rdfa_vocab_elements:
if element not in rdfa_elements:
result.rdfa_count += 1
# Set has_structured_data flag
result.has_structured_data = (
result.json_ld_count > 0 or
result.microdata_count > 0 or
result.rdfa_count > 0
)
# Combine all unique types
result.all_types = sorted(list(all_types))
# Limit JSON-LD data to avoid huge results
result.json_ld_data = result.json_ld_data[:5]
return result
def _extract_json_ld_types(self, data: Any, depth: int = 0) -> List[str]:
"""Recursively extract @type values from JSON-LD data."""
types = []
if depth > 5: # Prevent infinite recursion
return types
if isinstance(data, dict):
if '@type' in data:
type_value = data['@type']
if isinstance(type_value, list):
types.extend(type_value)
elif isinstance(type_value, str):
types.append(type_value)
# Check @graph
if '@graph' in data:
for item in data['@graph']:
types.extend(self._extract_json_ld_types(item, depth + 1))
# Recursively check nested objects
for key, value in data.items():
if key not in ['@type', '@graph', '@context']:
types.extend(self._extract_json_ld_types(value, depth + 1))
elif isinstance(data, list):
for item in data:
types.extend(self._extract_json_ld_types(item, depth + 1))
return types
def _count_words(self, soup: BeautifulSoup) -> int:
"""Count words in visible text content."""
# Remove script and style elements
for element in soup(['script', 'style', 'head', 'meta', 'link', 'noscript']):
element.decompose()
# Remove comments
for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
comment.extract()
# Get text
text = soup.get_text(separator=' ')
# Clean up whitespace
text = re.sub(r'\s+', ' ', text).strip()
# Count words
if text:
words = text.split()
return len(words)
return 0
# Convenience function
def analyze_html(html: str, base_url: str = '') -> Dict[str, Any]:
"""
Convenience function to analyze HTML content.
Args:
html: Raw HTML content.
base_url: Base URL for link analysis.
Returns:
Dict with SEO analysis results.
"""
analyzer = OnPageSEOAnalyzer()
result = analyzer.analyze_html(html, base_url)
return result.to_dict()
if __name__ == '__main__':
import sys
import requests
if len(sys.argv) < 2:
print("Usage: python seo_analyzer.py <url>")
print("Example: python seo_analyzer.py https://pixlab.pl")
sys.exit(1)
test_url = sys.argv[1]
print(f"Analyzing: {test_url}")
print("-" * 60)
# Fetch the page
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
response = requests.get(test_url, headers=headers, timeout=30)
response.raise_for_status()
html = response.text
except Exception as e:
print(f"Failed to fetch URL: {e}")
sys.exit(1)
# Analyze
analyzer = OnPageSEOAnalyzer()
result = analyzer.analyze_html(html, test_url)
# Print results
print("\n=== META TAGS ===")
print(f"Title: {result.meta_tags.title}")
print(f"Title length: {result.meta_tags.title_length}")
print(f"Description: {result.meta_tags.description[:100] if result.meta_tags.description else 'None'}...")
print(f"Description length: {result.meta_tags.description_length}")
print(f"Canonical: {result.meta_tags.canonical_url}")
print(f"Robots: {result.meta_tags.robots}")
print(f"Viewport: {result.meta_tags.viewport}")
print("\n=== OPEN GRAPH ===")
print(f"OG Title: {result.open_graph.og_title}")
print(f"OG Description: {result.open_graph.og_description[:100] if result.open_graph.og_description else 'None'}...")
print(f"OG Image: {result.open_graph.og_image}")
print(f"OG Type: {result.open_graph.og_type}")
print("\n=== TWITTER CARD ===")
print(f"Card Type: {result.twitter_card.card_type}")
print(f"Title: {result.twitter_card.title}")
print("\n=== HEADINGS ===")
print(f"H1: {result.headings.h1_count} ({result.headings.h1_texts})")
print(f"H2: {result.headings.h2_count}")
print(f"H3: {result.headings.h3_count}")
print(f"H4: {result.headings.h4_count}")
print(f"H5: {result.headings.h5_count}")
print(f"H6: {result.headings.h6_count}")
print(f"Has single H1: {result.headings.has_single_h1}")
print(f"Has proper hierarchy: {result.headings.has_proper_hierarchy}")
if result.headings.hierarchy_issues:
print(f"Hierarchy issues: {result.headings.hierarchy_issues}")
print("\n=== IMAGES ===")
print(f"Total images: {result.images.total_images}")
print(f"With alt: {result.images.images_with_alt}")
print(f"Without alt: {result.images.images_without_alt}")
print(f"With empty alt: {result.images.images_with_empty_alt}")
if result.images.alt_text_quality_issues:
print(f"Alt quality issues: {len(result.images.alt_text_quality_issues)}")
print("\n=== LINKS ===")
print(f"Total links: {result.links.total_links}")
print(f"Internal: {result.links.internal_links}")
print(f"External: {result.links.external_links}")
print(f"Nofollow: {result.links.nofollow_links}")
print(f"Broken anchor links: {result.links.broken_anchor_links}")
print(f"External domains: {result.links.unique_external_domains[:5]}")
print("\n=== STRUCTURED DATA ===")
print(f"Has structured data: {result.structured_data.has_structured_data}")
print(f"JSON-LD count: {result.structured_data.json_ld_count}")
print(f"Microdata count: {result.structured_data.microdata_count}")
print(f"RDFa count: {result.structured_data.rdfa_count}")
print(f"Schema types: {result.structured_data.all_types}")
print("\n=== OTHER ===")
print(f"Word count: {result.word_count}")
print(f"Has DOCTYPE: {result.has_doctype}")
print(f"Has lang attribute: {result.has_lang_attribute} ({result.lang_attribute})")
if result.errors:
print(f"\nErrors: {result.errors}")