#!/usr/bin/env python3 """ Verify all Mermaid diagrams in architecture documentation. Checks for: - Proper Mermaid code block syntax - Common syntax errors - Readability issues (too long lines, etc.) - Missing diagram titles/descriptions """ import re import os from pathlib import Path from typing import List, Dict, Tuple class DiagramVerifier: def __init__(self): self.errors = [] self.warnings = [] self.info = [] self.diagrams_found = 0 def verify_file(self, filepath: Path) -> Dict: """Verify a single markdown file.""" with open(filepath, 'r', encoding='utf-8') as f: content = f.read() file_result = { 'file': str(filepath), 'diagrams': 0, 'errors': [], 'warnings': [], 'info': [] } # Find all Mermaid code blocks mermaid_pattern = r'```mermaid\n(.*?)```' matches = re.finditer(mermaid_pattern, content, re.DOTALL) for idx, match in enumerate(matches, 1): self.diagrams_found += 1 file_result['diagrams'] += 1 diagram_content = match.group(1) # Check for common issues issues = self.check_diagram(diagram_content, idx) file_result['errors'].extend(issues['errors']) file_result['warnings'].extend(issues['warnings']) file_result['info'].extend(issues['info']) return file_result def check_diagram(self, diagram: str, diagram_num: int) -> Dict: """Check a single diagram for issues.""" issues = {'errors': [], 'warnings': [], 'info': []} lines = diagram.strip().split('\n') if not lines: issues['errors'].append(f"Diagram {diagram_num}: Empty diagram") return issues first_line = lines[0].strip() # Check for valid diagram type valid_types = [ 'graph', 'flowchart', 'sequenceDiagram', 'classDiagram', 'stateDiagram', 'erDiagram', 'gantt', 'pie', 'journey', 'gitGraph', 'C4Context', 'C4Container', 'C4Component' ] # Also accept diagrams starting with comments or init blocks valid_prefixes = ['%%', '%%{init:'] # Find the first non-comment line to check for diagram type diagram_type_line = first_line for line in lines: stripped = line.strip() if not stripped.startswith('%%'): diagram_type_line = stripped break diagram_type = diagram_type_line.split()[0] if diagram_type_line else '' is_comment_or_init = any(first_line.startswith(p) for p in valid_prefixes) has_valid_type = any(diagram_type_line.startswith(t) for t in valid_types) if not has_valid_type and not is_comment_or_init: issues['errors'].append( f"Diagram {diagram_num}: Invalid or missing diagram type. " f"First line: '{first_line[:50]}...'" ) # Check for extremely long lines (readability) for line_num, line in enumerate(lines, 1): if len(line) > 200: issues['warnings'].append( f"Diagram {diagram_num}, Line {line_num}: " f"Very long line ({len(line)} chars) may affect readability" ) # Check for common syntax errors # 1. Unmatched quotes for line_num, line in enumerate(lines, 1): # Count quotes (ignoring escaped quotes) quote_count = line.count('"') - line.count('\\"') if quote_count % 2 != 0: issues['warnings'].append( f"Diagram {diagram_num}, Line {line_num}: " f"Unmatched quotes detected" ) # 2. Unclosed brackets/parentheses for line_num, line in enumerate(lines, 1): open_count = line.count('[') + line.count('(') + line.count('{') close_count = line.count(']') + line.count(')') + line.count('}') # Note: This is a simple check, might have false positives for multi-line # statements, but it's good for catching obvious errors # 3. Check for common Mermaid syntax patterns diagram_str = '\n'.join(lines) # For sequence diagrams, check for proper participant declarations if 'sequenceDiagram' in first_line: if 'participant' not in diagram_str and 'actor' not in diagram_str: issues['info'].append( f"Diagram {diagram_num}: Sequence diagram without explicit " f"participant/actor declarations (auto-generated)" ) # For flowcharts, check for proper node definitions if 'flowchart' in first_line or 'graph' in first_line: # Check for at least one node definition if not re.search(r'\w+\[.*?\]|\w+\(.*?\)|\w+\{.*?\}', diagram_str): issues['warnings'].append( f"Diagram {diagram_num}: Flowchart might be missing node definitions" ) # For ERD, check for entity definitions if 'erDiagram' in first_line: if '{' not in diagram_str or '}' not in diagram_str: issues['warnings'].append( f"Diagram {diagram_num}: ERD might be missing entity attribute blocks" ) # Check diagram size (too many lines might be hard to render) if len(lines) > 500: issues['warnings'].append( f"Diagram {diagram_num}: Very large diagram ({len(lines)} lines) " f"might have rendering issues" ) elif len(lines) > 200: issues['info'].append( f"Diagram {diagram_num}: Large diagram ({len(lines)} lines)" ) return issues def verify_all(self, base_path: Path) -> List[Dict]: """Verify all markdown files in the directory.""" results = [] # Find all .md files md_files = sorted(base_path.rglob('*.md')) for md_file in md_files: result = self.verify_file(md_file) results.append(result) return results def print_report(self, results: List[Dict]): """Print a formatted report.""" print("=" * 80) print("MERMAID DIAGRAM VERIFICATION REPORT") print("=" * 80) print() total_files = len(results) total_diagrams = sum(r['diagrams'] for r in results) total_errors = sum(len(r['errors']) for r in results) total_warnings = sum(len(r['warnings']) for r in results) total_info = sum(len(r['info']) for r in results) print(f"📊 Summary:") print(f" - Files checked: {total_files}") print(f" - Diagrams found: {total_diagrams}") print(f" - Errors: {total_errors}") print(f" - Warnings: {total_warnings}") print(f" - Info messages: {total_info}") print() # Print details for each file for result in results: if result['diagrams'] == 0: continue # Handle both relative and absolute paths try: filename = Path(result['file']).relative_to(Path.cwd()) except ValueError: filename = result['file'] print(f"\n{'─' * 80}") print(f"📄 {filename}") print(f" Diagrams: {result['diagrams']}") if result['errors']: print(f"\n ❌ ERRORS ({len(result['errors'])}):") for error in result['errors']: print(f" â€ĸ {error}") if result['warnings']: print(f"\n âš ī¸ WARNINGS ({len(result['warnings'])}):") for warning in result['warnings']: print(f" â€ĸ {warning}") if result['info']: print(f"\n â„šī¸ INFO ({len(result['info'])}):") for info in result['info']: print(f" â€ĸ {info}") print("\n" + "=" * 80) if total_errors == 0 and total_warnings == 0: print("✅ All diagrams passed verification!") elif total_errors == 0: print("✅ No errors found (warnings are informational)") else: print("❌ Errors found - please review and fix") print("=" * 80) return total_errors == 0 def main(): """Main entry point.""" base_path = Path('./docs/architecture') if not base_path.exists(): print(f"❌ Directory not found: {base_path}") return False verifier = DiagramVerifier() results = verifier.verify_all(base_path) success = verifier.print_report(results) return success if __name__ == '__main__': import sys success = main() sys.exit(0 if success else 1)