#!/usr/bin/env python3 """ Architecture Documentation Accuracy Verification Script Cross-checks architecture documentation against actual codebase to verify accuracy. """ import os import re import json from pathlib import Path from typing import List, Dict, Any, Tuple class ArchitectureVerifier: def __init__(self): self.issues = [] self.warnings = [] self.verified = [] self.base_path = Path('.') def add_issue(self, category: str, message: str, severity: str = 'ERROR'): self.issues.append({ 'category': category, 'message': message, 'severity': severity }) def add_warning(self, category: str, message: str): self.warnings.append({ 'category': category, 'message': message }) def add_verified(self, category: str, message: str): self.verified.append({ 'category': category, 'message': message }) def check_file_exists(self, filepath: str, doc_reference: str) -> bool: """Check if a file mentioned in docs actually exists""" full_path = self.base_path / filepath if full_path.exists(): self.add_verified('File Existence', f'{filepath} exists (referenced in {doc_reference})') return True else: self.add_issue('File Existence', f'{filepath} NOT FOUND (referenced in {doc_reference})') return False def verify_core_files(self): """Verify core application files exist""" print("šŸ“ Verifying core application files...") core_files = [ ('app.py', 'Flask Components'), ('database.py', 'Database Schema'), ('gemini_service.py', 'External Integrations'), ('nordabiz_chat.py', 'AI Chat Flow'), ('search_service.py', 'Search Flow'), ('email_service.py', 'External Integrations'), ('krs_api_service.py', 'External Integrations'), ('gbp_audit_service.py', 'External Integrations'), ('it_audit_service.py', 'External Integrations'), ] for filepath, doc_ref in core_files: self.check_file_exists(filepath, doc_ref) def verify_database_models(self): """Verify database models match documentation""" print("šŸ—„ļø Verifying database models...") if not self.check_file_exists('database.py', 'Database Schema'): return # Read database.py and extract model classes with open('database.py', 'r', encoding='utf-8') as f: content = f.read() # Find all class definitions that inherit from db.Model model_pattern = r'class\s+(\w+)\(.*?db\.Model.*?\):' models_found = re.findall(model_pattern, content) # Documented models from 05-database-schema.md documented_models = [ 'User', 'Company', 'CompanyService', 'CompanyCompetency', 'CompanyContact', 'CompanySocialMedia', 'CompanyNews', 'CompanyWebsiteAnalysis', 'CompanyDigitalMaturityAssessment', 'AIChatConversation', 'AIChatMessage', 'AIAPICost', 'ForumPost', 'ForumComment', 'Event', 'EventAttendance', 'Message', 'Conversation', 'ConversationParticipant', 'Classified', 'Recommendation', 'MembershipFee', 'UserNotification', 'NewsModeration' ] # Check for documented models for model in documented_models: if model in models_found: self.add_verified('Database Model', f'Model {model} exists in database.py') else: self.add_warning('Database Model', f'Model {model} documented but not found in database.py') # Check for undocumented models for model in models_found: if model not in documented_models and model not in ['Service', 'Competency']: self.add_warning('Database Model', f'Model {model} exists in code but not documented') print(f" Found {len(models_found)} models in database.py") print(f" Documented: {len(documented_models)} models") def verify_api_endpoints(self): """Verify API endpoints match documentation""" print("🌐 Verifying API endpoints...") if not self.check_file_exists('app.py', 'API Endpoints'): return with open('app.py', 'r', encoding='utf-8') as f: content = f.read() # Find all route decorators route_pattern = r'@app\.route\([\'"]([^\'"]+)[\'"](?:,\s*methods=\[([^\]]+)\])?' routes_found = re.findall(route_pattern, content) print(f" Found {len(routes_found)} route definitions in app.py") # Sample critical endpoints to verify critical_endpoints = [ ('/', 'Homepage'), ('/search', 'Company Search'), ('/company/', 'Company Profile'), ('/login', 'Authentication'), ('/register', 'Authentication'), ('/api/chat//message', 'AI Chat'), ('/admin/seo', 'SEO Audit'), ('/health', 'Health Check'), ] for endpoint, description in critical_endpoints: # Normalize endpoint pattern for comparison endpoint_normalized = endpoint.replace('', '.*').replace('', '.*') found = any(re.match(endpoint_normalized, route[0]) for route in routes_found) if found: self.add_verified('API Endpoint', f'{endpoint} ({description}) exists') else: self.add_issue('API Endpoint', f'{endpoint} ({description}) NOT FOUND') def verify_external_api_configs(self): """Verify external API configurations""" print("šŸ”Œ Verifying external API integrations...") # Check for API service files api_services = { 'gemini_service.py': 'Google Gemini AI', 'krs_api_service.py': 'KRS Open API', 'gbp_audit_service.py': 'Google Places API', 'email_service.py': 'Microsoft Graph API', } for filepath, api_name in api_services.items(): if self.check_file_exists(filepath, 'External Integrations'): # Check for API key/config references with open(filepath, 'r', encoding='utf-8') as f: content = f.read() if 'API_KEY' in content or 'api_key' in content or 'GOOGLE' in content or 'GEMINI' in content: self.add_verified('API Integration', f'{api_name} has API configuration') else: self.add_warning('API Integration', f'{api_name} service exists but no API key found') def verify_infrastructure_docs(self): """Verify infrastructure details match documentation""" print("šŸ—ļø Verifying infrastructure documentation...") # Check deployment architecture mentions deployment_doc = self.base_path / 'docs/architecture/03-deployment-architecture.md' if deployment_doc.exists(): with open(deployment_doc, 'r', encoding='utf-8') as f: content = f.read() # Verify critical configurations are mentioned critical_items = [ ('10.22.68.249', 'NORDABIZ-01 IP address'), ('10.22.68.250', 'R11-REVPROXY-01 IP address'), ('port 5000', 'Flask/Gunicorn port'), ('port 5432', 'PostgreSQL port'), ('NPM', 'Nginx Proxy Manager'), ] for item, description in critical_items: if item in content: self.add_verified('Infrastructure', f'{description} documented') else: self.add_warning('Infrastructure', f'{description} NOT found in deployment docs') else: self.add_issue('Infrastructure', 'Deployment architecture document not found') def verify_security_features(self): """Verify security features documented match implementation""" print("šŸ”’ Verifying security features...") if not self.check_file_exists('app.py', 'Security Architecture'): return with open('app.py', 'r', encoding='utf-8') as f: content = f.read() security_features = [ ('Flask-Login', 'login_required', 'Authentication'), ('Flask-WTF', 'csrf', 'CSRF Protection'), ('Flask-Limiter', 'limiter', 'Rate Limiting'), ('werkzeug.security', 'generate_password_hash', 'Password Hashing'), ] for package, marker, feature in security_features: if marker in content: self.add_verified('Security', f'{feature} ({package}) implemented') else: self.add_warning('Security', f'{feature} ({package}) not found in app.py') def verify_data_flows(self): """Verify data flow documentation accuracy""" print("šŸ”„ Verifying data flow documentation...") flow_docs = [ 'docs/architecture/flows/01-authentication-flow.md', 'docs/architecture/flows/02-search-flow.md', 'docs/architecture/flows/03-ai-chat-flow.md', 'docs/architecture/flows/04-seo-audit-flow.md', 'docs/architecture/flows/05-news-monitoring-flow.md', 'docs/architecture/flows/06-http-request-flow.md', ] for doc in flow_docs: if (self.base_path / doc).exists(): self.add_verified('Data Flow', f'{Path(doc).name} exists') else: self.add_issue('Data Flow', f'{Path(doc).name} NOT FOUND') def verify_scripts_directory(self): """Verify scripts mentioned in documentation exist""" print("šŸ“œ Verifying scripts directory...") expected_scripts = [ 'scripts/seo_audit.py', 'scripts/social_media_audit.py', ] for script in expected_scripts: self.check_file_exists(script, 'SEO Audit Flow / Background Scripts') def generate_report(self) -> str: """Generate verification report""" report = [] report.append("# Architecture Documentation Accuracy Verification Report\n") report.append(f"Generated: {Path.cwd()}\n") report.append("\n## Executive Summary\n") report.append(f"- āœ… **Verified Items:** {len(self.verified)}\n") report.append(f"- āš ļø **Warnings:** {len(self.warnings)}\n") report.append(f"- āŒ **Issues:** {len(self.issues)}\n") if len(self.issues) == 0: report.append("\nāœ… **RESULT: PASS** - Documentation accurately reflects codebase\n") else: report.append("\nāš ļø **RESULT: ISSUES FOUND** - See details below\n") # Verified items if self.verified: report.append("\n## āœ… Verified Items\n") categories = {} for item in self.verified: cat = item['category'] if cat not in categories: categories[cat] = [] categories[cat].append(item['message']) for cat, messages in sorted(categories.items()): report.append(f"\n### {cat} ({len(messages)} items)\n") for msg in messages[:10]: # Limit to first 10 per category report.append(f"- {msg}\n") if len(messages) > 10: report.append(f"- ... and {len(messages) - 10} more\n") # Warnings if self.warnings: report.append("\n## āš ļø Warnings\n") categories = {} for item in self.warnings: cat = item['category'] if cat not in categories: categories[cat] = [] categories[cat].append(item['message']) for cat, messages in sorted(categories.items()): report.append(f"\n### {cat}\n") for msg in messages: report.append(f"- {msg}\n") # Issues if self.issues: report.append("\n## āŒ Issues Found\n") categories = {} for item in self.issues: cat = item['category'] if cat not in categories: categories[cat] = [] categories[cat].append(item['message']) for cat, messages in sorted(categories.items()): report.append(f"\n### {cat}\n") for msg in messages: report.append(f"- {msg}\n") # Recommendations report.append("\n## šŸ“‹ Recommendations\n") if len(self.issues) == 0 and len(self.warnings) == 0: report.append("- Documentation is accurate and up-to-date\n") report.append("- No action required\n") else: if self.warnings: report.append("- Review warnings to ensure documentation completeness\n") if self.issues: report.append("- **CRITICAL:** Address issues found - documentation may be inaccurate\n") report.append("- Update documentation or fix code references\n") report.append("\n## Next Steps\n") report.append("1. Review all warnings and issues above\n") report.append("2. Update documentation or code as needed\n") report.append("3. Re-run this verification script\n") report.append("4. Proceed to subtask 8.3: Create maintenance checklist\n") return ''.join(report) def run_all_checks(self): """Run all verification checks""" print("\n" + "="*60) print("Architecture Documentation Accuracy Verification") print("="*60 + "\n") self.verify_core_files() self.verify_database_models() self.verify_api_endpoints() self.verify_external_api_configs() self.verify_infrastructure_docs() self.verify_security_features() self.verify_data_flows() self.verify_scripts_directory() print("\n" + "="*60) print("Verification Complete") print("="*60 + "\n") print(f"āœ… Verified: {len(self.verified)}") print(f"āš ļø Warnings: {len(self.warnings)}") print(f"āŒ Issues: {len(self.issues)}") return self.generate_report() def main(): verifier = ArchitectureVerifier() report = verifier.run_all_checks() # Write report to file report_file = 'ARCHITECTURE_VERIFICATION_REPORT.md' with open(report_file, 'w', encoding='utf-8') as f: f.write(report) print(f"\nšŸ“„ Report saved to: {report_file}") # Also print summary print("\n" + report) if __name__ == '__main__': main()