#!/usr/bin/env python3 """ AI Quality Test Runner for Norda Biznes Hub Usage: python run_ai_quality_tests.py [--verbose] [--save] Options: --verbose, -v Show detailed output for each test --save, -s Save results to JSON file --quick, -q Run only high-priority tests """ import argparse import json import sys from datetime import datetime from pathlib import Path # Add project root to path sys.path.insert(0, str(Path(__file__).parent)) from tests.ai_quality_evaluator import AIQualityEvaluator, EvaluationReport def save_report(report: EvaluationReport, output_dir: Path = None): """Save evaluation report to JSON file""" if output_dir is None: output_dir = Path(__file__).parent / "tests" / "results" output_dir.mkdir(parents=True, exist_ok=True) timestamp = report.timestamp.strftime("%Y%m%d_%H%M%S") filename = f"ai_quality_report_{timestamp}.json" filepath = output_dir / filename # Convert report to serializable dict report_dict = { "timestamp": report.timestamp.isoformat(), "total_tests": report.total_tests, "passed_tests": report.passed_tests, "failed_tests": report.failed_tests, "pass_rate": report.pass_rate, "average_score": report.average_score, "summary_by_category": report.summary_by_category, "results": [ { "test_id": r.test_id, "query": r.query, "expected_companies": r.expected_companies, "found_companies": r.found_companies, "matched_companies": r.matched_companies, "score": r.score, "passed": r.passed, "execution_time_ms": r.execution_time_ms, "error": r.error } for r in report.results ] } with open(filepath, 'w', encoding='utf-8') as f: json.dump(report_dict, f, indent=2, ensure_ascii=False) print(f"\nReport saved to: {filepath}") return filepath def run_quick_tests(evaluator: AIQualityEvaluator, verbose: bool = False) -> EvaluationReport: """Run only high-priority tests""" test_cases = evaluator.test_cases.get("test_cases", []) high_priority = [tc for tc in test_cases if tc.get("priority") == "high"] # Temporarily replace test cases original_cases = evaluator.test_cases["test_cases"] evaluator.test_cases["test_cases"] = high_priority print(f"Running {len(high_priority)} high-priority tests...") report = evaluator.evaluate_all(verbose=verbose) # Restore original evaluator.test_cases["test_cases"] = original_cases return report def main(): parser = argparse.ArgumentParser( description="AI Quality Test Runner for Norda Biznes Hub", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python run_ai_quality_tests.py -v # Verbose output python run_ai_quality_tests.py -v -s # Verbose + save report python run_ai_quality_tests.py -q # Quick (high-priority only) """ ) parser.add_argument("--verbose", "-v", action="store_true", help="Show detailed output for each test") parser.add_argument("--save", "-s", action="store_true", help="Save results to JSON file") parser.add_argument("--quick", "-q", action="store_true", help="Run only high-priority tests") parser.add_argument("--threshold", "-t", type=float, default=0.7, help="Pass rate threshold (default: 0.7)") args = parser.parse_args() print(f""" ╔════════════════════════════════════════════════════════════╗ ║ Norda Biznes Hub - AI Quality Tests ║ ╠════════════════════════════════════════════════════════════╣ ║ Evaluating chat AI response quality ║ ║ Pass threshold: {args.threshold:.0%} ║ ╚════════════════════════════════════════════════════════════╝ """) evaluator = AIQualityEvaluator() try: if args.quick: report = run_quick_tests(evaluator, verbose=args.verbose) else: report = evaluator.evaluate_all(verbose=args.verbose) if args.save: save_report(report) # Final verdict print(f"\n{'='*60}") if report.pass_rate >= args.threshold: print(f"✓ EVALUATION PASSED") print(f" Pass rate: {report.pass_rate:.1%} >= {args.threshold:.0%}") exit_code = 0 else: print(f"✗ EVALUATION FAILED") print(f" Pass rate: {report.pass_rate:.1%} < {args.threshold:.0%}") exit_code = 1 print(f" Tests: {report.passed_tests}/{report.total_tests} passed") print(f" Average score: {report.average_score:.2f}") print(f"{'='*60}") sys.exit(exit_code) finally: evaluator.close() if __name__ == "__main__": main()