-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_comprehensive_tests.py
More file actions
executable file
·172 lines (137 loc) · 5.28 KB
/
run_comprehensive_tests.py
File metadata and controls
executable file
·172 lines (137 loc) · 5.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/env python3
"""
Comprehensive test suite for the benchmarking system.
This script runs all validation tests and verifies the complete
benchmarking pipeline is ready for production use.
"""
import subprocess
import sys
from pathlib import Path
import time
def run_test(test_name: str, command: list, description: str = "") -> bool:
"""Run a single test and report results."""
print(f"\n{'='*60}")
print(f"TEST: {test_name}")
if description:
print(f"Description: {description}")
print(f"Command: {' '.join(command)}")
print(f"{'='*60}")
start_time = time.time()
try:
result = subprocess.run(
command,
capture_output=True,
text=True,
timeout=300 # 5 minute timeout
)
duration = time.time() - start_time
if result.returncode == 0:
print(f"✅ PASSED ({duration:.1f}s)")
if result.stdout:
print("STDOUT:")
print(result.stdout[-500:]) # Last 500 chars
return True
else:
print(f"❌ FAILED ({duration:.1f}s)")
print("STDERR:")
print(result.stderr)
if result.stdout:
print("STDOUT:")
print(result.stdout)
return False
except subprocess.TimeoutExpired:
print(f"❌ TIMEOUT (>300s)")
return False
except Exception as e:
print(f"❌ ERROR: {e}")
return False
def main():
"""Run comprehensive test suite."""
print("🧪 COMPREHENSIVE BENCHMARKING SYSTEM TEST SUITE")
print("="*80)
# Track all test results
test_results = {}
# Test 1: Environment and dependencies
test_results["list_tools"] = run_test(
"List Available Tools",
["python", "processing_config.py", "--list-tools"],
"Verify tool configuration and basic imports"
)
# Test 2: Pipeline functionality
test_results["pipeline_test"] = run_test(
"Benchmarking Pipeline",
["python", "test_benchmarking_pipeline.py"],
"Test complete pipeline with synthetic data"
)
# Test 3: Result format validation
test_results["format_validation"] = run_test(
"Result Format Validation",
["python", "validate_result_format.py"],
"Ensure standardized output format"
)
# Test 4: Dry run submission
test_results["dry_run"] = run_test(
"SLURM Dry Run",
["python", "processing_config.py", "--submit-tool-benchmark", "aging", "scanpy", "--dry-run"],
"Test SLURM job submission without execution"
)
# Test 5: Configuration listing
test_results["list_configs"] = run_test(
"List Configurations",
["python", "processing_config.py", "--list"],
"Verify dataset and parameter configurations"
)
# Test 6: Result collection (basic)
test_results["result_collection"] = run_test(
"Result Collection Script",
["python", "benchmarking/scripts/gather_benchmark_results.py", "--help"],
"Test result collection and comparison tools"
)
# Test 7: Environment setup script syntax
test_results["setup_script"] = run_test(
"Setup Script Syntax",
["bash", "-n", "setup_environments.sh"],
"Validate setup script syntax"
)
# Summary
print(f"\n" + "="*80)
print("🏁 TEST SUITE SUMMARY")
print("="*80)
passed = 0
failed = 0
for test_name, success in test_results.items():
status = "PASS" if success else "FAIL"
icon = "✅" if success else "❌"
print(f"{icon} {test_name}: {status}")
if success:
passed += 1
else:
failed += 1
print(f"\nResults: {passed} passed, {failed} failed")
# Overall assessment
critical_tests = ["list_tools", "pipeline_test", "format_validation"]
critical_passed = all(test_results.get(test, False) for test in critical_tests)
if critical_passed and failed == 0:
print(f"\n🎉 ALL TESTS PASSED!")
print(f"The benchmarking system is ready for production use.")
print(f"\n📋 Next Steps:")
print(f"1. Set up tool environments: ./setup_environments.sh")
print(f"2. Submit first benchmark: python processing_config.py --submit-tool-benchmark aging scanpy")
print(f"3. Monitor progress: squeue -u $USER")
print(f"4. Analyze results: python benchmarking/scripts/gather_benchmark_results.py")
return True
elif critical_passed:
print(f"\n⚠️ CRITICAL TESTS PASSED")
print(f"Core functionality works, but {failed} non-critical tests failed.")
print(f"The system may be usable but should be reviewed.")
return True
else:
print(f"\n💥 CRITICAL TESTS FAILED")
print(f"The benchmarking system is not ready for use.")
print(f"Please review and fix the failed tests before proceeding.")
failed_critical = [test for test in critical_tests if not test_results.get(test, False)]
print(f"Failed critical tests: {failed_critical}")
return False
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)