Skip to content

Commit 4346aa5

Browse files
committed
Improve error handling in convergence runners: print stderr on failure, add traceback, validate output sizes, fix silent PASS on zero data
1 parent 5d2e710 commit 4346aa5

4 files changed

Lines changed: 47 additions & 22 deletions

File tree

toolchain/mfc/test/run_convergence.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ def run_case(tmpdir: str, N: int, extra_args: list, num_ranks: int = 1):
142142
result = subprocess.run(cmd, capture_output=True, text=True, cwd=os.getcwd(), check=False)
143143
if result.returncode != 0:
144144
print(result.stdout[-2000:])
145+
print(result.stderr)
145146
raise RuntimeError(f"./mfc.sh run failed for N={N}")
146147

147148
# Copy p_all to temp dir, then clean the case directory for next run
@@ -230,7 +231,10 @@ def main():
230231
try:
231232
passed = test_scheme(label, extra_args, expected_order, tol, args.resolutions, min_N, max_N, args.num_ranks)
232233
except Exception as e:
234+
import traceback
235+
233236
print(f" ERROR: {e}")
237+
traceback.print_exc()
234238
passed = False
235239
results[label] = passed
236240

toolchain/mfc/test/run_convergence_1d.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
]
7373

7474

75-
def read_cons_var(run_dir: str, step: int, var_idx: int, num_ranks: int = 1) -> np.ndarray:
75+
def read_cons_var(run_dir: str, step: int, var_idx: int, num_ranks: int = 1, expected_size: int = None) -> np.ndarray:
7676
"""Read q_cons_vf{var_idx} from all MPI ranks and concatenate into one 1D array."""
7777
chunks = []
7878
for rank in range(num_ranks):
@@ -82,20 +82,23 @@ def read_cons_var(run_dir: str, step: int, var_idx: int, num_ranks: int = 1) ->
8282
data = np.frombuffer(f.read(rec_len), dtype=np.float64)
8383
f.read(4)
8484
chunks.append(data.copy())
85-
return np.concatenate(chunks)
85+
combined = np.concatenate(chunks)
86+
if expected_size is not None and combined.size != expected_size:
87+
raise ValueError(f"Expected {expected_size} values across {num_ranks} ranks, got {combined.size}")
88+
return combined
8689

8790

8891
# 1D single-fluid Euler (model_eqns=2, num_fluids=1): vf1=ρ, vf2=ρu, vf3=E
8992
CONS_VARS_1D = [("density", 1), ("x-momentum", 2), ("energy", 3)]
9093
CONS_TOL = 1e-10
9194

9295

93-
def conservation_errors(run_dir: str, Nt: int, cell_vol: float, var_list: list, num_ranks: int) -> dict:
96+
def conservation_errors(run_dir: str, Nt: int, cell_vol: float, var_list: list, num_ranks: int, expected_size: int = None) -> dict:
9497
"""Return relative conservation error |Σq(T) - Σq(0)| / |Σq(0)| for each variable."""
9598
errs = {}
9699
for name, idx in var_list:
97-
q0 = read_cons_var(run_dir, 0, idx, num_ranks)
98-
qT = read_cons_var(run_dir, Nt, idx, num_ranks)
100+
q0 = read_cons_var(run_dir, 0, idx, num_ranks, expected_size)
101+
qT = read_cons_var(run_dir, Nt, idx, num_ranks, expected_size)
99102
s0 = float(np.sum(q0)) * cell_vol
100103
sT = float(np.sum(qT)) * cell_vol
101104
errs[name] = abs(sT - s0) / (abs(s0) + 1e-300)
@@ -142,6 +145,7 @@ def run_case(tmpdir: str, N: int, extra_args: list, num_ranks: int = 1):
142145
result = subprocess.run(cmd, capture_output=True, text=True, cwd=os.getcwd(), check=False)
143146
if result.returncode != 0:
144147
print(result.stdout[-3000:])
148+
print(result.stderr)
145149
raise RuntimeError(f"./mfc.sh run failed for N={N}")
146150

147151
case_dir = os.path.dirname(CASE)
@@ -173,11 +177,11 @@ def test_scheme(label, extra_args, expected_order, tol, resolutions, min_N=None,
173177
dx = 1.0 / N
174178
Nt, run_dir = run_case(tmpdir, N, extra_args, num_ranks)
175179
nts.append(Nt)
176-
vf0 = read_cons_var(run_dir, 0, 1, num_ranks)
177-
vfT = read_cons_var(run_dir, Nt, 1, num_ranks)
180+
vf0 = read_cons_var(run_dir, 0, 1, num_ranks, expected_size=N)
181+
vfT = read_cons_var(run_dir, Nt, 1, num_ranks, expected_size=N)
178182
err = l2_error(vfT, vf0, dx)
179183
errors.append(err)
180-
all_cons_errs.append(conservation_errors(run_dir, Nt, dx, CONS_VARS_1D, num_ranks))
184+
all_cons_errs.append(conservation_errors(run_dir, Nt, dx, CONS_VARS_1D, num_ranks, expected_size=N))
181185
print(f" N={N}: Nt={Nt}, |vf0|={len(vf0)}, err={err:.4e}")
182186

183187
rates = [None]
@@ -242,7 +246,10 @@ def main():
242246
try:
243247
passed = test_scheme(label, extra_args + muscl_extra, expected_order, tol, args.resolutions, min_N, max_N, args.num_ranks)
244248
except Exception as e:
249+
import traceback
250+
245251
print(f" ERROR: {e}")
252+
traceback.print_exc()
246253
passed = False
247254
results[label] = passed
248255

toolchain/mfc/test/run_sod.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
]
5555

5656

57-
def read_cons_var(run_dir: str, step: int, var_idx: int, num_ranks: int = 1) -> np.ndarray:
57+
def read_cons_var(run_dir: str, step: int, var_idx: int, num_ranks: int = 1, expected_size: int = None) -> np.ndarray:
5858
"""Read q_cons_vf{var_idx} from all ranks in rank order (= spatial order for 1D)."""
5959
chunks = []
6060
for rank in range(num_ranks):
@@ -64,7 +64,10 @@ def read_cons_var(run_dir: str, step: int, var_idx: int, num_ranks: int = 1) ->
6464
data = np.frombuffer(f.read(rec_len), dtype=np.float64)
6565
f.read(4)
6666
chunks.append(data.copy())
67-
return np.concatenate(chunks)
67+
combined = np.concatenate(chunks)
68+
if expected_size is not None and combined.size != expected_size:
69+
raise ValueError(f"Expected {expected_size} values across {num_ranks} ranks, got {combined.size}")
70+
return combined
6871

6972

7073
def l1_self_error(coarse: np.ndarray, fine: np.ndarray, dx_coarse: float) -> float:
@@ -103,6 +106,7 @@ def run_case(tmpdir: str, N: int, extra_args: list, num_ranks: int = 1):
103106
result = subprocess.run(cmd, capture_output=True, text=True, cwd=os.getcwd(), check=False)
104107
if result.returncode != 0:
105108
print(result.stdout[-3000:])
109+
print(result.stderr)
106110
raise RuntimeError(f"./mfc.sh run failed for N={N}")
107111

108112
case_dir = os.path.dirname(CASE)
@@ -142,8 +146,8 @@ def test_scheme(label, extra_args, expected_order, tol, resolutions, min_N=None,
142146
if N_f != 2 * N_c:
143147
continue # skip non-2x pairs
144148
dx_c = 1.0 / N_c
145-
rho_c = read_cons_var(run_dirs[i], nts[i], 1, num_ranks)
146-
rho_f = read_cons_var(run_dirs[i + 1], nts[i + 1], 1, num_ranks)
149+
rho_c = read_cons_var(run_dirs[i], nts[i], 1, num_ranks, expected_size=N_c)
150+
rho_f = read_cons_var(run_dirs[i + 1], nts[i + 1], 1, num_ranks, expected_size=N_f)
147151
err = l1_self_error(rho_c, rho_f, dx_c)
148152
errors.append(err)
149153
error_resolutions.append(N_c)
@@ -170,8 +174,8 @@ def test_scheme(label, extra_args, expected_order, tol, resolutions, min_N=None,
170174
print(f"\n Single pair rate: {rates[-1]:.2f} (need >= {expected_order - tol:.1f})")
171175
passed = rates[-1] >= expected_order - tol
172176
else:
173-
print("\n (need >= 2 consecutive resolutions to compute rate)")
174-
passed = True
177+
print("\n ERROR: need >= 2 consecutive 2x-apart resolutions to compute a rate")
178+
passed = False
175179

176180
print(f" {'PASS' if passed else 'FAIL'}")
177181
return passed
@@ -202,7 +206,10 @@ def main():
202206
try:
203207
passed = test_scheme(label, extra_args, expected_order, tol, args.resolutions, min_N, args.num_ranks)
204208
except Exception as e:
209+
import traceback
210+
205211
print(f" ERROR: {e}")
212+
traceback.print_exc()
206213
passed = False
207214
results[label] = passed
208215

toolchain/mfc/test/run_temporal_order.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
N_SPATIAL = 512 # fixed spatial resolution
5656

5757

58-
def read_cons_var(run_dir: str, step: int, var_idx: int, num_ranks: int = 1) -> np.ndarray:
58+
def read_cons_var(run_dir: str, step: int, var_idx: int, num_ranks: int = 1, expected_size: int = None) -> np.ndarray:
5959
"""Read q_cons_vf{var_idx} from all MPI ranks and concatenate into one 1D array."""
6060
chunks = []
6161
for rank in range(num_ranks):
@@ -65,20 +65,23 @@ def read_cons_var(run_dir: str, step: int, var_idx: int, num_ranks: int = 1) ->
6565
data = np.frombuffer(f.read(rec_len), dtype=np.float64)
6666
f.read(4)
6767
chunks.append(data.copy())
68-
return np.concatenate(chunks)
68+
combined = np.concatenate(chunks)
69+
if expected_size is not None and combined.size != expected_size:
70+
raise ValueError(f"Expected {expected_size} values across {num_ranks} ranks, got {combined.size}")
71+
return combined
6972

7073

7174
# 1D single-fluid Euler (model_eqns=2, num_fluids=1): vf1=ρ, vf2=ρu, vf3=E
7275
CONS_VARS_1D = [("density", 1), ("x-momentum", 2), ("energy", 3)]
7376
CONS_TOL = 1e-10
7477

7578

76-
def conservation_errors(run_dir: str, Nt: int, cell_vol: float, var_list: list, num_ranks: int) -> dict:
79+
def conservation_errors(run_dir: str, Nt: int, cell_vol: float, var_list: list, num_ranks: int, expected_size: int = None) -> dict:
7780
"""Return relative conservation error |Σq(T) - Σq(0)| / |Σq(0)| for each variable."""
7881
errs = {}
7982
for name, idx in var_list:
80-
q0 = read_cons_var(run_dir, 0, idx, num_ranks)
81-
qT = read_cons_var(run_dir, Nt, idx, num_ranks)
83+
q0 = read_cons_var(run_dir, 0, idx, num_ranks, expected_size)
84+
qT = read_cons_var(run_dir, Nt, idx, num_ranks, expected_size)
8285
s0 = float(np.sum(q0)) * cell_vol
8386
sT = float(np.sum(qT)) * cell_vol
8487
errs[name] = abs(sT - s0) / (abs(s0) + 1e-300)
@@ -121,6 +124,7 @@ def run_case(tmpdir: str, cfl: float, extra_args: list, num_ranks: int = 1):
121124
result = subprocess.run(cmd, capture_output=True, text=True, cwd=os.getcwd(), check=False)
122125
if result.returncode != 0:
123126
print(result.stdout[-3000:])
127+
print(result.stderr)
124128
raise RuntimeError(f"./mfc.sh run failed for CFL={cfl}")
125129

126130
case_dir = os.path.dirname(CASE)
@@ -152,11 +156,11 @@ def test_scheme(label, extra_args, expected_order, tol, cfls, num_ranks=1):
152156
dt, Nt, run_dir = run_case(tmpdir, cfl, extra_args, num_ranks)
153157
dts.append(dt)
154158
nts.append(Nt)
155-
vf0 = read_cons_var(run_dir, 0, 1, num_ranks)
156-
vfT = read_cons_var(run_dir, Nt, 1, num_ranks)
159+
vf0 = read_cons_var(run_dir, 0, 1, num_ranks, expected_size=N_SPATIAL)
160+
vfT = read_cons_var(run_dir, Nt, 1, num_ranks, expected_size=N_SPATIAL)
157161
err = l2_error(vfT, vf0, dx)
158162
errors.append(err)
159-
all_cons_errs.append(conservation_errors(run_dir, Nt, dx, CONS_VARS_1D, num_ranks))
163+
all_cons_errs.append(conservation_errors(run_dir, Nt, dx, CONS_VARS_1D, num_ranks, expected_size=N_SPATIAL))
160164

161165
rates = [None]
162166
for i in range(1, len(cfls)):
@@ -220,7 +224,10 @@ def main():
220224
try:
221225
passed = test_scheme(label, extra_args, expected_order, tol, cfls, args.num_ranks)
222226
except Exception as e:
227+
import traceback
228+
223229
print(f" ERROR: {e}")
230+
traceback.print_exc()
224231
passed = False
225232
results[label] = passed
226233

0 commit comments

Comments
 (0)