Skip to content

Commit 8e402c7

Browse files
baogorekclaude
andcommitted
Add volume-based verification after worker builds
Instead of trusting worker JSON results alone (which broke when stdout was polluted), now reload the volume after builds and count actual h5 files. The build fails if the volume has fewer files than expected, regardless of what workers reported. This makes the checkpoint system the source of truth for build completeness. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 422ba05 commit 8e402c7

1 file changed

Lines changed: 18 additions & 7 deletions

File tree

modal_app/local_area.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -562,25 +562,36 @@ def coordinate_publish(
562562
total_completed = sum(len(r["completed"]) for r in all_results)
563563
total_failed = sum(len(r["failed"]) for r in all_results)
564564

565-
print(f"\nBuild summary:")
565+
staging_volume.reload()
566+
volume_completed = get_completed_from_volume(version_dir)
567+
volume_new = volume_completed - completed
568+
print(f"\nBuild summary (worker-reported):")
566569
print(f" Completed: {total_completed}")
567570
print(f" Failed: {total_failed}")
568571
print(f" Previously completed: {len(completed)}")
572+
print(f"Build summary (volume verification):")
573+
print(f" Files on volume: {len(volume_completed)}")
574+
print(f" New files this run: {len(volume_new)}")
569575

570576
if all_errors:
571577
print(f"\nErrors ({len(all_errors)}):")
572578
for err in all_errors[:5]:
573579
err_msg = err.get("error", "Unknown")[:100]
574-
print(f" - {err.get('item', err.get('worker'))}: {err_msg}")
580+
print(
581+
f" - {err.get('item', err.get('worker'))}: "
582+
f"{err_msg}"
583+
)
575584
if len(all_errors) > 5:
576585
print(f" ... and {len(all_errors) - 5} more")
577586

578-
if total_failed > 0 or (
579-
all_errors and total_completed == 0
580-
):
587+
expected_total = len(states) + len(districts) + len(cities)
588+
if len(volume_completed) < expected_total:
589+
missing = expected_total - len(volume_completed)
581590
raise RuntimeError(
582-
f"Build incomplete: {total_failed} failures, "
583-
f"{len(all_errors)} errors. "
591+
f"Build incomplete: {missing} files missing from "
592+
f"volume ({len(volume_completed)}/{expected_total}). "
593+
f"Worker errors: {len(all_errors)}, "
594+
f"failures: {total_failed}. "
584595
f"Volume preserved for retry."
585596
)
586597

0 commit comments

Comments
 (0)