Update Modal runtime for Princeton leaderboard

msaroufim · msaroufim · commit 653761d346a8 · 2026-04-03T17:23:08.000-07:00
diff --git a/.claude/skills/modal-runtime-deploy-e2e.md b/.claude/skills/modal-runtime-deploy-e2e.md
@@ -0,0 +1,83 @@
+# Modal Runtime Deploy and E2E
+
+Use this when changing shared Modal dependencies in `kernelbot`, especially torch/CUDA, and when you need to prove the live leaderboard is actually using the new runtime.
+
+## Scope
+
+- Shared Modal image: `src/runners/modal_runner.py`
+- GPU-bound Modal functions: `src/runners/modal_runner_archs.py`
+- Live app name: `discord-bot-runner`
+- Popcorn e2e path: generate invite if needed, join closed leaderboard, submit with `popcorn-cli`
+
+## Workflow
+
+1. Make the smallest dependency change in `src/runners/modal_runner.py`.
+2. If changing torch/CUDA, inspect all later `.uv_pip_install(...)` blocks for conflicting CUDA/NCCL packages.
+3. Deploy to Modal `pytest` first.
+4. Run the narrow Modal integration test:
+   ```bash
+   cd /Users/mark/Dev/kernelbot
+   env MODAL_TOKEN_ID=... MODAL_TOKEN_SECRET=... \
+     uv run --extra dev python -m pytest -s tests/test_modal.py -k 'test_modal_launcher_python_script and T4'
+   ```
+5. If that passes, deploy to Modal `main`:
+   ```bash
+   cd /Users/mark/Dev/kernelbot/src/runners
+   env MODAL_TOKEN_ID=... MODAL_TOKEN_SECRET=... \
+     /Users/mark/Dev/kernelbot/.venv/bin/modal deploy --env main modal_runner_archs.py
+   ```
+6. Run a real `popcorn` submission in `test` mode against the target leaderboard.
+7. Confirm the returned report shows the expected `Torch:` version.
+8. Only then run `--mode leaderboard` if the user asked for a ranked submission.
+
+## Closed Leaderboards
+
+Generate an invite with admin token:
+
+```bash
+cd /Users/mark/Dev/popcorn-cli
+env POPCORN_API_URL=... POPCORN_ADMIN_TOKEN=... \
+  cargo run --quiet -- admin generate-invites --leaderboards <leaderboard> --count 1
+```
+
+Join with the existing CLI identity in `~/.popcorn.yaml`:
+
+```bash
+cd /Users/mark/Dev/popcorn-cli
+env POPCORN_API_URL=... \
+  cargo run --quiet -- join '<invite_code>'
+```
+
+## Real E2E Submit
+
+```bash
+cd /Users/mark/Dev/popcorn-cli
+env POPCORN_API_URL=... \
+  cargo run --quiet -- submit --no-tui --leaderboard <leaderboard> --gpu A100 --mode test <submission.py>
+```
+
+Ranked submit:
+
+```bash
+cd /Users/mark/Dev/popcorn-cli
+env POPCORN_API_URL=... \
+  cargo run --quiet -- submit --no-tui --leaderboard <leaderboard> --gpu A100 --mode leaderboard <submission.py>
+```
+
+Check recent runs:
+
+```bash
+cd /Users/mark/Dev/popcorn-cli
+env POPCORN_API_URL=... \
+  cargo run --quiet -- submissions list --leaderboard <leaderboard> --limit 5
+```
+
+## Failure Mode To Remember
+
+If a Modal run fails with:
+
+```text
+libtorch_cuda.so: undefined symbol: ncclDevCommCreate
+```
+
+then a later package install likely replaced torch's expected CUDA/NCCL dependency set. The practical fix is to install `torch` last so its dependency versions win.
diff --git a/.codex/skills/modal-runtime-deploy-e2e/SKILL.md b/.codex/skills/modal-runtime-deploy-e2e/SKILL.md
@@ -0,0 +1,92 @@
+---
+name: modal-runtime-deploy-e2e
+description: Upgrade shared Modal runtime dependencies in kernelbot and verify them end to end. Use when changing torch/CUDA or other shared Modal image dependencies, deploying the Modal app, and validating with both Modal integration tests and real popcorn leaderboard submissions.
+---
+
+# Modal Runtime Deploy and E2E
+
+Use this when changing shared Modal dependencies in `kernelbot`, especially torch/CUDA, and when you need to prove the live leaderboard is actually using the new runtime.
+
+## Scope
+
+- Shared Modal image: `src/runners/modal_runner.py`
+- GPU-bound Modal functions: `src/runners/modal_runner_archs.py`
+- Live app name: `discord-bot-runner`
+- Popcorn e2e path: generate invite if needed, join closed leaderboard, submit with `popcorn-cli`
+
+## Workflow
+
+1. Make the smallest dependency change in `src/runners/modal_runner.py`.
+2. If changing torch/CUDA, inspect all later `.uv_pip_install(...)` blocks for conflicting CUDA/NCCL packages.
+3. Deploy to Modal `pytest` first.
+4. Run the narrow Modal integration test:
+
+```bash
+cd /Users/mark/Dev/kernelbot
+env MODAL_TOKEN_ID=... MODAL_TOKEN_SECRET=... \
+  uv run --extra dev python -m pytest -s tests/test_modal.py -k 'test_modal_launcher_python_script and T4'
+```
+
+5. If that passes, deploy to Modal `main`:
+
+```bash
+cd /Users/mark/Dev/kernelbot/src/runners
+env MODAL_TOKEN_ID=... MODAL_TOKEN_SECRET=... \
+  /Users/mark/Dev/kernelbot/.venv/bin/modal deploy --env main modal_runner_archs.py
+```
+
+6. Run a real `popcorn` submission in `test` mode against the target leaderboard.
+7. Confirm the returned report shows the expected `Torch:` version.
+8. Only then run `--mode leaderboard` if the user asked for a ranked submission.
+
+## Closed Leaderboards
+
+Generate an invite with admin token:
+
+```bash
+cd /Users/mark/Dev/popcorn-cli
+env POPCORN_API_URL=... POPCORN_ADMIN_TOKEN=... \
+  cargo run --quiet -- admin generate-invites --leaderboards <leaderboard> --count 1
+```
+
+Join with the existing CLI identity in `~/.popcorn.yaml`:
+
+```bash
+cd /Users/mark/Dev/popcorn-cli
+env POPCORN_API_URL=... \
+  cargo run --quiet -- join '<invite_code>'
+```
+
+## Real E2E Submit
+
+```bash
+cd /Users/mark/Dev/popcorn-cli
+env POPCORN_API_URL=... \
+  cargo run --quiet -- submit --no-tui --leaderboard <leaderboard> --gpu A100 --mode test <submission.py>
+```
+
+Ranked submit:
+
+```bash
+cd /Users/mark/Dev/popcorn-cli
+env POPCORN_API_URL=... \
+  cargo run --quiet -- submit --no-tui --leaderboard <leaderboard> --gpu A100 --mode leaderboard <submission.py>
+```
+
+Check recent runs:
+
+```bash
+cd /Users/mark/Dev/popcorn-cli
+env POPCORN_API_URL=... \
+  cargo run --quiet -- submissions list --leaderboard <leaderboard> --limit 5
+```
+
+## Failure Mode To Remember
+
+If a Modal run fails with:
+
+```text
+libtorch_cuda.so: undefined symbol: ncclDevCommCreate
+```
+
+then a later package install likely replaced torch's expected CUDA/NCCL dependency set. The practical fix is to install `torch` last so its dependency versions win.
diff --git a/src/runners/modal_runner.py b/src/runners/modal_runner.py
@@ -9,7 +9,7 @@
 # Create a stub for the Modal app
 # IMPORTANT: This has to stay in separate file or modal breaks
 app = App("discord-bot-runner")
-cuda_version = "13.1.0"
+cuda_version = "12.9.1"
 flavor = "devel"
 operating_sys = "ubuntu24.04"
 tag = f"{cuda_version}-{flavor}-{operating_sys}"
@@ -37,6 +37,7 @@
     .run_commands("ln -sf $(which python) /usr/local/bin/python3")
     .apt_install(
         "git",
+        "curl",
         "gcc-13",
         "g++-13",
         "clang-18",
@@ -50,12 +51,6 @@
         "pytest",
         "PyYAML",
     )
-    .uv_pip_install(
-        "torch==2.9.1",
-        "torchvision",
-        "torchaudio",
-        index_url="https://download.pytorch.org/whl/cu130",
-    )
     # other frameworks
     .uv_pip_install(
         "tinygrad~=0.10",
@@ -70,6 +65,11 @@
         # "nvmath-python[cu13]~=0.4",
         # "numba-cuda[cu13]~=0.15",
     )
+    # Install torch last so its CUDA/NCCL dependency set wins over broader CUDA Python packages.
+    .uv_pip_install(
+        "torch==2.11.0",
+        index_url="https://download.pytorch.org/whl/cu129",
+    )
     # CUTLASS C++ headers for #include <cutlass/...>
     .run_commands(
         "git clone --depth 1 --branch v4.3.5 https://github.com/NVIDIA/cutlass.git /opt/cutlass",