We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 0b29d34 commit 27d50fcCopy full SHA for 27d50fc
1 file changed
runners/launch_h200-dgxc-slurm.sh
@@ -60,6 +60,15 @@ if [[ "$IS_MULTINODE" == "true" ]]; then
60
if [[ -d "$GPTOSS_LOCAL_DIR" && -n "$(ls -A "$GPTOSS_LOCAL_DIR" 2>/dev/null)" ]]; then
61
exit 0
62
fi
63
+ # Earlier dispatches that failed before staging may
64
+ # have left an empty $GPTOSS_LOCAL_DIR owned by root,
65
+ # because job.slurm's docker run bind-mounts MODEL_DIR
66
+ # with --user 0:0 and Docker auto-creates the source
67
+ # path when it does not exist. Remove the empty dir so
68
+ # `hf download` can recreate it with the right owner.
69
+ if [[ -d "$GPTOSS_LOCAL_DIR" ]]; then
70
+ rmdir "$GPTOSS_LOCAL_DIR" 2>/dev/null || true
71
+ fi
72
echo "Staging openai/gpt-oss-120b -> $GPTOSS_LOCAL_DIR (one-time, ~60 GB)"
73
ensure_hf_cli
74
hf download openai/gpt-oss-120b --local-dir "$GPTOSS_LOCAL_DIR"
0 commit comments