@@ -29,33 +29,47 @@ if [[ "$IS_MULTINODE" == "true" ]]; then
2929 # The download happens inline on the runner host so users with
3030 # only gh-dispatch access can stage the model without ssh.
3131 GPTOSS_LOCAL_DIR=" $HOME /inferencex-models/gpt-oss-120b"
32+ export GPTOSS_LOCAL_DIR
3233 if [[ -d " /models/gpt-oss-120b" ]]; then
3334 export MODEL_PATH=" /models/gpt-oss-120b"
3435 else
3536 mkdir -p " $HOME /inferencex-models"
36- # flock serializes concurrent dispatches so a second run
37- # waits for the first download to finish instead of racing.
38- (
37+ stage_gptoss_120b () {
38+ set -euo pipefail
39+ # flock serializes concurrent dispatches so a second
40+ # run waits instead of racing the first download.
41+ exec 200> " $HOME /inferencex-models/.gpt-oss-120b.download.lock"
3942 flock -x 200
40- if [[ ! -d " $GPTOSS_LOCAL_DIR " ]]; then
41- echo " Staging openai/gpt-oss-120b -> $GPTOSS_LOCAL_DIR (one-time, ~60 GB)"
42- if command -v huggingface-cli > /dev/null 2>&1 ; then
43- huggingface-cli download openai/gpt-oss-120b \
44- --local-dir " $GPTOSS_LOCAL_DIR "
45- elif python3 -c " import huggingface_hub" 2> /dev/null; then
46- python3 - << 'PY '
43+ if [[ -d " $GPTOSS_LOCAL_DIR " && -n " $( ls -A " $GPTOSS_LOCAL_DIR " 2> /dev/null) " ]]; then
44+ return 0 # already staged
45+ fi
46+ echo " Staging openai/gpt-oss-120b -> $GPTOSS_LOCAL_DIR (one-time, ~60 GB)"
47+ if ! command -v huggingface-cli > /dev/null 2>&1 \
48+ && ! python3 -c " import huggingface_hub" 2> /dev/null; then
49+ echo " Installing huggingface_hub via pip --user"
50+ python3 -m pip install --user --quiet huggingface_hub
51+ export PATH=" $HOME /.local/bin:$PATH "
52+ fi
53+ if command -v huggingface-cli > /dev/null 2>&1 ; then
54+ huggingface-cli download openai/gpt-oss-120b \
55+ --local-dir " $GPTOSS_LOCAL_DIR "
56+ else
57+ python3 - << 'PY '
4758import os
4859from huggingface_hub import snapshot_download
4960snapshot_download(repo_id="openai/gpt-oss-120b",
5061 local_dir=os.environ["GPTOSS_LOCAL_DIR"])
5162PY
52- else
53- echo " Error: neither huggingface-cli nor python3 huggingface_hub available." >&2
54- echo " Cannot auto-stage gpt-oss-120b on this runner." >&2
55- exit 1
56- fi
5763 fi
58- ) 200> " $HOME /inferencex-models/.gpt-oss-120b.download.lock"
64+ }
65+ if ! stage_gptoss_120b; then
66+ echo " Error: failed to stage gpt-oss-120b on this runner." >&2
67+ exit 1
68+ fi
69+ if [[ ! -d " $GPTOSS_LOCAL_DIR " ]] || [[ -z " $( ls -A " $GPTOSS_LOCAL_DIR " 2> /dev/null) " ]]; then
70+ echo " Error: $GPTOSS_LOCAL_DIR is empty after staging step." >&2
71+ exit 1
72+ fi
5973 export MODEL_PATH=" $GPTOSS_LOCAL_DIR "
6074 fi
6175 export MODEL_NAME=" gpt-oss-120b"
0 commit comments