|
27 | 27 | group: nightly-test-cluster-group-gpu |
28 | 28 | cancel-in-progress: false |
29 | 29 | env: |
30 | | - GPU_CLUSTER_NAME: nightly-xpk-b200 |
| 30 | + GPU_CLUSTER_NAME: nightly-xpk-h100 |
31 | 31 | WORKLOAD_NAME: xpktest-gpu-nightly-${{ github.run_attempt }} |
32 | 32 | steps: |
33 | 33 | - uses: actions/download-artifact@v4 |
@@ -55,21 +55,21 @@ jobs: |
55 | 55 |
|
56 | 56 | # 4. Set Env Var for the host (GitHub Runner) |
57 | 57 | echo "GOOGLE_APPLICATION_CREDENTIALS=$HOME/.config/gcloud/application_default_credentials.json" >> $GITHUB_ENV |
58 | | - - name: Create an XPK Cluster with 1 x b200 GPU |
59 | | - run: xpk cluster create --cluster $GPU_CLUSTER_NAME --device-type=b200-8 --zone=us-east7-b --default-pool-cpu-machine-type=e2-standard-8 --spot |
| 58 | + - name: Create an XPK Cluster with 1 x h100 GPU |
| 59 | + run: xpk cluster create --cluster $GPU_CLUSTER_NAME --device-type=h100-mega-80gb-8 --zone=asia-southeast1-b --default-pool-cpu-machine-type=e2-standard-8 --spot |
60 | 60 | - name: Authenticate Docker |
61 | 61 | run: gcloud auth configure-docker --quiet |
62 | 62 | - name: Run a base-docker-image workload |
63 | | - run: xpk workload create --cluster $GPU_CLUSTER_NAME --workload $WORKLOAD_NAME --docker-image='nvidia/cuda:12.1.0-base-ubuntu22.04' --command "nvidia-smi" --zone=us-east7-b --device-type=b200-8 |
| 63 | + run: xpk workload create --cluster $GPU_CLUSTER_NAME --workload $WORKLOAD_NAME --docker-image='nvidia/cuda:12.1.0-base-ubuntu22.04' --command "nvidia-smi" --zone=asia-southeast1-b --device-type=h100-mega-80gb-8 |
64 | 64 | - name: List out the workloads on the cluster |
65 | | - run: xpk workload list --cluster $GPU_CLUSTER_NAME --zone=us-east7-b |
| 65 | + run: xpk workload list --cluster $GPU_CLUSTER_NAME --zone=asia-southeast1-b |
66 | 66 | - name: Wait for workload completion and confirm it succeeded |
67 | | - run: xpk workload list --cluster $GPU_CLUSTER_NAME --zone=us-east7-b --wait-for-job-completion $WORKLOAD_NAME --timeout 600 |
| 67 | + run: xpk workload list --cluster $GPU_CLUSTER_NAME --zone=asia-southeast1-b --wait-for-job-completion $WORKLOAD_NAME --timeout 600 |
68 | 68 | - name: Delete the workload on the cluster |
69 | | - run: xpk workload delete --workload $WORKLOAD_NAME --cluster $GPU_CLUSTER_NAME --zone=us-east7-b |
| 69 | + run: xpk workload delete --workload $WORKLOAD_NAME --cluster $GPU_CLUSTER_NAME --zone=asia-southeast1-b |
70 | 70 | - name: Delete the cluster created |
71 | 71 | if: always() |
72 | | - run: xpk cluster delete --cluster $GPU_CLUSTER_NAME --zone=us-east7-b --force |
| 72 | + run: xpk cluster delete --cluster $GPU_CLUSTER_NAME --zone=asia-southeast1-b --force |
73 | 73 | - name: Upload cluster nodepool creation log |
74 | 74 | if: always() |
75 | 75 | uses: actions/upload-artifact@v4 |
|
0 commit comments