Skip to content

Commit 94ddada

Browse files
authored
add GCS service account name to job yamls (#95)
1 parent a70b701 commit 94ddada

10 files changed

Lines changed: 13 additions & 2 deletions

Ironwood/guides/automation/automation_launch.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
######################################################################
66
TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S)
77
export GCS_BUCKET_ROOT_DIR=""
8+
export GCS_SA_NAME="gcs-writer" # Service account with write access to GCS_BUCKET_ROOT_DIR
89

910
MAX_RETRIES=3
1011
TIMEOUT_SECOND=3600
@@ -93,11 +94,12 @@ apply_and_wait() {
9394
local filepath="${SCRIPT_DIR}/${yaml_file}"
9495
# Derive job name: remove .yaml, lowercase, replace _ with -
9596
local job_name=$(basename "${yaml_file}" .yaml | tr '[:upper:]' '[:lower:]' | tr '_' '-')
96-
export JOB_NAME="${job_name}"
97+
random_suffix=$(head /dev/urandom | tr -dc a-z0-9 | head -c 5)
98+
export JOB_NAME="${job_name}-${random_suffix}"
9799
export GCS_PATH="${GCS_BUCKET_ROOT_DIR}/${job_name}"
98100

99101
echo "Launching job: ${filepath} (name: ${JOB_NAME})"
100-
envsubst '${JOB_NAME} ${GCS_PATH}' < "${filepath}" | kubectl apply -f -
102+
envsubst '${JOB_NAME} ${GCS_PATH} ${GCS_SA_NAME}' < "${filepath}" | kubectl apply -f -
101103
job_names_in_batch+=("${JOB_NAME}")
102104
done
103105

Ironwood/guides/automation/tpu7x-2x2x1-collectives.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ spec:
2222
template:
2323
spec:
2424
subdomain: headless-svc-${JOB_NAME}
25+
serviceAccountName: ${GCS_SA_NAME}
2526
restartPolicy: Never
2627
nodeSelector:
2728
cloud.google.com/gke-tpu-accelerator: tpu7x

Ironwood/guides/automation/tpu7x-2x2x1-gemm.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ spec:
2222
template:
2323
spec:
2424
subdomain: headless-svc-${JOB_NAME}
25+
serviceAccountName: ${GCS_SA_NAME}
2526
restartPolicy: Never
2627
nodeSelector:
2728
cloud.google.com/gke-tpu-accelerator: tpu7x

Ironwood/guides/automation/tpu7x-2x2x1-hbm.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ spec:
2222
template:
2323
spec:
2424
subdomain: headless-svc-${JOB_NAME}
25+
serviceAccountName: ${GCS_SA_NAME}
2526
restartPolicy: Never
2627
nodeSelector:
2728
cloud.google.com/gke-tpu-accelerator: tpu7x

Ironwood/guides/automation/tpu7x-2x2x1-host_device.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ spec:
2222
template:
2323
spec:
2424
subdomain: headless-svc-${JOB_NAME}
25+
serviceAccountName: ${GCS_SA_NAME}
2526
restartPolicy: Never
2627
nodeSelector:
2728
cloud.google.com/gke-tpu-accelerator: tpu7x

Ironwood/guides/automation/tpu7x-2x2x2-collectives.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ spec:
2222
template:
2323
spec:
2424
subdomain: headless-svc-${JOB_NAME}
25+
serviceAccountName: ${GCS_SA_NAME}
2526
restartPolicy: Never
2627
nodeSelector:
2728
cloud.google.com/gke-tpu-accelerator: tpu7x

Ironwood/guides/automation/tpu7x-2x2x4-collectives.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ spec:
2222
template:
2323
spec:
2424
subdomain: headless-svc-${JOB_NAME}
25+
serviceAccountName: ${GCS_SA_NAME}
2526
restartPolicy: Never
2627
nodeSelector:
2728
cloud.google.com/gke-tpu-accelerator: tpu7x

Ironwood/guides/automation/tpu7x-2x4x4-collectives.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ spec:
2222
template:
2323
spec:
2424
subdomain: headless-svc-${JOB_NAME}
25+
serviceAccountName: ${GCS_SA_NAME}
2526
restartPolicy: Never
2627
nodeSelector:
2728
cloud.google.com/gke-tpu-accelerator: tpu7x

Ironwood/guides/automation/tpu7x-4x4x4-collectives.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ spec:
2222
template:
2323
spec:
2424
subdomain: headless-svc-${JOB_NAME}
25+
serviceAccountName: ${GCS_SA_NAME}
2526
restartPolicy: Never
2627
nodeSelector:
2728
cloud.google.com/gke-tpu-accelerator: tpu7x

Ironwood/guides/automation/tpu7x-4x4x8-collectives.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ spec:
2222
template:
2323
spec:
2424
subdomain: headless-svc-${JOB_NAME}
25+
serviceAccountName: ${GCS_SA_NAME}
2526
restartPolicy: Never
2627
nodeSelector:
2728
cloud.google.com/gke-tpu-accelerator: tpu7x

0 commit comments

Comments
 (0)