Skip to content

Commit e816a15

Browse files
guptaakacopybara-github
authored andcommitted
Restart the Pathways worker entrypoint on exit code 1
PiperOrigin-RevId: 911553878
1 parent f7ef6c0 commit e816a15

1 file changed

Lines changed: 28 additions & 4 deletions

File tree

pathwaysutils/experimental/shared_pathways_service/yamls/pw-service.yaml

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,34 @@ spec:
8383
- name: pathways-worker
8484
image: ${SERVER_IMAGE}
8585
imagePullPolicy: Always
86-
args:
87-
- --server_port=29005
88-
- --resource_manager_address=$$(PATHWAYS_HEAD):29001
89-
- --gcs_scratch_location=${GCS_SCRATCH_LOCATION}
86+
command:
87+
- /bin/sh
88+
- -c
89+
- |
90+
while true; do
91+
echo "Spawning pathways server ..."
92+
TARGET_BIN=""
93+
for f in /usr/pathways/run/cloud_pathways_server*; do
94+
if [ -x "$$f" ]; then
95+
TARGET_BIN="$$f"
96+
break
97+
fi
98+
done
99+
if [ -z "$$TARGET_BIN" ]; then
100+
echo "Error: Could not find executable cloud_pathways_server* in /usr/pathways/run/"
101+
exit 1
102+
fi
103+
echo "Found pathways server binary: $$TARGET_BIN"
104+
$$TARGET_BIN --server_port=29005 --resource_manager_address=$$(PATHWAYS_HEAD):29001 --gcs_scratch_location=${GCS_SCRATCH_LOCATION}
105+
ret_code=$$?
106+
if [ $$ret_code -eq 1 ]; then
107+
echo "Worker process exited with return code 1. Restarting the server..."
108+
continue
109+
else
110+
echo "Worker process terminated with unhandled return code: $$ret_code"
111+
exit $$ret_code
112+
fi
113+
done
90114
env:
91115
- name: TPU_MIN_LOG_LEVEL
92116
value: "0"

0 commit comments

Comments
 (0)