Skip to content

Commit a31d702

Browse files
committed
added try/catch blocks and ignore non-zero code on kill
1 parent 2e145cc commit a31d702

File tree

3 files changed

+39
-29
lines changed

3 files changed

+39
-29
lines changed

js/tests/supervisord.test.ts

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,15 @@ import { sandboxTest, wait } from './setup'
33

44
async function waitForHealth(sandbox: any, maxRetries = 10, intervalMs = 100) {
55
for (let i = 0; i < maxRetries; i++) {
6-
const result = await sandbox.commands.run(
7-
'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health'
8-
)
9-
if (result.stdout.trim() === '200') {
10-
return true
6+
try {
7+
const result = await sandbox.commands.run(
8+
'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health'
9+
)
10+
if (result.stdout.trim() === '200') {
11+
return true
12+
}
13+
} catch {
14+
// Connection refused or other error, retry
1115
}
1216
await wait(intervalMs)
1317
}
@@ -20,12 +24,12 @@ sandboxTest('restart after jupyter kill', async ({ sandbox }) => {
2024
expect(initialHealth).toBe(true)
2125

2226
// Kill the jupyter process as root
23-
await sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", {
27+
await sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server') || true", {
2428
user: 'root',
2529
})
2630

2731
// Wait for supervisord to restart it and health to come back
28-
const recovered = await waitForHealth(sandbox, 10, 100)
32+
const recovered = await waitForHealth(sandbox, 20, 100)
2933
expect(recovered).toBe(true)
3034

3135
// Verify code execution works after recovery
@@ -39,12 +43,12 @@ sandboxTest('restart after code-interpreter kill', async ({ sandbox }) => {
3943
expect(initialHealth).toBe(true)
4044

4145
// Kill the code-interpreter process as root
42-
await sandbox.commands.run('kill -9 $(cat /var/run/code-interpreter.pid)', {
46+
await sandbox.commands.run('kill -9 $(cat /var/run/code-interpreter.pid) || true', {
4347
user: 'root',
4448
})
4549

4650
// Wait for supervisord to restart it and health to come back
47-
const recovered = await waitForHealth(sandbox, 10, 100)
51+
const recovered = await waitForHealth(sandbox, 20, 100)
4852
expect(recovered).toBe(true)
4953

5054
// Verify code execution works after recovery

python/tests/async/test_async_supervisord.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@
55

66
async def wait_for_health(sandbox: AsyncSandbox, max_retries=10, interval_ms=100):
77
for _ in range(max_retries):
8-
result = await sandbox.commands.run(
9-
'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health'
10-
)
11-
if result.stdout.strip() == "200":
12-
return True
8+
try:
9+
result = await sandbox.commands.run(
10+
'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health'
11+
)
12+
if result.stdout.strip() == "200":
13+
return True
14+
except Exception:
15+
pass
1316
await asyncio.sleep(interval_ms / 1000)
1417
return False
1518

@@ -18,11 +21,11 @@ async def test_restart_after_jupyter_kill(async_sandbox: AsyncSandbox):
1821
# Verify health is up initially
1922
assert await wait_for_health(async_sandbox)
2023

21-
# Kill the jupyter process by pid
22-
await async_sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", user="root")
24+
# Kill the jupyter process as root
25+
await async_sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server') || true", user="root")
2326

2427
# Wait for supervisord to restart it and health to come back
25-
assert await wait_for_health(async_sandbox, 10, 100)
28+
assert await wait_for_health(async_sandbox, 20, 100)
2629

2730
# Verify code execution works after recovery
2831
result = await async_sandbox.run_code("x = 1; x")
@@ -33,11 +36,11 @@ async def test_restart_after_code_interpreter_kill(async_sandbox: AsyncSandbox):
3336
# Verify health is up initially
3437
assert await wait_for_health(async_sandbox)
3538

36-
# Kill the code-interpreter process by pid
37-
await async_sandbox.commands.run("kill -9 $(cat /var/run/code-interpreter.pid)", user="root")
39+
# Kill the code-interpreter process as root
40+
await async_sandbox.commands.run("kill -9 $(cat /var/run/code-interpreter.pid) || true", user="root")
3841

3942
# Wait for supervisord to restart it and health to come back
40-
assert await wait_for_health(async_sandbox, 10, 100)
43+
assert await wait_for_health(async_sandbox, 20, 100)
4144

4245
# Verify code execution works after recovery
4346
result = await async_sandbox.run_code("x = 1; x")

python/tests/sync/test_supervisord.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@
55

66
def wait_for_health(sandbox: Sandbox, max_retries=10, interval_ms=100):
77
for _ in range(max_retries):
8-
result = sandbox.commands.run(
9-
'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health'
10-
)
11-
if result.stdout.strip() == "200":
12-
return True
8+
try:
9+
result = sandbox.commands.run(
10+
'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health'
11+
)
12+
if result.stdout.strip() == "200":
13+
return True
14+
except Exception:
15+
pass
1316
time.sleep(interval_ms / 1000)
1417
return False
1518

@@ -19,10 +22,10 @@ def test_restart_after_jupyter_kill(sandbox: Sandbox):
1922
assert wait_for_health(sandbox)
2023

2124
# Kill the jupyter process as root
22-
sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", user="root")
25+
sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server') || true", user="root")
2326

2427
# Wait for supervisord to restart it and health to come back
25-
assert wait_for_health(sandbox, 10, 100)
28+
assert wait_for_health(sandbox, 20, 100)
2629

2730
# Verify code execution works after recovery
2831
result = sandbox.run_code("x = 1; x")
@@ -35,11 +38,11 @@ def test_restart_after_code_interpreter_kill(sandbox: Sandbox):
3538

3639
# Kill the code-interpreter process as root
3740
sandbox.commands.run(
38-
"kill -9 $(cat /var/run/code-interpreter.pid)", user="root"
41+
"kill -9 $(cat /var/run/code-interpreter.pid) || true", user="root"
3942
)
4043

4144
# Wait for supervisord to restart it and health to come back
42-
assert wait_for_health(sandbox, 10, 100)
45+
assert wait_for_health(sandbox, 20, 100)
4346

4447
# Verify code execution works after recovery
4548
result = sandbox.run_code("x = 1; x")

0 commit comments

Comments
 (0)