Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/empty-knives-make.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@e2b/code-interpreter-template': patch
---

added systemd to handle process restarts
67 changes: 67 additions & 0 deletions js/tests/systemd.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import { expect } from 'vitest'
import { sandboxTest, wait } from './setup'

async function waitForHealth(sandbox: any, maxRetries = 10, intervalMs = 100) {
for (let i = 0; i < maxRetries; i++) {
try {
const result = await sandbox.commands.run(
'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health'
)
if (result.stdout.trim() === '200') {
return true
}
} catch {
// Connection refused or other error, retry
}
await wait(intervalMs)
}
return false
}

sandboxTest('restart after jupyter kill', async ({ sandbox }) => {
// Verify health is up initially
const initialHealth = await waitForHealth(sandbox)
expect(initialHealth).toBe(true)

// Kill the jupyter process as root
// The command handle may get killed too (since killing jupyter cascades to code-interpreter),
// so we catch the error.
try {
await sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", {
user: 'root',
})
} catch {
// Expected — the kill cascade may terminate the command handle
}

// Wait for systemd to restart both services
const recovered = await waitForHealth(sandbox, 60, 500)
expect(recovered).toBe(true)

// Verify code execution works after recovery
const result = await sandbox.runCode('x = 1; x')
expect(result.text).toEqual('1')
})

sandboxTest('restart after code-interpreter kill', async ({ sandbox }) => {
// Verify health is up initially
const initialHealth = await waitForHealth(sandbox)
expect(initialHealth).toBe(true)

// Kill the code-interpreter process as root
try {
await sandbox.commands.run("kill -9 $(pgrep -f 'uvicorn main:app')", {
user: 'root',
})
} catch {
// Expected — killing code-interpreter may terminate the command handle
}

// Wait for systemd to restart it and health to come back
const recovered = await waitForHealth(sandbox, 60, 500)
expect(recovered).toBe(true)

// Verify code execution works after recovery
const result = await sandbox.runCode('x = 1; x')
expect(result.text).toEqual('1')
})
59 changes: 59 additions & 0 deletions python/tests/async/test_async_systemd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import asyncio

from e2b_code_interpreter.code_interpreter_async import AsyncSandbox


async def wait_for_health(sandbox: AsyncSandbox, max_retries=10, interval_ms=100):
for _ in range(max_retries):
try:
result = await sandbox.commands.run(
'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health'
)
if result.stdout.strip() == "200":
return True
except Exception:
pass
await asyncio.sleep(interval_ms / 1000)
return False


async def test_restart_after_jupyter_kill(async_sandbox: AsyncSandbox):
# Verify health is up initially
assert await wait_for_health(async_sandbox)

# Kill the jupyter process as root
# The command handle may get killed too (killing jupyter cascades to code-interpreter),
# so we catch the error.
try:
await async_sandbox.commands.run(
"kill -9 $(pgrep -f 'jupyter server')", user="root"
)
except Exception:
pass

# Wait for systemd to restart both services
assert await wait_for_health(async_sandbox, 60, 500)

# Verify code execution works after recovery
result = await async_sandbox.run_code("x = 1; x")
assert result.text == "1"


async def test_restart_after_code_interpreter_kill(async_sandbox: AsyncSandbox):
# Verify health is up initially
assert await wait_for_health(async_sandbox)

# Kill the code-interpreter process as root
try:
await async_sandbox.commands.run(
"kill -9 $(pgrep -f 'uvicorn main:app')", user="root"
)
except Exception:
pass

# Wait for systemd to restart it and health to come back
assert await wait_for_health(async_sandbox, 60, 500)

# Verify code execution works after recovery
result = await async_sandbox.run_code("x = 1; x")
assert result.text == "1"
55 changes: 55 additions & 0 deletions python/tests/sync/test_systemd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import time

from e2b_code_interpreter.code_interpreter_sync import Sandbox


def wait_for_health(sandbox: Sandbox, max_retries=10, interval_ms=100):
for _ in range(max_retries):
try:
result = sandbox.commands.run(
'curl -s -o /dev/null -w "%{http_code}" http://0.0.0.0:49999/health'
)
if result.stdout.strip() == "200":
return True
except Exception:
pass
time.sleep(interval_ms / 1000)
return False


def test_restart_after_jupyter_kill(sandbox: Sandbox):
# Verify health is up initially
assert wait_for_health(sandbox)

# Kill the jupyter process as root
# The command handle may get killed too (killing jupyter cascades to code-interpreter),
# so we catch the error.
try:
sandbox.commands.run("kill -9 $(pgrep -f 'jupyter server')", user="root")
except Exception:
pass

# Wait for systemd to restart both services
assert wait_for_health(sandbox, 60, 500)

# Verify code execution works after recovery
result = sandbox.run_code("x = 1; x")
assert result.text == "1"


def test_restart_after_code_interpreter_kill(sandbox: Sandbox):
# Verify health is up initially
assert wait_for_health(sandbox)

# Kill the code-interpreter process as root
try:
sandbox.commands.run("kill -9 $(pgrep -f 'uvicorn main:app')", user="root")
except Exception:
pass

# Wait for systemd to restart it and health to come back
assert wait_for_health(sandbox, 60, 500)

# Verify code execution works after recovery
result = sandbox.run_code("x = 1; x")
assert result.text == "1"
23 changes: 23 additions & 0 deletions template/jupyter-healthcheck.sh
Comment thread
jakubno marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash
# Custom health check for Jupyter Server
# Verifies the server is responsive via the /api/status endpoint

MAX_RETRIES=50
RETRY_INTERVAL=0.2

for i in $(seq 1 $MAX_RETRIES); do
status_code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:8888/api/status")

if [ "$status_code" -eq 200 ]; then
echo "Jupyter Server is healthy"
exit 0
fi

if [ $((i % 10)) -eq 0 ]; then
echo "Waiting for Jupyter Server to become healthy... (attempt $i/$MAX_RETRIES)"
fi
sleep $RETRY_INTERVAL
done

echo "Jupyter Server health check failed after $MAX_RETRIES attempts"
exit 1
20 changes: 7 additions & 13 deletions template/start-up.sh
Original file line number Diff line number Diff line change
@@ -1,22 +1,16 @@
#!/bin/bash

function start_jupyter_server() {
counter=0
response=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:8888/api/status")
while [[ ${response} -ne 200 ]]; do
let counter++
if ((counter % 20 == 0)); then
echo "Waiting for Jupyter Server to start..."
sleep 0.1
fi

response=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:8888/api/status")
done
function start_code_interpreter() {
/root/.jupyter/jupyter-healthcheck.sh
if [ $? -ne 0 ]; then
echo "Jupyter Server failed to start, aborting."
exit 1
fi

cd /root/.server/
.venv/bin/uvicorn main:app --host 0.0.0.0 --port 49999 --workers 1 --no-access-log --no-use-colors --timeout-keep-alive 640
}

echo "Starting Code Interpreter server..."
start_jupyter_server &
start_code_interpreter &
MATPLOTLIBRC=/root/.config/matplotlib/.matplotlibrc jupyter server --IdentityProvider.token="" >/dev/null 2>&1
16 changes: 16 additions & 0 deletions template/systemd/code-interpreter.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[Unit]
Description=Code Interpreter Server
Documentation=https://github.com/e2b-dev/code-interpreter
Requires=jupyter.service
After=jupyter.service
BindsTo=jupyter.service
Comment thread
mishushakov marked this conversation as resolved.
Outdated

[Service]
Type=simple
WorkingDirectory=/root/.server
ExecStartPre=/root/.jupyter/jupyter-healthcheck.sh
ExecStart=/root/.server/.venv/bin/uvicorn main:app --host 0.0.0.0 --port 49999 --workers 1 --no-access-log --no-use-colors --timeout-keep-alive 640
Restart=always
Comment thread
mishushakov marked this conversation as resolved.
Outdated
RestartSec=1
StandardOutput=journal
StandardError=journal
14 changes: 14 additions & 0 deletions template/systemd/jupyter.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[Unit]
Description=Jupyter Server
Documentation=https://jupyter-server.readthedocs.io

[Service]
Type=simple
Environment=MATPLOTLIBRC=/root/.config/matplotlib/.matplotlibrc
Environment=JUPYTER_CONFIG_PATH=/root/.jupyter
ExecStart=/usr/local/bin/jupyter server --IdentityProvider.token=""
ExecStartPost=-/usr/bin/systemctl start --no-block code-interpreter
Comment thread
mishushakov marked this conversation as resolved.
Outdated
Restart=on-failure
RestartSec=1
StandardOutput=null
StandardError=journal
Comment thread
mishushakov marked this conversation as resolved.
27 changes: 24 additions & 3 deletions template/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,30 @@ def make_template(
# Copy configuration files
template = (
template.copy("matplotlibrc", ".config/matplotlib/.matplotlibrc")
.copy("start-up.sh", ".jupyter/start-up.sh")
.run_cmd("chmod +x .jupyter/start-up.sh")
.copy("jupyter-healthcheck.sh", ".jupyter/jupyter-healthcheck.sh")
.run_cmd("chmod +x .jupyter/jupyter-healthcheck.sh")
.copy("jupyter_server_config.py", ".jupyter/")
.make_dir(".ipython/profile_default/startup")
.copy("ipython_kernel_config.py", ".ipython/profile_default/")
.copy("startup_scripts", ".ipython/profile_default/startup")
)

if not is_docker:
template = (
template.copy(
"systemd/jupyter.service", "/etc/systemd/system/jupyter.service"
)
.copy(
"systemd/code-interpreter.service",
"/etc/systemd/system/code-interpreter.service",
)
.run_cmd("systemctl daemon-reload")
)
else:
template = template.copy("start-up.sh", ".jupyter/start-up.sh").run_cmd(
"chmod +x .jupyter/start-up.sh"
)

if is_docker:
# create user user and /home/user
template = template.run_cmd("useradd -m user")
Expand All @@ -130,6 +146,11 @@ def make_template(

template = template.set_user("user").set_workdir("/home/user")

if is_docker:
start_cmd = "sudo /root/.jupyter/start-up.sh"
else:
start_cmd = "sudo systemctl start jupyter"

return template.set_start_cmd(
"sudo /root/.jupyter/start-up.sh", wait_for_url("http://localhost:49999/health")
start_cmd, wait_for_url("http://localhost:49999/health")
)
Loading