Skip to content

Commit d370387

Browse files
committed
Lambda: Docker: use cgroupfs driver
Fixes: #2601
1 parent 488104a commit d370387

1 file changed

Lines changed: 11 additions & 6 deletions

File tree

  • src/dstack/_internal/core/backends/lambdalabs

src/dstack/_internal/core/backends/lambdalabs/compute.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -179,13 +179,18 @@ def _setup_instance(
179179
ssh_private_key: str,
180180
):
181181
setup_commands = (
182-
"mkdir /home/ubuntu/.dstack && "
183-
"sudo apt-get update && "
184-
"sudo apt-get install -y --no-install-recommends nvidia-container-toolkit && "
185-
"sudo nvidia-ctk runtime configure --runtime=docker && "
186-
"sudo pkill -SIGHUP dockerd"
182+
"mkdir /home/ubuntu/.dstack",
183+
"sudo apt-get update",
184+
"sudo apt-get install -y --no-install-recommends nvidia-container-toolkit",
185+
"sudo install -d -m 0755 /etc/docker",
186+
# Workaround for https://github.com/NVIDIA/nvidia-container-toolkit/issues/48
187+
"""echo '{"exec-opts":["native.cgroupdriver=cgroupfs"]}' | sudo tee /etc/docker/daemon.json""",
188+
"sudo nvidia-ctk runtime configure --runtime=docker",
189+
"sudo systemctl restart docker.service", # `systemctl reload` (`kill -HUP`) won't work
190+
)
191+
_run_ssh_command(
192+
hostname=hostname, ssh_private_key=ssh_private_key, command=" && ".join(setup_commands)
187193
)
188-
_run_ssh_command(hostname=hostname, ssh_private_key=ssh_private_key, command=setup_commands)
189194

190195

191196
def _launch_runner(

0 commit comments

Comments
 (0)