Skip to content

Commit 5bd0bcc

Browse files
committed
Install CUDA driver into VM image for ARM64
1 parent db07882 commit 5bd0bcc

3 files changed

Lines changed: 41 additions & 3 deletions

File tree

vm_images/linux-arm64/bootstrap.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,19 @@ sudo systemctl is-active --quiet docker.service || sudo systemctl start docker.s
3131
sudo systemctl is-enabled --quiet docker.service || sudo systemctl enable docker.service
3232
sleep 10 # Docker daemon takes time to come up after installing
3333
sudo docker info
34+
35+
## Install NVIDIA Container Toolkit
36+
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
37+
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
38+
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
39+
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
40+
sudo apt-get update
41+
sudo apt-get install -y nvidia-container-toolkit
42+
sudo nvidia-ctk runtime configure --runtime=docker
43+
sudo systemctl restart docker
44+
45+
sleep 10
46+
sudo docker run --rm --gpus all ubuntu nvidia-smi
3447
sudo systemctl stop docker
3548

3649
## Install AWS CLI v2
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
set -euo pipefail
3+
4+
## Install basic tools
5+
echo 'debconf debconf/frontend select Noninteractive' | sudo debconf-set-selections
6+
sudo apt-get update
7+
sudo apt-get install -y cmake git build-essential wget ca-certificates curl unzip
8+
9+
## Install CUDA Driver 580
10+
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb
11+
sudo dpkg -i cuda-keyring_1.1-1_all.deb
12+
sudo apt-get update
13+
sudo apt-get -y install nvidia-open-580
14+
rm cuda-keyring_1.1-1_all.deb

vm_images/linux-arm64/linux-arm64.pkr.hcl

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ packer {
99

1010
locals {
1111
ami_name_prefix = "xgboost-ci"
12-
image_name = "RunsOn worker with Ubuntu 24.04 ARM64"
12+
image_name = "RunsOn worker with Ubuntu 24.04 ARM64 + CUDA driver 580"
1313
region = "us-west-2"
1414
timestamp = regex_replace(timestamp(), "[- TZ:]", "")
1515
volume_size = 40
@@ -33,7 +33,7 @@ source "amazon-ebs" "runs-on-linux-arm64" {
3333
ami_virtualization_type = "hvm"
3434
associate_public_ip_address = true
3535
communicator = "ssh"
36-
instance_type = "c6g.4xlarge"
36+
instance_type = "g5g.xlarge"
3737
region = "${local.region}"
3838
ssh_timeout = "10m"
3939
ssh_username = "ubuntu"
@@ -63,6 +63,17 @@ build {
6363
sources = ["source.amazon-ebs.runs-on-linux-arm64"]
6464

6565
provisioner "shell" {
66-
script = "bootstrap.sh"
66+
script = "install_drivers.sh"
67+
pause_after = "30s"
68+
}
69+
70+
provisioner "shell" {
71+
expect_disconnect = true
72+
inline = ["echo 'Reboot VM'", "sudo reboot"]
73+
}
74+
75+
provisioner "shell" {
76+
pause_before = "1m0s"
77+
script = "bootstrap.sh"
6778
}
6879
}

0 commit comments

Comments
 (0)