File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -31,6 +31,19 @@ sudo systemctl is-active --quiet docker.service || sudo systemctl start docker.s
3131sudo systemctl is-enabled --quiet docker.service || sudo systemctl enable docker.service
3232sleep 10 # Docker daemon takes time to come up after installing
3333sudo docker info
34+
35+ # # Install NVIDIA Container Toolkit
36+ curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
37+ && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
38+ sed ' s#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
39+ sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
40+ sudo apt-get update
41+ sudo apt-get install -y nvidia-container-toolkit
42+ sudo nvidia-ctk runtime configure --runtime=docker
43+ sudo systemctl restart docker
44+
45+ sleep 10
46+ sudo docker run --rm --gpus all ubuntu nvidia-smi
3447sudo systemctl stop docker
3548
3649# # Install AWS CLI v2
Original file line number Diff line number Diff line change 1+ #! /bin/bash
2+ set -euo pipefail
3+
4+ # # Install basic tools
5+ echo ' debconf debconf/frontend select Noninteractive' | sudo debconf-set-selections
6+ sudo apt-get update
7+ sudo apt-get install -y cmake git build-essential wget ca-certificates curl unzip
8+
9+ # # Install CUDA Driver 580
10+ wget -nv https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb
11+ sudo dpkg -i cuda-keyring_1.1-1_all.deb
12+ sudo apt-get update
13+ sudo apt-get -y install nvidia-open-580
14+ rm cuda-keyring_1.1-1_all.deb
Original file line number Diff line number Diff line change 99
1010locals {
1111 ami_name_prefix = " xgboost-ci"
12- image_name = " RunsOn worker with Ubuntu 24.04 ARM64"
12+ image_name = " RunsOn worker with Ubuntu 24.04 ARM64 + CUDA driver 580 "
1313 region = " us-west-2"
1414 timestamp = regex_replace (timestamp (), " [- TZ:]" , " " )
1515 volume_size = 40
@@ -33,7 +33,7 @@ source "amazon-ebs" "runs-on-linux-arm64" {
3333 ami_virtualization_type = " hvm"
3434 associate_public_ip_address = true
3535 communicator = " ssh"
36- instance_type = " c6g.4xlarge "
36+ instance_type = " g5g.xlarge "
3737 region = " ${ local . region } "
3838 ssh_timeout = " 10m"
3939 ssh_username = " ubuntu"
@@ -63,6 +63,17 @@ build {
6363 sources = [" source.amazon-ebs.runs-on-linux-arm64" ]
6464
6565 provisioner "shell" {
66- script = " bootstrap.sh"
66+ script = " install_drivers.sh"
67+ pause_after = " 30s"
68+ }
69+
70+ provisioner "shell" {
71+ expect_disconnect = true
72+ inline = [" echo 'Reboot VM'" , " sudo reboot" ]
73+ }
74+
75+ provisioner "shell" {
76+ pause_before = " 1m0s"
77+ script = " bootstrap.sh"
6778 }
6879}
You can’t perform that action at this time.
0 commit comments