Skip to content

Commit 90e7983

Browse files
arsacclaude
andcommitted
feat(nvidia-dra-driver-gpu): add Talos OS support image
Builds a patched version of nvcr.io/nvidia/k8s-dra-driver-gpu that adds /usr/local/glibc/usr/lib and /usr/local/bin to the library/binary search paths, matching upstream kubernetes-sigs/dra-driver-nvidia-gpu#695. Renovate will track NVIDIA/k8s-dra-driver-gpu releases to keep the VERSION in sync. Remove this app once PR #695 is released upstream. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 340fa6a commit 90e7983

3 files changed

Lines changed: 147 additions & 0 deletions

File tree

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
ARG VERSION="v25.12.0"
2+
ARG SOURCE_IMAGE="nvcr.io/nvidia/k8s-dra-driver-gpu"
3+
4+
# =============================================================================
5+
# Builder stage: clone upstream source, apply Talos support patch, build binaries
6+
# =============================================================================
7+
FROM golang:1.26.1-bookworm AS builder
8+
9+
ARG VERSION
10+
11+
RUN mkdir -p /out && \
12+
git clone --depth 1 --branch ${VERSION} \
13+
https://github.com/NVIDIA/k8s-dra-driver-gpu.git /src
14+
15+
WORKDIR /src
16+
17+
# Apply Talos OS support (upstream PR #695):
18+
# - adds /usr/local/glibc/usr/lib to library search paths
19+
# - adds /usr/local/bin to binary search paths
20+
COPY talos-support.patch /src/
21+
RUN patch -p1 < talos-support.patch
22+
23+
# Build kubelet plugin binaries using vendored dependencies
24+
RUN CGO_ENABLED=1 GOOS=linux \
25+
go build -mod=vendor \
26+
-o /out/gpu-kubelet-plugin \
27+
./cmd/gpu-kubelet-plugin/ && \
28+
CGO_ENABLED=1 GOOS=linux \
29+
go build -mod=vendor \
30+
-o /out/compute-domain-kubelet-plugin \
31+
./cmd/compute-domain-kubelet-plugin/
32+
33+
# =============================================================================
34+
# Final stage: official image with patched binaries and prestart script
35+
# =============================================================================
36+
FROM ${SOURCE_IMAGE}:${VERSION}
37+
38+
COPY --from=builder /out/gpu-kubelet-plugin /usr/bin/gpu-kubelet-plugin
39+
COPY --from=builder /out/compute-domain-kubelet-plugin /usr/bin/compute-domain-kubelet-plugin
40+
COPY --from=builder /src/hack/kubelet-plugin-prestart.sh /usr/bin/kubelet-plugin-prestart.sh
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
target "docker-metadata-action" {}
2+
3+
variable "APP" {
4+
default = "nvidia-dra-driver-gpu"
5+
}
6+
7+
variable "VERSION" {
8+
// renovate: datasource=github-releases depName=NVIDIA/k8s-dra-driver-gpu
9+
default = "v25.12.0"
10+
}
11+
12+
variable "SOURCE" {
13+
default = "https://github.com/NVIDIA/k8s-dra-driver-gpu"
14+
}
15+
16+
variable "SOURCE_IMAGE" {
17+
default = "nvcr.io/nvidia/k8s-dra-driver-gpu"
18+
}
19+
20+
group "default" {
21+
targets = ["image-local"]
22+
}
23+
24+
target "image" {
25+
inherits = ["docker-metadata-action"]
26+
args = {
27+
VERSION = "${VERSION}"
28+
SOURCE_IMAGE = "${SOURCE_IMAGE}"
29+
}
30+
labels = {
31+
"org.opencontainers.image.source" = "${SOURCE}"
32+
}
33+
}
34+
35+
target "image-local" {
36+
inherits = ["image"]
37+
output = ["type=docker"]
38+
tags = ["${APP}:${VERSION}"]
39+
}
40+
41+
target "image-all" {
42+
inherits = ["image"]
43+
platforms = [
44+
"linux/amd64"
45+
]
46+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
Talos Linux installs NVIDIA libraries via a glibc compatibility layer at
2+
/usr/local/glibc/usr/lib/ and binaries at /usr/local/bin/, which are not
3+
in the standard search paths. Mirrors upstream PR NVIDIA/k8s-dra-driver-gpu#695.
4+
5+
diff --git a/cmd/gpu-kubelet-plugin/root.go b/cmd/gpu-kubelet-plugin/root.go
6+
--- a/cmd/gpu-kubelet-plugin/root.go
7+
+++ b/cmd/gpu-kubelet-plugin/root.go
8+
@@ -34,6 +34,7 @@ func (r root) getDriverLibraryPath() (string, error) {
9+
"/lib64",
10+
"/lib/x86_64-linux-gnu",
11+
"/lib/aarch64-linux-gnu",
12+
+ "/usr/local/glibc/usr/lib",
13+
}
14+
15+
libraryPath, err := r.findFile("libnvidia-ml.so.1", librarySearchPaths...)
16+
@@ -52,6 +53,7 @@ func (r root) getNvidiaSMIPath() (string, error) {
17+
"/usr/sbin",
18+
"/bin",
19+
"/sbin",
20+
+ "/usr/local/bin",
21+
}
22+
23+
binaryPath, err := r.findFile("nvidia-smi", binarySearchPaths...)
24+
diff --git a/cmd/compute-domain-kubelet-plugin/root.go b/cmd/compute-domain-kubelet-plugin/root.go
25+
--- a/cmd/compute-domain-kubelet-plugin/root.go
26+
+++ b/cmd/compute-domain-kubelet-plugin/root.go
27+
@@ -34,6 +34,7 @@ func (r root) getDriverLibraryPath() (string, error) {
28+
"/lib64",
29+
"/lib/x86_64-linux-gnu",
30+
"/lib/aarch64-linux-gnu",
31+
+ "/usr/local/glibc/usr/lib",
32+
}
33+
34+
libraryPath, err := r.findFile("libnvidia-ml.so.1", librarySearchPaths...)
35+
@@ -52,6 +53,7 @@ func (r root) getNvidiaSMIPath() (string, error) {
36+
"/usr/sbin",
37+
"/bin",
38+
"/sbin",
39+
+ "/usr/local/bin",
40+
}
41+
42+
binaryPath, err := r.findFile("nvidia-smi", binarySearchPaths...)
43+
diff --git a/hack/kubelet-plugin-prestart.sh b/hack/kubelet-plugin-prestart.sh
44+
--- a/hack/kubelet-plugin-prestart.sh
45+
+++ b/hack/kubelet-plugin-prestart.sh
46+
@@ -46,7 +46,8 @@ validate_and_exit_on_success () {
47+
/driver-root/usr/bin \
48+
/driver-root/usr/sbin \
49+
/driver-root/bin \
50+
+ /driver-root/usr/local/bin \
51+
/driver-root/sbin \
52+
-maxdepth 1 -type f -name "nvidia-smi" 2> /dev/null | head -n1
53+
)
54+
@@ -60,7 +61,8 @@ validate_and_exit_on_success () {
55+
/driver-root/usr/lib64 \
56+
/driver-root/usr/lib/x86_64-linux-gnu \
57+
/driver-root/usr/lib/aarch64-linux-gnu \
58+
+ /driver-root/usr/local/glibc/usr/lib \
59+
/driver-root/lib64 \
60+
/driver-root/lib/x86_64-linux-gnu \
61+
/driver-root/lib/aarch64-linux-gnu \

0 commit comments

Comments
 (0)