Skip to content

Commit 17d5e56

Browse files
authored
Merge pull request #1901 from NVIDIA/nri-additional-namespaces
2 parents 41dd444 + e950019 commit 17d5e56

3 files changed

Lines changed: 165 additions & 14 deletions

File tree

cmd/nvidia-ctk-installer/container/runtime/nri/plugin.go

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"fmt"
2222
"os"
23+
"slices"
2324
"strings"
2425
"sync/atomic"
2526
"time"
@@ -48,8 +49,8 @@ type Plugin struct {
4849
ctx context.Context
4950
logger logger.Interface
5051

51-
namespace string
52-
stub stub.Stub
52+
namespaces []string
53+
stub stub.Stub
5354

5455
// stopped is set before Stop() so OnClose does not reconnect during shutdown.
5556
stopped atomic.Bool
@@ -58,11 +59,11 @@ type Plugin struct {
5859
}
5960

6061
// NewPlugin creates a new NRI plugin for injecting CDI devices
61-
func NewPlugin(ctx context.Context, logger logger.Interface, namespace string) *Plugin {
62+
func NewPlugin(ctx context.Context, logger logger.Interface, namespaces []string) *Plugin {
6263
return &Plugin{
63-
ctx: ctx,
64-
logger: logger,
65-
namespace: namespace,
64+
ctx: ctx,
65+
logger: logger,
66+
namespaces: namespaces,
6667
}
6768
}
6869

@@ -107,9 +108,9 @@ func (p *Plugin) parseCDIDevices(pod *api.PodSandbox, key, container string) []s
107108
}
108109

109110
if strings.Contains(cdiDeviceNames, "management.nvidia.com/gpu") {
110-
if p.namespace != pod.Namespace {
111-
p.logger.Infof("pod %s/%s is requesting one or more management CDI devices, but it is outside of the toolkit's "+
112-
"namespace %s. Skipping CDI device injection...", pod.Namespace, pod.Name, p.namespace)
111+
if !slices.Contains(p.namespaces, pod.Namespace) {
112+
p.logger.Infof("pod %s/%s is requesting one or more management CDI devices, but it is not in one of the allowed "+
113+
"namespaces %s. Skipping CDI device injection...", pod.Namespace, pod.Name, strings.Join(p.namespaces, ", "))
113114
return nil
114115
}
115116
}
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/**
2+
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
**/
16+
17+
package nri
18+
19+
import (
20+
"testing"
21+
22+
"github.com/containerd/nri/pkg/api"
23+
"github.com/stretchr/testify/require"
24+
)
25+
26+
// nullLogger satisfies logger.Interface without any output.
27+
type nullLogger struct{}
28+
29+
func (nullLogger) Debugf(string, ...any) {}
30+
func (nullLogger) Errorf(string, ...any) {}
31+
func (nullLogger) Infof(string, ...any) {}
32+
func (nullLogger) Warningf(string, ...any) {}
33+
func (nullLogger) Tracef(string, ...any) {}
34+
35+
func newTestPlugin(namespaces []string) *Plugin {
36+
return &Plugin{
37+
logger: nullLogger{},
38+
namespaces: namespaces,
39+
}
40+
}
41+
42+
func podWithAnnotation(namespace, annotation, value string) *api.PodSandbox {
43+
return &api.PodSandbox{
44+
Namespace: namespace,
45+
Annotations: map[string]string{annotation: value},
46+
}
47+
}
48+
49+
func TestParseCDIDevices(t *testing.T) {
50+
const (
51+
toolkitNamespace = "gpu-operator"
52+
additionalNamespace = "kube-system"
53+
unknownNamespace = "default"
54+
)
55+
56+
testCases := []struct {
57+
description string
58+
namespaces []string
59+
pod *api.PodSandbox
60+
container string
61+
expected []string
62+
}{
63+
{
64+
description: "no annotations returns nil",
65+
namespaces: []string{toolkitNamespace},
66+
pod: &api.PodSandbox{Namespace: toolkitNamespace},
67+
container: "ctr",
68+
expected: nil,
69+
},
70+
{
71+
description: "non-management CDI device is injected in any namespace",
72+
namespaces: []string{toolkitNamespace},
73+
pod: podWithAnnotation(unknownNamespace, nriCDIAnnotationDomain+"/pod", "nvidia.com/gpu=0"),
74+
container: "ctr",
75+
expected: []string{"nvidia.com/gpu=0"},
76+
},
77+
{
78+
description: "management CDI device injected when pod is in the toolkit namespace",
79+
namespaces: []string{toolkitNamespace},
80+
pod: podWithAnnotation(toolkitNamespace, nriCDIAnnotationDomain+"/pod", "management.nvidia.com/gpu=0"),
81+
container: "ctr",
82+
expected: []string{"management.nvidia.com/gpu=0"},
83+
},
84+
{
85+
description: "management CDI device blocked when pod is outside allowed namespaces",
86+
namespaces: []string{toolkitNamespace},
87+
pod: podWithAnnotation(unknownNamespace, nriCDIAnnotationDomain+"/pod", "management.nvidia.com/gpu=0"),
88+
container: "ctr",
89+
expected: nil,
90+
},
91+
{
92+
description: "management CDI device injected when pod is in an additional allowed namespace",
93+
namespaces: []string{toolkitNamespace, additionalNamespace},
94+
pod: podWithAnnotation(additionalNamespace, nriCDIAnnotationDomain+"/pod", "management.nvidia.com/gpu=0"),
95+
container: "ctr",
96+
expected: []string{"management.nvidia.com/gpu=0"},
97+
},
98+
{
99+
description: "management CDI device blocked even with additional namespaces when pod namespace not listed",
100+
namespaces: []string{toolkitNamespace, additionalNamespace},
101+
pod: podWithAnnotation(unknownNamespace, nriCDIAnnotationDomain+"/pod", "management.nvidia.com/gpu=0"),
102+
container: "ctr",
103+
expected: nil,
104+
},
105+
{
106+
description: "mixed management and non-management CDI devices are injected in allowed namespace",
107+
namespaces: []string{toolkitNamespace},
108+
pod: podWithAnnotation(toolkitNamespace, nriCDIAnnotationDomain+"/pod", "nvidia.com/gpu=0,management.nvidia.com/gpu=0"),
109+
container: "ctr",
110+
expected: []string{"nvidia.com/gpu=0", "management.nvidia.com/gpu=0"},
111+
},
112+
{
113+
description: "mixed management and non-management CDI devices are blocked in disallowed namespace",
114+
namespaces: []string{toolkitNamespace},
115+
pod: podWithAnnotation(unknownNamespace, nriCDIAnnotationDomain+"/pod", "nvidia.com/gpu=0,management.nvidia.com/gpu=0"),
116+
container: "ctr",
117+
expected: nil,
118+
},
119+
{
120+
description: "container-scoped annotation takes precedence over pod-scoped",
121+
namespaces: []string{toolkitNamespace},
122+
pod: &api.PodSandbox{
123+
Namespace: toolkitNamespace,
124+
Annotations: map[string]string{
125+
nriCDIAnnotationDomain + "/pod": "nvidia.com/gpu=0",
126+
nriCDIAnnotationDomain + "/container.ctr": "management.nvidia.com/gpu=0",
127+
},
128+
},
129+
container: "ctr",
130+
expected: []string{"management.nvidia.com/gpu=0"},
131+
},
132+
}
133+
134+
for _, tc := range testCases {
135+
t.Run(tc.description, func(t *testing.T) {
136+
plugin := newTestPlugin(tc.namespaces)
137+
got := plugin.parseCDIDevices(tc.pod, nriCDIAnnotationDomain, tc.container)
138+
require.Equal(t, tc.expected, got)
139+
})
140+
}
141+
}

cmd/nvidia-ctk-installer/main.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,11 @@ type options struct {
4747
sourceRoot string
4848
packageType string
4949

50-
enableNRIPlugin bool
51-
nriPluginIndex uint
52-
nriSocket string
53-
nriNamespace string
50+
enableNRIPlugin bool
51+
nriPluginIndex uint
52+
nriSocket string
53+
nriNamespace string
54+
nriManagementCDIDeviceNamespaces []string
5455

5556
toolkitOptions toolkit.Options
5657

@@ -159,6 +160,13 @@ func (a app) build() *cli.Command {
159160
Destination: &options.nriNamespace,
160161
Sources: cli.EnvVars("NRI_NAMESPACE"),
161162
},
163+
&cli.StringSliceFlag{
164+
Name: "nri-management-cdi-device-namespaces",
165+
Usage: "Specify the list of kubernetes namespaces (in addition to the nri-namespace) that are" +
166+
" allowed to receive management CDI devices through the NRI plugin",
167+
Destination: &options.nriManagementCDIDeviceNamespaces,
168+
Sources: cli.EnvVars("NRI_MANAGEMENT_CDI_DEVICE_NAMESPACES"),
169+
},
162170
&cli.StringFlag{
163171
Name: "runtime",
164172
Aliases: []string{"r"},
@@ -383,7 +391,8 @@ func (a *app) waitForSignal() error {
383391
func (a *app) startNRIPluginServer(ctx context.Context, opts *options) (*nri.Plugin, error) {
384392
a.logger.Infof("Starting the NRI Plugin server....")
385393

386-
plugin := nri.NewPlugin(ctx, a.logger, opts.nriNamespace)
394+
nriNamespaces := append([]string{opts.nriNamespace}, opts.nriManagementCDIDeviceNamespaces...)
395+
plugin := nri.NewPlugin(ctx, a.logger, nriNamespaces)
387396
err := plugin.Start(ctx, opts.nriSocket, fmt.Sprintf("%02d", opts.nriPluginIndex))
388397
if err != nil {
389398
return nil, err

0 commit comments

Comments
 (0)