Skip to content

Commit 1f5a950

Browse files
committed
feat(clusterpolicy): precompiled gdrcopy support
1 parent 9984cb8 commit 1f5a950

2 files changed

Lines changed: 24 additions & 5 deletions

File tree

api/nvidia/v1/clusterpolicy_types.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1539,6 +1539,12 @@ type GDRCopySpec struct {
15391539
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch"
15401540
Enabled *bool `json:"enabled,omitempty"`
15411541

1542+
// UsePrecompiled indicates if deployment of GDRCopy using pre-compiled modules is enabled
1543+
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
1544+
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable GDRCopy deployment using pre-compiled modules"
1545+
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch"
1546+
UsePrecompiled *bool `json:"usePrecompiled,omitempty"`
1547+
15421548
// NVIDIA GDRCopy driver image repository
15431549
// +kubebuilder:validation:Optional
15441550
Repository string `json:"repository,omitempty"`
@@ -2428,6 +2434,14 @@ func (gdrcopy *GDRCopySpec) IsEnabled() bool {
24282434
return *gdrcopy.Enabled
24292435
}
24302436

2437+
// UsePrecompiledDrivers returns true if usePrecompiled option is enabled in spec
2438+
func (gdrcopy *GDRCopySpec) UsePrecompiledDrivers() bool {
2439+
if gdrcopy.UsePrecompiled == nil {
2440+
return false
2441+
}
2442+
return *gdrcopy.UsePrecompiled
2443+
}
2444+
24312445
// IsEnabled returns true if DCGM hostengine as a separate Pod is enabled through gpu-perator
24322446
func (dcgm *DCGMSpec) IsEnabled() bool {
24332447
if dcgm.Enabled == nil {

controllers/object_controls.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3140,8 +3140,8 @@ func transformGDRCopyContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolic
31403140
obj.Spec.Template.Spec.Containers = append(obj.Spec.Template.Spec.Containers[:i], obj.Spec.Template.Spec.Containers[i+1:]...)
31413141
return nil
31423142
}
3143-
if config.Driver.UsePrecompiledDrivers() {
3144-
return fmt.Errorf("GDRCopy is not supported along with pre-compiled NVIDIA drivers")
3143+
if config.Driver.UsePrecompiledDrivers() && !config.GDRCopy.UsePrecompiledDrivers() {
3144+
return fmt.Errorf("GDRCopy is not supported along with pre-compiled NVIDIA drivers unless gdrcopy.usePrecompiled is also enabled")
31453145
}
31463146

31473147
gdrcopyContainer := &obj.Spec.Template.Spec.Containers[i]
@@ -3425,9 +3425,14 @@ func resolveDriverTag(n ClusterPolicyController, driverSpec interface{}) (string
34253425
}
34263426
case *gpuv1.GDRCopySpec:
34273427
spec := driverSpec.(*gpuv1.GDRCopySpec)
3428-
image, err = gpuv1.ImagePath(spec)
3429-
if err != nil {
3430-
return "", err
3428+
if spec.UsePrecompiledDrivers() {
3429+
// use per kernel version tag
3430+
image = spec.Repository + "/" + spec.Image + ":" + spec.Version + "-" + n.currentKernelVersion
3431+
} else {
3432+
image, err = gpuv1.ImagePath(spec)
3433+
if err != nil {
3434+
return "", err
3435+
}
34313436
}
34323437
default:
34333438
return "", fmt.Errorf("invalid type to construct image path: %v", v)

0 commit comments

Comments
 (0)