@@ -714,7 +714,6 @@ func preProcessDaemonSet(obj *appsv1.DaemonSet, n ClusterPolicyController) error
714714 "nvidia-mig-manager" : TransformMIGManager ,
715715 "nvidia-operator-validator" : TransformValidator ,
716716 "nvidia-sandbox-validator" : TransformSandboxValidator ,
717- "nvidia-kata-manager" : TransformKataManager ,
718717 "nvidia-cc-manager" : TransformCCManager ,
719718 }
720719
@@ -2023,83 +2022,6 @@ func TransformMIGManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec,
20232022 return nil
20242023}
20252024
2026- // TransformKataManager transforms Kata Manager daemonset with required config as per ClusterPolicy
2027- func TransformKataManager (obj * appsv1.DaemonSet , config * gpuv1.ClusterPolicySpec , n ClusterPolicyController ) error {
2028- // update image
2029- image , err := gpuv1 .ImagePath (& config .KataManager )
2030- if err != nil {
2031- return err
2032- }
2033- obj .Spec .Template .Spec .Containers [0 ].Image = image
2034-
2035- // update image pull policy
2036- obj .Spec .Template .Spec .Containers [0 ].ImagePullPolicy = gpuv1 .ImagePullPolicy (config .KataManager .ImagePullPolicy )
2037-
2038- // set image pull secrets
2039- if len (config .KataManager .ImagePullSecrets ) > 0 {
2040- addPullSecrets (& obj .Spec .Template .Spec , config .KataManager .ImagePullSecrets )
2041- }
2042-
2043- // set resource limits
2044- if config .KataManager .Resources != nil {
2045- // apply resource limits to all containers
2046- for i := range obj .Spec .Template .Spec .Containers {
2047- obj .Spec .Template .Spec .Containers [i ].Resources .Requests = config .KataManager .Resources .Requests
2048- obj .Spec .Template .Spec .Containers [i ].Resources .Limits = config .KataManager .Resources .Limits
2049- }
2050- }
2051-
2052- // set arguments if specified for mig-manager container
2053- if len (config .KataManager .Args ) > 0 {
2054- obj .Spec .Template .Spec .Containers [0 ].Args = config .KataManager .Args
2055- }
2056-
2057- // mount artifactsDir
2058- artifactsDir := DefaultKataArtifactsDir
2059- if config .KataManager .Config .ArtifactsDir != "" {
2060- artifactsDir = config .KataManager .Config .ArtifactsDir
2061- }
2062-
2063- // set env used by readinessProbe to determine path to kata-manager pid file.
2064- setContainerEnv (& (obj .Spec .Template .Spec .Containers [0 ]), "KATA_ARTIFACTS_DIR" , artifactsDir )
2065-
2066- artifactsVolMount := corev1.VolumeMount {Name : "kata-artifacts" , MountPath : artifactsDir }
2067- obj .Spec .Template .Spec .Containers [0 ].VolumeMounts = append (obj .Spec .Template .Spec .Containers [0 ].VolumeMounts , artifactsVolMount )
2068-
2069- artifactsVol := corev1.Volume {Name : "kata-artifacts" , VolumeSource : corev1.VolumeSource {HostPath : & corev1.HostPathVolumeSource {Path : artifactsDir , Type : ptr .To (corev1 .HostPathDirectoryOrCreate )}}}
2070- obj .Spec .Template .Spec .Volumes = append (obj .Spec .Template .Spec .Volumes , artifactsVol )
2071-
2072- // Compute hash of kata manager config and add an annotation with the value.
2073- // If the kata config changes, a new revision of the daemonset will be
2074- // created and thus the kata-manager pods will restart with the updated config.
2075- hash := utils .GetObjectHash (config .KataManager .Config )
2076-
2077- if obj .Spec .Template .Annotations == nil {
2078- obj .Spec .Template .Annotations = make (map [string ]string )
2079- }
2080- obj .Spec .Template .Annotations [KataManagerAnnotationHashKey ] = hash
2081-
2082- if len (config .KataManager .Env ) > 0 {
2083- for _ , env := range config .KataManager .Env {
2084- setContainerEnv (& (obj .Spec .Template .Spec .Containers [0 ]), env .Name , env .Value )
2085- }
2086- }
2087-
2088- // mount containerd config and socket
2089- // setup mounts for runtime config file
2090- runtime := n .runtime .String ()
2091- // kata manager is the only container in this daemonset
2092- err = transformForRuntime (obj , config , runtime , & obj .Spec .Template .Spec .Containers [0 ])
2093- if err != nil {
2094- return fmt .Errorf ("error transforming kata-manager daemonset : %w" , err )
2095- }
2096-
2097- // set hostNetwork for kata-manager if specified
2098- applyHostNetworkConfig (& obj .Spec .Template .Spec , config .KataManager .HostNetwork )
2099-
2100- return nil
2101- }
2102-
21032025// TransformVFIOManager transforms VFIO-PCI Manager daemonset with required config as per ClusterPolicy
21042026func TransformVFIOManager (obj * appsv1.DaemonSet , config * gpuv1.ClusterPolicySpec , n ClusterPolicyController ) error {
21052027 // update k8s-driver-manager initContainer
@@ -5156,115 +5078,13 @@ func transformRuntimeClass(n ClusterPolicyController, spec nodev1.RuntimeClass)
51565078 return gpuv1 .Ready , nil
51575079}
51585080
5159- func transformKataRuntimeClasses (n ClusterPolicyController ) (gpuv1.State , error ) {
5160- ctx := n .ctx
5161- state := n .idx
5162- config := n .singleton .Spec
5163-
5164- // Get all existing Kata RuntimeClasses
5165- opts := []client.ListOption {& client.MatchingLabels {"nvidia.com/kata-runtime-class" : "true" }}
5166- list := & nodev1.RuntimeClassList {}
5167- err := n .client .List (ctx , list , opts ... )
5168- if err != nil {
5169- n .logger .Info ("Could not get Kata RuntimeClassList" , err )
5170- return gpuv1 .NotReady , fmt .Errorf ("error getting kata RuntimeClassList: %v" , err )
5171- }
5172- n .logger .V (1 ).Info ("Kata RuntimeClasses" , "Number" , len (list .Items ))
5173-
5174- if ! config .KataManager .IsEnabled () {
5175- // Delete all Kata RuntimeClasses
5176- n .logger .Info ("Kata Manager disabled, deleting all Kata RuntimeClasses" )
5177- for _ , rc := range list .Items {
5178- rc := rc
5179- n .logger .V (1 ).Info ("Deleting Kata RuntimeClass" , "Name" , rc .Name )
5180- err := n .client .Delete (ctx , & rc )
5181- if err != nil {
5182- return gpuv1 .NotReady , fmt .Errorf ("error deleting kata RuntimeClass '%s': %v" , rc .Name , err )
5183- }
5184- }
5185- return gpuv1 .Ready , nil
5186- }
5187-
5188- // Get names of desired kata RuntimeClasses
5189- rcNames := make (map [string ]struct {})
5190- for _ , rc := range config .KataManager .Config .RuntimeClasses {
5191- rcNames [rc .Name ] = struct {}{}
5192- }
5193-
5194- // Delete any existing Kata RuntimeClasses that are no longer specified in KataManager configuration
5195- for _ , rc := range list .Items {
5196- if _ , ok := rcNames [rc .Name ]; ! ok {
5197- rc := rc
5198- n .logger .Info ("Deleting Kata RuntimeClass" , "Name" , rc .Name )
5199- err := n .client .Delete (ctx , & rc )
5200- if err != nil {
5201- return gpuv1 .NotReady , fmt .Errorf ("error deleting kata RuntimeClass '%s': %v" , rc .Name , err )
5202- }
5203- }
5204- }
5205-
5206- // Using kata RuntimClass template, create / update RuntimeClass objects specified in KataManager configuration
5207- template := n .resources [state ].RuntimeClasses [0 ]
5208- for _ , rc := range config .KataManager .Config .RuntimeClasses {
5209- logger := n .logger .WithValues ("RuntimeClass" , rc .Name )
5210-
5211- if rc .Name == config .Operator .RuntimeClass {
5212- return gpuv1 .NotReady , fmt .Errorf ("error creating kata runtimeclass '%s' as it conflicts with the runtimeclass used for the gpu-operator operand pods itself" , rc .Name )
5213- }
5214-
5215- obj := nodev1.RuntimeClass {}
5216- obj .Name = rc .Name
5217- obj .Handler = rc .Name
5218- obj .Labels = template .Labels
5219- obj .Scheduling = & nodev1.Scheduling {}
5220- nodeSelector := make (map [string ]string )
5221- for k , v := range template .Scheduling .NodeSelector {
5222- nodeSelector [k ] = v
5223- }
5224- if rc .NodeSelector != nil {
5225- // append user provided selectors to default nodeSelector
5226- for k , v := range rc .NodeSelector {
5227- nodeSelector [k ] = v
5228- }
5229- }
5230- obj .Scheduling .NodeSelector = nodeSelector
5231-
5232- if err := controllerutil .SetControllerReference (n .singleton , & obj , n .scheme ); err != nil {
5233- return gpuv1 .NotReady , err
5234- }
5235-
5236- found := & nodev1.RuntimeClass {}
5237- err := n .client .Get (ctx , types.NamespacedName {Namespace : "" , Name : obj .Name }, found )
5238- if err != nil && apierrors .IsNotFound (err ) {
5239- logger .Info ("Not found, creating..." )
5240- err = n .client .Create (ctx , & obj )
5241- if err != nil {
5242- logger .Info ("Couldn't create" , "Error" , err )
5243- return gpuv1 .NotReady , err
5244- }
5245- continue
5246- } else if err != nil {
5247- return gpuv1 .NotReady , err
5248- }
5249-
5250- logger .Info ("Found Resource, updating..." )
5251- obj .ResourceVersion = found .ResourceVersion
5252-
5253- err = n .client .Update (ctx , & obj )
5254- if err != nil {
5255- logger .Info ("Couldn't update" , "Error" , err )
5256- return gpuv1 .NotReady , err
5257- }
5258- }
5259- return gpuv1 .Ready , nil
5260- }
5261-
52625081func RuntimeClasses (n ClusterPolicyController ) (gpuv1.State , error ) {
52635082 status := gpuv1 .Ready
52645083 state := n .idx
52655084
52665085 if n .stateNames [state ] == "state-kata-manager" {
5267- return transformKataRuntimeClasses (n )
5086+ // Kata Manager is deprecated, no need to process anything
5087+ return gpuv1 .Ready , nil
52685088 }
52695089
52705090 nvidiaRuntimeClasses := n .resources [state ].RuntimeClasses
0 commit comments