@@ -113,6 +113,11 @@ func (r *DRAReconciler) Reconcile(ctx context.Context, mod *kmmv1beta1.Module) (
113113 return res , fmt .Errorf ("could not handle DRA: %v" , err )
114114 }
115115
116+ err = r .reconHelperAPI .garbageCollectDRADaemonSets (ctx , mod , existingDRADS )
117+ if err != nil {
118+ return res , fmt .Errorf ("failed to run DRA garbage collection: %v" , err )
119+ }
120+
116121 err = r .reconHelperAPI .handleDeviceClasses (ctx , mod , existingDCs )
117122 if err != nil {
118123 return res , fmt .Errorf ("could not handle DeviceClasses: %v" , err )
@@ -133,6 +138,7 @@ func (r *DRAReconciler) Reconcile(ctx context.Context, mod *kmmv1beta1.Module) (
133138type draReconcilerHelperAPI interface {
134139 getModuleDRADaemonSets (ctx context.Context , name , namespace string ) ([]appsv1.DaemonSet , error )
135140 handleDRA (ctx context.Context , mod * kmmv1beta1.Module , existingDRADS []appsv1.DaemonSet ) error
141+ garbageCollectDRADaemonSets (ctx context.Context , mod * kmmv1beta1.Module , existingDS []appsv1.DaemonSet ) error
136142 deleteDRAResources (ctx context.Context , moduleName , moduleNamespace string ) error
137143 moduleUpdateDRAStatus (ctx context.Context , mod * kmmv1beta1.Module , existingDRADS []appsv1.DaemonSet ) error
138144 clearDRAStatus (ctx context.Context , mod * kmmv1beta1.Module ) error
@@ -181,11 +187,9 @@ func (drh *draReconcilerHelper) handleDRA(ctx context.Context, mod *kmmv1beta1.M
181187
182188 logger := log .FromContext (ctx )
183189
184- var ds * appsv1.DaemonSet
185- if len (existingDRADS ) > 0 {
186- ds = & existingDRADS [0 ]
187- } else {
188- logger .Info ("creating new DRA DaemonSet" )
190+ ds , version := getExistingDRADSFromVersion (existingDRADS , mod .Namespace , mod .Name , mod .Spec .ModuleLoader )
191+ if ds == nil {
192+ logger .Info ("creating new DRA DaemonSet" , "version" , version )
189193 ds = & appsv1.DaemonSet {
190194 ObjectMeta : metav1.ObjectMeta {Namespace : mod .Namespace , GenerateName : mod .Name + "-dra-" },
191195 }
@@ -202,6 +206,51 @@ func (drh *draReconcilerHelper) handleDRA(ctx context.Context, mod *kmmv1beta1.M
202206 return err
203207}
204208
209+ func (drh * draReconcilerHelper ) garbageCollectDRADaemonSets (ctx context.Context , mod * kmmv1beta1.Module , existingDS []appsv1.DaemonSet ) error {
210+ if mod .Spec .ModuleLoader == nil {
211+ return nil
212+ }
213+
214+ logger := log .FromContext (ctx )
215+ deleted := make ([]string , 0 )
216+ for _ , ds := range existingDS {
217+ if isOlderVersionUnusedDRADaemonSet (& ds , mod .Namespace , mod .Spec .ModuleLoader .Container .Version ) {
218+ deleted = append (deleted , ds .Name )
219+ if err := drh .client .Delete (ctx , & ds ); err != nil {
220+ return fmt .Errorf ("could not delete DRA DaemonSet %s: %v" , ds .Name , err )
221+ }
222+ }
223+ }
224+
225+ logger .Info ("garbage-collected DRA DaemonSets" , "names" , deleted )
226+ return nil
227+ }
228+
229+ func getExistingDRADSFromVersion (existingDS []appsv1.DaemonSet ,
230+ moduleNamespace string ,
231+ moduleName string ,
232+ moduleLoader * kmmv1beta1.ModuleLoaderSpec ) (* appsv1.DaemonSet , string ) {
233+ version := ""
234+ if moduleLoader != nil {
235+ version = moduleLoader .Container .Version
236+ }
237+
238+ versionLabel := utils .GetSchedulePluginVersionLabelName (moduleNamespace , moduleName )
239+ for _ , ds := range existingDS {
240+ dsModuleVersion := ds .GetLabels ()[versionLabel ]
241+ if dsModuleVersion == version {
242+ return & ds , version
243+ }
244+ }
245+ return nil , version
246+ }
247+
248+ func isOlderVersionUnusedDRADaemonSet (ds * appsv1.DaemonSet , moduleNamespace , moduleVersion string ) bool {
249+ moduleName := ds .Labels [constants .ModuleNameLabel ]
250+ versionLabel := utils .GetSchedulePluginVersionLabelName (moduleNamespace , moduleName )
251+ return ds .Labels [versionLabel ] != moduleVersion && ds .Status .DesiredNumberScheduled == 0
252+ }
253+
205254// deleteDRAResources deletes all DRA-owned DaemonSets and DeviceClasses using label-based bulk deletion.
206255func (drh * draReconcilerHelper ) deleteDRAResources (ctx context.Context , moduleName , moduleNamespace string ) error {
207256 var errs []error
@@ -403,6 +452,12 @@ func (dsci *draDaemonSetCreatorImpl) setDRAAsDesired(
403452 utils .GetKernelModuleReadyNodeLabel (mod .Namespace , mod .Name ): "" ,
404453 }
405454
455+ if mod .Spec .ModuleLoader != nil && mod .Spec .ModuleLoader .Container .Version != "" {
456+ versionLabel := utils .GetSchedulePluginVersionLabelName (mod .Namespace , mod .Name )
457+ standardLabels [versionLabel ] = mod .Spec .ModuleLoader .Container .Version
458+ nodeSelector [versionLabel ] = mod .Spec .ModuleLoader .Container .Version
459+ }
460+
406461 ds .SetLabels (
407462 overrideLabels (ds .GetLabels (), standardLabels ),
408463 )
0 commit comments