@@ -42,6 +42,8 @@ var NVMeNamespaceRegExp = regexp.MustCompile(`[^(\/vol\/.+\/.+)?$]`)
4242var (
4343 beforeNQNRemovalFromSubsystem = fiji .Register ("beforeNQNRemovalFromSubsystem" , "ontap_san_nvme" )
4444 beforeNamespaceUnmapFromSubsystem = fiji .Register ("beforeNamespaceUnmapFromSubsystem" , "ontap_san_nvme" )
45+ beforeNamespaceCreate = fiji .Register ("beforeNamespaceCreate" , "ontap_san_nvme" )
46+ afterNamespaceCreate = fiji .Register ("afterNamespaceCreate" , "ontap_san_nvme" )
4547)
4648
4749// NVMeStorageDriver is for NVMe storage provisioning.
@@ -267,36 +269,138 @@ func (d *NVMeStorageDriver) validate(ctx context.Context) error {
267269}
268270
269271// Create a Volume+Namespace with the specified options.
270- func (d * NVMeStorageDriver ) Create (
271- ctx context.Context , volConfig * storage.VolumeConfig , storagePool storage.Pool , volAttributes map [string ]sa.Request ,
272+ // cleanupIncompleteNamespace checks if a FlexVol exists without its NVMe namespace.
273+ // Returns (true, nil) if both volume and namespace exist (complete).
274+ // Returns (false, nil) if no volume exists or orphaned volume was cleaned up.
275+ // Returns (_, error) on API errors or if cleanup fails.
276+ func (d * NVMeStorageDriver ) cleanupIncompleteNamespace (
277+ ctx context.Context , volConfig * storage.VolumeConfig ,
272278) error {
273279 name := volConfig .InternalName
274280
275281 fields := LogFields {
276- "method" : "Create " ,
282+ "method" : "cleanupIncompleteNamespace " ,
277283 "type" : "NVMeStorageDriver" ,
278284 "name" : name ,
279- "attrs" : volAttributes ,
280285 }
281- Logd (ctx , d .Name (), d .Config .DebugTraceFlags ["method" ]).WithFields (fields ).Trace (">>>> Create" )
282- defer Logd (ctx , d .Name (), d .Config .DebugTraceFlags ["method" ]).WithFields (fields ).Trace ("<<<< Create" )
286+ Logd (ctx , d .Name (), d .Config .DebugTraceFlags ["method" ]).WithFields (fields ).Trace (
287+ ">>>> cleanupIncompleteNamespace" )
288+ defer Logd (ctx , d .Name (), d .Config .DebugTraceFlags ["method" ]).WithFields (fields ).Trace (
289+ "<<<< cleanupIncompleteNamespace" )
283290
284- // If the volume already exists, bail out.
285291 volExists , err := d .API .VolumeExists (ctx , name )
286292 if err != nil {
287293 return fmt .Errorf ("error checking for existing volume: %v" , err )
288294 }
289- if volExists {
295+ if ! volExists {
296+ return nil
297+ }
298+ if volConfig .IsMirrorDestination {
290299 return drivers .NewVolumeExistsError (name )
291300 }
292301
293- // If volume shall be mirrored, check that the SVM is peered with the other side.
294- if volConfig .PeerVolumeHandle != "" {
295- if err = checkSVMPeered (ctx , volConfig , d .API .SVMName (), d .API ); err != nil {
296- return err
302+ nsName := extractNamespaceName (volConfig .InternalID )
303+ nsPath := createNamespacePath (name , nsName )
304+
305+ ns , nsErr := d .API .NVMeNamespaceGetByName (ctx , nsPath )
306+ if nsErr != nil && ! errors .IsNotFoundError (nsErr ) {
307+ return fmt .Errorf ("error checking for existing namespace %s: %v" , nsPath , nsErr )
308+ }
309+
310+ // If both volume and namespace exist and volConfig has all critical fields populated,
311+ // this is a fully formed volume from a prior successful create -- report it as existing.
312+ if ns != nil && volConfig .FileSystem != "" {
313+ Logc (ctx ).WithFields (LogFields {
314+ "volume" : name ,
315+ "namespace" : nsPath ,
316+ "uuid" : ns .UUID ,
317+ }).Debug ("Found existing volume and namespace." )
318+ volConfig .AccessInfo .NVMeNamespaceUUID = ns .UUID
319+ volConfig .InternalID = nsPath
320+ return drivers .NewVolumeExistsError (name )
321+ }
322+
323+ // Either the namespace is missing (partial create) or volConfig is incomplete (e.g. empty
324+ // FileSystem from a fresh CSI retry). In both cases, destroy the FlexVol so Create can
325+ // re-create it with a fully populated volConfig.
326+ reason := "namespace missing"
327+ if ns != nil {
328+ reason = "volConfig.FileSystem is empty"
329+ }
330+ Logc (ctx ).WithFields (LogFields {
331+ "volume" : name ,
332+ "reason" : reason ,
333+ }).Warning ("Cleaning up incomplete volume to allow re-creation." )
334+ // Destroying the FlexVol is safe: ONTAP implicitly removes any contained namespaces,
335+ // and no pod can be using this volume yet since Create has not returned success.
336+ if err = d .API .VolumeDestroy (ctx , name , true , true ); err != nil {
337+ return fmt .Errorf ("could not clean up incomplete volume %s: %v" , name , err )
338+ }
339+ return nil
340+ }
341+
342+ // getNamespaceWithRetry retrieves a namespace by path, retrying with linear backoff to handle
343+ // ONTAP eventual consistency where a namespace may not be visible immediately after creation.
344+ func (d * NVMeStorageDriver ) getNamespaceWithRetry (
345+ ctx context.Context , nsPath string ,
346+ ) (* api.NVMeNamespace , error ) {
347+ const maxRetries = 3
348+ var ns * api.NVMeNamespace
349+ var err error
350+
351+ for attempt := 1 ; attempt <= maxRetries ; attempt ++ {
352+ ns , err = d .API .NVMeNamespaceGetByName (ctx , nsPath )
353+ if err == nil && ns != nil {
354+ return ns , nil
355+ }
356+
357+ if attempt < maxRetries {
358+ Logc (ctx ).WithFields (LogFields {
359+ "namespace" : nsPath ,
360+ "attempt" : attempt ,
361+ "error" : err ,
362+ }).Trace ("Namespace not yet visible after creation, retrying." )
363+
364+ select {
365+ case <- time .After (time .Duration (attempt ) * time .Second ):
366+ case <- ctx .Done ():
367+ return nil , fmt .Errorf ("context cancelled while waiting for namespace visibility: %v" , ctx .Err ())
368+ }
369+ } else {
370+ Logc (ctx ).WithFields (LogFields {
371+ "namespace" : nsPath ,
372+ "attempt" : attempt ,
373+ "error" : err ,
374+ }).Error ("Namespace not visible after all retries." )
297375 }
298376 }
299377
378+ if err != nil {
379+ return nil , fmt .Errorf ("error retrieving namespace %s after creation: %v" , nsPath , err )
380+ }
381+ return nil , fmt .Errorf ("newly created namespace %s not found" , nsPath )
382+ }
383+
384+ func (d * NVMeStorageDriver ) Create (
385+ ctx context.Context , volConfig * storage.VolumeConfig , storagePool storage.Pool , volAttributes map [string ]sa.Request ,
386+ ) error {
387+ name := volConfig .InternalName
388+
389+ fields := LogFields {
390+ "method" : "Create" ,
391+ "type" : "NVMeStorageDriver" ,
392+ "name" : name ,
393+ "attrs" : volAttributes ,
394+ }
395+ Logd (ctx , d .Name (), d .Config .DebugTraceFlags ["method" ]).WithFields (fields ).Trace (">>>> Create" )
396+ defer Logd (ctx , d .Name (), d .Config .DebugTraceFlags ["method" ]).WithFields (fields ).Trace ("<<<< Create" )
397+
398+ // Check if volume+namespace already exist. Clean up orphaned FlexVols (volume without namespace).
399+ // Returns VolumeExistsError if both are present, nil if creation should proceed.
400+ if err := d .cleanupIncompleteNamespace (ctx , volConfig ); err != nil {
401+ return err
402+ }
403+
300404 // Get candidate physical pools.
301405 physicalPools , err := getPoolsForCreate (ctx , volConfig , storagePool , volAttributes , d .physicalPools , d .virtualPools )
302406 if err != nil {
@@ -403,6 +507,13 @@ func (d *NVMeStorageDriver) Create(
403507 volConfig .LUKSEncryption = luksEncryption
404508 volConfig .FileSystem = fstype
405509
510+ // If volume shall be mirrored, check that the SVM is peered with the other side.
511+ if volConfig .PeerVolumeHandle != "" {
512+ if err = checkSVMPeered (ctx , volConfig , d .API .SVMName (), d .API ); err != nil {
513+ return err
514+ }
515+ }
516+
406517 Logc (ctx ).WithFields (LogFields {
407518 "name" : name ,
408519 "namespaceSize" : namespaceSize ,
@@ -472,20 +583,20 @@ func (d *NVMeStorageDriver) Create(
472583 DPVolume : volConfig .IsMirrorDestination ,
473584 })
474585 if err != nil {
475- if api .IsVolumeCreateJobExistsError (err ) {
476- // TODO(sphadnis): If it was decided that iSCSI has a bug here, make similar changes for NVMe.
477- return nil
478- }
479-
480- errMessage := fmt .Sprintf (
481- "ONTAP-NVMe pool %s/%s; error creating volume %s: %v" , storagePool .Name (),
482- aggregate , name , err ,
483- )
484- Logc (ctx ).Error (errMessage )
485- createErrors = append (createErrors , errors .New (errMessage ))
586+ if ! api .IsVolumeCreateJobExistsError (err ) {
587+ errMessage := fmt .Sprintf (
588+ "ONTAP-NVMe pool %s/%s; error creating volume %s: %v" , storagePool .Name (),
589+ aggregate , name , err ,
590+ )
591+ Logc (ctx ).Error (errMessage )
592+ createErrors = append (createErrors , errors .New (errMessage ))
486593
487- // Move on to the next pool.
488- continue
594+ // Move on to the next pool.
595+ continue
596+ }
597+ // Log a message. Proceed to create Namespace, hoping volume would have been created by the time
598+ // we send Namespace create request.
599+ Logc (ctx ).WithField ("volume" , name ).Debug ("Volume create is already in progress." )
489600 }
490601
491602 osType := "linux"
@@ -509,6 +620,10 @@ func (d *NVMeStorageDriver) Create(
509620 return err
510621 }
511622
623+ if err := beforeNamespaceCreate .Inject (); err != nil {
624+ return err
625+ }
626+
512627 // Create namespace. If this fails, clean up and move on to the next pool.
513628 err = d .API .NVMeNamespaceCreate (
514629 ctx , api.NVMeNamespace {
@@ -537,17 +652,11 @@ func (d *NVMeStorageDriver) Create(
537652 continue
538653 }
539654
540- // Get the newly created namespace and save the UUID
541- newNamespace , err := d .API .NVMeNamespaceGetByName (ctx , nsPath )
655+ newNamespace , err := d .getNamespaceWithRetry (ctx , nsPath )
542656 if err != nil {
543- return fmt .Errorf ("failure checking for existence of volume: %v" , err )
544- }
545-
546- if newNamespace == nil {
547- return fmt .Errorf ("newly created volume %s not found" , name )
657+ return err
548658 }
549659
550- // Store the Namespace UUID and Namespace Path for future operations.
551660 volConfig .AccessInfo .NVMeNamespaceUUID = newNamespace .UUID
552661 volConfig .InternalID = nsPath
553662
@@ -558,6 +667,11 @@ func (d *NVMeStorageDriver) Create(
558667 "internalID" : volConfig .InternalID ,
559668 }).Debug ("Created FlexVol with NVMe namespace." )
560669 }
670+
671+ if err := afterNamespaceCreate .Inject (); err != nil {
672+ return err
673+ }
674+
561675 return nil
562676 }
563677
0 commit comments