@@ -18,6 +18,7 @@ import (
1818 "github.com/RoaringBitmap/roaring/v2"
1919
2020 tridentconfig "github.com/netapp/trident/config"
21+ "github.com/netapp/trident/internal/fiji"
2122 . "github.com/netapp/trident/logging"
2223 "github.com/netapp/trident/pkg/capacity"
2324 "github.com/netapp/trident/pkg/collection"
@@ -38,6 +39,11 @@ import (
3839// string of the form /vol/<flexVolName>/<Namespacename>
3940var NVMeNamespaceRegExp = regexp .MustCompile (`[^(\/vol\/.+\/.+)?$]` )
4041
42+ var (
43+ beforeNamespaceCreate = fiji .Register ("beforeNamespaceCreate" , "ontap_san_nvme" )
44+ afterNamespaceCreate = fiji .Register ("afterNamespaceCreate" , "ontap_san_nvme" )
45+ )
46+
4147// NVMeStorageDriver is for NVMe storage provisioning.
4248type NVMeStorageDriver struct {
4349 initialized bool
@@ -260,37 +266,139 @@ func (d *NVMeStorageDriver) validate(ctx context.Context) error {
260266 return nil
261267}
262268
263- // Create a Volume+Namespace with the specified options.
264- func (d * NVMeStorageDriver ) Create (
265- ctx context.Context , volConfig * storage.VolumeConfig , storagePool storage.Pool , volAttributes map [string ]sa.Request ,
269+ // cleanupIncompleteNamespace checks for an existing FlexVol and handles three cases:
270+ // - No volume exists: returns nil so Create proceeds normally.
271+ // - Volume + namespace exist with a fully populated volConfig: returns VolumeExistsError.
272+ // - Volume exists but namespace is missing, or volConfig.FileSystem is empty: destroys the
273+ // FlexVol so Create can re-create it with a fully populated volConfig.
274+ func (d * NVMeStorageDriver ) cleanupIncompleteNamespace (
275+ ctx context.Context , volConfig * storage.VolumeConfig ,
266276) error {
267277 name := volConfig .InternalName
268278
269279 fields := LogFields {
270- "method" : "Create " ,
280+ "method" : "cleanupIncompleteNamespace " ,
271281 "type" : "NVMeStorageDriver" ,
272282 "name" : name ,
273- "attrs" : volAttributes ,
274283 }
275- Logd (ctx , d .Name (), d .Config .DebugTraceFlags ["method" ]).WithFields (fields ).Trace (">>>> Create" )
276- defer Logd (ctx , d .Name (), d .Config .DebugTraceFlags ["method" ]).WithFields (fields ).Trace ("<<<< Create" )
284+ Logd (ctx , d .Name (), d .Config .DebugTraceFlags ["method" ]).WithFields (fields ).Trace (
285+ ">>>> cleanupIncompleteNamespace" )
286+ defer Logd (ctx , d .Name (), d .Config .DebugTraceFlags ["method" ]).WithFields (fields ).Trace (
287+ "<<<< cleanupIncompleteNamespace" )
277288
278- // If the volume already exists, bail out.
279289 volExists , err := d .API .VolumeExists (ctx , name )
280290 if err != nil {
281291 return fmt .Errorf ("error checking for existing volume: %v" , err )
282292 }
283- if volExists {
293+ if ! volExists {
294+ return nil
295+ }
296+ if volConfig .IsMirrorDestination {
284297 return drivers .NewVolumeExistsError (name )
285298 }
286299
287- // If volume shall be mirrored, check that the SVM is peered with the other side.
288- if volConfig .PeerVolumeHandle != "" {
289- if err = checkSVMPeered (ctx , volConfig , d .API .SVMName (), d .API ); err != nil {
290- return err
300+ nsName := extractNamespaceName (volConfig .InternalID )
301+ nsPath := createNamespacePath (name , nsName )
302+
303+ ns , nsErr := d .API .NVMeNamespaceGetByName (ctx , nsPath )
304+ if nsErr != nil && ! errors .IsNotFoundError (nsErr ) {
305+ return fmt .Errorf ("error checking for existing namespace %s: %v" , nsPath , nsErr )
306+ }
307+
308+ // If both volume and namespace exist and volConfig has all critical fields populated,
309+ // this is a fully formed volume from a prior successful create -- report it as existing.
310+ if ns != nil && volConfig .FileSystem != "" {
311+ Logc (ctx ).WithFields (LogFields {
312+ "volume" : name ,
313+ "namespace" : nsPath ,
314+ "uuid" : ns .UUID ,
315+ }).Debug ("Found existing volume and namespace." )
316+ volConfig .AccessInfo .NVMeNamespaceUUID = ns .UUID
317+ volConfig .InternalID = nsPath
318+ return drivers .NewVolumeExistsError (name )
319+ }
320+
321+ // Either the namespace is missing (partial create) or volConfig is incomplete (e.g. empty
322+ // FileSystem from a fresh CSI retry). In both cases, destroy the FlexVol so Create can
323+ // re-create it with a fully populated volConfig.
324+ reason := "namespace missing"
325+ if ns != nil {
326+ reason = "volConfig.FileSystem is empty"
327+ }
328+ Logc (ctx ).WithFields (LogFields {
329+ "volume" : name ,
330+ "reason" : reason ,
331+ }).Warning ("Cleaning up incomplete volume to allow re-creation." )
332+ // Destroying the FlexVol is safe: ONTAP implicitly removes any contained namespaces,
333+ // and no pod can be using this volume yet since Create has not returned success.
334+ if err = d .API .VolumeDestroy (ctx , name , true , true ); err != nil {
335+ return fmt .Errorf ("could not clean up incomplete volume %s: %v" , name , err )
336+ }
337+ return nil
338+ }
339+
340+ // getNamespaceWithRetry retrieves a namespace by path, retrying with linear backoff to handle
341+ // ONTAP eventual consistency where a namespace may not be visible immediately after creation.
342+ func (d * NVMeStorageDriver ) getNamespaceWithRetry (
343+ ctx context.Context , nsPath string ,
344+ ) (* api.NVMeNamespace , error ) {
345+ const maxRetries = 3
346+ var ns * api.NVMeNamespace
347+ var err error
348+
349+ for attempt := 1 ; attempt <= maxRetries ; attempt ++ {
350+ ns , err = d .API .NVMeNamespaceGetByName (ctx , nsPath )
351+ if err == nil && ns != nil {
352+ return ns , nil
353+ }
354+
355+ if attempt < maxRetries {
356+ Logc (ctx ).WithFields (LogFields {
357+ "namespace" : nsPath ,
358+ "attempt" : attempt ,
359+ "error" : err ,
360+ }).Trace ("Namespace not yet visible after creation, retrying." )
361+
362+ select {
363+ case <- time .After (time .Duration (attempt ) * time .Second ):
364+ case <- ctx .Done ():
365+ return nil , fmt .Errorf ("context cancelled while waiting for namespace visibility: %v" , ctx .Err ())
366+ }
367+ } else {
368+ Logc (ctx ).WithFields (LogFields {
369+ "namespace" : nsPath ,
370+ "attempt" : attempt ,
371+ "error" : err ,
372+ }).Error ("Namespace not visible after all retries." )
291373 }
292374 }
293375
376+ if err != nil {
377+ return nil , fmt .Errorf ("error retrieving namespace %s after creation: %v" , nsPath , err )
378+ }
379+ return nil , fmt .Errorf ("newly created namespace %s not found" , nsPath )
380+ }
381+
382+ func (d * NVMeStorageDriver ) Create (
383+ ctx context.Context , volConfig * storage.VolumeConfig , storagePool storage.Pool , volAttributes map [string ]sa.Request ,
384+ ) error {
385+ name := volConfig .InternalName
386+
387+ fields := LogFields {
388+ "method" : "Create" ,
389+ "type" : "NVMeStorageDriver" ,
390+ "name" : name ,
391+ "attrs" : volAttributes ,
392+ }
393+ Logd (ctx , d .Name (), d .Config .DebugTraceFlags ["method" ]).WithFields (fields ).Trace (">>>> Create" )
394+ defer Logd (ctx , d .Name (), d .Config .DebugTraceFlags ["method" ]).WithFields (fields ).Trace ("<<<< Create" )
395+
396+ // Check if volume+namespace already exist. Clean up orphaned FlexVols (volume without namespace).
397+ // Returns VolumeExistsError if both are present, nil if creation should proceed.
398+ if err := d .cleanupIncompleteNamespace (ctx , volConfig ); err != nil {
399+ return err
400+ }
401+
294402 // Get candidate physical pools.
295403 physicalPools , err := getPoolsForCreate (ctx , volConfig , storagePool , volAttributes , d .physicalPools , d .virtualPools )
296404 if err != nil {
@@ -397,6 +505,13 @@ func (d *NVMeStorageDriver) Create(
397505 volConfig .LUKSEncryption = luksEncryption
398506 volConfig .FileSystem = fstype
399507
508+ // If volume shall be mirrored, check that the SVM is peered with the other side.
509+ if volConfig .PeerVolumeHandle != "" {
510+ if err = checkSVMPeered (ctx , volConfig , d .API .SVMName (), d .API ); err != nil {
511+ return err
512+ }
513+ }
514+
400515 Logc (ctx ).WithFields (LogFields {
401516 "name" : name ,
402517 "namespaceSize" : namespaceSize ,
@@ -466,20 +581,20 @@ func (d *NVMeStorageDriver) Create(
466581 DPVolume : volConfig .IsMirrorDestination ,
467582 })
468583 if err != nil {
469- if api .IsVolumeCreateJobExistsError (err ) {
470- // TODO(sphadnis): If it was decided that iSCSI has a bug here, make similar changes for NVMe.
471- return nil
472- }
473-
474- errMessage := fmt .Sprintf (
475- "ONTAP-NVMe pool %s/%s; error creating volume %s: %v" , storagePool .Name (),
476- aggregate , name , err ,
477- )
478- Logc (ctx ).Error (errMessage )
479- createErrors = append (createErrors , errors .New (errMessage ))
584+ if ! api .IsVolumeCreateJobExistsError (err ) {
585+ errMessage := fmt .Sprintf (
586+ "ONTAP-NVMe pool %s/%s; error creating volume %s: %v" , storagePool .Name (),
587+ aggregate , name , err ,
588+ )
589+ Logc (ctx ).Error (errMessage )
590+ createErrors = append (createErrors , errors .New (errMessage ))
480591
481- // Move on to the next pool.
482- continue
592+ // Move on to the next pool.
593+ continue
594+ }
595+ // Log a message. Proceed to create Namespace, hoping volume would have been created by the time
596+ // we send Namespace create request.
597+ Logc (ctx ).WithField ("volume" , name ).Debug ("Volume create is already in progress." )
483598 }
484599
485600 osType := "linux"
@@ -503,6 +618,10 @@ func (d *NVMeStorageDriver) Create(
503618 return err
504619 }
505620
621+ if err := beforeNamespaceCreate .Inject (); err != nil {
622+ return err
623+ }
624+
506625 // Create namespace. If this fails, clean up and move on to the next pool.
507626 err = d .API .NVMeNamespaceCreate (
508627 ctx , api.NVMeNamespace {
@@ -531,17 +650,11 @@ func (d *NVMeStorageDriver) Create(
531650 continue
532651 }
533652
534- // Get the newly created namespace and save the UUID
535- newNamespace , err := d .API .NVMeNamespaceGetByName (ctx , nsPath )
653+ newNamespace , err := d .getNamespaceWithRetry (ctx , nsPath )
536654 if err != nil {
537- return fmt .Errorf ("failure checking for existence of volume: %v" , err )
538- }
539-
540- if newNamespace == nil {
541- return fmt .Errorf ("newly created volume %s not found" , name )
655+ return err
542656 }
543657
544- // Store the Namespace UUID and Namespace Path for future operations.
545658 volConfig .AccessInfo .NVMeNamespaceUUID = newNamespace .UUID
546659 volConfig .InternalID = nsPath
547660
@@ -552,6 +665,11 @@ func (d *NVMeStorageDriver) Create(
552665 "internalID" : volConfig .InternalID ,
553666 }).Debug ("Created FlexVol with NVMe namespace." )
554667 }
668+
669+ if err := afterNamespaceCreate .Inject (); err != nil {
670+ return err
671+ }
672+
555673 return nil
556674 }
557675
0 commit comments