Skip to content

Commit f85785a

Browse files
authored
[Backport 26.02] Fix NVMe namespace race condition during concurrent volume creation
1 parent e586ce1 commit f85785a

File tree

2 files changed

+442
-42
lines changed

2 files changed

+442
-42
lines changed

storage_drivers/ontap/ontap_san_nvme.go

Lines changed: 152 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/RoaringBitmap/roaring/v2"
1919

2020
tridentconfig "github.com/netapp/trident/config"
21+
"github.com/netapp/trident/internal/fiji"
2122
. "github.com/netapp/trident/logging"
2223
"github.com/netapp/trident/pkg/capacity"
2324
"github.com/netapp/trident/pkg/collection"
@@ -38,6 +39,11 @@ import (
3839
// string of the form /vol/<flexVolName>/<Namespacename>
3940
var NVMeNamespaceRegExp = regexp.MustCompile(`[^(\/vol\/.+\/.+)?$]`)
4041

42+
var (
43+
beforeNamespaceCreate = fiji.Register("beforeNamespaceCreate", "ontap_san_nvme")
44+
afterNamespaceCreate = fiji.Register("afterNamespaceCreate", "ontap_san_nvme")
45+
)
46+
4147
// NVMeStorageDriver is for NVMe storage provisioning.
4248
type NVMeStorageDriver struct {
4349
initialized bool
@@ -260,37 +266,139 @@ func (d *NVMeStorageDriver) validate(ctx context.Context) error {
260266
return nil
261267
}
262268

263-
// Create a Volume+Namespace with the specified options.
264-
func (d *NVMeStorageDriver) Create(
265-
ctx context.Context, volConfig *storage.VolumeConfig, storagePool storage.Pool, volAttributes map[string]sa.Request,
269+
// cleanupIncompleteNamespace checks for an existing FlexVol and handles three cases:
270+
// - No volume exists: returns nil so Create proceeds normally.
271+
// - Volume + namespace exist with a fully populated volConfig: returns VolumeExistsError.
272+
// - Volume exists but namespace is missing, or volConfig.FileSystem is empty: destroys the
273+
// FlexVol so Create can re-create it with a fully populated volConfig.
274+
func (d *NVMeStorageDriver) cleanupIncompleteNamespace(
275+
ctx context.Context, volConfig *storage.VolumeConfig,
266276
) error {
267277
name := volConfig.InternalName
268278

269279
fields := LogFields{
270-
"method": "Create",
280+
"method": "cleanupIncompleteNamespace",
271281
"type": "NVMeStorageDriver",
272282
"name": name,
273-
"attrs": volAttributes,
274283
}
275-
Logd(ctx, d.Name(), d.Config.DebugTraceFlags["method"]).WithFields(fields).Trace(">>>> Create")
276-
defer Logd(ctx, d.Name(), d.Config.DebugTraceFlags["method"]).WithFields(fields).Trace("<<<< Create")
284+
Logd(ctx, d.Name(), d.Config.DebugTraceFlags["method"]).WithFields(fields).Trace(
285+
">>>> cleanupIncompleteNamespace")
286+
defer Logd(ctx, d.Name(), d.Config.DebugTraceFlags["method"]).WithFields(fields).Trace(
287+
"<<<< cleanupIncompleteNamespace")
277288

278-
// If the volume already exists, bail out.
279289
volExists, err := d.API.VolumeExists(ctx, name)
280290
if err != nil {
281291
return fmt.Errorf("error checking for existing volume: %v", err)
282292
}
283-
if volExists {
293+
if !volExists {
294+
return nil
295+
}
296+
if volConfig.IsMirrorDestination {
284297
return drivers.NewVolumeExistsError(name)
285298
}
286299

287-
// If volume shall be mirrored, check that the SVM is peered with the other side.
288-
if volConfig.PeerVolumeHandle != "" {
289-
if err = checkSVMPeered(ctx, volConfig, d.API.SVMName(), d.API); err != nil {
290-
return err
300+
nsName := extractNamespaceName(volConfig.InternalID)
301+
nsPath := createNamespacePath(name, nsName)
302+
303+
ns, nsErr := d.API.NVMeNamespaceGetByName(ctx, nsPath)
304+
if nsErr != nil && !errors.IsNotFoundError(nsErr) {
305+
return fmt.Errorf("error checking for existing namespace %s: %v", nsPath, nsErr)
306+
}
307+
308+
// If both volume and namespace exist and volConfig has all critical fields populated,
309+
// this is a fully formed volume from a prior successful create -- report it as existing.
310+
if ns != nil && volConfig.FileSystem != "" {
311+
Logc(ctx).WithFields(LogFields{
312+
"volume": name,
313+
"namespace": nsPath,
314+
"uuid": ns.UUID,
315+
}).Debug("Found existing volume and namespace.")
316+
volConfig.AccessInfo.NVMeNamespaceUUID = ns.UUID
317+
volConfig.InternalID = nsPath
318+
return drivers.NewVolumeExistsError(name)
319+
}
320+
321+
// Either the namespace is missing (partial create) or volConfig is incomplete (e.g. empty
322+
// FileSystem from a fresh CSI retry). In both cases, destroy the FlexVol so Create can
323+
// re-create it with a fully populated volConfig.
324+
reason := "namespace missing"
325+
if ns != nil {
326+
reason = "volConfig.FileSystem is empty"
327+
}
328+
Logc(ctx).WithFields(LogFields{
329+
"volume": name,
330+
"reason": reason,
331+
}).Warning("Cleaning up incomplete volume to allow re-creation.")
332+
// Destroying the FlexVol is safe: ONTAP implicitly removes any contained namespaces,
333+
// and no pod can be using this volume yet since Create has not returned success.
334+
if err = d.API.VolumeDestroy(ctx, name, true, true); err != nil {
335+
return fmt.Errorf("could not clean up incomplete volume %s: %v", name, err)
336+
}
337+
return nil
338+
}
339+
340+
// getNamespaceWithRetry retrieves a namespace by path, retrying with linear backoff to handle
341+
// ONTAP eventual consistency where a namespace may not be visible immediately after creation.
342+
func (d *NVMeStorageDriver) getNamespaceWithRetry(
343+
ctx context.Context, nsPath string,
344+
) (*api.NVMeNamespace, error) {
345+
const maxRetries = 3
346+
var ns *api.NVMeNamespace
347+
var err error
348+
349+
for attempt := 1; attempt <= maxRetries; attempt++ {
350+
ns, err = d.API.NVMeNamespaceGetByName(ctx, nsPath)
351+
if err == nil && ns != nil {
352+
return ns, nil
353+
}
354+
355+
if attempt < maxRetries {
356+
Logc(ctx).WithFields(LogFields{
357+
"namespace": nsPath,
358+
"attempt": attempt,
359+
"error": err,
360+
}).Trace("Namespace not yet visible after creation, retrying.")
361+
362+
select {
363+
case <-time.After(time.Duration(attempt) * time.Second):
364+
case <-ctx.Done():
365+
return nil, fmt.Errorf("context cancelled while waiting for namespace visibility: %v", ctx.Err())
366+
}
367+
} else {
368+
Logc(ctx).WithFields(LogFields{
369+
"namespace": nsPath,
370+
"attempt": attempt,
371+
"error": err,
372+
}).Error("Namespace not visible after all retries.")
291373
}
292374
}
293375

376+
if err != nil {
377+
return nil, fmt.Errorf("error retrieving namespace %s after creation: %v", nsPath, err)
378+
}
379+
return nil, fmt.Errorf("newly created namespace %s not found", nsPath)
380+
}
381+
382+
func (d *NVMeStorageDriver) Create(
383+
ctx context.Context, volConfig *storage.VolumeConfig, storagePool storage.Pool, volAttributes map[string]sa.Request,
384+
) error {
385+
name := volConfig.InternalName
386+
387+
fields := LogFields{
388+
"method": "Create",
389+
"type": "NVMeStorageDriver",
390+
"name": name,
391+
"attrs": volAttributes,
392+
}
393+
Logd(ctx, d.Name(), d.Config.DebugTraceFlags["method"]).WithFields(fields).Trace(">>>> Create")
394+
defer Logd(ctx, d.Name(), d.Config.DebugTraceFlags["method"]).WithFields(fields).Trace("<<<< Create")
395+
396+
// Check if volume+namespace already exist. Clean up orphaned FlexVols (volume without namespace).
397+
// Returns VolumeExistsError if both are present, nil if creation should proceed.
398+
if err := d.cleanupIncompleteNamespace(ctx, volConfig); err != nil {
399+
return err
400+
}
401+
294402
// Get candidate physical pools.
295403
physicalPools, err := getPoolsForCreate(ctx, volConfig, storagePool, volAttributes, d.physicalPools, d.virtualPools)
296404
if err != nil {
@@ -397,6 +505,13 @@ func (d *NVMeStorageDriver) Create(
397505
volConfig.LUKSEncryption = luksEncryption
398506
volConfig.FileSystem = fstype
399507

508+
// If volume shall be mirrored, check that the SVM is peered with the other side.
509+
if volConfig.PeerVolumeHandle != "" {
510+
if err = checkSVMPeered(ctx, volConfig, d.API.SVMName(), d.API); err != nil {
511+
return err
512+
}
513+
}
514+
400515
Logc(ctx).WithFields(LogFields{
401516
"name": name,
402517
"namespaceSize": namespaceSize,
@@ -466,20 +581,20 @@ func (d *NVMeStorageDriver) Create(
466581
DPVolume: volConfig.IsMirrorDestination,
467582
})
468583
if err != nil {
469-
if api.IsVolumeCreateJobExistsError(err) {
470-
// TODO(sphadnis): If it was decided that iSCSI has a bug here, make similar changes for NVMe.
471-
return nil
472-
}
473-
474-
errMessage := fmt.Sprintf(
475-
"ONTAP-NVMe pool %s/%s; error creating volume %s: %v", storagePool.Name(),
476-
aggregate, name, err,
477-
)
478-
Logc(ctx).Error(errMessage)
479-
createErrors = append(createErrors, errors.New(errMessage))
584+
if !api.IsVolumeCreateJobExistsError(err) {
585+
errMessage := fmt.Sprintf(
586+
"ONTAP-NVMe pool %s/%s; error creating volume %s: %v", storagePool.Name(),
587+
aggregate, name, err,
588+
)
589+
Logc(ctx).Error(errMessage)
590+
createErrors = append(createErrors, errors.New(errMessage))
480591

481-
// Move on to the next pool.
482-
continue
592+
// Move on to the next pool.
593+
continue
594+
}
595+
// Log a message. Proceed to create Namespace, hoping volume would have been created by the time
596+
// we send Namespace create request.
597+
Logc(ctx).WithField("volume", name).Debug("Volume create is already in progress.")
483598
}
484599

485600
osType := "linux"
@@ -503,6 +618,10 @@ func (d *NVMeStorageDriver) Create(
503618
return err
504619
}
505620

621+
if err := beforeNamespaceCreate.Inject(); err != nil {
622+
return err
623+
}
624+
506625
// Create namespace. If this fails, clean up and move on to the next pool.
507626
err = d.API.NVMeNamespaceCreate(
508627
ctx, api.NVMeNamespace{
@@ -531,17 +650,11 @@ func (d *NVMeStorageDriver) Create(
531650
continue
532651
}
533652

534-
// Get the newly created namespace and save the UUID
535-
newNamespace, err := d.API.NVMeNamespaceGetByName(ctx, nsPath)
653+
newNamespace, err := d.getNamespaceWithRetry(ctx, nsPath)
536654
if err != nil {
537-
return fmt.Errorf("failure checking for existence of volume: %v", err)
538-
}
539-
540-
if newNamespace == nil {
541-
return fmt.Errorf("newly created volume %s not found", name)
655+
return err
542656
}
543657

544-
// Store the Namespace UUID and Namespace Path for future operations.
545658
volConfig.AccessInfo.NVMeNamespaceUUID = newNamespace.UUID
546659
volConfig.InternalID = nsPath
547660

@@ -552,6 +665,11 @@ func (d *NVMeStorageDriver) Create(
552665
"internalID": volConfig.InternalID,
553666
}).Debug("Created FlexVol with NVMe namespace.")
554667
}
668+
669+
if err := afterNamespaceCreate.Inject(); err != nil {
670+
return err
671+
}
672+
555673
return nil
556674
}
557675

0 commit comments

Comments
 (0)