diff --git a/frontend/csi/controller_helpers/kubernetes/config.go b/frontend/csi/controller_helpers/kubernetes/config.go index ba269353f..c039e3420 100644 --- a/frontend/csi/controller_helpers/kubernetes/config.go +++ b/frontend/csi/controller_helpers/kubernetes/config.go @@ -81,6 +81,18 @@ const ( AnnTieringPolicy = prefix + "/tieringPolicy" AnnTieringMinimumCoolingDays = prefix + "/tieringMinimumCoolingDays" + // Shift/MTV StorageClass annotations + AnnShiftStorageClassType = "shift.netapp.io/storage-class-type" + AnnShiftTridentBackendUUID = "shift.netapp.io/trident-backend-uuid" + AnnShiftEndpoint = "shift.netapp.io/endpoint" + + // MTV PVC annotations + AnnMTVDiskPath = "mtv.redhat.com/disk-path" + AnnMTVNFSServer = "mtv.redhat.com/nfs-server" + AnnMTVNFSPath = "mtv.redhat.com/nfs-path" + AnnMTVVMID = "mtv.redhat.com/vm-id" + AnnMTVVMUUID = "mtv.redhat.com/vm-uuid" + // Pod remediation policy annotation and values AnnPodRemediationPolicyAnnotation = prefix + "/podRemediationPolicy" PodRemediationPolicyDelete = "delete" diff --git a/frontend/csi/controller_helpers/kubernetes/helper.go b/frontend/csi/controller_helpers/kubernetes/helper.go index 13fda5a28..ac293d969 100644 --- a/frontend/csi/controller_helpers/kubernetes/helper.go +++ b/frontend/csi/controller_helpers/kubernetes/helper.go @@ -4,6 +4,7 @@ package kubernetes import ( "context" + "encoding/json" "fmt" "strconv" "strings" @@ -99,6 +100,32 @@ func (h *helper) GetVolumeConfig( volumeConfig := getVolumeConfig(ctx, pvc, pvName, pvcSize, annotations, sc, requisiteTopology, preferredTopology) + // Detect Shift StorageClass and populate ShiftConfig with ONTAP credentials + MTV metadata + if scAnnotations[AnnShiftStorageClassType] == "shift" { + Logc(ctx).WithFields(LogFields{ + "storageClass": sc.Name, + "pvc": pvc.Name, + }).Info("Shift StorageClass detected, resolving ONTAP credentials for Shift integration.") + + shiftCfg, shiftErr := h.buildShiftConfig(ctx, pvc, scAnnotations) + if shiftErr != nil { + return nil, fmt.Errorf("failed to build Shift config for PVC %s: %v", pvc.Name, shiftErr) + } + volumeConfig.Shift = shiftCfg + + Logc(ctx).WithFields(LogFields{ + "endpoint": shiftCfg.Endpoint, + "backendUUID": shiftCfg.BackendUUID, + "managementLIF": shiftCfg.ManagementLIF, + "svm": shiftCfg.SVM, + "diskPath": shiftCfg.DiskPath, + "nfsServer": shiftCfg.NFSServer, + "nfsPath": shiftCfg.NFSPath, + "vmID": shiftCfg.VMID, + "vmUUID": shiftCfg.VMUUID, + }).Info("Shift config populated on VolumeConfig.") + } + // Update the volume config with the Access Control only if the storage class nasType parameter is SMB if sc.Parameters[SCParameterNASType] == NASTypeSMB { err = h.updateVolumeConfigWithSecureSMBAccessControl(ctx, volumeConfig, sc, annotations, scAnnotations, secrets) @@ -965,6 +992,143 @@ func processSCAnnotations(sc *k8sstoragev1.StorageClass) map[string]string { return annotations } +// buildShiftConfig resolves all information needed for the Shift integration: +// ONTAP credentials from the TBC secret, MTV metadata from PVC annotations, +// and the Shift endpoint from StorageClass annotations. +func (h *helper) buildShiftConfig( + ctx context.Context, + pvc *v1.PersistentVolumeClaim, + scAnnotations map[string]string, +) (*storage.ShiftConfig, error) { + endpoint := scAnnotations[AnnShiftEndpoint] + if endpoint == "" { + return nil, fmt.Errorf("StorageClass missing %s annotation", AnnShiftEndpoint) + } + backendUUID := scAnnotations[AnnShiftTridentBackendUUID] + if backendUUID == "" { + return nil, fmt.Errorf("StorageClass missing %s annotation", AnnShiftTridentBackendUUID) + } + + pvcAnn := pvc.Annotations + diskPath := pvcAnn[AnnMTVDiskPath] + nfsServer := pvcAnn[AnnMTVNFSServer] + nfsPath := pvcAnn[AnnMTVNFSPath] + + if diskPath == "" || nfsServer == "" || nfsPath == "" { + return nil, fmt.Errorf("PVC %s missing required MTV annotations (disk-path, nfs-server, nfs-path)", pvc.Name) + } + + // Resolve ONTAP connection info from the backend + mgmtLIF, svm, username, password, err := h.resolveOntapCredentials(ctx, backendUUID) + if err != nil { + return nil, fmt.Errorf("failed to resolve ONTAP credentials for backend %s: %v", backendUUID, err) + } + + return &storage.ShiftConfig{ + Endpoint: endpoint, + BackendUUID: backendUUID, + ManagementLIF: mgmtLIF, + SVM: svm, + Username: username, + Password: password, + DiskPath: diskPath, + NFSServer: nfsServer, + NFSPath: nfsPath, + VMID: pvcAnn[AnnMTVVMID], + VMUUID: pvcAnn[AnnMTVVMUUID], + }, nil +} + +// resolveOntapCredentials fetches the ManagementLIF and SVM from the BackendExternal, +// then reads the ONTAP username/password from the Kubernetes Secret referenced by the TBC. +func (h *helper) resolveOntapCredentials( + ctx context.Context, backendUUID string, +) (mgmtLIF, svm, username, password string, err error) { + + backendExt, err := h.orchestrator.GetBackendByBackendUUID(ctx, backendUUID) + if err != nil { + return "", "", "", "", fmt.Errorf("backend %s not found: %v", backendUUID, err) + } + + Logc(ctx).WithFields(LogFields{ + "backendName": backendExt.Name, + "backendUUID": backendExt.BackendUUID, + "configRef": backendExt.ConfigRef, + }).Debug("Shift: resolved backend for credential lookup.") + + // Extract non-sensitive fields (ManagementLIF, SVM) from the external config. + // The external config is a map[string]interface{} when JSON-round-tripped. + configJSON, jsonErr := json.Marshal(backendExt.Config) + if jsonErr != nil { + return "", "", "", "", fmt.Errorf("cannot marshal backend config: %v", jsonErr) + } + var parsed map[string]interface{} + if jsonErr = json.Unmarshal(configJSON, &parsed); jsonErr != nil { + return "", "", "", "", fmt.Errorf("cannot unmarshal backend config: %v", jsonErr) + } + + if v, ok := parsed["managementLIF"].(string); ok { + mgmtLIF = v + } + if v, ok := parsed["svm"].(string); ok { + svm = v + } + + Logc(ctx).WithFields(LogFields{ + "managementLIF": mgmtLIF, + "svm": svm, + }).Debug("Shift: extracted ONTAP connection info from backend config.") + + // Look up the TBC CRD to find the credentials secret reference + configRef := backendExt.ConfigRef + if configRef == "" { + return "", "", "", "", fmt.Errorf("backend %s has no configRef (TBC)", backendUUID) + } + + tbc, tbcErr := h.tridentClient.TridentV1().TridentBackendConfigs(h.namespace).Get(ctx, configRef, getOpts) + if tbcErr != nil { + return "", "", "", "", fmt.Errorf("failed to get TBC %s: %v", configRef, tbcErr) + } + + var tbcSpec map[string]interface{} + if jsonErr = json.Unmarshal(tbc.Spec.Raw, &tbcSpec); jsonErr != nil { + return "", "", "", "", fmt.Errorf("failed to parse TBC spec: %v", jsonErr) + } + + credsRaw, ok := tbcSpec["credentials"] + if !ok { + return "", "", "", "", fmt.Errorf("TBC %s has no credentials field", configRef) + } + credsMap, ok := credsRaw.(map[string]interface{}) + if !ok { + return "", "", "", "", fmt.Errorf("TBC %s credentials field is not a map", configRef) + } + + secretName, _ := credsMap["name"].(string) + if secretName == "" { + return "", "", "", "", fmt.Errorf("TBC %s credentials missing secret name", configRef) + } + + Logc(ctx).WithFields(LogFields{ + "secretName": secretName, + "namespace": h.namespace, + }).Debug("Shift: reading credentials secret.") + + secret, secretErr := h.kubeClient.CoreV1().Secrets(h.namespace).Get(ctx, secretName, getOpts) + if secretErr != nil { + return "", "", "", "", fmt.Errorf("failed to read secret %s/%s: %v", h.namespace, secretName, secretErr) + } + + username = string(secret.Data["username"]) + password = string(secret.Data["password"]) + if username == "" || password == "" { + return "", "", "", "", fmt.Errorf("secret %s missing username or password", secretName) + } + + Logc(ctx).Debug("Shift: successfully resolved ONTAP credentials from TBC secret.") + return mgmtLIF, svm, username, password, nil +} + // getSMBShareAccessControlFromPVCAnnotation parses the smbShareAccessControl annotation and updates the smbShareACL map func getSMBShareAccessControlFromPVCAnnotation(smbShareAccessControlAnn string) (map[string]string, error) { // Structure to hold the parsed smbShareAccessControlAnnotation diff --git a/frontend/csi/controller_server.go b/frontend/csi/controller_server.go index 42a65e542..4d73767eb 100644 --- a/frontend/csi/controller_server.go +++ b/frontend/csi/controller_server.go @@ -20,6 +20,7 @@ import ( tridentconfig "github.com/netapp/trident/config" controllerhelpers "github.com/netapp/trident/frontend/csi/controller_helpers" + "github.com/netapp/trident/frontend/csi/shift" . "github.com/netapp/trident/logging" "github.com/netapp/trident/pkg/capacity" "github.com/netapp/trident/pkg/collection" @@ -239,6 +240,76 @@ func (p *Plugin) CreateVolume( return nil, p.getCSIErrorForOrchestratorError(err) } + // --- Shift integration: intercept before clone/import/create decision --- + // Only triggered when StorageClass has annotation shift.netapp.io/storage-class-type: "shift". + // Normal PVCs (without that annotation) will have volConfig.Shift == nil and skip this block entirely. + if volConfig.Shift != nil { + Logc(ctx).WithFields(LogFields{ + "pvcName": volConfig.RequestName, + "pvcNamespace": volConfig.Namespace, + "storageClass": volConfig.StorageClass, + "endpoint": volConfig.Shift.Endpoint, + "backendUUID": volConfig.Shift.BackendUUID, + "managementLIF": volConfig.Shift.ManagementLIF, + "svm": volConfig.Shift.SVM, + "hasUsername": volConfig.Shift.Username != "", + "hasPassword": volConfig.Shift.Password != "", + "diskPath": volConfig.Shift.DiskPath, + "nfsServer": volConfig.Shift.NFSServer, + "nfsPath": volConfig.Shift.NFSPath, + "vmID": volConfig.Shift.VMID, + "vmUUID": volConfig.Shift.VMUUID, + }).Info("Shift: PVC targets a Shift StorageClass -- entering Shift flow.") + + shiftResp, shiftErr := p.shiftClient.InvokeShiftJob(ctx, volConfig.Shift, req.Name, volConfig.Namespace) + if shiftErr != nil { + msg := fmt.Sprintf("Shift API call failed for PVC %s: %v", req.Name, shiftErr) + Logc(ctx).Error(msg) + p.controllerHelper.RecordVolumeEvent(ctx, req.Name, controllerhelpers.EventTypeWarning, + "ShiftJobFailed", msg) + return nil, status.Error(codes.Internal, msg) + } + + switch shiftResp.Status { + case shift.JobStatusSuccess: + Logc(ctx).WithFields(LogFields{ + "volumeName": shiftResp.VolumeName, + "jobID": shiftResp.JobID, + }).Info("Shift: job succeeded, converting to volume import.") + + volConfig.ImportOriginalName = shiftResp.VolumeName + volConfig.ImportBackendUUID = volConfig.Shift.BackendUUID + volConfig.ImportNotManaged = false + + p.controllerHelper.RecordVolumeEvent(ctx, req.Name, controllerhelpers.EventTypeNormal, + "ShiftJobSucceeded", + fmt.Sprintf("Shift job completed, importing volume %s", shiftResp.VolumeName)) + + case shift.JobStatusRunning: + Logc(ctx).WithFields(LogFields{ + "jobID": shiftResp.JobID, + }).Info("Shift: job still running, returning retryable error.") + + p.controllerHelper.RecordVolumeEvent(ctx, req.Name, controllerhelpers.EventTypeNormal, + "ShiftJobRunning", + fmt.Sprintf("Shift job %s still running, will retry", shiftResp.JobID)) + return nil, status.Errorf(codes.DeadlineExceeded, + "shift job %s still running for PVC %s, will retry", shiftResp.JobID, req.Name) + + case shift.JobStatusFailed: + msg := fmt.Sprintf("Shift job failed for PVC %s: %s", req.Name, shiftResp.Message) + Logc(ctx).Error(msg) + p.controllerHelper.RecordVolumeEvent(ctx, req.Name, controllerhelpers.EventTypeWarning, + "ShiftJobFailed", msg) + return nil, status.Error(codes.Internal, msg) + + default: + msg := fmt.Sprintf("Shift returned unknown status %q for PVC %s", shiftResp.Status, req.Name) + Logc(ctx).Error(msg) + return nil, status.Error(codes.Internal, msg) + } + } + // Check if CSI asked for a clone (overrides trident.netapp.io/cloneFromPVC PVC annotation, if present) if req.VolumeContentSource != nil { switch contentSource := req.VolumeContentSource.Type.(type) { diff --git a/frontend/csi/plugin.go b/frontend/csi/plugin.go index fba9c092e..96b3620f1 100644 --- a/frontend/csi/plugin.go +++ b/frontend/csi/plugin.go @@ -20,6 +20,7 @@ import ( controllerAPI "github.com/netapp/trident/frontend/csi/controller_api" controllerhelpers "github.com/netapp/trident/frontend/csi/controller_helpers" nodehelpers "github.com/netapp/trident/frontend/csi/node_helpers" + "github.com/netapp/trident/frontend/csi/shift" . "github.com/netapp/trident/logging" "github.com/netapp/trident/utils/devices" "github.com/netapp/trident/utils/errors" @@ -60,6 +61,7 @@ type Plugin struct { restClient controllerAPI.TridentController controllerHelper controllerhelpers.ControllerHelper nodeHelper nodehelpers.NodeHelper + shiftClient shift.Client aesKey []byte @@ -126,6 +128,7 @@ func NewControllerPlugin( command: execCmd.NewCommand(), osutils: osutils.New(), activatedChan: make(chan struct{}, 1), + shiftClient: shift.NewClient(), } var err error @@ -336,6 +339,7 @@ func NewAllInOnePlugin( command: execCmd.NewCommand(), osutils: osutils.New(), activatedChan: make(chan struct{}, 1), + shiftClient: shift.NewClient(), } port := "34571" diff --git a/frontend/csi/shift/client.go b/frontend/csi/shift/client.go new file mode 100644 index 000000000..d1f4afb43 --- /dev/null +++ b/frontend/csi/shift/client.go @@ -0,0 +1,188 @@ +// Copyright 2025 NetApp, Inc. All Rights Reserved. + +package shift + +import ( + "context" + + . "github.com/netapp/trident/logging" + "github.com/netapp/trident/storage" +) + +// JobStatus represents the status of a Shift job. +type JobStatus string + +const ( + JobStatusSuccess JobStatus = "success" + JobStatusRunning JobStatus = "running" + JobStatusFailed JobStatus = "failed" +) + +// Request is the payload that will be sent to the Shift REST endpoint. +type Request struct { + PVCName string `json:"pvcName"` + PVCNamespace string `json:"pvcNamespace"` + BackendUUID string `json:"backendUUID"` + ManagementLIF string `json:"managementLIF"` + SVM string `json:"svm"` + Username string `json:"username"` + Password string `json:"password"` + DiskPath string `json:"diskPath"` + NFSServer string `json:"nfsServer"` + NFSPath string `json:"nfsPath"` + VMID string `json:"vmId"` + VMUUID string `json:"vmUuid"` +} + +// Response is the payload returned by the Shift REST endpoint. +type Response struct { + Status JobStatus `json:"status"` + VolumeName string `json:"volumeName"` + Message string `json:"message"` + JobID string `json:"jobId"` +} + +// Client is the interface for invoking Shift jobs. +type Client interface { + InvokeShiftJob(ctx context.Context, shiftCfg *storage.ShiftConfig, pvcName, pvcNamespace string) (*Response, error) +} + +// dryRunClient is a log-only stub used for testing the integration wiring. +// It logs every field that would be sent to the Shift endpoint and returns +// a "running" status so the PVC stays Pending and Kubernetes retries. +// TODO: Replace with real httpClient once the Shift endpoint is available. +type dryRunClient struct{} + +// NewClient returns a dry-run Shift client that only logs. +// Swap this to newHTTPClient() when ready to call the real endpoint. +func NewClient() Client { + return &dryRunClient{} +} + +func (c *dryRunClient) InvokeShiftJob( + ctx context.Context, shiftCfg *storage.ShiftConfig, pvcName, pvcNamespace string, +) (*Response, error) { + + Logc(ctx).Info("========== SHIFT DRY-RUN: START ==========") + Logc(ctx).WithFields(LogFields{ + "pvcName": pvcName, + "pvcNamespace": pvcNamespace, + }).Info("SHIFT DRY-RUN: PVC identification") + + Logc(ctx).WithFields(LogFields{ + "endpoint": shiftCfg.Endpoint, + "backendUUID": shiftCfg.BackendUUID, + "managementLIF": shiftCfg.ManagementLIF, + "svm": shiftCfg.SVM, + "hasUsername": shiftCfg.Username != "", + "hasPassword": shiftCfg.Password != "", + }).Info("SHIFT DRY-RUN: ONTAP credentials (from TBC secret)") + + Logc(ctx).WithFields(LogFields{ + "diskPath": shiftCfg.DiskPath, + "nfsServer": shiftCfg.NFSServer, + "nfsPath": shiftCfg.NFSPath, + "vmID": shiftCfg.VMID, + "vmUUID": shiftCfg.VMUUID, + }).Info("SHIFT DRY-RUN: MTV VM metadata (from PVC annotations)") + + Logc(ctx).Info("SHIFT DRY-RUN: Would POST to endpoint: " + shiftCfg.Endpoint) + Logc(ctx).Info("SHIFT DRY-RUN: Returning 'running' status so PVC stays Pending (dry-run mode).") + Logc(ctx).Info("========== SHIFT DRY-RUN: END ==========") + + return &Response{ + Status: JobStatusRunning, + JobID: "dry-run-job-001", + Message: "dry-run mode: Shift endpoint not called", + }, nil +} + +// TODO: Uncomment and wire this in when the real Shift endpoint is available. +// +// import ( +// "bytes" +// "crypto/tls" +// "encoding/json" +// "fmt" +// "io" +// "net/http" +// "time" +// ) +// +// type httpClient struct { +// client *http.Client +// } +// +// func newHTTPClient() Client { +// return &httpClient{ +// client: &http.Client{ +// Timeout: 30 * time.Second, +// Transport: &http.Transport{ +// TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, +// }, +// }, +// } +// } +// +// func (c *httpClient) InvokeShiftJob( +// ctx context.Context, shiftCfg *storage.ShiftConfig, pvcName, pvcNamespace string, +// ) (*Response, error) { +// reqBody := &Request{ +// PVCName: pvcName, +// PVCNamespace: pvcNamespace, +// BackendUUID: shiftCfg.BackendUUID, +// ManagementLIF: shiftCfg.ManagementLIF, +// SVM: shiftCfg.SVM, +// Username: shiftCfg.Username, +// Password: shiftCfg.Password, +// DiskPath: shiftCfg.DiskPath, +// NFSServer: shiftCfg.NFSServer, +// NFSPath: shiftCfg.NFSPath, +// VMID: shiftCfg.VMID, +// VMUUID: shiftCfg.VMUUID, +// } +// +// payload, err := json.Marshal(reqBody) +// if err != nil { +// return nil, fmt.Errorf("shift: failed to marshal request: %v", err) +// } +// +// Logc(ctx).WithFields(LogFields{ +// "endpoint": shiftCfg.Endpoint, +// "pvcName": pvcName, +// }).Info("Shift: invoking Shift REST endpoint.") +// +// httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, shiftCfg.Endpoint, bytes.NewReader(payload)) +// if err != nil { +// return nil, fmt.Errorf("shift: failed to create HTTP request: %v", err) +// } +// httpReq.Header.Set("Content-Type", "application/json") +// +// httpResp, err := c.client.Do(httpReq) +// if err != nil { +// return nil, fmt.Errorf("shift: HTTP request failed: %v", err) +// } +// defer httpResp.Body.Close() +// +// body, err := io.ReadAll(httpResp.Body) +// if err != nil { +// return nil, fmt.Errorf("shift: failed to read response body: %v", err) +// } +// +// if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { +// return nil, fmt.Errorf("shift: endpoint returned HTTP %d: %s", httpResp.StatusCode, string(body)) +// } +// +// var resp Response +// if err = json.Unmarshal(body, &resp); err != nil { +// return nil, fmt.Errorf("shift: failed to unmarshal response: %v", err) +// } +// +// Logc(ctx).WithFields(LogFields{ +// "jobStatus": resp.Status, +// "volumeName": resp.VolumeName, +// "jobID": resp.JobID, +// }).Info("Shift: parsed Shift response.") +// +// return &resp, nil +// } diff --git a/storage/volume.go b/storage/volume.go index f02d64080..1a4f0c63b 100644 --- a/storage/volume.go +++ b/storage/volume.go @@ -14,6 +14,22 @@ import ( "github.com/netapp/trident/utils/models" ) +// ShiftConfig holds all metadata needed for the NetApp Shift integration. +// Fields are transient (json:"-") so they are never persisted to CRDs. +type ShiftConfig struct { + Endpoint string `json:"-"` + BackendUUID string `json:"-"` + ManagementLIF string `json:"-"` + SVM string `json:"-"` + Username string `json:"-"` + Password string `json:"-"` + DiskPath string `json:"-"` + NFSServer string `json:"-"` + NFSPath string `json:"-"` + VMID string `json:"-"` + VMUUID string `json:"-"` +} + type VolumeConfig struct { Version string `json:"version"` Name string `json:"name"` @@ -80,6 +96,8 @@ type VolumeConfig struct { // RequestedAutogrowPolicy stores the autogrow policy on volume // This IS persisted so we can recompute the effective policy after restart RequestedAutogrowPolicy string `json:"requestedAutogrowPolicy,omitempty"` + // Shift holds transient shift-integration metadata; never persisted. + Shift *ShiftConfig `json:"-"` } type VolumeCreatingConfig struct {