diff --git a/pkg/provider/azure/action/rhel-ai/rhelai.go b/pkg/provider/azure/action/rhel-ai/rhelai.go index 3d2cd8e59..edc462347 100644 --- a/pkg/provider/azure/action/rhel-ai/rhelai.go +++ b/pkg/provider/azure/action/rhel-ai/rhelai.go @@ -47,6 +47,11 @@ func Create(mCtxArgs *maptContext.ContextArgs, args *apiRHELAI.RHELAIArgs) (err Spot: args.Spot, ImageRef: &data.ImageReference{ SharedImageID: sharedImageID, + // Belt-and-suspenders: set SCSI explicitly so Azure never infers a + // conflicting default. resolveImageRef will also derive this from the + // gallery image's Features, but the static value protects against API + // failures or future images with multiple supported types. + DiskControllerType: "SCSI", }, Username: username, ReadinessCommand: command.CommandPing} diff --git a/pkg/provider/azure/data/compute-request.go b/pkg/provider/azure/data/compute-request.go index 0b50e61a9..bd7aec7df 100644 --- a/pkg/provider/azure/data/compute-request.go +++ b/pkg/provider/azure/data/compute-request.go @@ -33,7 +33,10 @@ func (c *ComputeSelector) Select(ctx context.Context, args *cr.ComputeRequestArg return getAzureVMSKUs(ctx, args) } -func FilterComputeSizesByLocation(ctx context.Context, location *string, computeSizes []string) ([]string, error) { +// FilterComputeSizesByDiskControllerType returns the subset of computeSizes that are +// available in location AND support requiredType. Sizes without a DiskControllerTypes +// capability are assumed to support only SCSI (Azure historical default). +func FilterComputeSizesByDiskControllerType(ctx context.Context, location *string, computeSizes []string, requiredType string) ([]string, error) { creds, subscriptionID, err := getCredentials() if err != nil { return nil, err @@ -43,27 +46,66 @@ func FilterComputeSizesByLocation(ctx context.Context, location *string, compute return nil, err } pager := client.NewListPager(nil) - supportedSizes := []string{} + supported := []string{} for pager.More() { page, err := pager.NextPage(ctx) if err != nil { return nil, err } for _, sku := range page.Value { - if sku.ResourceType != nil && - *sku.ResourceType == string(RTVirtualMachines) { - if sku.Name != nil && slices.Contains(computeSizes, *sku.Name) { - for _, loc := range sku.Locations { - if strings.EqualFold(*loc, *location) { - supportedSizes = append(supportedSizes, *sku.Name) - break - } - } + if sku.ResourceType == nil || *sku.ResourceType != string(RTVirtualMachines) { + continue + } + if sku.Name == nil || !slices.Contains(computeSizes, *sku.Name) { + continue + } + inLocation := false + for _, loc := range sku.Locations { + if loc != nil && strings.EqualFold(*loc, *location) { + inLocation = true + break } } + if !inLocation { + continue + } + diskTypes := diskControllerTypesFromCapabilities(sku.Capabilities) + if diskControllerTypeSupported(diskTypes, requiredType) && !slices.Contains(supported, *sku.Name) { + supported = append(supported, *sku.Name) + } } } - return supportedSizes, nil + return supported, nil +} + +// diskControllerTypesFromCapabilities extracts the DiskControllerTypes value from SKU +// capabilities. Returns nil when the capability is absent. +func diskControllerTypesFromCapabilities(caps []*armcompute.ResourceSKUCapabilities) []string { + for _, c := range caps { + if c.Name != nil && *c.Name == "DiskControllerTypes" && c.Value != nil { + return splitDiskControllerTypes(*c.Value) + } + } + return nil +} + +// diskControllerTypeSupported reports whether requiredType is satisfied by the supported +// set. Empty requiredType means no restriction (always passes). A nil/empty supported +// set means the capability is absent; Azure sizes that predate NVMe default to SCSI, so +// absence is treated as SCSI-only. +func diskControllerTypeSupported(supported []string, requiredType string) bool { + if requiredType == "" { + return true + } + if len(supported) == 0 { + return strings.EqualFold(requiredType, "SCSI") + } + for _, t := range supported { + if strings.EqualFold(t, requiredType) { + return true + } + } + return false } func getAzureVMSKUs(ctx context.Context, args *cr.ComputeRequestArgs) ([]string, error) { diff --git a/pkg/provider/azure/data/imageref.go b/pkg/provider/azure/data/imageref.go index 30faebf57..fb483c833 100644 --- a/pkg/provider/azure/data/imageref.go +++ b/pkg/provider/azure/data/imageref.go @@ -28,8 +28,11 @@ type ImageReference struct { Sku string // Community CommunityImageID string - // // Private Shared + // Private Shared SharedImageID string + // Required disk controller type for this image (e.g. "SCSI", "NVMe"). + // Empty means no specific requirement; Azure uses the VM size default. + DiskControllerType string } var ( diff --git a/pkg/provider/azure/data/images.go b/pkg/provider/azure/data/images.go index aced2ddac..0d0134d6f 100644 --- a/pkg/provider/azure/data/images.go +++ b/pkg/provider/azure/data/images.go @@ -85,6 +85,40 @@ func getSharedImage(ctx context.Context, c *armcompute.ClientFactory, id *string return &res, nil } +// GetSharedImageDiskControllerTypes returns the disk controller types listed in the +// gallery image definition's features (e.g. ["SCSI"] for RHEL AI images). Returns nil +// when the feature is absent. Uses the gallery owner's subscription (parts[2] of the +// ARM resource ID) so the API path resolves to where the resource actually lives. +// The image ID must be a full ARM resource ID with 13 slash-separated parts. +func GetSharedImageDiskControllerTypes(ctx context.Context, id *string) ([]string, error) { + cred, err := azidentity.NewDefaultAzureCredential(nil) + if err != nil { + return nil, err + } + parts := strings.Split(*id, "/") + if len(parts) != 13 { + return nil, fmt.Errorf("invalid shared image ID: %s", *id) + } + c, err := armcompute.NewClientFactory(parts[2], cred, nil) + if err != nil { + return nil, err + } + // Query the image definition, not the version — Features live on the definition. + res, err := c.NewGalleryImagesClient().Get(ctx, parts[4], parts[8], parts[10], nil) + if err != nil { + return nil, err + } + if res.Properties == nil { + return nil, nil + } + for _, f := range res.Properties.Features { + if f.Name != nil && *f.Name == "DiskControllerTypes" && f.Value != nil { + return splitDiskControllerTypes(*f.Value), nil + } + } + return nil, nil +} + func SkuG2Support(ctx context.Context, location string, publisher string, offer string, sku string) (string, error) { cred, err := azidentity.NewDefaultAzureCredential(nil) if err != nil { diff --git a/pkg/provider/azure/data/util.go b/pkg/provider/azure/data/util.go index cbb9aa20a..417956806 100644 --- a/pkg/provider/azure/data/util.go +++ b/pkg/provider/azure/data/util.go @@ -2,6 +2,7 @@ package data import ( "os" + "strings" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resourcegraph/armresourcegraph" @@ -21,6 +22,17 @@ func getCredentials() (cred *azidentity.DefaultAzureCredential, subscriptionID * return } +// splitDiskControllerTypes splits a comma-separated disk controller type string +// (e.g. "SCSI,NVMe") and trims whitespace from each element. +func splitDiskControllerTypes(s string) []string { + raw := strings.Split(s, ",") + out := make([]string, 0, len(raw)) + for _, t := range raw { + out = append(out, strings.TrimSpace(t)) + } + return out +} + func getGraphClientFactory() (*armresourcegraph.ClientFactory, error) { cred, err := azidentity.NewDefaultAzureCredential(nil) if err != nil { diff --git a/pkg/provider/azure/modules/allocation/allocation.go b/pkg/provider/azure/modules/allocation/allocation.go index eca0929f3..ac35da00a 100644 --- a/pkg/provider/azure/modules/allocation/allocation.go +++ b/pkg/provider/azure/modules/allocation/allocation.go @@ -7,6 +7,7 @@ import ( cr "github.com/redhat-developer/mapt/pkg/provider/api/compute-request" spotTypes "github.com/redhat-developer/mapt/pkg/provider/api/spot" "github.com/redhat-developer/mapt/pkg/provider/azure/data" + "github.com/redhat-developer/mapt/pkg/util/logging" ) type AllocationArgs struct { @@ -34,6 +35,17 @@ func Allocation(mCtx *mc.Context, args *AllocationArgs) (*AllocationResult, erro return nil, err } } + + // Derive the effective image reference. For shared gallery images, resolveImageRef + // queries the gallery API and sets DiskControllerType when the image supports + // exactly one type; the caller's explicit value is preserved in all other cases. + ir := resolveImageRef(mCtx, args.ImageRef) + + diskControllerType := "" + if ir != nil { + diskControllerType = ir.DiskControllerType + } + if args.Spot != nil && args.Spot.Spot { sArgs := &data.SpotInfoArgs{ ComputeSizes: computeSizes, @@ -42,15 +54,29 @@ func Allocation(mCtx *mc.Context, args *AllocationArgs) (*AllocationResult, erro ExcludedLocations: args.Spot.ExcludedHostingPlaces, SpotPriceIncreaseRate: &args.Spot.IncreaseRate, } - if args.ImageRef != nil { - sArgs.ImageRef = args.ImageRef + if ir != nil { + sArgs.ImageRef = ir } bsc, err := data.SpotInfo(mCtx, sArgs) if err != nil { return nil, err } + // Filter the spot-selected sizes by disk controller type compatibility. + if diskControllerType != "" { + spotLocation := bsc.HostingPlace + bsc.ComputeType, err = data.FilterComputeSizesByDiskControllerType( + mCtx.Context(), &spotLocation, bsc.ComputeType, diskControllerType) + if err != nil { + return nil, err + } + if len(bsc.ComputeType) == 0 { + return nil, fmt.Errorf( + "spot compute sizes in location %q do not support disk controller type %q required by the selected image", + bsc.HostingPlace, diskControllerType) + } + } return &AllocationResult{ - ImageRef: args.ImageRef, + ImageRef: ir, Location: &bsc.HostingPlace, Price: &bsc.Price, ComputeSizes: bsc.ComputeType, @@ -65,20 +91,48 @@ func Allocation(mCtx *mc.Context, args *AllocationArgs) (*AllocationResult, erro } location = &hp } - // Filter for current location the computesizes - supportedComputeSizes, err := data.FilterComputeSizesByLocation( - mCtx.Context(), location, computeSizes) + // Single SKU enumeration: filter by location and disk controller type together. + // When diskControllerType is empty the type check is a no-op (any type passes). + supportedComputeSizes, err := data.FilterComputeSizesByDiskControllerType( + mCtx.Context(), location, computeSizes, diskControllerType) if err != nil { return nil, err } if len(supportedComputeSizes) == 0 { + if diskControllerType != "" { + return nil, fmt.Errorf( + "no compute sizes in location %q support disk controller type %q required by the selected image", + *location, diskControllerType) + } return nil, fmt.Errorf("no compute sizes available for location %q", *location) } return &AllocationResult{ - ImageRef: args.ImageRef, + ImageRef: ir, Location: location, ComputeSizes: supportedComputeSizes, }, nil + } +} +// resolveImageRef returns a copy of the image reference, optionally enriched with the +// disk controller type read from the gallery image definition. The gallery Features value +// lists types the image *supports*, not what it *requires*. We only override the caller's +// value when the gallery returns exactly one supported type — that uniquely identifies the +// requirement. When the gallery returns multiple types the image is flexible, so the +// caller's explicit value (if any) is preserved unchanged. On fetch failure the caller's +// value is also preserved. +func resolveImageRef(mCtx *mc.Context, ir *data.ImageReference) *data.ImageReference { + if ir == nil || ir.SharedImageID == "" { + return ir + } + enriched := *ir + types, err := data.GetSharedImageDiskControllerTypes(mCtx.Context(), &ir.SharedImageID) + if err != nil { + logging.Debugf("could not fetch disk controller types for image %s: %v", ir.SharedImageID, err) + return &enriched + } + if len(types) == 1 && enriched.DiskControllerType == "" { + enriched.DiskControllerType = types[0] } + return &enriched } diff --git a/pkg/provider/azure/modules/virtual-machine/virtual-machine.go b/pkg/provider/azure/modules/virtual-machine/virtual-machine.go index c700b1603..60141ac96 100644 --- a/pkg/provider/azure/modules/virtual-machine/virtual-machine.go +++ b/pkg/provider/azure/modules/virtual-machine/virtual-machine.go @@ -82,6 +82,7 @@ func Create(ctx *pulumi.Context, mCtx *mc.Context, args *VirtualMachineArgs) (Vi StorageAccountType: pulumi.String("Standard_LRS"), }, }, + DiskControllerType: diskControllerTypePtr(args.Image.DiskControllerType), }, // Try to improve provisioning time DiagnosticsProfile: compute.DiagnosticsProfileArgs{ @@ -163,3 +164,10 @@ func isSelfOwned(sharedImageId *string) bool { sharedImageParams := strings.Split(*sharedImageId, "/") return os.Getenv("AZURE_SUBSCRIPTION_ID") == sharedImageParams[2] } + +func diskControllerTypePtr(t string) pulumi.StringPtrInput { + if t == "" { + return nil + } + return pulumi.StringPtr(t) +}