Skip to content

Commit 9da27f0

Browse files
rishupkclaude
andcommitted
fix(azure): set DiskControllerType in StorageProfile for RHEL AI VMs
- Add DiskControllerType field to ImageReference; rhelai sets it to "SCSI" as a fallback when the gallery API is unavailable - Wire ImageReference.DiskControllerType into StorageProfile in virtual-machine.go via diskControllerTypePtr helper - Add GetSharedImageDiskControllerTypes to read disk controller types from gallery image definition features (not the version) - Add FilterComputeSizesByDiskControllerType combining location and disk controller type filtering in a single SKU enumeration; replaces the previous two-pass approach in allocation.go - resolveImageRef enriches DiskControllerType from gallery API only when the image supports exactly one type (capability != requirement) - Filter both spot and non-spot compute sizes; return a clear error if no compatible sizes remain after filtering Co-Authored-By: Claude <Sonnet 4.6> <noreply@anthropic.com> Signed-off-by: Rishabh Kothari <rkothari@redhat.com>
1 parent 7c5d50d commit 9da27f0

7 files changed

Lines changed: 178 additions & 20 deletions

File tree

pkg/provider/azure/action/rhel-ai/rhelai.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ func Create(mCtxArgs *maptContext.ContextArgs, args *apiRHELAI.RHELAIArgs) (err
4747
Spot: args.Spot,
4848
ImageRef: &data.ImageReference{
4949
SharedImageID: sharedImageID,
50+
// Belt-and-suspenders: set SCSI explicitly so Azure never infers a
51+
// conflicting default. resolveImageRef will also derive this from the
52+
// gallery image's Features, but the static value protects against API
53+
// failures or future images with multiple supported types.
54+
DiskControllerType: "SCSI",
5055
},
5156
Username: username,
5257
ReadinessCommand: command.CommandPing}

pkg/provider/azure/data/compute-request.go

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@ func (c *ComputeSelector) Select(ctx context.Context, args *cr.ComputeRequestArg
3333
return getAzureVMSKUs(ctx, args)
3434
}
3535

36-
func FilterComputeSizesByLocation(ctx context.Context, location *string, computeSizes []string) ([]string, error) {
36+
// FilterComputeSizesByDiskControllerType returns the subset of computeSizes that are
37+
// available in location AND support requiredType. Sizes without a DiskControllerTypes
38+
// capability are assumed to support only SCSI (Azure historical default).
39+
func FilterComputeSizesByDiskControllerType(ctx context.Context, location *string, computeSizes []string, requiredType string) ([]string, error) {
3740
creds, subscriptionID, err := getCredentials()
3841
if err != nil {
3942
return nil, err
@@ -43,27 +46,66 @@ func FilterComputeSizesByLocation(ctx context.Context, location *string, compute
4346
return nil, err
4447
}
4548
pager := client.NewListPager(nil)
46-
supportedSizes := []string{}
49+
supported := []string{}
4750
for pager.More() {
4851
page, err := pager.NextPage(ctx)
4952
if err != nil {
5053
return nil, err
5154
}
5255
for _, sku := range page.Value {
53-
if sku.ResourceType != nil &&
54-
*sku.ResourceType == string(RTVirtualMachines) {
55-
if sku.Name != nil && slices.Contains(computeSizes, *sku.Name) {
56-
for _, loc := range sku.Locations {
57-
if strings.EqualFold(*loc, *location) {
58-
supportedSizes = append(supportedSizes, *sku.Name)
59-
break
60-
}
61-
}
56+
if sku.ResourceType == nil || *sku.ResourceType != string(RTVirtualMachines) {
57+
continue
58+
}
59+
if sku.Name == nil || !slices.Contains(computeSizes, *sku.Name) {
60+
continue
61+
}
62+
inLocation := false
63+
for _, loc := range sku.Locations {
64+
if loc != nil && strings.EqualFold(*loc, *location) {
65+
inLocation = true
66+
break
6267
}
6368
}
69+
if !inLocation {
70+
continue
71+
}
72+
diskTypes := diskControllerTypesFromCapabilities(sku.Capabilities)
73+
if diskControllerTypeSupported(diskTypes, requiredType) && !slices.Contains(supported, *sku.Name) {
74+
supported = append(supported, *sku.Name)
75+
}
6476
}
6577
}
66-
return supportedSizes, nil
78+
return supported, nil
79+
}
80+
81+
// diskControllerTypesFromCapabilities extracts the DiskControllerTypes value from SKU
82+
// capabilities. Returns nil when the capability is absent.
83+
func diskControllerTypesFromCapabilities(caps []*armcompute.ResourceSKUCapabilities) []string {
84+
for _, c := range caps {
85+
if c.Name != nil && *c.Name == "DiskControllerTypes" && c.Value != nil {
86+
return splitDiskControllerTypes(*c.Value)
87+
}
88+
}
89+
return nil
90+
}
91+
92+
// diskControllerTypeSupported reports whether requiredType is satisfied by the supported
93+
// set. Empty requiredType means no restriction (always passes). A nil/empty supported
94+
// set means the capability is absent; Azure sizes that predate NVMe default to SCSI, so
95+
// absence is treated as SCSI-only.
96+
func diskControllerTypeSupported(supported []string, requiredType string) bool {
97+
if requiredType == "" {
98+
return true
99+
}
100+
if len(supported) == 0 {
101+
return strings.EqualFold(requiredType, "SCSI")
102+
}
103+
for _, t := range supported {
104+
if strings.EqualFold(t, requiredType) {
105+
return true
106+
}
107+
}
108+
return false
67109
}
68110

69111
func getAzureVMSKUs(ctx context.Context, args *cr.ComputeRequestArgs) ([]string, error) {

pkg/provider/azure/data/imageref.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,11 @@ type ImageReference struct {
2828
Sku string
2929
// Community
3030
CommunityImageID string
31-
// // Private Shared
31+
// Private Shared
3232
SharedImageID string
33+
// Required disk controller type for this image (e.g. "SCSI", "NVMe").
34+
// Empty means no specific requirement; Azure uses the VM size default.
35+
DiskControllerType string
3336
}
3437

3538
var (

pkg/provider/azure/data/images.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,40 @@ func getSharedImage(ctx context.Context, c *armcompute.ClientFactory, id *string
8585
return &res, nil
8686
}
8787

88+
// GetSharedImageDiskControllerTypes returns the disk controller types listed in the
89+
// gallery image definition's features (e.g. ["SCSI"] for RHEL AI images). Returns nil
90+
// when the feature is absent. Uses the gallery owner's subscription (parts[2] of the
91+
// ARM resource ID) so the API path resolves to where the resource actually lives.
92+
// The image ID must be a full ARM resource ID with 13 slash-separated parts.
93+
func GetSharedImageDiskControllerTypes(ctx context.Context, id *string) ([]string, error) {
94+
cred, err := azidentity.NewDefaultAzureCredential(nil)
95+
if err != nil {
96+
return nil, err
97+
}
98+
parts := strings.Split(*id, "/")
99+
if len(parts) != 13 {
100+
return nil, fmt.Errorf("invalid shared image ID: %s", *id)
101+
}
102+
c, err := armcompute.NewClientFactory(parts[2], cred, nil)
103+
if err != nil {
104+
return nil, err
105+
}
106+
// Query the image definition, not the version — Features live on the definition.
107+
res, err := c.NewGalleryImagesClient().Get(ctx, parts[4], parts[8], parts[10], nil)
108+
if err != nil {
109+
return nil, err
110+
}
111+
if res.Properties == nil {
112+
return nil, nil
113+
}
114+
for _, f := range res.Properties.Features {
115+
if f.Name != nil && *f.Name == "DiskControllerTypes" && f.Value != nil {
116+
return splitDiskControllerTypes(*f.Value), nil
117+
}
118+
}
119+
return nil, nil
120+
}
121+
88122
func SkuG2Support(ctx context.Context, location string, publisher string, offer string, sku string) (string, error) {
89123
cred, err := azidentity.NewDefaultAzureCredential(nil)
90124
if err != nil {

pkg/provider/azure/data/util.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package data
22

33
import (
44
"os"
5+
"strings"
56

67
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
78
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resourcegraph/armresourcegraph"
@@ -21,6 +22,17 @@ func getCredentials() (cred *azidentity.DefaultAzureCredential, subscriptionID *
2122
return
2223
}
2324

25+
// splitDiskControllerTypes splits a comma-separated disk controller type string
26+
// (e.g. "SCSI,NVMe") and trims whitespace from each element.
27+
func splitDiskControllerTypes(s string) []string {
28+
raw := strings.Split(s, ",")
29+
out := make([]string, 0, len(raw))
30+
for _, t := range raw {
31+
out = append(out, strings.TrimSpace(t))
32+
}
33+
return out
34+
}
35+
2436
func getGraphClientFactory() (*armresourcegraph.ClientFactory, error) {
2537
cred, err := azidentity.NewDefaultAzureCredential(nil)
2638
if err != nil {

pkg/provider/azure/modules/allocation/allocation.go

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
cr "github.com/redhat-developer/mapt/pkg/provider/api/compute-request"
88
spotTypes "github.com/redhat-developer/mapt/pkg/provider/api/spot"
99
"github.com/redhat-developer/mapt/pkg/provider/azure/data"
10+
"github.com/redhat-developer/mapt/pkg/util/logging"
1011
)
1112

1213
type AllocationArgs struct {
@@ -34,6 +35,17 @@ func Allocation(mCtx *mc.Context, args *AllocationArgs) (*AllocationResult, erro
3435
return nil, err
3536
}
3637
}
38+
39+
// Derive the effective image reference. For shared gallery images, resolveImageRef
40+
// queries the gallery API and sets DiskControllerType when the image supports
41+
// exactly one type; the caller's explicit value is preserved in all other cases.
42+
ir := resolveImageRef(mCtx, args.ImageRef)
43+
44+
diskControllerType := ""
45+
if ir != nil {
46+
diskControllerType = ir.DiskControllerType
47+
}
48+
3749
if args.Spot != nil && args.Spot.Spot {
3850
sArgs := &data.SpotInfoArgs{
3951
ComputeSizes: computeSizes,
@@ -42,15 +54,29 @@ func Allocation(mCtx *mc.Context, args *AllocationArgs) (*AllocationResult, erro
4254
ExcludedLocations: args.Spot.ExcludedHostingPlaces,
4355
SpotPriceIncreaseRate: &args.Spot.IncreaseRate,
4456
}
45-
if args.ImageRef != nil {
46-
sArgs.ImageRef = args.ImageRef
57+
if ir != nil {
58+
sArgs.ImageRef = ir
4759
}
4860
bsc, err := data.SpotInfo(mCtx, sArgs)
4961
if err != nil {
5062
return nil, err
5163
}
64+
// Filter the spot-selected sizes by disk controller type compatibility.
65+
if diskControllerType != "" {
66+
spotLocation := bsc.HostingPlace
67+
bsc.ComputeType, err = data.FilterComputeSizesByDiskControllerType(
68+
mCtx.Context(), &spotLocation, bsc.ComputeType, diskControllerType)
69+
if err != nil {
70+
return nil, err
71+
}
72+
if len(bsc.ComputeType) == 0 {
73+
return nil, fmt.Errorf(
74+
"spot compute sizes in location %q do not support disk controller type %q required by the selected image",
75+
bsc.HostingPlace, diskControllerType)
76+
}
77+
}
5278
return &AllocationResult{
53-
ImageRef: args.ImageRef,
79+
ImageRef: ir,
5480
Location: &bsc.HostingPlace,
5581
Price: &bsc.Price,
5682
ComputeSizes: bsc.ComputeType,
@@ -65,20 +91,48 @@ func Allocation(mCtx *mc.Context, args *AllocationArgs) (*AllocationResult, erro
6591
}
6692
location = &hp
6793
}
68-
// Filter for current location the computesizes
69-
supportedComputeSizes, err := data.FilterComputeSizesByLocation(
70-
mCtx.Context(), location, computeSizes)
94+
// Single SKU enumeration: filter by location and disk controller type together.
95+
// When diskControllerType is empty the type check is a no-op (any type passes).
96+
supportedComputeSizes, err := data.FilterComputeSizesByDiskControllerType(
97+
mCtx.Context(), location, computeSizes, diskControllerType)
7198
if err != nil {
7299
return nil, err
73100
}
74101
if len(supportedComputeSizes) == 0 {
102+
if diskControllerType != "" {
103+
return nil, fmt.Errorf(
104+
"no compute sizes in location %q support disk controller type %q required by the selected image",
105+
*location, diskControllerType)
106+
}
75107
return nil, fmt.Errorf("no compute sizes available for location %q", *location)
76108
}
77109
return &AllocationResult{
78-
ImageRef: args.ImageRef,
110+
ImageRef: ir,
79111
Location: location,
80112
ComputeSizes: supportedComputeSizes,
81113
}, nil
114+
}
115+
}
82116

117+
// resolveImageRef returns a copy of the image reference, optionally enriched with the
118+
// disk controller type read from the gallery image definition. The gallery Features value
119+
// lists types the image *supports*, not what it *requires*. We only override the caller's
120+
// value when the gallery returns exactly one supported type — that uniquely identifies the
121+
// requirement. When the gallery returns multiple types the image is flexible, so the
122+
// caller's explicit value (if any) is preserved unchanged. On fetch failure the caller's
123+
// value is also preserved.
124+
func resolveImageRef(mCtx *mc.Context, ir *data.ImageReference) *data.ImageReference {
125+
if ir == nil || ir.SharedImageID == "" {
126+
return ir
127+
}
128+
enriched := *ir
129+
types, err := data.GetSharedImageDiskControllerTypes(mCtx.Context(), &ir.SharedImageID)
130+
if err != nil {
131+
logging.Debugf("could not fetch disk controller types for image %s: %v", ir.SharedImageID, err)
132+
return &enriched
133+
}
134+
if len(types) == 1 && enriched.DiskControllerType == "" {
135+
enriched.DiskControllerType = types[0]
83136
}
137+
return &enriched
84138
}

pkg/provider/azure/modules/virtual-machine/virtual-machine.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ func Create(ctx *pulumi.Context, mCtx *mc.Context, args *VirtualMachineArgs) (Vi
8282
StorageAccountType: pulumi.String("Standard_LRS"),
8383
},
8484
},
85+
DiskControllerType: diskControllerTypePtr(args.Image.DiskControllerType),
8586
},
8687
// Try to improve provisioning time
8788
DiagnosticsProfile: compute.DiagnosticsProfileArgs{
@@ -163,3 +164,10 @@ func isSelfOwned(sharedImageId *string) bool {
163164
sharedImageParams := strings.Split(*sharedImageId, "/")
164165
return os.Getenv("AZURE_SUBSCRIPTION_ID") == sharedImageParams[2]
165166
}
167+
168+
func diskControllerTypePtr(t string) pulumi.StringPtrInput {
169+
if t == "" {
170+
return nil
171+
}
172+
return pulumi.StringPtr(t)
173+
}

0 commit comments

Comments
 (0)