Skip to content

Commit 35fba64

Browse files
rishupkclaude
andcommitted
fix(azure): set DiskControllerType in StorageProfile for RHEL AI VMs
- Add DiskControllerType field to ImageReference; rhelai sets it to "SCSI" as a fallback when the gallery API is unavailable - Wire ImageReference.DiskControllerType into StorageProfile in virtual-machine.go via diskControllerTypePtr helper - Add GetSharedImageDiskControllerTypes to read disk controller types from gallery image definition features (not the version) - Add FilterComputeSizesByDiskControllerType combining location and disk controller type filtering in a single SKU enumeration; replaces the previous two-pass approach in allocation.go - resolveImageRef enriches DiskControllerType from gallery API only when the image supports exactly one type (capability != requirement) - Filter both spot and non-spot compute sizes; return a clear error if no compatible sizes remain after filtering Co-Authored-By: Claude <Sonnet 4.6> <noreply@anthropic.com> Signed-off-by: Rishabh Kothari <rkothari@redhat.com>
1 parent 7c5d50d commit 35fba64

7 files changed

Lines changed: 196 additions & 9 deletions

File tree

pkg/provider/azure/action/rhel-ai/rhelai.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ func Create(mCtxArgs *maptContext.ContextArgs, args *apiRHELAI.RHELAIArgs) (err
4646
ComputeRequest: args.ComputeRequest,
4747
Spot: args.Spot,
4848
ImageRef: &data.ImageReference{
49-
SharedImageID: sharedImageID,
49+
SharedImageID: sharedImageID,
50+
DiskControllerType: "SCSI",
5051
},
5152
Username: username,
5253
ReadinessCommand: command.CommandPing}

pkg/provider/azure/data/compute-request.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,81 @@ func FilterComputeSizesByLocation(ctx context.Context, location *string, compute
6666
return supportedSizes, nil
6767
}
6868

69+
// FilterComputeSizesByDiskControllerType returns the subset of computeSizes that are
70+
// available in location AND support requiredType. Sizes without a DiskControllerTypes
71+
// capability are assumed to support only SCSI (Azure historical default).
72+
func FilterComputeSizesByDiskControllerType(ctx context.Context, location *string, computeSizes []string, requiredType string) ([]string, error) {
73+
creds, subscriptionID, err := getCredentials()
74+
if err != nil {
75+
return nil, err
76+
}
77+
client, err := armcompute.NewResourceSKUsClient(*subscriptionID, creds, nil)
78+
if err != nil {
79+
return nil, err
80+
}
81+
pager := client.NewListPager(nil)
82+
supported := []string{}
83+
for pager.More() {
84+
page, err := pager.NextPage(ctx)
85+
if err != nil {
86+
return nil, err
87+
}
88+
for _, sku := range page.Value {
89+
if sku.ResourceType == nil || *sku.ResourceType != string(RTVirtualMachines) {
90+
continue
91+
}
92+
if sku.Name == nil || !slices.Contains(computeSizes, *sku.Name) {
93+
continue
94+
}
95+
inLocation := false
96+
for _, loc := range sku.Locations {
97+
if loc != nil && strings.EqualFold(*loc, *location) {
98+
inLocation = true
99+
break
100+
}
101+
}
102+
if !inLocation {
103+
continue
104+
}
105+
diskTypes := diskControllerTypesFromCapabilities(sku.Capabilities)
106+
if diskControllerTypeSupported(diskTypes, requiredType) && !slices.Contains(supported, *sku.Name) {
107+
supported = append(supported, *sku.Name)
108+
}
109+
}
110+
}
111+
return supported, nil
112+
}
113+
114+
// diskControllerTypesFromCapabilities extracts the DiskControllerTypes value from SKU
115+
// capabilities. Returns nil when the capability is absent.
116+
func diskControllerTypesFromCapabilities(caps []*armcompute.ResourceSKUCapabilities) []string {
117+
for _, cap := range caps {
118+
if cap.Name != nil && *cap.Name == "DiskControllerTypes" && cap.Value != nil {
119+
return splitDiskControllerTypes(*cap.Value)
120+
}
121+
}
122+
return nil
123+
}
124+
125+
// diskControllerTypeSupported reports whether requiredType is satisfied by the supported
126+
// set. Empty requiredType means no restriction (always passes). A nil/empty supported
127+
// set means the capability is absent; Azure sizes that predate NVMe default to SCSI, so
128+
// absence is treated as SCSI-only.
129+
func diskControllerTypeSupported(supported []string, requiredType string) bool {
130+
if requiredType == "" {
131+
return true
132+
}
133+
if len(supported) == 0 {
134+
return strings.EqualFold(requiredType, "SCSI")
135+
}
136+
for _, t := range supported {
137+
if strings.EqualFold(t, requiredType) {
138+
return true
139+
}
140+
}
141+
return false
142+
}
143+
69144
func getAzureVMSKUs(ctx context.Context, args *cr.ComputeRequestArgs) ([]string, error) {
70145
cred, err := azidentity.NewDefaultAzureCredential(nil)
71146
if err != nil {

pkg/provider/azure/data/imageref.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,11 @@ type ImageReference struct {
2828
Sku string
2929
// Community
3030
CommunityImageID string
31-
// // Private Shared
31+
// Private Shared
3232
SharedImageID string
33+
// Required disk controller type for this image (e.g. "SCSI", "NVMe").
34+
// Empty means no specific requirement; Azure uses the VM size default.
35+
DiskControllerType string
3336
}
3437

3538
var (

pkg/provider/azure/data/images.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,40 @@ func getSharedImage(ctx context.Context, c *armcompute.ClientFactory, id *string
8585
return &res, nil
8686
}
8787

88+
// GetSharedImageDiskControllerTypes returns the disk controller types listed in the
89+
// gallery image definition's features (e.g. ["SCSI"] for RHEL AI images). Returns nil
90+
// when the feature is absent. Uses the caller's subscription, matching GetSharedImage.
91+
// The image ID must be a full ARM resource ID with 13 slash-separated parts.
92+
func GetSharedImageDiskControllerTypes(ctx context.Context, id *string) ([]string, error) {
93+
cred, err := azidentity.NewDefaultAzureCredential(nil)
94+
if err != nil {
95+
return nil, err
96+
}
97+
parts := strings.Split(*id, "/")
98+
if len(parts) != 13 {
99+
return nil, fmt.Errorf("invalid shared image ID: %s", *id)
100+
}
101+
subscriptionId := os.Getenv("AZURE_SUBSCRIPTION_ID")
102+
c, err := armcompute.NewClientFactory(subscriptionId, cred, nil)
103+
if err != nil {
104+
return nil, err
105+
}
106+
// Query the image definition, not the version — Features live on the definition.
107+
res, err := c.NewGalleryImagesClient().Get(ctx, parts[4], parts[8], parts[10], nil)
108+
if err != nil {
109+
return nil, err
110+
}
111+
if res.Properties == nil {
112+
return nil, nil
113+
}
114+
for _, f := range res.Properties.Features {
115+
if f.Name != nil && *f.Name == "DiskControllerTypes" && f.Value != nil {
116+
return splitDiskControllerTypes(*f.Value), nil
117+
}
118+
}
119+
return nil, nil
120+
}
121+
88122
func SkuG2Support(ctx context.Context, location string, publisher string, offer string, sku string) (string, error) {
89123
cred, err := azidentity.NewDefaultAzureCredential(nil)
90124
if err != nil {

pkg/provider/azure/data/util.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package data
22

33
import (
44
"os"
5+
"strings"
56

67
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
78
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resourcegraph/armresourcegraph"
@@ -21,6 +22,17 @@ func getCredentials() (cred *azidentity.DefaultAzureCredential, subscriptionID *
2122
return
2223
}
2324

25+
// splitDiskControllerTypes splits a comma-separated disk controller type string
26+
// (e.g. "SCSI,NVMe") and trims whitespace from each element.
27+
func splitDiskControllerTypes(s string) []string {
28+
raw := strings.Split(s, ",")
29+
out := make([]string, 0, len(raw))
30+
for _, t := range raw {
31+
out = append(out, strings.TrimSpace(t))
32+
}
33+
return out
34+
}
35+
2436
func getGraphClientFactory() (*armresourcegraph.ClientFactory, error) {
2537
cred, err := azidentity.NewDefaultAzureCredential(nil)
2638
if err != nil {

pkg/provider/azure/modules/allocation/allocation.go

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
cr "github.com/redhat-developer/mapt/pkg/provider/api/compute-request"
88
spotTypes "github.com/redhat-developer/mapt/pkg/provider/api/spot"
99
"github.com/redhat-developer/mapt/pkg/provider/azure/data"
10+
"github.com/redhat-developer/mapt/pkg/util/logging"
1011
)
1112

1213
type AllocationArgs struct {
@@ -34,6 +35,17 @@ func Allocation(mCtx *mc.Context, args *AllocationArgs) (*AllocationResult, erro
3435
return nil, err
3536
}
3637
}
38+
39+
// Derive the effective image reference. For shared gallery images, resolveImageRef
40+
// queries the gallery API and sets DiskControllerType when the image supports
41+
// exactly one type; the caller's explicit value is preserved in all other cases.
42+
ir := resolveImageRef(mCtx, args.ImageRef)
43+
44+
diskControllerType := ""
45+
if ir != nil {
46+
diskControllerType = ir.DiskControllerType
47+
}
48+
3749
if args.Spot != nil && args.Spot.Spot {
3850
sArgs := &data.SpotInfoArgs{
3951
ComputeSizes: computeSizes,
@@ -42,15 +54,29 @@ func Allocation(mCtx *mc.Context, args *AllocationArgs) (*AllocationResult, erro
4254
ExcludedLocations: args.Spot.ExcludedHostingPlaces,
4355
SpotPriceIncreaseRate: &args.Spot.IncreaseRate,
4456
}
45-
if args.ImageRef != nil {
46-
sArgs.ImageRef = args.ImageRef
57+
if ir != nil {
58+
sArgs.ImageRef = ir
4759
}
4860
bsc, err := data.SpotInfo(mCtx, sArgs)
4961
if err != nil {
5062
return nil, err
5163
}
64+
// Filter the spot-selected sizes by disk controller type compatibility.
65+
if diskControllerType != "" {
66+
spotLocation := bsc.HostingPlace
67+
bsc.ComputeType, err = data.FilterComputeSizesByDiskControllerType(
68+
mCtx.Context(), &spotLocation, bsc.ComputeType, diskControllerType)
69+
if err != nil {
70+
return nil, err
71+
}
72+
if len(bsc.ComputeType) == 0 {
73+
return nil, fmt.Errorf(
74+
"spot compute sizes in location %q do not support disk controller type %q required by the selected image",
75+
bsc.HostingPlace, diskControllerType)
76+
}
77+
}
5278
return &AllocationResult{
53-
ImageRef: args.ImageRef,
79+
ImageRef: ir,
5480
Location: &bsc.HostingPlace,
5581
Price: &bsc.Price,
5682
ComputeSizes: bsc.ComputeType,
@@ -65,20 +91,48 @@ func Allocation(mCtx *mc.Context, args *AllocationArgs) (*AllocationResult, erro
6591
}
6692
location = &hp
6793
}
68-
// Filter for current location the computesizes
69-
supportedComputeSizes, err := data.FilterComputeSizesByLocation(
70-
mCtx.Context(), location, computeSizes)
94+
// Single SKU enumeration: filter by location and disk controller type together.
95+
// When diskControllerType is empty the type check is a no-op (any type passes).
96+
supportedComputeSizes, err := data.FilterComputeSizesByDiskControllerType(
97+
mCtx.Context(), location, computeSizes, diskControllerType)
7198
if err != nil {
7299
return nil, err
73100
}
74101
if len(supportedComputeSizes) == 0 {
102+
if diskControllerType != "" {
103+
return nil, fmt.Errorf(
104+
"no compute sizes in location %q support disk controller type %q required by the selected image",
105+
*location, diskControllerType)
106+
}
75107
return nil, fmt.Errorf("no compute sizes available for location %q", *location)
76108
}
77109
return &AllocationResult{
78-
ImageRef: args.ImageRef,
110+
ImageRef: ir,
79111
Location: location,
80112
ComputeSizes: supportedComputeSizes,
81113
}, nil
114+
}
115+
}
82116

117+
// resolveImageRef returns a copy of the image reference, optionally enriched with the
118+
// disk controller type read from the gallery image definition. The gallery Features value
119+
// lists types the image *supports*, not what it *requires*. We only override the caller's
120+
// value when the gallery returns exactly one supported type — that uniquely identifies the
121+
// requirement. When the gallery returns multiple types the image is flexible, so the
122+
// caller's explicit value (if any) is preserved unchanged. On fetch failure the caller's
123+
// value is also preserved.
124+
func resolveImageRef(mCtx *mc.Context, ir *data.ImageReference) *data.ImageReference {
125+
if ir == nil || ir.SharedImageID == "" {
126+
return ir
127+
}
128+
enriched := *ir
129+
types, err := data.GetSharedImageDiskControllerTypes(mCtx.Context(), &ir.SharedImageID)
130+
if err != nil {
131+
logging.Debugf("could not fetch disk controller types for image %s: %v", ir.SharedImageID, err)
132+
return &enriched
133+
}
134+
if len(types) == 1 {
135+
enriched.DiskControllerType = types[0]
83136
}
137+
return &enriched
84138
}

pkg/provider/azure/modules/virtual-machine/virtual-machine.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ func Create(ctx *pulumi.Context, mCtx *mc.Context, args *VirtualMachineArgs) (Vi
8282
StorageAccountType: pulumi.String("Standard_LRS"),
8383
},
8484
},
85+
DiskControllerType: diskControllerTypePtr(args.Image.DiskControllerType),
8586
},
8687
// Try to improve provisioning time
8788
DiagnosticsProfile: compute.DiagnosticsProfileArgs{
@@ -163,3 +164,10 @@ func isSelfOwned(sharedImageId *string) bool {
163164
sharedImageParams := strings.Split(*sharedImageId, "/")
164165
return os.Getenv("AZURE_SUBSCRIPTION_ID") == sharedImageParams[2]
165166
}
167+
168+
func diskControllerTypePtr(t string) pulumi.StringPtrInput {
169+
if t == "" {
170+
return nil
171+
}
172+
return pulumi.StringPtr(t)
173+
}

0 commit comments

Comments
 (0)