From 1346d0e90e252333bb312eec292944636db6e723 Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Thu, 23 Apr 2026 14:34:15 -0700 Subject: [PATCH 1/2] simplify image selection --- v1/providers/nebius/instance.go | 149 +++++---------------------- v1/providers/nebius/instance_test.go | 40 +++++++ 2 files changed, 66 insertions(+), 123 deletions(-) diff --git a/v1/providers/nebius/instance.go b/v1/providers/nebius/instance.go index b4747730..89825ad0 100644 --- a/v1/providers/nebius/instance.go +++ b/v1/providers/nebius/instance.go @@ -15,7 +15,9 @@ import ( ) const ( - platformTypeCPU = "cpu" + platformTypeCPU = "cpu" + nebiusGPUImageFamily = "ubuntu24.04-cuda13.0" + nebiusCPUImageFamily = "ubuntu24.04-driverless" ) //nolint:gocyclo,funlen // Complex instance creation with resource management @@ -1180,8 +1182,8 @@ func (c *NebiusClient) createBootDisk(ctx context.Context, attrs v1.CreateInstan return diskID, nil } -// buildDiskCreateRequest builds a disk creation request, trying image family first, then image ID -func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName string, attrs v1.CreateInstanceAttrs) (*compute.CreateDiskRequest, error) { +// buildDiskCreateRequest builds a disk creation request using the fixed Nebius image family for the instance type. +func (c *NebiusClient) buildDiskCreateRequest(_ context.Context, diskName string, attrs v1.CreateInstanceAttrs) (*compute.CreateDiskRequest, error) { if attrs.DiskSize == 0 { attrs.DiskSize = 1280 * units.Gibibyte // Defaulted by the Nebius Console } @@ -1204,138 +1206,39 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri }, } - // First, try to resolve and use image family - if imageFamily, err := c.resolveImageFamily(ctx, attrs.ImageID); err == nil { - publicImagesParent := c.getPublicImagesParent() - - // Skip validation for known-good common families to speed up instance start - knownFamilies := []string{"ubuntu22.04-cuda12", "mk8s-worker-node-v-1-32-ubuntu24.04", "mk8s-worker-node-v-1-32-ubuntu24.04-cuda12.8"} - isKnownFamily := false - for _, known := range knownFamilies { - if imageFamily == known { - isKnownFamily = true - break - } - } - - if isKnownFamily { - // Use known family without validation - baseReq.Spec.Source = &compute.DiskSpec_SourceImageFamily{ - SourceImageFamily: &compute.SourceImageFamily{ - ImageFamily: imageFamily, - ParentId: publicImagesParent, - }, - } - baseReq.Metadata.Labels["image-family"] = imageFamily - return baseReq, nil - } - - // For unknown families, validate first - _, err := c.sdk.Services().Compute().V1().Image().GetLatestByFamily(ctx, &compute.GetImageLatestByFamilyRequest{ - ParentId: publicImagesParent, + imageFamily := getNebiusBootImageFamily(attrs.InstanceType) + baseReq.Spec.Source = &compute.DiskSpec_SourceImageFamily{ + SourceImageFamily: &compute.SourceImageFamily{ ImageFamily: imageFamily, - }) - if err == nil { - // Family works, use it - baseReq.Spec.Source = &compute.DiskSpec_SourceImageFamily{ - SourceImageFamily: &compute.SourceImageFamily{ - ImageFamily: imageFamily, - ParentId: publicImagesParent, - }, - } - baseReq.Metadata.Labels["image-family"] = imageFamily - return baseReq, nil - } - } - - // Family approach failed, try to use a known working public image ID - publicImageID, err := c.getWorkingPublicImageID(ctx, attrs.ImageID) - if err == nil { - baseReq.Spec.Source = &compute.DiskSpec_SourceImageId{ - SourceImageId: publicImageID, - } - baseReq.Metadata.Labels["source-image-id"] = publicImageID - return baseReq, nil + ParentId: c.getPublicImagesParent(), + }, } + baseReq.Metadata.Labels["image-family"] = imageFamily - // Both approaches failed - return nil, fmt.Errorf("could not resolve image %s to either a working family or image ID: %w", attrs.ImageID, err) + return baseReq, nil } -// getWorkingPublicImageID gets a working public image ID based on the requested image type -// -//nolint:gocognit,gocyclo // Complex function trying multiple image resolution strategies -func (c *NebiusClient) getWorkingPublicImageID(ctx context.Context, requestedImage string) (string, error) { - // Get available public images from the correct region - publicImagesParent := c.getPublicImagesParent() - imagesResp, err := c.sdk.Services().Compute().V1().Image().List(ctx, &compute.ListImagesRequest{ - ParentId: publicImagesParent, - }) - if err != nil { - return "", fmt.Errorf("failed to list public images: %w", err) +func getNebiusBootImageFamily(instanceType string) string { + if isNebiusGPUInstanceType(instanceType) { + return nebiusGPUImageFamily } + return nebiusCPUImageFamily +} - if len(imagesResp.GetItems()) == 0 { - return "", fmt.Errorf("no public images available") +func isNebiusGPUInstanceType(instanceType string) bool { + instanceTypeLower := strings.ToLower(instanceType) + if instanceTypeLower == "" { + return false } - // Try to find the best match based on the requested image - requestedLower := strings.ToLower(requestedImage) - - var bestMatch *compute.Image - var fallbackImage *compute.Image - - for _, image := range imagesResp.GetItems() { - if image.Metadata == nil { - continue - } - - imageName := strings.ToLower(image.Metadata.Name) - - // Set fallback to first available image - if fallbackImage == nil { - fallbackImage = image - } - - // Look for Ubuntu matches - if strings.Contains(requestedLower, "ubuntu") && strings.Contains(imageName, "ubuntu") { - // Prefer specific version matches - //nolint:gocritic // if-else chain is clearer than switch for version matching logic - if strings.Contains(requestedLower, "24.04") || strings.Contains(requestedLower, "24") { - if strings.Contains(imageName, "ubuntu24.04") { - bestMatch = image - break - } - } else if strings.Contains(requestedLower, "22.04") || strings.Contains(requestedLower, "22") { - if strings.Contains(imageName, "ubuntu22.04") { - bestMatch = image - break - } - } else if strings.Contains(requestedLower, "20.04") || strings.Contains(requestedLower, "20") { - if strings.Contains(imageName, "ubuntu20.04") { - bestMatch = image - break - } - } - - // Any Ubuntu image is better than non-Ubuntu - if bestMatch == nil { - bestMatch = image - } + if strings.Contains(instanceTypeLower, ".") { + parts := strings.SplitN(instanceTypeLower, ".", 2) + if len(parts) == 2 { + return !strings.HasPrefix(parts[0], "cpu-") } } - // Use best match if found, otherwise fallback - selectedImage := bestMatch - if selectedImage == nil { - selectedImage = fallbackImage - } - - if selectedImage == nil { - return "", fmt.Errorf("no suitable public image found") - } - - return selectedImage.Metadata.Id, nil + return !strings.Contains(instanceTypeLower, "-cpu-") } // getPublicImagesParent determines the correct public images parent ID based on project routing code diff --git a/v1/providers/nebius/instance_test.go b/v1/providers/nebius/instance_test.go index 389dea26..b207a2ad 100644 --- a/v1/providers/nebius/instance_test.go +++ b/v1/providers/nebius/instance_test.go @@ -270,6 +270,46 @@ func TestExtractGPUTypeAndName(t *testing.T) { } } +func TestGetNebiusBootImageFamily(t *testing.T) { + tests := []struct { + name string + instanceType string + expected string + }{ + { + name: "gpu dot format uses cuda image", + instanceType: "gpu-h100-sxm.8gpu-128vcpu-1600gb", + expected: nebiusGPUImageFamily, + }, + { + name: "cpu dot format uses driverless image", + instanceType: "cpu-e2.4vcpu-16gb", + expected: nebiusCPUImageFamily, + }, + { + name: "gpu legacy format uses cuda image", + instanceType: "nebius-eu-north1-l40s-4gpu-96vcpu-768gb", + expected: nebiusGPUImageFamily, + }, + { + name: "cpu legacy format uses driverless image", + instanceType: "nebius-eu-north1-cpu-4vcpu-16gb", + expected: nebiusCPUImageFamily, + }, + { + name: "empty instance type defaults to cpu image", + instanceType: "", + expected: nebiusCPUImageFamily, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, getNebiusBootImageFamily(tt.instanceType)) + }) + } +} + func TestIsPlatformSupported(t *testing.T) { client := createTestClient() From f5d599dc20667559ab29bbe6084ebe184203bcf9 Mon Sep 17 00:00:00 2001 From: Drew Malin Date: Thu, 23 Apr 2026 15:27:38 -0700 Subject: [PATCH 2/2] fix --- v1/providers/nebius/instance.go | 104 +++++---------------------- v1/providers/nebius/instance_test.go | 10 --- 2 files changed, 16 insertions(+), 98 deletions(-) diff --git a/v1/providers/nebius/instance.go b/v1/providers/nebius/instance.go index 89825ad0..6bc058ea 100644 --- a/v1/providers/nebius/instance.go +++ b/v1/providers/nebius/instance.go @@ -1153,10 +1153,7 @@ func (c *NebiusClient) createBootDisk(ctx context.Context, attrs v1.CreateInstan diskName := fmt.Sprintf("%s-boot-disk", attrs.RefID) // Try to use image family first, then fallback to specific image ID - createReq, err := c.buildDiskCreateRequest(ctx, diskName, attrs) - if err != nil { - return "", fmt.Errorf("failed to build disk create request: %w", err) - } + createReq := c.buildDiskCreateRequest(ctx, diskName, attrs) operation, err := c.sdk.Services().Compute().V1().Disk().Create(ctx, createReq) if err != nil { @@ -1183,11 +1180,13 @@ func (c *NebiusClient) createBootDisk(ctx context.Context, attrs v1.CreateInstan } // buildDiskCreateRequest builds a disk creation request using the fixed Nebius image family for the instance type. -func (c *NebiusClient) buildDiskCreateRequest(_ context.Context, diskName string, attrs v1.CreateInstanceAttrs) (*compute.CreateDiskRequest, error) { +func (c *NebiusClient) buildDiskCreateRequest(_ context.Context, diskName string, attrs v1.CreateInstanceAttrs) *compute.CreateDiskRequest { if attrs.DiskSize == 0 { attrs.DiskSize = 1280 * units.Gibibyte // Defaulted by the Nebius Console } + imageFamily := getNebiusBootImageFamily(attrs.InstanceType) + baseReq := &compute.CreateDiskRequest{ Metadata: &common.ResourceMetadata{ ParentId: c.projectID, @@ -1196,6 +1195,7 @@ func (c *NebiusClient) buildDiskCreateRequest(_ context.Context, diskName string "created-by": "brev-cloud-sdk", "brev-user": c.refID, "environment-id": attrs.RefID, + "image-family": imageFamily, }, }, Spec: &compute.DiskSpec{ @@ -1203,19 +1203,16 @@ func (c *NebiusClient) buildDiskCreateRequest(_ context.Context, diskName string SizeGibibytes: int64(attrs.DiskSize / units.Gibibyte), }, Type: compute.DiskSpec_NETWORK_SSD, + Source: &compute.DiskSpec_SourceImageFamily{ + SourceImageFamily: &compute.SourceImageFamily{ + ImageFamily: imageFamily, + ParentId: c.getPublicImagesParent(), + }, + }, }, } - imageFamily := getNebiusBootImageFamily(attrs.InstanceType) - baseReq.Spec.Source = &compute.DiskSpec_SourceImageFamily{ - SourceImageFamily: &compute.SourceImageFamily{ - ImageFamily: imageFamily, - ParentId: c.getPublicImagesParent(), - }, - } - baseReq.Metadata.Labels["image-family"] = imageFamily - - return baseReq, nil + return baseReq } func getNebiusBootImageFamily(instanceType string) string { @@ -1231,14 +1228,12 @@ func isNebiusGPUInstanceType(instanceType string) bool { return false } - if strings.Contains(instanceTypeLower, ".") { - parts := strings.SplitN(instanceTypeLower, ".", 2) - if len(parts) == 2 { - return !strings.HasPrefix(parts[0], "cpu-") - } + // GPU instance types start with "gpu-" (see getInstanceTypesForLocation) + if strings.HasPrefix(instanceTypeLower, "gpu-") { + return true } - return !strings.Contains(instanceTypeLower, "-cpu-") + return false } // getPublicImagesParent determines the correct public images parent ID based on project routing code @@ -1478,73 +1473,6 @@ func (c *NebiusClient) parseInstanceType(ctx context.Context, instanceTypeID str return "", "", fmt.Errorf("could not parse instance type %s or find suitable platform/preset", instanceTypeID) } -// resolveImageFamily resolves an ImageID to an image family name -// If ImageID is already a family name, use it directly -// Otherwise, try to get the image and extract its family -// -//nolint:gocyclo,unparam // Complex image family resolution with fallback logic -func (c *NebiusClient) resolveImageFamily(ctx context.Context, imageID string) (string, error) { - // Common Nebius image families - if ImageID matches one of these, use it directly - commonFamilies := []string{ - "ubuntu22.04-cuda12", - "mk8s-worker-node-v-1-32-ubuntu24.04", - "mk8s-worker-node-v-1-32-ubuntu24.04-cuda12.8", - "mk8s-worker-node-v-1-31-ubuntu24.04-cuda12", - "ubuntu22.04", - "ubuntu20.04", - } - - // Check if ImageID is already a known family name - for _, family := range commonFamilies { - if imageID == family { - return family, nil - } - } - - // If ImageID looks like a family name pattern (contains dots, dashes, no UUIDs) - // and doesn't look like a UUID, assume it's a family name - if !strings.Contains(imageID, "-") || len(imageID) < 32 { - // Likely a family name, use it directly - return imageID, nil - } - - // If it looks like a UUID/ID, try to get the image and extract its family - image, err := c.sdk.Services().Compute().V1().Image().Get(ctx, &compute.GetImageRequest{ - Id: imageID, - }) - if err != nil { - // If we can't get the image, try using the ID as a family name anyway - // This allows for custom family names that don't match our patterns - return imageID, nil - } - - // Extract family from image metadata/labels if available - if image.Metadata != nil && image.Metadata.Labels != nil { - if family, exists := image.Metadata.Labels["family"]; exists && family != "" { - return family, nil - } - if family, exists := image.Metadata.Labels["image-family"]; exists && family != "" { - return family, nil - } - } - - // Extract family from image name as fallback - if image.Metadata != nil && image.Metadata.Name != "" { - // Try to extract a reasonable family name from the image name - name := strings.ToLower(image.Metadata.Name) - if strings.Contains(name, "ubuntu22") || strings.Contains(name, "ubuntu-22") { - return "ubuntu22.04", nil - } - if strings.Contains(name, "ubuntu20") || strings.Contains(name, "ubuntu-20") { - return "ubuntu20.04", nil - } - } - - // Default fallback - use the original ImageID as family - // This handles cases where users provide custom family names - return imageID, nil -} - // deleteBootDisk deletes a boot disk by ID func (c *NebiusClient) deleteBootDisk(ctx context.Context, diskID string) error { operation, err := c.sdk.Services().Compute().V1().Disk().Delete(ctx, &compute.DeleteDiskRequest{ diff --git a/v1/providers/nebius/instance_test.go b/v1/providers/nebius/instance_test.go index b207a2ad..65504849 100644 --- a/v1/providers/nebius/instance_test.go +++ b/v1/providers/nebius/instance_test.go @@ -286,16 +286,6 @@ func TestGetNebiusBootImageFamily(t *testing.T) { instanceType: "cpu-e2.4vcpu-16gb", expected: nebiusCPUImageFamily, }, - { - name: "gpu legacy format uses cuda image", - instanceType: "nebius-eu-north1-l40s-4gpu-96vcpu-768gb", - expected: nebiusGPUImageFamily, - }, - { - name: "cpu legacy format uses driverless image", - instanceType: "nebius-eu-north1-cpu-4vcpu-16gb", - expected: nebiusCPUImageFamily, - }, { name: "empty instance type defaults to cpu image", instanceType: "",