@@ -1198,6 +1198,12 @@ func (c *NebiusClient) createBootDisk(ctx context.Context, attrs v1.CreateInstan
11981198
11991199// buildDiskCreateRequest builds a disk creation request, trying image family first, then image ID
12001200func (c * NebiusClient ) buildDiskCreateRequest (ctx context.Context , diskName string , attrs v1.CreateInstanceAttrs ) (* compute.CreateDiskRequest , error ) {
1201+ c .logger .Info (ctx , "buildDiskCreateRequest: start" ,
1202+ v1 .LogField ("diskName" , diskName ),
1203+ v1 .LogField ("attrs.ImageID" , attrs .ImageID ),
1204+ v1 .LogField ("attrs.RefID" , attrs .RefID ),
1205+ v1 .LogField ("attrs.DiskSize" , attrs .DiskSize ))
1206+
12011207 if attrs .DiskSize == 0 {
12021208 attrs .DiskSize = 1280 * units .Gibibyte // Defaulted by the Nebius Console
12031209 }
@@ -1221,7 +1227,13 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri
12211227 }
12221228
12231229 // First, try to resolve and use image family
1224- if imageFamily , err := c .resolveImageFamily (ctx , attrs .ImageID ); err == nil {
1230+ imageFamily , resolveErr := c .resolveImageFamily (ctx , attrs .ImageID )
1231+ c .logger .Info (ctx , "buildDiskCreateRequest: resolveImageFamily result" ,
1232+ v1 .LogField ("attrs.ImageID" , attrs .ImageID ),
1233+ v1 .LogField ("resolvedFamily" , imageFamily ),
1234+ v1 .LogField ("err" , fmt .Sprintf ("%v" , resolveErr )))
1235+
1236+ if resolveErr == nil {
12251237 publicImagesParent := c .getPublicImagesParent ()
12261238
12271239 // Skip validation for known-good common families to speed up instance start
@@ -1233,8 +1245,14 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri
12331245 break
12341246 }
12351247 }
1248+ c .logger .Info (ctx , "buildDiskCreateRequest: known-family check" ,
1249+ v1 .LogField ("imageFamily" , imageFamily ),
1250+ v1 .LogField ("isKnownFamily" , isKnownFamily ),
1251+ v1 .LogField ("publicImagesParent" , publicImagesParent ))
12361252
12371253 if isKnownFamily {
1254+ c .logger .Info (ctx , "buildDiskCreateRequest: BRANCH=known-family (skipping validation)" ,
1255+ v1 .LogField ("imageFamily" , imageFamily ))
12381256 // Use known family without validation
12391257 baseReq .Spec .Source = & compute.DiskSpec_SourceImageFamily {
12401258 SourceImageFamily : & compute.SourceImageFamily {
@@ -1251,9 +1269,29 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri
12511269 ParentId : publicImagesParent ,
12521270 ImageFamily : imageFamily ,
12531271 })
1272+ latestName , latestID , latestArch := "" , "" , ""
1273+ if latestImage != nil {
1274+ if latestImage .Metadata != nil {
1275+ latestName = latestImage .Metadata .Name
1276+ latestID = latestImage .Metadata .Id
1277+ }
1278+ if latestImage .Spec != nil {
1279+ latestArch = latestImage .Spec .GetCpuArchitecture ().String ()
1280+ }
1281+ }
1282+ c .logger .Info (ctx , "buildDiskCreateRequest: GetLatestByFamily result" ,
1283+ v1 .LogField ("imageFamily" , imageFamily ),
1284+ v1 .LogField ("err" , fmt .Sprintf ("%v" , err )),
1285+ v1 .LogField ("latestImageID" , latestID ),
1286+ v1 .LogField ("latestImageName" , latestName ),
1287+ v1 .LogField ("latestImageArch" , latestArch ))
1288+
12541289 if err == nil {
12551290 isARM64 := latestImage .Spec != nil && latestImage .Spec .GetCpuArchitecture () == compute .ImageSpec_ARM64
12561291 if ! isARM64 {
1292+ c .logger .Info (ctx , "buildDiskCreateRequest: BRANCH=validated-family (non-ARM64)" ,
1293+ v1 .LogField ("imageFamily" , imageFamily ),
1294+ v1 .LogField ("latestImageID" , latestID ))
12571295 baseReq .Spec .Source = & compute.DiskSpec_SourceImageFamily {
12581296 SourceImageFamily : & compute.SourceImageFamily {
12591297 ImageFamily : imageFamily ,
@@ -1263,12 +1301,20 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri
12631301 baseReq .Metadata .Labels ["image-family" ] = imageFamily
12641302 return baseReq , nil
12651303 }
1304+ c .logger .Info (ctx , "buildDiskCreateRequest: validated-family is ARM64, falling through to scoring" ,
1305+ v1 .LogField ("imageFamily" , imageFamily ))
12661306 // ARM64 family — fall through to getWorkingPublicImageID which filters by architecture
12671307 }
12681308 }
12691309
12701310 // Family approach failed, try to use a known working public image ID
1311+ c .logger .Info (ctx , "buildDiskCreateRequest: BRANCH=scoring (falling back to getWorkingPublicImageID)" ,
1312+ v1 .LogField ("attrs.ImageID" , attrs .ImageID ))
12711313 publicImageID , err := c .getWorkingPublicImageID (ctx , attrs .ImageID )
1314+ c .logger .Info (ctx , "buildDiskCreateRequest: getWorkingPublicImageID result" ,
1315+ v1 .LogField ("publicImageID" , publicImageID ),
1316+ v1 .LogField ("err" , fmt .Sprintf ("%v" , err )))
1317+
12721318 if err == nil {
12731319 baseReq .Spec .Source = & compute.DiskSpec_SourceImageId {
12741320 SourceImageId : publicImageID ,
@@ -1285,41 +1331,81 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri
12851331// It scores every non-ARM64 image and returns the highest-scored one, this is done to handle change in ordering of images from nebius api.
12861332func (c * NebiusClient ) getWorkingPublicImageID (ctx context.Context , requestedImage string ) (string , error ) {
12871333 publicImagesParent := c .getPublicImagesParent ()
1334+ c .logger .Info (ctx , "getWorkingPublicImageID: listing images" ,
1335+ v1 .LogField ("requestedImage" , requestedImage ),
1336+ v1 .LogField ("publicImagesParent" , publicImagesParent ))
1337+
12881338 imagesResp , err := c .sdk .Services ().Compute ().V1 ().Image ().List (ctx , & compute.ListImagesRequest {
12891339 ParentId : publicImagesParent ,
12901340 })
12911341 if err != nil {
1342+ c .logger .Error (ctx , fmt .Errorf ("failed to list public images: %w" , err ),
1343+ v1 .LogField ("publicImagesParent" , publicImagesParent ))
12921344 return "" , fmt .Errorf ("failed to list public images: %w" , err )
12931345 }
12941346
1295- if len (imagesResp .GetItems ()) == 0 {
1347+ totalCount := len (imagesResp .GetItems ())
1348+ c .logger .Info (ctx , "getWorkingPublicImageID: list returned" ,
1349+ v1 .LogField ("totalImages" , totalCount ))
1350+
1351+ if totalCount == 0 {
12961352 return "" , fmt .Errorf ("no public images available" )
12971353 }
12981354
12991355 requestedLower := strings .ToLower (requestedImage )
13001356
13011357 var bestImage * compute.Image
13021358 bestScore := - 1
1359+ consideredCount , arm64Skipped , nilMetadataSkipped := 0 , 0 , 0
13031360
13041361 for _ , image := range imagesResp .GetItems () {
13051362 if image .Metadata == nil {
1363+ nilMetadataSkipped ++
13061364 continue
13071365 }
13081366 if image .Spec != nil && image .Spec .GetCpuArchitecture () == compute .ImageSpec_ARM64 {
1367+ arm64Skipped ++
13091368 continue
13101369 }
1370+ consideredCount ++
13111371
13121372 score := scoreImage (image , requestedLower )
1373+ family := ""
1374+ if image .Spec != nil {
1375+ family = image .Spec .GetImageFamily ()
1376+ }
1377+ c .logger .Info (ctx , "getWorkingPublicImageID: scored" ,
1378+ v1 .LogField ("id" , image .Metadata .Id ),
1379+ v1 .LogField ("name" , image .Metadata .Name ),
1380+ v1 .LogField ("family" , family ),
1381+ v1 .LogField ("score" , score ))
1382+
13131383 if score > bestScore {
13141384 bestScore = score
13151385 bestImage = image
13161386 }
13171387 }
13181388
1389+ c .logger .Info (ctx , "getWorkingPublicImageID: scoring summary" ,
1390+ v1 .LogField ("consideredCount" , consideredCount ),
1391+ v1 .LogField ("arm64Skipped" , arm64Skipped ),
1392+ v1 .LogField ("nilMetadataSkipped" , nilMetadataSkipped ),
1393+ v1 .LogField ("bestScore" , bestScore ))
1394+
13191395 if bestImage == nil {
13201396 return "" , fmt .Errorf ("no suitable public image found" )
13211397 }
13221398
1399+ winnerFamily := ""
1400+ if bestImage .Spec != nil {
1401+ winnerFamily = bestImage .Spec .GetImageFamily ()
1402+ }
1403+ c .logger .Info (ctx , "getWorkingPublicImageID: winner" ,
1404+ v1 .LogField ("id" , bestImage .Metadata .Id ),
1405+ v1 .LogField ("name" , bestImage .Metadata .Name ),
1406+ v1 .LogField ("family" , winnerFamily ),
1407+ v1 .LogField ("score" , bestScore ))
1408+
13231409 return bestImage .Metadata .Id , nil
13241410}
13251411
@@ -1625,6 +1711,10 @@ func (c *NebiusClient) parseInstanceType(ctx context.Context, instanceTypeID str
16251711//
16261712//nolint:gocyclo,unparam // Complex image family resolution with fallback logic
16271713func (c * NebiusClient ) resolveImageFamily (ctx context.Context , imageID string ) (string , error ) {
1714+ c .logger .Info (ctx , "resolveImageFamily: start" ,
1715+ v1 .LogField ("imageID" , imageID ),
1716+ v1 .LogField ("imageIDLen" , len (imageID )))
1717+
16281718 // Common Nebius image families - if ImageID matches one of these, use it directly
16291719 commonFamilies := []string {
16301720 "ubuntu24.04-cuda13.0" ,
@@ -1641,14 +1731,17 @@ func (c *NebiusClient) resolveImageFamily(ctx context.Context, imageID string) (
16411731 // Check if ImageID is already a known family name
16421732 for _ , family := range commonFamilies {
16431733 if imageID == family {
1734+ c .logger .Info (ctx , "resolveImageFamily: matched commonFamilies" ,
1735+ v1 .LogField ("family" , family ))
16441736 return family , nil
16451737 }
16461738 }
16471739
16481740 // If ImageID looks like a family name pattern (contains dots, dashes, no UUIDs)
16491741 // and doesn't look like a UUID, assume it's a family name
16501742 if ! strings .Contains (imageID , "-" ) || len (imageID ) < 32 {
1651- // Likely a family name, use it directly
1743+ c .logger .Info (ctx , "resolveImageFamily: treating as family (short/no-dash)" ,
1744+ v1 .LogField ("returnValue" , imageID ))
16521745 return imageID , nil
16531746 }
16541747
@@ -1657,17 +1750,22 @@ func (c *NebiusClient) resolveImageFamily(ctx context.Context, imageID string) (
16571750 Id : imageID ,
16581751 })
16591752 if err != nil {
1660- // If we can't get the image, try using the ID as a family name anyway
1661- // This allows for custom family names that don't match our patterns
1753+ c .logger .Info (ctx , "resolveImageFamily: Get failed, returning imageID as family" ,
1754+ v1 .LogField ("imageID" , imageID ),
1755+ v1 .LogField ("err" , fmt .Sprintf ("%v" , err )))
16621756 return imageID , nil
16631757 }
16641758
16651759 // Extract family from image metadata/labels if available
16661760 if image .Metadata != nil && image .Metadata .Labels != nil {
16671761 if family , exists := image .Metadata .Labels ["family" ]; exists && family != "" {
1762+ c .logger .Info (ctx , "resolveImageFamily: resolved via labels[family]" ,
1763+ v1 .LogField ("family" , family ))
16681764 return family , nil
16691765 }
16701766 if family , exists := image .Metadata .Labels ["image-family" ]; exists && family != "" {
1767+ c .logger .Info (ctx , "resolveImageFamily: resolved via labels[image-family]" ,
1768+ v1 .LogField ("family" , family ))
16711769 return family , nil
16721770 }
16731771 }
@@ -1677,15 +1775,21 @@ func (c *NebiusClient) resolveImageFamily(ctx context.Context, imageID string) (
16771775 // Try to extract a reasonable family name from the image name
16781776 name := strings .ToLower (image .Metadata .Name )
16791777 if strings .Contains (name , "ubuntu22" ) || strings .Contains (name , "ubuntu-22" ) {
1778+ c .logger .Info (ctx , "resolveImageFamily: inferred ubuntu22 from name" ,
1779+ v1 .LogField ("name" , image .Metadata .Name ))
16801780 return "ubuntu22.04" , nil
16811781 }
16821782 if strings .Contains (name , "ubuntu20" ) || strings .Contains (name , "ubuntu-20" ) {
1783+ c .logger .Info (ctx , "resolveImageFamily: inferred ubuntu20 from name" ,
1784+ v1 .LogField ("name" , image .Metadata .Name ))
16831785 return "ubuntu20.04" , nil
16841786 }
16851787 }
16861788
16871789 // Default fallback - use the original ImageID as family
16881790 // This handles cases where users provide custom family names
1791+ c .logger .Info (ctx , "resolveImageFamily: default fallback, returning imageID as family" ,
1792+ v1 .LogField ("imageID" , imageID ))
16891793 return imageID , nil
16901794}
16911795
0 commit comments