Skip to content

Commit 758e933

Browse files
committed
chore(nebius): adding better logging
1 parent dceb545 commit 758e933

1 file changed

Lines changed: 109 additions & 5 deletions

File tree

v1/providers/nebius/instance.go

Lines changed: 109 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,6 +1198,12 @@ func (c *NebiusClient) createBootDisk(ctx context.Context, attrs v1.CreateInstan
11981198

11991199
// buildDiskCreateRequest builds a disk creation request, trying image family first, then image ID
12001200
func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName string, attrs v1.CreateInstanceAttrs) (*compute.CreateDiskRequest, error) {
1201+
c.logger.Info(ctx, "buildDiskCreateRequest: start",
1202+
v1.LogField("diskName", diskName),
1203+
v1.LogField("attrs.ImageID", attrs.ImageID),
1204+
v1.LogField("attrs.RefID", attrs.RefID),
1205+
v1.LogField("attrs.DiskSize", attrs.DiskSize))
1206+
12011207
if attrs.DiskSize == 0 {
12021208
attrs.DiskSize = 1280 * units.Gibibyte // Defaulted by the Nebius Console
12031209
}
@@ -1221,7 +1227,13 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri
12211227
}
12221228

12231229
// First, try to resolve and use image family
1224-
if imageFamily, err := c.resolveImageFamily(ctx, attrs.ImageID); err == nil {
1230+
imageFamily, resolveErr := c.resolveImageFamily(ctx, attrs.ImageID)
1231+
c.logger.Info(ctx, "buildDiskCreateRequest: resolveImageFamily result",
1232+
v1.LogField("attrs.ImageID", attrs.ImageID),
1233+
v1.LogField("resolvedFamily", imageFamily),
1234+
v1.LogField("err", fmt.Sprintf("%v", resolveErr)))
1235+
1236+
if resolveErr == nil {
12251237
publicImagesParent := c.getPublicImagesParent()
12261238

12271239
// Skip validation for known-good common families to speed up instance start
@@ -1233,8 +1245,14 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri
12331245
break
12341246
}
12351247
}
1248+
c.logger.Info(ctx, "buildDiskCreateRequest: known-family check",
1249+
v1.LogField("imageFamily", imageFamily),
1250+
v1.LogField("isKnownFamily", isKnownFamily),
1251+
v1.LogField("publicImagesParent", publicImagesParent))
12361252

12371253
if isKnownFamily {
1254+
c.logger.Info(ctx, "buildDiskCreateRequest: BRANCH=known-family (skipping validation)",
1255+
v1.LogField("imageFamily", imageFamily))
12381256
// Use known family without validation
12391257
baseReq.Spec.Source = &compute.DiskSpec_SourceImageFamily{
12401258
SourceImageFamily: &compute.SourceImageFamily{
@@ -1251,9 +1269,29 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri
12511269
ParentId: publicImagesParent,
12521270
ImageFamily: imageFamily,
12531271
})
1272+
latestName, latestID, latestArch := "", "", ""
1273+
if latestImage != nil {
1274+
if latestImage.Metadata != nil {
1275+
latestName = latestImage.Metadata.Name
1276+
latestID = latestImage.Metadata.Id
1277+
}
1278+
if latestImage.Spec != nil {
1279+
latestArch = latestImage.Spec.GetCpuArchitecture().String()
1280+
}
1281+
}
1282+
c.logger.Info(ctx, "buildDiskCreateRequest: GetLatestByFamily result",
1283+
v1.LogField("imageFamily", imageFamily),
1284+
v1.LogField("err", fmt.Sprintf("%v", err)),
1285+
v1.LogField("latestImageID", latestID),
1286+
v1.LogField("latestImageName", latestName),
1287+
v1.LogField("latestImageArch", latestArch))
1288+
12541289
if err == nil {
12551290
isARM64 := latestImage.Spec != nil && latestImage.Spec.GetCpuArchitecture() == compute.ImageSpec_ARM64
12561291
if !isARM64 {
1292+
c.logger.Info(ctx, "buildDiskCreateRequest: BRANCH=validated-family (non-ARM64)",
1293+
v1.LogField("imageFamily", imageFamily),
1294+
v1.LogField("latestImageID", latestID))
12571295
baseReq.Spec.Source = &compute.DiskSpec_SourceImageFamily{
12581296
SourceImageFamily: &compute.SourceImageFamily{
12591297
ImageFamily: imageFamily,
@@ -1263,12 +1301,20 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri
12631301
baseReq.Metadata.Labels["image-family"] = imageFamily
12641302
return baseReq, nil
12651303
}
1304+
c.logger.Info(ctx, "buildDiskCreateRequest: validated-family is ARM64, falling through to scoring",
1305+
v1.LogField("imageFamily", imageFamily))
12661306
// ARM64 family — fall through to getWorkingPublicImageID which filters by architecture
12671307
}
12681308
}
12691309

12701310
// Family approach failed, try to use a known working public image ID
1311+
c.logger.Info(ctx, "buildDiskCreateRequest: BRANCH=scoring (falling back to getWorkingPublicImageID)",
1312+
v1.LogField("attrs.ImageID", attrs.ImageID))
12711313
publicImageID, err := c.getWorkingPublicImageID(ctx, attrs.ImageID)
1314+
c.logger.Info(ctx, "buildDiskCreateRequest: getWorkingPublicImageID result",
1315+
v1.LogField("publicImageID", publicImageID),
1316+
v1.LogField("err", fmt.Sprintf("%v", err)))
1317+
12721318
if err == nil {
12731319
baseReq.Spec.Source = &compute.DiskSpec_SourceImageId{
12741320
SourceImageId: publicImageID,
@@ -1285,41 +1331,81 @@ func (c *NebiusClient) buildDiskCreateRequest(ctx context.Context, diskName stri
12851331
// It scores every non-ARM64 image and returns the highest-scored one, this is done to handle change in ordering of images from nebius api.
12861332
func (c *NebiusClient) getWorkingPublicImageID(ctx context.Context, requestedImage string) (string, error) {
12871333
publicImagesParent := c.getPublicImagesParent()
1334+
c.logger.Info(ctx, "getWorkingPublicImageID: listing images",
1335+
v1.LogField("requestedImage", requestedImage),
1336+
v1.LogField("publicImagesParent", publicImagesParent))
1337+
12881338
imagesResp, err := c.sdk.Services().Compute().V1().Image().List(ctx, &compute.ListImagesRequest{
12891339
ParentId: publicImagesParent,
12901340
})
12911341
if err != nil {
1342+
c.logger.Error(ctx, fmt.Errorf("failed to list public images: %w", err),
1343+
v1.LogField("publicImagesParent", publicImagesParent))
12921344
return "", fmt.Errorf("failed to list public images: %w", err)
12931345
}
12941346

1295-
if len(imagesResp.GetItems()) == 0 {
1347+
totalCount := len(imagesResp.GetItems())
1348+
c.logger.Info(ctx, "getWorkingPublicImageID: list returned",
1349+
v1.LogField("totalImages", totalCount))
1350+
1351+
if totalCount == 0 {
12961352
return "", fmt.Errorf("no public images available")
12971353
}
12981354

12991355
requestedLower := strings.ToLower(requestedImage)
13001356

13011357
var bestImage *compute.Image
13021358
bestScore := -1
1359+
consideredCount, arm64Skipped, nilMetadataSkipped := 0, 0, 0
13031360

13041361
for _, image := range imagesResp.GetItems() {
13051362
if image.Metadata == nil {
1363+
nilMetadataSkipped++
13061364
continue
13071365
}
13081366
if image.Spec != nil && image.Spec.GetCpuArchitecture() == compute.ImageSpec_ARM64 {
1367+
arm64Skipped++
13091368
continue
13101369
}
1370+
consideredCount++
13111371

13121372
score := scoreImage(image, requestedLower)
1373+
family := ""
1374+
if image.Spec != nil {
1375+
family = image.Spec.GetImageFamily()
1376+
}
1377+
c.logger.Info(ctx, "getWorkingPublicImageID: scored",
1378+
v1.LogField("id", image.Metadata.Id),
1379+
v1.LogField("name", image.Metadata.Name),
1380+
v1.LogField("family", family),
1381+
v1.LogField("score", score))
1382+
13131383
if score > bestScore {
13141384
bestScore = score
13151385
bestImage = image
13161386
}
13171387
}
13181388

1389+
c.logger.Info(ctx, "getWorkingPublicImageID: scoring summary",
1390+
v1.LogField("consideredCount", consideredCount),
1391+
v1.LogField("arm64Skipped", arm64Skipped),
1392+
v1.LogField("nilMetadataSkipped", nilMetadataSkipped),
1393+
v1.LogField("bestScore", bestScore))
1394+
13191395
if bestImage == nil {
13201396
return "", fmt.Errorf("no suitable public image found")
13211397
}
13221398

1399+
winnerFamily := ""
1400+
if bestImage.Spec != nil {
1401+
winnerFamily = bestImage.Spec.GetImageFamily()
1402+
}
1403+
c.logger.Info(ctx, "getWorkingPublicImageID: winner",
1404+
v1.LogField("id", bestImage.Metadata.Id),
1405+
v1.LogField("name", bestImage.Metadata.Name),
1406+
v1.LogField("family", winnerFamily),
1407+
v1.LogField("score", bestScore))
1408+
13231409
return bestImage.Metadata.Id, nil
13241410
}
13251411

@@ -1625,6 +1711,10 @@ func (c *NebiusClient) parseInstanceType(ctx context.Context, instanceTypeID str
16251711
//
16261712
//nolint:gocyclo,unparam // Complex image family resolution with fallback logic
16271713
func (c *NebiusClient) resolveImageFamily(ctx context.Context, imageID string) (string, error) {
1714+
c.logger.Info(ctx, "resolveImageFamily: start",
1715+
v1.LogField("imageID", imageID),
1716+
v1.LogField("imageIDLen", len(imageID)))
1717+
16281718
// Common Nebius image families - if ImageID matches one of these, use it directly
16291719
commonFamilies := []string{
16301720
"ubuntu24.04-cuda13.0",
@@ -1641,14 +1731,17 @@ func (c *NebiusClient) resolveImageFamily(ctx context.Context, imageID string) (
16411731
// Check if ImageID is already a known family name
16421732
for _, family := range commonFamilies {
16431733
if imageID == family {
1734+
c.logger.Info(ctx, "resolveImageFamily: matched commonFamilies",
1735+
v1.LogField("family", family))
16441736
return family, nil
16451737
}
16461738
}
16471739

16481740
// If ImageID looks like a family name pattern (contains dots, dashes, no UUIDs)
16491741
// and doesn't look like a UUID, assume it's a family name
16501742
if !strings.Contains(imageID, "-") || len(imageID) < 32 {
1651-
// Likely a family name, use it directly
1743+
c.logger.Info(ctx, "resolveImageFamily: treating as family (short/no-dash)",
1744+
v1.LogField("returnValue", imageID))
16521745
return imageID, nil
16531746
}
16541747

@@ -1657,17 +1750,22 @@ func (c *NebiusClient) resolveImageFamily(ctx context.Context, imageID string) (
16571750
Id: imageID,
16581751
})
16591752
if err != nil {
1660-
// If we can't get the image, try using the ID as a family name anyway
1661-
// This allows for custom family names that don't match our patterns
1753+
c.logger.Info(ctx, "resolveImageFamily: Get failed, returning imageID as family",
1754+
v1.LogField("imageID", imageID),
1755+
v1.LogField("err", fmt.Sprintf("%v", err)))
16621756
return imageID, nil
16631757
}
16641758

16651759
// Extract family from image metadata/labels if available
16661760
if image.Metadata != nil && image.Metadata.Labels != nil {
16671761
if family, exists := image.Metadata.Labels["family"]; exists && family != "" {
1762+
c.logger.Info(ctx, "resolveImageFamily: resolved via labels[family]",
1763+
v1.LogField("family", family))
16681764
return family, nil
16691765
}
16701766
if family, exists := image.Metadata.Labels["image-family"]; exists && family != "" {
1767+
c.logger.Info(ctx, "resolveImageFamily: resolved via labels[image-family]",
1768+
v1.LogField("family", family))
16711769
return family, nil
16721770
}
16731771
}
@@ -1677,15 +1775,21 @@ func (c *NebiusClient) resolveImageFamily(ctx context.Context, imageID string) (
16771775
// Try to extract a reasonable family name from the image name
16781776
name := strings.ToLower(image.Metadata.Name)
16791777
if strings.Contains(name, "ubuntu22") || strings.Contains(name, "ubuntu-22") {
1778+
c.logger.Info(ctx, "resolveImageFamily: inferred ubuntu22 from name",
1779+
v1.LogField("name", image.Metadata.Name))
16801780
return "ubuntu22.04", nil
16811781
}
16821782
if strings.Contains(name, "ubuntu20") || strings.Contains(name, "ubuntu-20") {
1783+
c.logger.Info(ctx, "resolveImageFamily: inferred ubuntu20 from name",
1784+
v1.LogField("name", image.Metadata.Name))
16831785
return "ubuntu20.04", nil
16841786
}
16851787
}
16861788

16871789
// Default fallback - use the original ImageID as family
16881790
// This handles cases where users provide custom family names
1791+
c.logger.Info(ctx, "resolveImageFamily: default fallback, returning imageID as family",
1792+
v1.LogField("imageID", imageID))
16891793
return imageID, nil
16901794
}
16911795

0 commit comments

Comments
 (0)