Skip to content

Commit 79f0500

Browse files
wbrezaCopilot
andauthored
feat: add telemetry, documentation, and edge case handling for extension upgrade (#7853)
* feat: add telemetry, documentation, and edge case handling for extension upgrade - Add per-extension telemetry events (ext.upgrade) with name, version from/to, source, duration_ms, and outcome attributes - Add distinct promotion telemetry events (ext.promote) with source.from and source.to attributes for tracking registry promotion adoption - Update azd extension upgrade help text to document default registry behavior, auto-promotion, --source override, continue-on-error in --all mode, and --output json for structured reports - Handle delisted extensions as skipped (not failed) so batch continues - Detect network errors from FindExtensions/Upgrade and show retry suggestion distinguishing connectivity issues from missing extensions - Implement atomic config writes (write-to-temp-then-rename) to prevent corruption during interrupted batch upgrades - Add telemetry field constants for extension upgrade tracking - Update snapshot tests for new help text Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix: address review findings for telemetry and config atomicity - Verify atomic write already uses os.CreateTemp (no code change needed) - Add tests for temp file cleanup: success path, directory creation failure with clear error, and original file preservation on write failure - Add doc comments on upgradeOneExtension and emitPromotionEvent documenting the telemetry testing strategy Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix: simplify network error detection and address review feedback Remove redundant net.DNSError and net.OpError type assertions from isNetworkError - both implement net.Error, which is already checked. Keep the interface check and string-matching fallback. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent d14ecbe commit 79f0500

9 files changed

Lines changed: 445 additions & 25 deletions

File tree

cli/azd/.vscode/cspell.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ words:
107107
- mintty
108108
- dockerenv
109109
- dedup
110+
- delisted
110111
- exfiltration
111112
- Fprintf
112113
- gocritic

cli/azd/cmd/extension.go

Lines changed: 150 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,25 @@ import (
88
"errors"
99
"fmt"
1010
"io"
11+
"net"
1112
"slices"
1213
"strings"
1314
"text/tabwriter"
15+
"time"
1416

1517
"github.com/Masterminds/semver/v3"
1618
"github.com/azure/azure-dev/cli/azd/cmd/actions"
1719
"github.com/azure/azure-dev/cli/azd/internal"
20+
"github.com/azure/azure-dev/cli/azd/internal/tracing"
21+
"github.com/azure/azure-dev/cli/azd/internal/tracing/events"
22+
"github.com/azure/azure-dev/cli/azd/internal/tracing/fields"
1823
"github.com/azure/azure-dev/cli/azd/pkg/extensions"
1924
"github.com/azure/azure-dev/cli/azd/pkg/input"
2025
"github.com/azure/azure-dev/cli/azd/pkg/output"
2126
"github.com/azure/azure-dev/cli/azd/pkg/output/ux"
2227
uxlib "github.com/azure/azure-dev/cli/azd/pkg/ux"
2328
"github.com/spf13/cobra"
29+
"go.opentelemetry.io/otel/codes"
2430
)
2531

2632
// Register extension commands
@@ -84,7 +90,19 @@ func extensionActions(root *actions.ActionDescriptor) *actions.ActionDescriptor
8490
group.Add("upgrade", &actions.ActionDescriptorOptions{
8591
Command: &cobra.Command{
8692
Use: "upgrade [extension-id]",
87-
Short: "Upgrade specified extensions.",
93+
Short: "Upgrade installed extensions to the latest version.",
94+
Long: `Upgrade one or more installed extensions.
95+
96+
By default, uses the stored registry source for each extension. If the stored
97+
source is unavailable, falls back to the main (azd) registry. Extensions that
98+
were installed from a non-main registry (e.g., dev) are automatically promoted
99+
to the main registry when a newer version is available there.
100+
101+
Use --source to explicitly override the registry source for the upgrade. Use
102+
--all to upgrade all installed extensions in a single batch; failures in one
103+
extension do not prevent the remaining extensions from being upgraded.
104+
105+
Use --output json for a structured report of all upgrade results.`,
88106
},
89107
OutputFormats: []output.Format{output.JsonFormat, output.NoneFormat},
90108
DefaultFormat: output.NoneFormat,
@@ -1079,16 +1097,50 @@ loop:
10791097

10801098
// upgradeOneExtension processes a single extension upgrade and returns
10811099
// the result. It never returns an error — failures are captured in
1082-
// the returned UpgradeResult.
1100+
// the returned UpgradeResult. A telemetry span is emitted for every
1101+
// attempt.
1102+
//
1103+
// Telemetry attributes use constants from internal/tracing/fields to ensure consistency.
1104+
// Integration testing of telemetry output is done via the tracing test infrastructure
1105+
// in internal/tracing/ — unit tests here verify the upgrade logic, not span emission.
10831106
func (a *extensionUpgradeAction) upgradeOneExtension(
10841107
ctx context.Context,
10851108
extensionId string,
10861109
index int,
10871110
azdVersion *semver.Version,
10881111
isJsonOutput bool,
10891112
) extensions.UpgradeResult {
1113+
startTime := time.Now()
10901114
baseResult := extensions.UpgradeResult{ExtensionId: extensionId}
10911115

1116+
// Start a telemetry span for this individual extension upgrade.
1117+
ctx, span := tracing.Start(ctx, events.ExtensionUpgradeEvent)
1118+
defer func() {
1119+
elapsed := time.Since(startTime).Milliseconds()
1120+
span.SetAttributes(
1121+
fields.ExtensionId.String(extensionId),
1122+
fields.ExtensionVersionFrom.String(
1123+
baseResult.FromVersion,
1124+
),
1125+
fields.ExtensionVersionTo.String(
1126+
baseResult.ToVersion,
1127+
),
1128+
fields.ExtensionSource.String(
1129+
baseResult.ToSource,
1130+
),
1131+
fields.ExtensionUpgradeDurationMs.Int64(elapsed),
1132+
fields.ExtensionUpgradeOutcome.String(
1133+
baseResult.Status.String(),
1134+
),
1135+
)
1136+
if baseResult.Status == extensions.UpgradeStatusFailed {
1137+
span.SetStatus(codes.Error, "upgrade.failed")
1138+
} else {
1139+
span.SetStatus(codes.Ok, "")
1140+
}
1141+
span.End()
1142+
}()
1143+
10921144
if !isJsonOutput && index > 0 {
10931145
a.console.Message(ctx, "")
10941146
}
@@ -1148,15 +1200,35 @@ func (a *extensionUpgradeAction) upgradeOneExtension(
11481200
ctx, allMatchOptions,
11491201
)
11501202
if err != nil {
1203+
if isNetworkError(err) {
1204+
return fail(fmt.Errorf(
1205+
"network error looking up extension %s "+
1206+
"(check your connection and retry): %w",
1207+
extensionId, err,
1208+
))
1209+
}
11511210
return fail(fmt.Errorf(
11521211
"failed to find extension %s: %w", extensionId, err,
11531212
))
11541213
}
11551214
if len(matches) == 0 {
1156-
return fail(fmt.Errorf(
1157-
"extension '%s' not found in any configured registry",
1158-
extensionId,
1159-
))
1215+
// Delisted or unavailable — skip instead of fail so
1216+
// the batch continues.
1217+
baseResult.Status = extensions.UpgradeStatusSkipped
1218+
baseResult.SkipReason = "extension no longer available " +
1219+
"in any configured registry"
1220+
if !isJsonOutput {
1221+
skipMsg := fmt.Sprintf(
1222+
"Upgrading %s extension",
1223+
output.WithHighLightFormat(extensionId),
1224+
) + output.WithGrayFormat(
1225+
" (No longer available in any registry)",
1226+
)
1227+
a.console.StopSpinner(
1228+
ctx, skipMsg, input.StepSkipped,
1229+
)
1230+
}
1231+
return baseResult
11601232
}
11611233

11621234
var selectedExt *extensions.ExtensionMetadata
@@ -1287,15 +1359,27 @@ func (a *extensionUpgradeAction) upgradeOneExtension(
12871359
ctx, compatExt, a.flags.version,
12881360
)
12891361
if err != nil {
1362+
if isNetworkError(err) {
1363+
return fail(fmt.Errorf(
1364+
"network error upgrading %s "+
1365+
"(check your connection and retry): %w",
1366+
extensionId, err,
1367+
))
1368+
}
12901369
return fail(fmt.Errorf(
12911370
"failed to upgrade extension: %w", err,
12921371
))
12931372
}
12941373
baseResult.ToVersion = extVersion.Version
12951374

1296-
// Handle promotion display
1375+
// Handle promotion display and distinct telemetry
12971376
if isPromotion {
12981377
baseResult.Status = extensions.UpgradeStatusPromoted
1378+
emitPromotionEvent(
1379+
ctx, extensionId,
1380+
installed.Version, extVersion.Version,
1381+
oldSource, newSource,
1382+
)
12991383
if !isJsonOutput {
13001384
a.displayPromotionWarning(
13011385
ctx, stepMsg, extensionId,
@@ -1439,6 +1523,65 @@ func upgradeActionResult(
14391523
}, nil
14401524
}
14411525

1526+
// emitPromotionEvent fires a distinct telemetry span for a registry
1527+
// promotion (e.g., dev → main). This allows tracking promotion
1528+
// adoption rates separately from regular upgrades.
1529+
//
1530+
// Telemetry attributes use constants from internal/tracing/fields to ensure consistency.
1531+
// Integration testing of telemetry output is done via the tracing test infrastructure
1532+
// in internal/tracing/ — unit tests here verify the upgrade logic, not span emission.
1533+
func emitPromotionEvent(
1534+
ctx context.Context,
1535+
extensionId string,
1536+
fromVersion string,
1537+
toVersion string,
1538+
oldSource string,
1539+
newSource string,
1540+
) {
1541+
_, promSpan := tracing.Start(ctx, events.ExtensionPromoteEvent)
1542+
promSpan.SetAttributes(
1543+
fields.ExtensionId.String(extensionId),
1544+
fields.ExtensionVersionFrom.String(fromVersion),
1545+
fields.ExtensionVersionTo.String(toVersion),
1546+
fields.ExtensionSourceFrom.String(oldSource),
1547+
fields.ExtensionSourceTo.String(newSource),
1548+
)
1549+
promSpan.SetStatus(codes.Ok, "")
1550+
promSpan.End()
1551+
}
1552+
1553+
// isNetworkError checks whether err is caused by a network or
1554+
// transport-level failure (DNS resolution, TCP connection, TLS
1555+
// handshake, timeout). This distinguishes connectivity issues from
1556+
// "extension not found" errors.
1557+
func isNetworkError(err error) bool {
1558+
if err == nil {
1559+
return false
1560+
}
1561+
1562+
var netErr net.Error
1563+
if errors.As(err, &netErr) {
1564+
return true
1565+
}
1566+
1567+
// String-matching fallback for wrapped errors that may not implement net.Error.
1568+
msg := err.Error()
1569+
networkKeywords := []string{
1570+
"connection refused",
1571+
"no such host",
1572+
"i/o timeout",
1573+
"TLS handshake timeout",
1574+
"network is unreachable",
1575+
}
1576+
for _, kw := range networkKeywords {
1577+
if strings.Contains(msg, kw) {
1578+
return true
1579+
}
1580+
}
1581+
1582+
return false
1583+
}
1584+
14421585
type extensionSourceListAction struct {
14431586
formatter output.Formatter
14441587
writer io.Writer

0 commit comments

Comments
 (0)