From 3350917031021e071f9261caae4a2a1080033c1b Mon Sep 17 00:00:00 2001 From: Juan Antonio Osorio Date: Mon, 1 Jun 2026 07:52:52 +0000 Subject: [PATCH 1/5] Add upgrade detection for registry workloads CLI and API users have no way to discover when a newer version of a registry-sourced MCP server is available; only Studio implements drift detection, in its frontend. Introduce a backend package that all clients can consume. Add pkg/workloads/upgrade with a Checker that compares a running workload's image tag against its registry entry (semver-aware, with a string fallback) and reports environment-variable and configuration (transport / permission-profile / network-isolation) drift. Comparison degrades safely to "unknown" for :latest, digest refs, repository changes, and non-registry-sourced workloads, so only a strictly-newer tag on the same repository yields "upgrade-available". This is the read-only detection core (RFC THV-0068, phase A); the apply path, API endpoints, and CLI follow in later changes. Co-Authored-By: Claude Opus 4.8 (1M context) --- pkg/workloads/upgrade/checker.go | 218 +++++++++++++++++++ pkg/workloads/upgrade/checker_test.go | 298 ++++++++++++++++++++++++++ pkg/workloads/upgrade/doc.go | 25 +++ pkg/workloads/upgrade/types.go | 169 +++++++++++++++ pkg/workloads/upgrade/types_test.go | 124 +++++++++++ pkg/workloads/upgrade/version.go | 119 ++++++++++ pkg/workloads/upgrade/version_test.go | 205 ++++++++++++++++++ 7 files changed, 1158 insertions(+) create mode 100644 pkg/workloads/upgrade/checker.go create mode 100644 pkg/workloads/upgrade/checker_test.go create mode 100644 pkg/workloads/upgrade/doc.go create mode 100644 pkg/workloads/upgrade/types.go create mode 100644 pkg/workloads/upgrade/types_test.go create mode 100644 pkg/workloads/upgrade/version.go create mode 100644 pkg/workloads/upgrade/version_test.go diff --git a/pkg/workloads/upgrade/checker.go b/pkg/workloads/upgrade/checker.go new file mode 100644 index 0000000000..5a7ce1f28c --- /dev/null +++ b/pkg/workloads/upgrade/checker.go @@ -0,0 +1,218 @@ +// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package upgrade + +import ( + "context" + "errors" + "fmt" + + regtypes "github.com/stacklok/toolhive-core/registry/types" + "github.com/stacklok/toolhive/pkg/registry" + "github.com/stacklok/toolhive/pkg/runner" + "github.com/stacklok/toolhive/pkg/secrets" +) + +// Checker determines whether registry-sourced workloads have an available +// upgrade by comparing their current image and configuration against the +// metadata the injected registry provider reports. +type Checker struct { + provider registry.Provider +} + +// NewChecker creates a Checker backed by the given registry provider. +// +// The provider is the source of truth for candidate image metadata; callers +// typically pass the shared singleton from registry.GetDefaultProvider so the +// provider's response cache is reused across checks. It returns an error if the +// provider is nil. +func NewChecker(provider registry.Provider) (*Checker, error) { + if provider == nil { + return nil, fmt.Errorf("registry provider must not be nil") + } + return &Checker{provider: provider}, nil +} + +// Check evaluates a single workload's RunConfig against the registry and +// returns the upgrade status. It never mutates the supplied config. Per-item +// problems (missing server, unparsable tags, non-image entries) are encoded in +// the returned CheckResult's Status/Reason rather than returned as an error; +// an error is returned only for an invalid call (nil config). +func (c *Checker) Check(_ context.Context, cfg *runner.RunConfig) (*CheckResult, error) { + if cfg == nil { + return nil, fmt.Errorf("run config must not be nil") + } + + result := &CheckResult{ + WorkloadName: cfg.Name, + RegistryServer: cfg.RegistryServerName, + CurrentImage: cfg.Image, + } + + if cfg.RegistryServerName == "" { + result.Status = StatusNotRegistrySourced + return result, nil + } + + server, err := c.provider.GetServer(cfg.RegistryServerName) + if err != nil { + if errors.Is(err, registry.ErrServerNotFound) { + result.Status = StatusServerNotFound + return result, nil + } + result.Status = StatusUnknown + result.Reason = fmt.Sprintf("registry lookup failed: %v", err) + return result, nil + } + + imgMeta, ok := server.(*regtypes.ImageMetadata) + if !ok { + result.Status = StatusUnknown + result.Reason = fmt.Sprintf("registry entry %q is not a container image (cannot determine upgrade)", cfg.RegistryServerName) + return result, nil + } + + result.CandidateImage = imgMeta.Image + + comparison, reason := compareImageTags(cfg.Image, imgMeta.Image) + switch comparison { + case comparisonNewer: + result.Status = StatusUpgradeAvailable + result.EnvVarDrift = computeEnvDrift(cfg, imgMeta) + result.ConfigDrift = computeConfigDrift(cfg, imgMeta) + case comparisonSameOrOlder: + result.Status = StatusUpToDate + case comparisonUndecidable: + result.Status = StatusUnknown + result.Reason = reason + } + + return result, nil +} + +// CheckAll evaluates a batch of workloads. It never returns an error: each +// workload's outcome (including per-item failures) is encoded in its own +// CheckResult. The returned slice preserves the input order. Nil entries in the +// input are skipped. +func (c *Checker) CheckAll(ctx context.Context, configs []*runner.RunConfig) []*CheckResult { + results := make([]*CheckResult, 0, len(configs)) + for _, cfg := range configs { + if cfg == nil { + continue + } + // Check only errors on a nil config, which we already guarded against, + // so the error here is unreachable; encode defensively rather than drop. + res, err := c.Check(ctx, cfg) + if err != nil { + res = &CheckResult{ + WorkloadName: cfg.Name, + Status: StatusUnknown, + Reason: fmt.Sprintf("check failed: %v", err), + } + } + results = append(results, res) + } + return results +} + +// computeEnvDrift reports the candidate environment variables the workload does +// not currently satisfy. A variable is considered satisfied if it appears as a +// plain env var key in the config, or as the target of one of the config's +// secret parameters. Removed is left unpopulated (best-effort, forward-compat). +// +// It treats the config as read-only. Returns nil when there is no drift. +func computeEnvDrift(cfg *runner.RunConfig, imgMeta *regtypes.ImageMetadata) *EnvVarDrift { + satisfied := make(map[string]struct{}, len(cfg.EnvVars)+len(cfg.Secrets)) + for k := range cfg.EnvVars { + satisfied[k] = struct{}{} + } + for _, s := range cfg.Secrets { + parsed, err := secrets.ParseSecretParameter(s) + if err != nil { + // Malformed secret parameters can't satisfy a variable; skip them. + continue + } + if parsed.Target != "" { + satisfied[parsed.Target] = struct{}{} + } + } + + var added []EnvVarInfo + for _, ev := range imgMeta.EnvVars { + if ev == nil { + continue + } + if _, ok := satisfied[ev.Name]; ok { + continue + } + added = append(added, toEnvVarInfo(ev)) + } + + if len(added) == 0 { + return nil + } + return &EnvVarDrift{Added: added} +} + +// computeConfigDrift reports posture differences between the workload's current +// configuration and the candidate registry entry. Each field is nil when that +// aspect did not drift or could not be compared. +// +// The permission profile is compared against imgMeta.Permissions.Name (a +// *permissions.Profile, not a string). Comparison degrades gracefully: when the +// candidate has no profile, or the workload's profile is a custom name/path +// that has no registry analogue, that dimension is not reported as drift unless +// both sides are known and differ. It treats the config as read-only. +func computeConfigDrift(cfg *runner.RunConfig, imgMeta *regtypes.ImageMetadata) *ConfigDrift { + drift := &ConfigDrift{} + + // Transport: compare the workload's transport string against the registry + // entry's transport. GetTransport() may return an empty string when the + // registry entry does not declare one; only report drift when both are set. + currentTransport := cfg.Transport.String() + candidateTransport := imgMeta.GetTransport() + if candidateTransport != "" && currentTransport != "" && currentTransport != candidateTransport { + drift.Transport = &StringChange{From: currentTransport, To: candidateTransport} + } + + // Network isolation is a plain boolean on the config; the registry entry has + // no explicit network-isolation field, so the candidate posture is the + // default (false). Report drift only when the workload currently isolates. + if cfg.IsolateNetwork { + drift.NetworkIsolation = &BoolChange{From: true, To: false} + } + + // Permission profile: compare names. The candidate name is only known when + // the registry entry carries a profile with a non-empty Name. + candidateProfile := "" + if imgMeta.Permissions != nil { + candidateProfile = imgMeta.Permissions.Name + } + currentProfile := cfg.PermissionProfileNameOrPath + if candidateProfile != "" && currentProfile != "" && currentProfile != candidateProfile { + drift.PermissionProfile = &StringChange{From: currentProfile, To: candidateProfile} + } + + if drift.Transport == nil && drift.NetworkIsolation == nil && drift.PermissionProfile == nil { + return nil + } + return drift +} + +// toEnvVarInfo converts a registry EnvVar into the drift-report shape, clearing +// the Default value when the variable is a secret to avoid leaking sensitive +// data into reports that may be logged or returned over the API. +func toEnvVarInfo(ev *regtypes.EnvVar) EnvVarInfo { + info := EnvVarInfo{ + Name: ev.Name, + Description: ev.Description, + Required: ev.Required, + Secret: ev.Secret, + Default: ev.Default, + } + if info.Secret { + info.Default = "" + } + return info +} diff --git a/pkg/workloads/upgrade/checker_test.go b/pkg/workloads/upgrade/checker_test.go new file mode 100644 index 0000000000..af23776b36 --- /dev/null +++ b/pkg/workloads/upgrade/checker_test.go @@ -0,0 +1,298 @@ +// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package upgrade + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/mock/gomock" + + "github.com/stacklok/toolhive-core/permissions" + regtypes "github.com/stacklok/toolhive-core/registry/types" + "github.com/stacklok/toolhive/pkg/registry" + registrymocks "github.com/stacklok/toolhive/pkg/registry/mocks" + "github.com/stacklok/toolhive/pkg/runner" + transporttypes "github.com/stacklok/toolhive/pkg/transport/types" +) + +const testServerName = "example-server" + +func TestNewChecker(t *testing.T) { + t.Parallel() + + t.Run("nil provider fails", func(t *testing.T) { + t.Parallel() + c, err := NewChecker(nil) + require.Error(t, err) + assert.Nil(t, c) + }) + + t.Run("valid provider succeeds", func(t *testing.T) { + t.Parallel() + ctrl := gomock.NewController(t) + c, err := NewChecker(registrymocks.NewMockProvider(ctrl)) + require.NoError(t, err) + assert.NotNil(t, c) + }) +} + +func TestChecker_Check(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + cfg *runner.RunConfig + setupMock func(m *registrymocks.MockProvider) + wantStatus UpgradeStatus + wantReason bool // true if Reason must be non-empty + assertMore func(t *testing.T, res *CheckResult) + }{ + { + name: "not registry sourced", + cfg: &runner.RunConfig{ + Name: "raw", + Image: "ghcr.io/example/server:1.0.0", + }, + setupMock: func(_ *registrymocks.MockProvider) {}, + wantStatus: StatusNotRegistrySourced, + }, + { + name: "server not found", + cfg: &runner.RunConfig{ + Name: "wl", + Image: "ghcr.io/example/server:1.0.0", + RegistryServerName: testServerName, + }, + setupMock: func(m *registrymocks.MockProvider) { + m.EXPECT().GetServer(testServerName).Return(nil, registry.ErrServerNotFound) + }, + wantStatus: StatusServerNotFound, + }, + { + name: "registry lookup error is unknown", + cfg: &runner.RunConfig{ + Name: "wl", + Image: "ghcr.io/example/server:1.0.0", + RegistryServerName: testServerName, + }, + setupMock: func(m *registrymocks.MockProvider) { + m.EXPECT().GetServer(testServerName).Return(nil, fmt.Errorf("network down")) + }, + wantStatus: StatusUnknown, + wantReason: true, + }, + { + name: "non-image entry is unknown", + cfg: &runner.RunConfig{ + Name: "wl", + Image: "ghcr.io/example/server:1.0.0", + RegistryServerName: testServerName, + }, + setupMock: func(m *registrymocks.MockProvider) { + m.EXPECT().GetServer(testServerName).Return(®types.RemoteServerMetadata{}, nil) + }, + wantStatus: StatusUnknown, + wantReason: true, + }, + { + name: "up to date", + cfg: &runner.RunConfig{ + Name: "wl", + Image: "ghcr.io/example/server:1.2.0", + RegistryServerName: testServerName, + }, + setupMock: func(m *registrymocks.MockProvider) { + m.EXPECT().GetServer(testServerName).Return(imageMeta("ghcr.io/example/server:1.2.0"), nil) + }, + wantStatus: StatusUpToDate, + }, + { + name: "undecidable tags is unknown", + cfg: &runner.RunConfig{ + Name: "wl", + Image: "ghcr.io/example/server:latest", + RegistryServerName: testServerName, + }, + setupMock: func(m *registrymocks.MockProvider) { + m.EXPECT().GetServer(testServerName).Return(imageMeta("ghcr.io/example/server:1.2.0"), nil) + }, + wantStatus: StatusUnknown, + wantReason: true, + }, + { + name: "upgrade available with no drift", + cfg: &runner.RunConfig{ + Name: "wl", + Image: "ghcr.io/example/server:1.0.0", + RegistryServerName: testServerName, + }, + setupMock: func(m *registrymocks.MockProvider) { + m.EXPECT().GetServer(testServerName).Return(imageMeta("ghcr.io/example/server:1.2.0"), nil) + }, + wantStatus: StatusUpgradeAvailable, + assertMore: func(t *testing.T, res *CheckResult) { + t.Helper() + assert.Equal(t, "ghcr.io/example/server:1.2.0", res.CandidateImage) + assert.Nil(t, res.EnvVarDrift) + assert.Nil(t, res.ConfigDrift) + }, + }, + { + name: "upgrade available surfaces env and config drift", + cfg: &runner.RunConfig{ + Name: "wl", + Image: "ghcr.io/example/server:1.0.0", + RegistryServerName: testServerName, + Transport: transporttypes.TransportTypeStdio, + IsolateNetwork: true, + PermissionProfileNameOrPath: "none", + EnvVars: map[string]string{"LOG_LEVEL": "info"}, + Secrets: []string{"mykey,target=API_KEY"}, + }, + setupMock: func(m *registrymocks.MockProvider) { + meta := imageMeta("ghcr.io/example/server:1.2.0") + meta.Transport = string(transporttypes.TransportTypeStreamableHTTP) + meta.Permissions = &permissions.Profile{Name: "network"} + meta.EnvVars = []*regtypes.EnvVar{ + {Name: "LOG_LEVEL"}, // satisfied via EnvVars + {Name: "API_KEY", Secret: true}, // satisfied via secret target + {Name: "NEW_VAR", Required: true}, // drift: added + {Name: "NEW_SECRET", Secret: true, Default: "leak-me"}, // drift: added, default cleared + } + m.EXPECT().GetServer(testServerName).Return(meta, nil) + }, + wantStatus: StatusUpgradeAvailable, + assertMore: func(t *testing.T, res *CheckResult) { + t.Helper() + require.NotNil(t, res.EnvVarDrift) + names := make(map[string]EnvVarInfo, len(res.EnvVarDrift.Added)) + for _, e := range res.EnvVarDrift.Added { + names[e.Name] = e + } + assert.Len(t, names, 2) + assert.Contains(t, names, "NEW_VAR") + require.Contains(t, names, "NEW_SECRET") + assert.Empty(t, names["NEW_SECRET"].Default, "secret default must be cleared in drift") + + require.NotNil(t, res.ConfigDrift) + require.NotNil(t, res.ConfigDrift.Transport) + assert.Equal(t, "stdio", res.ConfigDrift.Transport.From) + assert.Equal(t, "streamable-http", res.ConfigDrift.Transport.To) + require.NotNil(t, res.ConfigDrift.NetworkIsolation) + assert.True(t, res.ConfigDrift.NetworkIsolation.From) + assert.False(t, res.ConfigDrift.NetworkIsolation.To) + require.NotNil(t, res.ConfigDrift.PermissionProfile) + assert.Equal(t, "none", res.ConfigDrift.PermissionProfile.From) + assert.Equal(t, "network", res.ConfigDrift.PermissionProfile.To) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + ctrl := gomock.NewController(t) + mockProvider := registrymocks.NewMockProvider(ctrl) + tt.setupMock(mockProvider) + + checker, err := NewChecker(mockProvider) + require.NoError(t, err) + + res, err := checker.Check(t.Context(), tt.cfg) + require.NoError(t, err) + require.NotNil(t, res) + + assert.Equal(t, tt.wantStatus, res.Status) + if tt.wantReason { + assert.NotEmpty(t, res.Reason) + } + if tt.assertMore != nil { + tt.assertMore(t, res) + } + }) + } +} + +func TestChecker_Check_NilConfig(t *testing.T) { + t.Parallel() + ctrl := gomock.NewController(t) + checker, err := NewChecker(registrymocks.NewMockProvider(ctrl)) + require.NoError(t, err) + + res, err := checker.Check(t.Context(), nil) + require.Error(t, err) + assert.Nil(t, res) +} + +func TestChecker_CheckAll(t *testing.T) { + t.Parallel() + ctrl := gomock.NewController(t) + mockProvider := registrymocks.NewMockProvider(ctrl) + mockProvider.EXPECT().GetServer(testServerName).Return(imageMeta("ghcr.io/example/server:1.2.0"), nil) + + checker, err := NewChecker(mockProvider) + require.NoError(t, err) + + configs := []*runner.RunConfig{ + nil, // skipped + {Name: "raw", Image: "ghcr.io/example/server:1.0.0"}, // not-registry-sourced + {Name: "wl", Image: "ghcr.io/example/server:1.0.0", RegistryServerName: testServerName}, // upgrade-available + } + + results := checker.CheckAll(t.Context(), configs) + require.Len(t, results, 2, "nil config must be skipped") + assert.Equal(t, StatusNotRegistrySourced, results[0].Status) + assert.Equal(t, StatusUpgradeAvailable, results[1].Status) +} + +func TestComputeEnvDrift_NoDrift(t *testing.T) { + t.Parallel() + cfg := &runner.RunConfig{ + EnvVars: map[string]string{"FOO": "bar"}, + Secrets: []string{"sk,target=TOKEN"}, + } + meta := imageMeta("ghcr.io/example/server:1.0.0") + meta.EnvVars = []*regtypes.EnvVar{ + {Name: "FOO"}, + {Name: "TOKEN", Secret: true}, + nil, // tolerated + } + assert.Nil(t, computeEnvDrift(cfg, meta)) +} + +func TestComputeConfigDrift_GracefulDegradation(t *testing.T) { + t.Parallel() + + t.Run("nil permissions and empty transport do not drift", func(t *testing.T) { + t.Parallel() + cfg := &runner.RunConfig{ + Transport: transporttypes.TransportTypeStdio, + PermissionProfileNameOrPath: "custom-profile", + } + meta := imageMeta("ghcr.io/example/server:1.0.0") + meta.Transport = "" // registry declares no transport + meta.Permissions = nil + assert.Nil(t, computeConfigDrift(cfg, meta)) + }) + + t.Run("network isolation alone drifts", func(t *testing.T) { + t.Parallel() + cfg := &runner.RunConfig{IsolateNetwork: true} + meta := imageMeta("ghcr.io/example/server:1.0.0") + meta.Transport = "" + drift := computeConfigDrift(cfg, meta) + require.NotNil(t, drift) + require.NotNil(t, drift.NetworkIsolation) + assert.Nil(t, drift.Transport) + assert.Nil(t, drift.PermissionProfile) + }) +} + +// imageMeta builds a minimal ImageMetadata for the given image reference. +func imageMeta(image string) *regtypes.ImageMetadata { + return ®types.ImageMetadata{Image: image} +} diff --git a/pkg/workloads/upgrade/doc.go b/pkg/workloads/upgrade/doc.go new file mode 100644 index 0000000000..be7d699c35 --- /dev/null +++ b/pkg/workloads/upgrade/doc.go @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +// Package upgrade implements registry-sourced workload upgrade checks for +// ToolHive (RFC THV-0068). It compares the image and configuration of a +// running workload against the latest metadata served by the registry and +// reports whether an upgrade is available, along with any environment-variable +// or posture (transport/network/permission) drift. +// +// # Dependency direction (cycle guard) +// +// This package depends on pkg/workloads (for the apply path in later phases) +// and pkg/runner. To avoid an import cycle, pkg/workloads MUST NEVER import +// this package. Higher-level entry points (CLI, API handlers) wire the two +// together; the manager itself stays unaware of upgrade logic. +// +// # No rollback +// +// The apply path (Phase D) intentionally provides no rollback. It resolves, +// verifies, and pulls the candidate image BEFORE destroying the existing +// workload so that a failure during preparation leaves the running workload +// untouched. Once the workload is deleted and recreated, there is no automatic +// revert to the previous image or configuration; recovery is a forward +// operation (re-running the previous configuration explicitly). +package upgrade diff --git a/pkg/workloads/upgrade/types.go b/pkg/workloads/upgrade/types.go new file mode 100644 index 0000000000..95a88bd705 --- /dev/null +++ b/pkg/workloads/upgrade/types.go @@ -0,0 +1,169 @@ +// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package upgrade + +import ( + "github.com/stacklok/toolhive/pkg/runner" + "github.com/stacklok/toolhive/pkg/runner/retriever" +) + +// UpgradeStatus is the outcome of an upgrade check for a single workload. +// +//revive:disable-next-line:exported // "UpgradeStatus" reads better than "Status" at call sites. +type UpgradeStatus string + +const ( + // StatusUpToDate indicates the workload is already running the candidate + // image reported by the registry (or a newer one). + StatusUpToDate UpgradeStatus = "up-to-date" + + // StatusUpgradeAvailable indicates the registry reports a newer image than + // the one the workload is currently running. + StatusUpgradeAvailable UpgradeStatus = "upgrade-available" + + // StatusNotRegistrySourced indicates the workload was not created from a + // registry entry (no RegistryServerName), so no upgrade can be determined. + StatusNotRegistrySourced UpgradeStatus = "not-registry-sourced" + + // StatusServerNotFound indicates the workload references a registry server + // name that no longer exists in the configured registry. + StatusServerNotFound UpgradeStatus = "server-not-found" + + // StatusUnknown indicates the upgrade status could not be determined, e.g. + // the registry lookup failed, the entry is a remote (non-image) server, or + // the image tags are not comparable. The Reason field explains why. + StatusUnknown UpgradeStatus = "unknown" +) + +// CheckResult is the outcome of checking a single workload for an available +// upgrade. EnvVarDrift and ConfigDrift are only populated when an upgrade is +// available and the relevant drift was detected. +type CheckResult struct { + // WorkloadName is the name of the workload that was checked. + WorkloadName string `json:"workload_name"` + + // RegistryServer is the registry entry name the workload was sourced from. + // Empty when the workload is not registry-sourced. + RegistryServer string `json:"registry_server,omitempty"` + + // Status is the upgrade status for the workload. + Status UpgradeStatus `json:"status"` + + // CurrentImage is the image reference the workload is currently running. + CurrentImage string `json:"current_image,omitempty"` + + // CandidateImage is the image reference the registry currently reports. + CandidateImage string `json:"candidate_image,omitempty"` + + // Reason provides additional context, primarily for StatusUnknown. + Reason string `json:"reason,omitempty"` + + // EnvVarDrift describes environment variables the candidate registry entry + // declares that differ from the workload's current configuration. + EnvVarDrift *EnvVarDrift `json:"env_var_drift,omitempty"` + + // ConfigDrift describes posture differences (transport, network isolation, + // permission profile) between the workload and the candidate registry entry. + ConfigDrift *ConfigDrift `json:"config_drift,omitempty"` +} + +// EnvVarDrift describes how the candidate registry entry's declared +// environment variables differ from those the workload currently satisfies. +type EnvVarDrift struct { + // Added lists environment variables the candidate declares that the + // workload does not currently supply (via plain env vars or secrets). + Added []EnvVarInfo `json:"added,omitempty"` + + // Removed lists environment variables the workload supplies that the + // candidate no longer declares. Populated on a best-effort basis; may be + // empty even when removals exist (forward-compatible field). + Removed []EnvVarInfo `json:"removed,omitempty"` +} + +// EnvVarInfo is a registry-declared environment variable surfaced in drift. +type EnvVarInfo struct { + // Name is the environment variable name. + Name string `json:"name"` + + // Description is the human-readable purpose of the variable. + Description string `json:"description,omitempty"` + + // Required indicates whether the candidate marks the variable as required. + Required bool `json:"required"` + + // Secret indicates whether the variable holds sensitive data. + Secret bool `json:"secret,omitempty"` + + // Default is the candidate's default value. It is cleared (left empty) + // whenever Secret is true: a secret env var's default could carry sensitive + // data, and surfacing it in a drift report (which may be logged or returned + // over the API) would leak it. Non-secret defaults are safe to display. + Default string `json:"default,omitempty"` +} + +// ConfigDrift describes posture differences between a workload's current +// configuration and the candidate registry entry. A nil field means that +// aspect did not drift (or could not be compared). +type ConfigDrift struct { + // Transport is set when the candidate's transport differs from the + // workload's current transport. + Transport *StringChange `json:"transport,omitempty"` + + // NetworkIsolation is set when the candidate's network isolation posture + // differs from the workload's current setting. + NetworkIsolation *BoolChange `json:"network_isolation,omitempty"` + + // PermissionProfile is set when the candidate's permission profile differs + // from the workload's current profile. + PermissionProfile *StringChange `json:"permission_profile,omitempty"` +} + +// StringChange records a string-valued configuration change from the +// workload's current value to the candidate registry value. +type StringChange struct { + From string `json:"from"` + To string `json:"to"` +} + +// BoolChange records a boolean-valued configuration change from the workload's +// current value to the candidate registry value. +type BoolChange struct { + From bool `json:"from"` + To bool `json:"to"` +} + +// ApplyOptions controls how an upgrade is applied to a workload. +// +// It is defined here for use by the Applier (Phase D). No apply logic is +// implemented in this phase. +type ApplyOptions struct { + // EnvVars are additional or overriding environment variables to merge into + // the upgraded workload's configuration. + EnvVars map[string]string + + // Secrets are additional secret parameters (`,target=`) to merge + // into the upgraded workload's configuration. + Secrets []string + + // EnvVarValidator validates that required environment variables and secrets + // are supplied for the candidate registry entry. + EnvVarValidator runner.EnvVarValidator + + // VerifySetting controls image signature verification. Empty defaults to + // retriever.VerifyImageWarn. + VerifySetting string + + // CACertPath is an optional path to a CA certificate bundle used when + // resolving the candidate image from a registry over TLS. + CACertPath string +} + +// defaultVerifySetting returns the configured verification setting, falling +// back to retriever.VerifyImageWarn when unset. +func (o ApplyOptions) defaultVerifySetting() string { + if o.VerifySetting == "" { + return retriever.VerifyImageWarn + } + return o.VerifySetting +} diff --git a/pkg/workloads/upgrade/types_test.go b/pkg/workloads/upgrade/types_test.go new file mode 100644 index 0000000000..2acc01a514 --- /dev/null +++ b/pkg/workloads/upgrade/types_test.go @@ -0,0 +1,124 @@ +// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package upgrade + +import ( + "reflect" + "testing" + + "github.com/stretchr/testify/assert" + + regtypes "github.com/stacklok/toolhive-core/registry/types" + "github.com/stacklok/toolhive/pkg/runner/retriever" +) + +// TestEnvVarInfoMirrorsRegistryEnvVar is a drift guard: EnvVarInfo carries a +// curated, display-safe subset of regtypes.EnvVar. If a new field is added to +// regtypes.EnvVar, this test fails so a maintainer consciously decides whether +// the new field should be surfaced (and whether it is safe to display). +func TestEnvVarInfoMirrorsRegistryEnvVar(t *testing.T) { + t.Parallel() + + regFields := structFieldNames(reflect.TypeOf(regtypes.EnvVar{})) + infoFields := structFieldNames(reflect.TypeOf(EnvVarInfo{})) + + // Every field of regtypes.EnvVar that we expect to mirror must be present in + // EnvVarInfo. If regtypes.EnvVar grows, update this expectation deliberately. + expectedRegFields := []string{"Name", "Description", "Required", "Default", "Secret"} + assert.ElementsMatch(t, expectedRegFields, regFields, + "regtypes.EnvVar fields changed; review toEnvVarInfo and the EnvVarInfo drift surface") + + for _, f := range expectedRegFields { + assert.Contains(t, infoFields, f, "EnvVarInfo must mirror regtypes.EnvVar field %q", f) + } +} + +func TestToEnvVarInfo(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + in *regtypes.EnvVar + want EnvVarInfo + }{ + { + name: "non-secret keeps default", + in: ®types.EnvVar{ + Name: "LOG_LEVEL", + Description: "logging verbosity", + Required: false, + Default: "info", + Secret: false, + }, + want: EnvVarInfo{ + Name: "LOG_LEVEL", + Description: "logging verbosity", + Required: false, + Default: "info", + Secret: false, + }, + }, + { + name: "secret clears default", + in: ®types.EnvVar{ + Name: "API_KEY", + Description: "service api key", + Required: true, + Default: "super-secret-default", + Secret: true, + }, + want: EnvVarInfo{ + Name: "API_KEY", + Description: "service api key", + Required: true, + Default: "", // cleared because Secret == true + Secret: true, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := toEnvVarInfo(tt.in) + assert.Equal(t, tt.want, got) + if tt.in.Secret { + assert.Empty(t, got.Default, "secret env var default must be cleared") + } + }) + } +} + +func TestApplyOptionsDefaultVerifySetting(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + opts ApplyOptions + want string + }{ + {name: "empty defaults to warn", opts: ApplyOptions{}, want: retriever.VerifyImageWarn}, + {name: "explicit setting preserved", opts: ApplyOptions{VerifySetting: retriever.VerifyImageEnabled}, want: retriever.VerifyImageEnabled}, + {name: "disabled preserved", opts: ApplyOptions{VerifySetting: retriever.VerifyImageDisabled}, want: retriever.VerifyImageDisabled}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, tt.opts.defaultVerifySetting()) + }) + } +} + +// structFieldNames returns the exported field names of a struct type. +func structFieldNames(t reflect.Type) []string { + names := make([]string, 0, t.NumField()) + for i := 0; i < t.NumField(); i++ { + f := t.Field(i) + if f.IsExported() { + names = append(names, f.Name) + } + } + return names +} diff --git a/pkg/workloads/upgrade/version.go b/pkg/workloads/upgrade/version.go new file mode 100644 index 0000000000..64067cd906 --- /dev/null +++ b/pkg/workloads/upgrade/version.go @@ -0,0 +1,119 @@ +// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package upgrade + +import ( + "fmt" + + "github.com/google/go-containerregistry/pkg/name" + "golang.org/x/mod/semver" +) + +// tagComparison is the result of comparing a workload's current image tag +// against a candidate registry image tag. +type tagComparison int + +const ( + // comparisonSameOrOlder means the candidate is the same as, or older than, + // the current image (no upgrade available). + comparisonSameOrOlder tagComparison = iota + + // comparisonNewer means the candidate is strictly newer than the current + // image (an upgrade is available). + comparisonNewer + + // comparisonUndecidable means the two references cannot be meaningfully + // compared (different repositories, a "latest" tag, or non-semver tags that + // are not equal). The accompanying reason explains why. + comparisonUndecidable +) + +// latestTag is the floating tag that defers to the registry and therefore +// cannot be compared for ordering. +const latestTag = "latest" + +// compareImageTags compares a workload's current image against a candidate +// registry image and reports whether the candidate represents an upgrade. +// +// Both references are parsed and their repositories are compared using the +// host-normalized fully-qualified name. The comparison is intentionally +// conservative: any situation it cannot reason about confidently (parse +// failure, differing repositories, a "latest" tag on either side, or +// non-equal non-semver tags) yields comparisonUndecidable with a reason rather +// than a guess. Equal semver or equal literal tags yield comparisonSameOrOlder. +func compareImageTags(current, candidate string) (tagComparison, string) { + curRepo, curTag, err := splitRepoTag(current) + if err != nil { + return comparisonUndecidable, fmt.Sprintf("cannot parse current image %q: %v", current, err) + } + candRepo, candTag, err := splitRepoTag(candidate) + if err != nil { + return comparisonUndecidable, fmt.Sprintf("cannot parse candidate image %q: %v", candidate, err) + } + + if curRepo != candRepo { + return comparisonUndecidable, + fmt.Sprintf("repository differs: current %q vs candidate %q", curRepo, candRepo) + } + + if curTag == latestTag || candTag == latestTag { + return comparisonUndecidable, "image uses the \"latest\" tag, which defers to the registry and cannot be ordered" + } + + curSemver := normalizeSemver(curTag) + candSemver := normalizeSemver(candTag) + if semver.IsValid(curSemver) && semver.IsValid(candSemver) { + switch semver.Compare(semver.Canonical(curSemver), semver.Canonical(candSemver)) { + case -1: + return comparisonNewer, "" + default: + // Equal or current is newer: nothing to upgrade to. + return comparisonSameOrOlder, "" + } + } + + // Fallback for non-semver tags: only an exact match is decidable. + if curTag == candTag { + return comparisonSameOrOlder, "" + } + return comparisonUndecidable, + fmt.Sprintf("tags are not comparable: current %q vs candidate %q", curTag, candTag) +} + +// splitRepoTag parses an image reference and returns its host-normalized +// fully-qualified repository name and tag string. +// +// It uses name.ParseReference and asserts the result to name.Tag, then reads +// the repository via Context().Name() and the tag via TagStr() (not +// Identifier(), which returns a digest for digest references). A reference that +// is not a tag (e.g. a bare digest) is rejected so the caller can treat it as +// undecidable rather than mis-compare a digest against a tag. +func splitRepoTag(ref string) (repo, tag string, err error) { + parsed, err := name.ParseReference(ref) + if err != nil { + return "", "", err + } + tagged, ok := parsed.(name.Tag) + if !ok { + return "", "", fmt.Errorf("reference %q is not a tag", ref) + } + return tagged.Context().Name(), tagged.TagStr(), nil +} + +// normalizeSemver prepends a "v" to a tag that looks like a bare semantic +// version (e.g. "1.2.3" -> "v1.2.3") so it can be validated and compared by +// golang.org/x/mod/semver, which requires the "v" prefix. Tags that already +// start with "v", or that are not semver-shaped, are returned unchanged. +func normalizeSemver(tag string) string { + if tag == "" { + return tag + } + if tag[0] == 'v' || tag[0] == 'V' { + return tag + } + if tag[0] >= '0' && tag[0] <= '9' { + return "v" + tag + } + return tag +} diff --git a/pkg/workloads/upgrade/version_test.go b/pkg/workloads/upgrade/version_test.go new file mode 100644 index 0000000000..d96e0859f0 --- /dev/null +++ b/pkg/workloads/upgrade/version_test.go @@ -0,0 +1,205 @@ +// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package upgrade + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCompareImageTags(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + current string + candidate string + want tagComparison + }{ + { + name: "candidate newer semver", + current: "ghcr.io/example/server:1.0.0", + candidate: "ghcr.io/example/server:1.1.0", + want: comparisonNewer, + }, + { + name: "candidate newer semver with v prefix", + current: "ghcr.io/example/server:v1.0.0", + candidate: "ghcr.io/example/server:v2.0.0", + want: comparisonNewer, + }, + { + name: "double-digit minor orders numerically not lexically", + current: "ghcr.io/example/server:1.9.0", + candidate: "ghcr.io/example/server:1.10.0", + want: comparisonNewer, + }, + { + name: "prerelease is older than its release", + current: "ghcr.io/example/server:1.2.3-rc1", + candidate: "ghcr.io/example/server:1.2.3", + want: comparisonNewer, + }, + { + name: "candidate equal semver", + current: "ghcr.io/example/server:1.2.3", + candidate: "ghcr.io/example/server:1.2.3", + want: comparisonSameOrOlder, + }, + { + name: "candidate older semver", + current: "ghcr.io/example/server:2.0.0", + candidate: "ghcr.io/example/server:1.0.0", + want: comparisonSameOrOlder, + }, + { + name: "mixed v-prefix normalization", + current: "ghcr.io/example/server:1.0.0", + candidate: "ghcr.io/example/server:v1.5.0", + want: comparisonNewer, + }, + { + name: "equal non-semver literal tag", + current: "ghcr.io/example/server:stable", + candidate: "ghcr.io/example/server:stable", + want: comparisonSameOrOlder, + }, + { + name: "different non-semver tags undecidable", + current: "ghcr.io/example/server:stable", + candidate: "ghcr.io/example/server:edge", + want: comparisonUndecidable, + }, + { + name: "different repositories undecidable", + current: "ghcr.io/example/server:1.0.0", + candidate: "ghcr.io/other/server:1.1.0", + want: comparisonUndecidable, + }, + { + name: "latest on candidate undecidable", + current: "ghcr.io/example/server:1.0.0", + candidate: "ghcr.io/example/server:latest", + want: comparisonUndecidable, + }, + { + name: "latest on current undecidable", + current: "ghcr.io/example/server:latest", + candidate: "ghcr.io/example/server:1.0.0", + want: comparisonUndecidable, + }, + { + name: "implicit latest (no tag) undecidable", + current: "ghcr.io/example/server", + candidate: "ghcr.io/example/server:1.0.0", + want: comparisonUndecidable, + }, + { + name: "docker hub short name normalized to same repo", + current: "library/nginx:1.0.0", + candidate: "library/nginx:1.1.0", + want: comparisonNewer, + }, + { + name: "digest reference is undecidable", + current: "ghcr.io/example/server@sha256:" + "0000000000000000000000000000000000000000000000000000000000000000", + candidate: "ghcr.io/example/server:1.1.0", + want: comparisonUndecidable, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got, reason := compareImageTags(tt.current, tt.candidate) + assert.Equal(t, tt.want, got) + if got == comparisonUndecidable { + assert.NotEmpty(t, reason, "undecidable result must carry a reason") + } else { + assert.Empty(t, reason, "decidable result must not carry a reason") + } + }) + } +} + +func TestSplitRepoTag(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + ref string + wantRepo string + wantTag string + wantErr bool + }{ + { + name: "fully qualified ref", + ref: "ghcr.io/example/server:1.0.0", + wantRepo: "ghcr.io/example/server", + wantTag: "1.0.0", + }, + { + name: "docker hub short name normalized", + ref: "library/nginx:1.0.0", + wantRepo: "index.docker.io/library/nginx", + wantTag: "1.0.0", + }, + { + name: "no tag defaults to latest", + ref: "ghcr.io/example/server", + wantRepo: "ghcr.io/example/server", + wantTag: "latest", + }, + { + name: "digest reference rejected", + ref: "ghcr.io/example/server@sha256:" + "0000000000000000000000000000000000000000000000000000000000000000", + wantErr: true, + }, + { + name: "invalid reference", + ref: "::::not a ref::::", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + repo, tag, err := splitRepoTag(tt.ref) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tt.wantRepo, repo) + assert.Equal(t, tt.wantTag, tag) + }) + } +} + +func TestNormalizeSemver(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + in string + want string + }{ + {name: "bare version gets v prefix", in: "1.2.3", want: "v1.2.3"}, + {name: "already prefixed unchanged", in: "v1.2.3", want: "v1.2.3"}, + {name: "uppercase V unchanged", in: "V1.2.3", want: "V1.2.3"}, + {name: "non-semver tag unchanged", in: "stable", want: "stable"}, + {name: "empty unchanged", in: "", want: ""}, + {name: "numeric prefix gets v", in: "2024.01", want: "v2024.01"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, normalizeSemver(tt.in)) + }) + } +} From 6cb950658dee29adbce5886e1090e0447fb1730f Mon Sep 17 00:00:00 2001 From: Juan Antonio Osorio Date: Mon, 1 Jun 2026 13:07:02 +0000 Subject: [PATCH 2/5] Address review feedback on upgrade detection - Lowercase an uppercase "V" tag prefix so semver comparison works; "V1.2.0" vs "V1.3.0" no longer falls through to undecidable and hides a real upgrade. - Drop the raw provider error from CheckResult.Reason (it is serialized into the API response and can leak internal addressing); log it at DEBUG and return a fixed string. Same for the CheckAll path. - Add a defensive default to the comparison switch so an unexpected value yields StatusUnknown rather than the least-safe StatusUpToDate. - Stop reporting network-isolation drift: the registry has no network-isolation field, so it fired for every isolated workload regardless of the candidate version. Remove the ConfigDrift field and the now-unused BoolChange type. Co-Authored-By: Claude Opus 4.8 (1M context) --- pkg/workloads/upgrade/checker.go | 25 +++++++++++++++---------- pkg/workloads/upgrade/checker_test.go | 15 +++++---------- pkg/workloads/upgrade/types.go | 15 ++------------- pkg/workloads/upgrade/version.go | 15 ++++++++++++--- pkg/workloads/upgrade/version_test.go | 8 +++++++- 5 files changed, 41 insertions(+), 37 deletions(-) diff --git a/pkg/workloads/upgrade/checker.go b/pkg/workloads/upgrade/checker.go index 5a7ce1f28c..ac62436cb4 100644 --- a/pkg/workloads/upgrade/checker.go +++ b/pkg/workloads/upgrade/checker.go @@ -7,6 +7,7 @@ import ( "context" "errors" "fmt" + "log/slog" regtypes "github.com/stacklok/toolhive-core/registry/types" "github.com/stacklok/toolhive/pkg/registry" @@ -61,8 +62,13 @@ func (c *Checker) Check(_ context.Context, cfg *runner.RunConfig) (*CheckResult, result.Status = StatusServerNotFound return result, nil } + // Keep the detailed provider error out of the result: Reason is + // serialized into the HTTP response, and for an unreachable or + // misconfigured registry the raw error can carry internal addressing + // (e.g. "dial tcp 10.x.x.x:443: ..."). Log it for operators instead. + slog.Debug("registry lookup failed", "server", cfg.RegistryServerName, "error", err) result.Status = StatusUnknown - result.Reason = fmt.Sprintf("registry lookup failed: %v", err) + result.Reason = "registry lookup failed" return result, nil } @@ -86,6 +92,11 @@ func (c *Checker) Check(_ context.Context, cfg *runner.RunConfig) (*CheckResult, case comparisonUndecidable: result.Status = StatusUnknown result.Reason = reason + default: + // Defensive: a future tagComparison value (or an unset zero value) must + // not fall through to the least-safe StatusUpToDate. Treat anything + // unexpected as unknown. + result.Status = StatusUnknown } return result, nil @@ -105,10 +116,11 @@ func (c *Checker) CheckAll(ctx context.Context, configs []*runner.RunConfig) []* // so the error here is unreachable; encode defensively rather than drop. res, err := c.Check(ctx, cfg) if err != nil { + slog.Debug("upgrade check failed", "workload", cfg.Name, "error", err) res = &CheckResult{ WorkloadName: cfg.Name, Status: StatusUnknown, - Reason: fmt.Sprintf("check failed: %v", err), + Reason: "check failed", } } results = append(results, res) @@ -176,13 +188,6 @@ func computeConfigDrift(cfg *runner.RunConfig, imgMeta *regtypes.ImageMetadata) drift.Transport = &StringChange{From: currentTransport, To: candidateTransport} } - // Network isolation is a plain boolean on the config; the registry entry has - // no explicit network-isolation field, so the candidate posture is the - // default (false). Report drift only when the workload currently isolates. - if cfg.IsolateNetwork { - drift.NetworkIsolation = &BoolChange{From: true, To: false} - } - // Permission profile: compare names. The candidate name is only known when // the registry entry carries a profile with a non-empty Name. candidateProfile := "" @@ -194,7 +199,7 @@ func computeConfigDrift(cfg *runner.RunConfig, imgMeta *regtypes.ImageMetadata) drift.PermissionProfile = &StringChange{From: currentProfile, To: candidateProfile} } - if drift.Transport == nil && drift.NetworkIsolation == nil && drift.PermissionProfile == nil { + if drift.Transport == nil && drift.PermissionProfile == nil { return nil } return drift diff --git a/pkg/workloads/upgrade/checker_test.go b/pkg/workloads/upgrade/checker_test.go index af23776b36..797b0a5053 100644 --- a/pkg/workloads/upgrade/checker_test.go +++ b/pkg/workloads/upgrade/checker_test.go @@ -148,7 +148,6 @@ func TestChecker_Check(t *testing.T) { Image: "ghcr.io/example/server:1.0.0", RegistryServerName: testServerName, Transport: transporttypes.TransportTypeStdio, - IsolateNetwork: true, PermissionProfileNameOrPath: "none", EnvVars: map[string]string{"LOG_LEVEL": "info"}, Secrets: []string{"mykey,target=API_KEY"}, @@ -182,9 +181,6 @@ func TestChecker_Check(t *testing.T) { require.NotNil(t, res.ConfigDrift.Transport) assert.Equal(t, "stdio", res.ConfigDrift.Transport.From) assert.Equal(t, "streamable-http", res.ConfigDrift.Transport.To) - require.NotNil(t, res.ConfigDrift.NetworkIsolation) - assert.True(t, res.ConfigDrift.NetworkIsolation.From) - assert.False(t, res.ConfigDrift.NetworkIsolation.To) require.NotNil(t, res.ConfigDrift.PermissionProfile) assert.Equal(t, "none", res.ConfigDrift.PermissionProfile.From) assert.Equal(t, "network", res.ConfigDrift.PermissionProfile.To) @@ -279,16 +275,15 @@ func TestComputeConfigDrift_GracefulDegradation(t *testing.T) { assert.Nil(t, computeConfigDrift(cfg, meta)) }) - t.Run("network isolation alone drifts", func(t *testing.T) { + t.Run("network isolation alone does not drift", func(t *testing.T) { t.Parallel() + // The registry has no network-isolation field, so a workload's own + // isolation choice is not registry-driven drift and must not be + // reported (it would otherwise fire for every isolated workload). cfg := &runner.RunConfig{IsolateNetwork: true} meta := imageMeta("ghcr.io/example/server:1.0.0") meta.Transport = "" - drift := computeConfigDrift(cfg, meta) - require.NotNil(t, drift) - require.NotNil(t, drift.NetworkIsolation) - assert.Nil(t, drift.Transport) - assert.Nil(t, drift.PermissionProfile) + assert.Nil(t, computeConfigDrift(cfg, meta)) }) } diff --git a/pkg/workloads/upgrade/types.go b/pkg/workloads/upgrade/types.go index 95a88bd705..9cc1ae44d8 100644 --- a/pkg/workloads/upgrade/types.go +++ b/pkg/workloads/upgrade/types.go @@ -63,8 +63,8 @@ type CheckResult struct { // declares that differ from the workload's current configuration. EnvVarDrift *EnvVarDrift `json:"env_var_drift,omitempty"` - // ConfigDrift describes posture differences (transport, network isolation, - // permission profile) between the workload and the candidate registry entry. + // ConfigDrift describes posture differences (transport, permission profile) + // between the workload and the candidate registry entry. ConfigDrift *ConfigDrift `json:"config_drift,omitempty"` } @@ -110,10 +110,6 @@ type ConfigDrift struct { // workload's current transport. Transport *StringChange `json:"transport,omitempty"` - // NetworkIsolation is set when the candidate's network isolation posture - // differs from the workload's current setting. - NetworkIsolation *BoolChange `json:"network_isolation,omitempty"` - // PermissionProfile is set when the candidate's permission profile differs // from the workload's current profile. PermissionProfile *StringChange `json:"permission_profile,omitempty"` @@ -126,13 +122,6 @@ type StringChange struct { To string `json:"to"` } -// BoolChange records a boolean-valued configuration change from the workload's -// current value to the candidate registry value. -type BoolChange struct { - From bool `json:"from"` - To bool `json:"to"` -} - // ApplyOptions controls how an upgrade is applied to a workload. // // It is defined here for use by the Applier (Phase D). No apply logic is diff --git a/pkg/workloads/upgrade/version.go b/pkg/workloads/upgrade/version.go index 64067cd906..280f62f78d 100644 --- a/pkg/workloads/upgrade/version.go +++ b/pkg/workloads/upgrade/version.go @@ -103,15 +103,24 @@ func splitRepoTag(ref string) (repo, tag string, err error) { // normalizeSemver prepends a "v" to a tag that looks like a bare semantic // version (e.g. "1.2.3" -> "v1.2.3") so it can be validated and compared by -// golang.org/x/mod/semver, which requires the "v" prefix. Tags that already -// start with "v", or that are not semver-shaped, are returned unchanged. +// golang.org/x/mod/semver, which requires a lowercase "v" prefix. Tags that +// already start with a lowercase "v" are returned unchanged; an uppercase "V" +// prefix is lowercased (semver.IsValid rejects "V1.2.3"). Tags that are not +// semver-shaped are returned unchanged. func normalizeSemver(tag string) string { if tag == "" { return tag } - if tag[0] == 'v' || tag[0] == 'V' { + if tag[0] == 'v' { return tag } + if tag[0] == 'V' { + // semver.IsValid only accepts a lowercase "v"; without this an + // uppercase-tagged pair (e.g. "V1.2.0" vs "V1.3.0") falls through to + // the non-semver path and reports an undecidable comparison, hiding a + // real upgrade. + return "v" + tag[1:] + } if tag[0] >= '0' && tag[0] <= '9' { return "v" + tag } diff --git a/pkg/workloads/upgrade/version_test.go b/pkg/workloads/upgrade/version_test.go index d96e0859f0..c22c645a1c 100644 --- a/pkg/workloads/upgrade/version_test.go +++ b/pkg/workloads/upgrade/version_test.go @@ -31,6 +31,12 @@ func TestCompareImageTags(t *testing.T) { candidate: "ghcr.io/example/server:v2.0.0", want: comparisonNewer, }, + { + name: "candidate newer semver with uppercase V prefix", + current: "ghcr.io/example/server:V1.2.0", + candidate: "ghcr.io/example/server:V1.3.0", + want: comparisonNewer, + }, { name: "double-digit minor orders numerically not lexically", current: "ghcr.io/example/server:1.9.0", @@ -190,7 +196,7 @@ func TestNormalizeSemver(t *testing.T) { }{ {name: "bare version gets v prefix", in: "1.2.3", want: "v1.2.3"}, {name: "already prefixed unchanged", in: "v1.2.3", want: "v1.2.3"}, - {name: "uppercase V unchanged", in: "V1.2.3", want: "V1.2.3"}, + {name: "uppercase V lowercased for semver validation", in: "V1.2.3", want: "v1.2.3"}, {name: "non-semver tag unchanged", in: "stable", want: "stable"}, {name: "empty unchanged", in: "", want: ""}, {name: "numeric prefix gets v", in: "2024.01", want: "v2024.01"}, From 0300063308de29f9d9cfa17f61b4bb5a4de355ac Mon Sep 17 00:00:00 2001 From: Juan Antonio Osorio Date: Mon, 1 Jun 2026 08:05:05 +0000 Subject: [PATCH 3/5] Add upgrade-check REST endpoints for workloads CLI, Studio, and automation need a single backend source of truth for upgrade availability instead of each client reimplementing registry drift detection. Expose the Phase A checker over the existing workloads API. Add GET /api/v1beta/workloads/upgrade-check (batch) and GET /api/v1beta/workloads/{name}/upgrade-check (single). The batch handler reuses the exact group/all authorization scoping of the list endpoint and intersects it with the enumerated run configs, so it can never report a workload outside the caller's scope. Responses carry only non-sensitive CheckResult metadata; secret env-var defaults are cleared. Both routes are read-only and skip image pulls, so they use the standard timeout. Regenerate the OpenAPI spec. The dedicated apply endpoint (POST .../upgrade) follows once the Applier lands (RFC THV-0068, phase B). Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/server/docs.go | 273 +++++++++++++++++ docs/server/swagger.json | 273 +++++++++++++++++ docs/server/swagger.yaml | 218 +++++++++++++ pkg/api/v1/workload_types.go | 20 ++ pkg/api/v1/workloads.go | 33 +- pkg/api/v1/workloads_upgrade.go | 183 +++++++++++ pkg/api/v1/workloads_upgrade_test.go | 439 +++++++++++++++++++++++++++ 7 files changed, 1434 insertions(+), 5 deletions(-) create mode 100644 pkg/api/v1/workloads_upgrade.go create mode 100644 pkg/api/v1/workloads_upgrade_test.go diff --git a/docs/server/docs.go b/docs/server/docs.go index 9ca280736f..cc932c0f79 100644 --- a/docs/server/docs.go +++ b/docs/server/docs.go @@ -1966,6 +1966,145 @@ const docTemplate = `{ }, "type": "object" }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.BoolChange": { + "description": "NetworkIsolation is set when the candidate's network isolation posture\ndiffers from the workload's current setting.", + "properties": { + "from": { + "type": "boolean" + }, + "to": { + "type": "boolean" + } + }, + "type": "object" + }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.CheckResult": { + "description": "Result is the upgrade-check outcome for the workload. It carries only\nmetadata (status, image references, drift) and never secret values.", + "properties": { + "candidate_image": { + "description": "CandidateImage is the image reference the registry currently reports.", + "type": "string" + }, + "config_drift": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.ConfigDrift" + }, + "current_image": { + "description": "CurrentImage is the image reference the workload is currently running.", + "type": "string" + }, + "env_var_drift": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarDrift" + }, + "reason": { + "description": "Reason provides additional context, primarily for StatusUnknown.", + "type": "string" + }, + "registry_server": { + "description": "RegistryServer is the registry entry name the workload was sourced from.\nEmpty when the workload is not registry-sourced.", + "type": "string" + }, + "status": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.UpgradeStatus" + }, + "workload_name": { + "description": "WorkloadName is the name of the workload that was checked.", + "type": "string" + } + }, + "type": "object" + }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.ConfigDrift": { + "description": "ConfigDrift describes posture differences (transport, network isolation,\npermission profile) between the workload and the candidate registry entry.", + "properties": { + "network_isolation": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.BoolChange" + }, + "permission_profile": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.StringChange" + }, + "transport": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.StringChange" + } + }, + "type": "object" + }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarDrift": { + "description": "EnvVarDrift describes environment variables the candidate registry entry\ndeclares that differ from the workload's current configuration.", + "properties": { + "added": { + "description": "Added lists environment variables the candidate declares that the\nworkload does not currently supply (via plain env vars or secrets).", + "items": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarInfo" + }, + "type": "array", + "uniqueItems": false + }, + "removed": { + "description": "Removed lists environment variables the workload supplies that the\ncandidate no longer declares. Populated on a best-effort basis; may be\nempty even when removals exist (forward-compatible field).", + "items": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarInfo" + }, + "type": "array", + "uniqueItems": false + } + }, + "type": "object" + }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarInfo": { + "properties": { + "default": { + "description": "Default is the candidate's default value. It is cleared (left empty)\nwhenever Secret is true: a secret env var's default could carry sensitive\ndata, and surfacing it in a drift report (which may be logged or returned\nover the API) would leak it. Non-secret defaults are safe to display.", + "type": "string" + }, + "description": { + "description": "Description is the human-readable purpose of the variable.", + "type": "string" + }, + "name": { + "description": "Name is the environment variable name.", + "type": "string" + }, + "required": { + "description": "Required indicates whether the candidate marks the variable as required.", + "type": "boolean" + }, + "secret": { + "description": "Secret indicates whether the variable holds sensitive data.", + "type": "boolean" + } + }, + "type": "object" + }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.StringChange": { + "description": "PermissionProfile is set when the candidate's permission profile differs\nfrom the workload's current profile.", + "properties": { + "from": { + "type": "string" + }, + "to": { + "type": "string" + } + }, + "type": "object" + }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.UpgradeStatus": { + "description": "Status is the upgrade status for the workload.", + "enum": [ + "up-to-date", + "upgrade-available", + "not-registry-sourced", + "server-not-found", + "unknown" + ], + "type": "string", + "x-enum-varnames": [ + "StatusUpToDate", + "StatusUpgradeAvailable", + "StatusNotRegistrySourced", + "StatusServerNotFound", + "StatusUnknown" + ] + }, "model.Argument": { "properties": { "choices": { @@ -3386,6 +3525,29 @@ const docTemplate = `{ }, "type": "object" }, + "pkg_api_v1.upgradeCheckBulkResponse": { + "description": "Results of checking multiple workloads for available upgrades", + "properties": { + "results": { + "description": "Results holds one upgrade-check outcome per scoped workload, in the order\nthe workloads were enumerated. Each entry carries only metadata and never\nsecret values.", + "items": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.CheckResult" + }, + "type": "array", + "uniqueItems": false + } + }, + "type": "object" + }, + "pkg_api_v1.upgradeCheckResponse": { + "description": "Result of checking a single workload for an available upgrade", + "properties": { + "result": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.CheckResult" + } + }, + "type": "object" + }, "pkg_api_v1.validateSkillRequest": { "description": "Request to validate a skill definition", "properties": { @@ -6487,6 +6649,65 @@ const docTemplate = `{ ] } }, + "/api/v1beta/workloads/upgrade-check": { + "get": { + "description": "Check all workloads (optionally filtered by group) for newer\nimages available in their source registries. This is an offline\nmetadata comparison; it does not pull images. Secret values are\nnever returned.", + "parameters": [ + { + "description": "Include stopped workloads", + "in": "query", + "name": "all", + "schema": { + "type": "boolean" + } + }, + { + "description": "Filter workloads by group name", + "in": "query", + "name": "group", + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/pkg_api_v1.upgradeCheckBulkResponse" + } + } + }, + "description": "OK" + }, + "400": { + "content": { + "application/json": { + "schema": { + "type": "string" + } + } + }, + "description": "Bad Request" + }, + "404": { + "content": { + "application/json": { + "schema": { + "type": "string" + } + } + }, + "description": "Group not found" + } + }, + "summary": "Check workloads for available upgrades", + "tags": [ + "workloads" + ] + } + }, "/api/v1beta/workloads/{name}": { "delete": { "description": "Delete a workload asynchronously. Returns 202 Accepted immediately.\nThe deletion happens in the background. Poll the workload list to confirm deletion.", @@ -6943,6 +7164,58 @@ const docTemplate = `{ ] } }, + "/api/v1beta/workloads/{name}/upgrade-check": { + "get": { + "description": "Check whether a single workload has a newer image available in\nits source registry. This is an offline metadata comparison; it\ndoes not pull images. Secret values are never returned.", + "parameters": [ + { + "description": "Workload name", + "in": "path", + "name": "name", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/pkg_api_v1.upgradeCheckResponse" + } + } + }, + "description": "OK" + }, + "400": { + "content": { + "application/json": { + "schema": { + "type": "string" + } + } + }, + "description": "Bad Request" + }, + "404": { + "content": { + "application/json": { + "schema": { + "type": "string" + } + } + }, + "description": "Not Found" + } + }, + "summary": "Check a workload for an available upgrade", + "tags": [ + "workloads" + ] + } + }, "/health": { "get": { "description": "Check if the API is healthy", diff --git a/docs/server/swagger.json b/docs/server/swagger.json index 8127a52c45..b34be6673d 100644 --- a/docs/server/swagger.json +++ b/docs/server/swagger.json @@ -1959,6 +1959,145 @@ }, "type": "object" }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.BoolChange": { + "description": "NetworkIsolation is set when the candidate's network isolation posture\ndiffers from the workload's current setting.", + "properties": { + "from": { + "type": "boolean" + }, + "to": { + "type": "boolean" + } + }, + "type": "object" + }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.CheckResult": { + "description": "Result is the upgrade-check outcome for the workload. It carries only\nmetadata (status, image references, drift) and never secret values.", + "properties": { + "candidate_image": { + "description": "CandidateImage is the image reference the registry currently reports.", + "type": "string" + }, + "config_drift": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.ConfigDrift" + }, + "current_image": { + "description": "CurrentImage is the image reference the workload is currently running.", + "type": "string" + }, + "env_var_drift": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarDrift" + }, + "reason": { + "description": "Reason provides additional context, primarily for StatusUnknown.", + "type": "string" + }, + "registry_server": { + "description": "RegistryServer is the registry entry name the workload was sourced from.\nEmpty when the workload is not registry-sourced.", + "type": "string" + }, + "status": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.UpgradeStatus" + }, + "workload_name": { + "description": "WorkloadName is the name of the workload that was checked.", + "type": "string" + } + }, + "type": "object" + }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.ConfigDrift": { + "description": "ConfigDrift describes posture differences (transport, network isolation,\npermission profile) between the workload and the candidate registry entry.", + "properties": { + "network_isolation": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.BoolChange" + }, + "permission_profile": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.StringChange" + }, + "transport": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.StringChange" + } + }, + "type": "object" + }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarDrift": { + "description": "EnvVarDrift describes environment variables the candidate registry entry\ndeclares that differ from the workload's current configuration.", + "properties": { + "added": { + "description": "Added lists environment variables the candidate declares that the\nworkload does not currently supply (via plain env vars or secrets).", + "items": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarInfo" + }, + "type": "array", + "uniqueItems": false + }, + "removed": { + "description": "Removed lists environment variables the workload supplies that the\ncandidate no longer declares. Populated on a best-effort basis; may be\nempty even when removals exist (forward-compatible field).", + "items": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarInfo" + }, + "type": "array", + "uniqueItems": false + } + }, + "type": "object" + }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarInfo": { + "properties": { + "default": { + "description": "Default is the candidate's default value. It is cleared (left empty)\nwhenever Secret is true: a secret env var's default could carry sensitive\ndata, and surfacing it in a drift report (which may be logged or returned\nover the API) would leak it. Non-secret defaults are safe to display.", + "type": "string" + }, + "description": { + "description": "Description is the human-readable purpose of the variable.", + "type": "string" + }, + "name": { + "description": "Name is the environment variable name.", + "type": "string" + }, + "required": { + "description": "Required indicates whether the candidate marks the variable as required.", + "type": "boolean" + }, + "secret": { + "description": "Secret indicates whether the variable holds sensitive data.", + "type": "boolean" + } + }, + "type": "object" + }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.StringChange": { + "description": "PermissionProfile is set when the candidate's permission profile differs\nfrom the workload's current profile.", + "properties": { + "from": { + "type": "string" + }, + "to": { + "type": "string" + } + }, + "type": "object" + }, + "github_com_stacklok_toolhive_pkg_workloads_upgrade.UpgradeStatus": { + "description": "Status is the upgrade status for the workload.", + "enum": [ + "up-to-date", + "upgrade-available", + "not-registry-sourced", + "server-not-found", + "unknown" + ], + "type": "string", + "x-enum-varnames": [ + "StatusUpToDate", + "StatusUpgradeAvailable", + "StatusNotRegistrySourced", + "StatusServerNotFound", + "StatusUnknown" + ] + }, "model.Argument": { "properties": { "choices": { @@ -3379,6 +3518,29 @@ }, "type": "object" }, + "pkg_api_v1.upgradeCheckBulkResponse": { + "description": "Results of checking multiple workloads for available upgrades", + "properties": { + "results": { + "description": "Results holds one upgrade-check outcome per scoped workload, in the order\nthe workloads were enumerated. Each entry carries only metadata and never\nsecret values.", + "items": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.CheckResult" + }, + "type": "array", + "uniqueItems": false + } + }, + "type": "object" + }, + "pkg_api_v1.upgradeCheckResponse": { + "description": "Result of checking a single workload for an available upgrade", + "properties": { + "result": { + "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.CheckResult" + } + }, + "type": "object" + }, "pkg_api_v1.validateSkillRequest": { "description": "Request to validate a skill definition", "properties": { @@ -6480,6 +6642,65 @@ ] } }, + "/api/v1beta/workloads/upgrade-check": { + "get": { + "description": "Check all workloads (optionally filtered by group) for newer\nimages available in their source registries. This is an offline\nmetadata comparison; it does not pull images. Secret values are\nnever returned.", + "parameters": [ + { + "description": "Include stopped workloads", + "in": "query", + "name": "all", + "schema": { + "type": "boolean" + } + }, + { + "description": "Filter workloads by group name", + "in": "query", + "name": "group", + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/pkg_api_v1.upgradeCheckBulkResponse" + } + } + }, + "description": "OK" + }, + "400": { + "content": { + "application/json": { + "schema": { + "type": "string" + } + } + }, + "description": "Bad Request" + }, + "404": { + "content": { + "application/json": { + "schema": { + "type": "string" + } + } + }, + "description": "Group not found" + } + }, + "summary": "Check workloads for available upgrades", + "tags": [ + "workloads" + ] + } + }, "/api/v1beta/workloads/{name}": { "delete": { "description": "Delete a workload asynchronously. Returns 202 Accepted immediately.\nThe deletion happens in the background. Poll the workload list to confirm deletion.", @@ -6936,6 +7157,58 @@ ] } }, + "/api/v1beta/workloads/{name}/upgrade-check": { + "get": { + "description": "Check whether a single workload has a newer image available in\nits source registry. This is an offline metadata comparison; it\ndoes not pull images. Secret values are never returned.", + "parameters": [ + { + "description": "Workload name", + "in": "path", + "name": "name", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/pkg_api_v1.upgradeCheckResponse" + } + } + }, + "description": "OK" + }, + "400": { + "content": { + "application/json": { + "schema": { + "type": "string" + } + } + }, + "description": "Bad Request" + }, + "404": { + "content": { + "application/json": { + "schema": { + "type": "string" + } + } + }, + "description": "Not Found" + } + }, + "summary": "Check a workload for an available upgrade", + "tags": [ + "workloads" + ] + } + }, "/health": { "get": { "description": "Check if the API is healthy", diff --git a/docs/server/swagger.yaml b/docs/server/swagger.yaml index 6bbded4bce..fc8f69b3c8 100644 --- a/docs/server/swagger.yaml +++ b/docs/server/swagger.yaml @@ -1922,6 +1922,130 @@ components: WARNING: This should only be used for development/testing. type: boolean type: object + github_com_stacklok_toolhive_pkg_workloads_upgrade.BoolChange: + description: |- + NetworkIsolation is set when the candidate's network isolation posture + differs from the workload's current setting. + properties: + from: + type: boolean + to: + type: boolean + type: object + github_com_stacklok_toolhive_pkg_workloads_upgrade.CheckResult: + description: |- + Result is the upgrade-check outcome for the workload. It carries only + metadata (status, image references, drift) and never secret values. + properties: + candidate_image: + description: CandidateImage is the image reference the registry currently + reports. + type: string + config_drift: + $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.ConfigDrift' + current_image: + description: CurrentImage is the image reference the workload is currently + running. + type: string + env_var_drift: + $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarDrift' + reason: + description: Reason provides additional context, primarily for StatusUnknown. + type: string + registry_server: + description: |- + RegistryServer is the registry entry name the workload was sourced from. + Empty when the workload is not registry-sourced. + type: string + status: + $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.UpgradeStatus' + workload_name: + description: WorkloadName is the name of the workload that was checked. + type: string + type: object + github_com_stacklok_toolhive_pkg_workloads_upgrade.ConfigDrift: + description: |- + ConfigDrift describes posture differences (transport, network isolation, + permission profile) between the workload and the candidate registry entry. + properties: + network_isolation: + $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.BoolChange' + permission_profile: + $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.StringChange' + transport: + $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.StringChange' + type: object + github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarDrift: + description: |- + EnvVarDrift describes environment variables the candidate registry entry + declares that differ from the workload's current configuration. + properties: + added: + description: |- + Added lists environment variables the candidate declares that the + workload does not currently supply (via plain env vars or secrets). + items: + $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarInfo' + type: array + uniqueItems: false + removed: + description: |- + Removed lists environment variables the workload supplies that the + candidate no longer declares. Populated on a best-effort basis; may be + empty even when removals exist (forward-compatible field). + items: + $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarInfo' + type: array + uniqueItems: false + type: object + github_com_stacklok_toolhive_pkg_workloads_upgrade.EnvVarInfo: + properties: + default: + description: |- + Default is the candidate's default value. It is cleared (left empty) + whenever Secret is true: a secret env var's default could carry sensitive + data, and surfacing it in a drift report (which may be logged or returned + over the API) would leak it. Non-secret defaults are safe to display. + type: string + description: + description: Description is the human-readable purpose of the variable. + type: string + name: + description: Name is the environment variable name. + type: string + required: + description: Required indicates whether the candidate marks the variable + as required. + type: boolean + secret: + description: Secret indicates whether the variable holds sensitive data. + type: boolean + type: object + github_com_stacklok_toolhive_pkg_workloads_upgrade.StringChange: + description: |- + PermissionProfile is set when the candidate's permission profile differs + from the workload's current profile. + properties: + from: + type: string + to: + type: string + type: object + github_com_stacklok_toolhive_pkg_workloads_upgrade.UpgradeStatus: + description: Status is the upgrade status for the workload. + enum: + - up-to-date + - upgrade-available + - not-registry-sourced + - server-not-found + - unknown + type: string + x-enum-varnames: + - StatusUpToDate + - StatusUpgradeAvailable + - StatusNotRegistrySourced + - StatusServerNotFound + - StatusUnknown model.Argument: properties: choices: @@ -2998,6 +3122,25 @@ components: description: Success message type: string type: object + pkg_api_v1.upgradeCheckBulkResponse: + description: Results of checking multiple workloads for available upgrades + properties: + results: + description: |- + Results holds one upgrade-check outcome per scoped workload, in the order + the workloads were enumerated. Each entry carries only metadata and never + secret values. + items: + $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.CheckResult' + type: array + uniqueItems: false + type: object + pkg_api_v1.upgradeCheckResponse: + description: Result of checking a single workload for an available upgrade + properties: + result: + $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.CheckResult' + type: object pkg_api_v1.validateSkillRequest: description: Request to validate a skill definition properties: @@ -5283,6 +5426,41 @@ paths: summary: Stop a workload tags: - workloads + /api/v1beta/workloads/{name}/upgrade-check: + get: + description: |- + Check whether a single workload has a newer image available in + its source registry. This is an offline metadata comparison; it + does not pull images. Secret values are never returned. + parameters: + - description: Workload name + in: path + name: name + required: true + schema: + type: string + responses: + "200": + content: + application/json: + schema: + $ref: '#/components/schemas/pkg_api_v1.upgradeCheckResponse' + description: OK + "400": + content: + application/json: + schema: + type: string + description: Bad Request + "404": + content: + application/json: + schema: + type: string + description: Not Found + summary: Check a workload for an available upgrade + tags: + - workloads /api/v1beta/workloads/delete: post: description: |- @@ -5375,6 +5553,46 @@ paths: summary: Stop workloads in bulk tags: - workloads + /api/v1beta/workloads/upgrade-check: + get: + description: |- + Check all workloads (optionally filtered by group) for newer + images available in their source registries. This is an offline + metadata comparison; it does not pull images. Secret values are + never returned. + parameters: + - description: Include stopped workloads + in: query + name: all + schema: + type: boolean + - description: Filter workloads by group name + in: query + name: group + schema: + type: string + responses: + "200": + content: + application/json: + schema: + $ref: '#/components/schemas/pkg_api_v1.upgradeCheckBulkResponse' + description: OK + "400": + content: + application/json: + schema: + type: string + description: Bad Request + "404": + content: + application/json: + schema: + type: string + description: Group not found + summary: Check workloads for available upgrades + tags: + - workloads /health: get: description: Check if the API is healthy diff --git a/pkg/api/v1/workload_types.go b/pkg/api/v1/workload_types.go index 180e4e5045..e2345a6625 100644 --- a/pkg/api/v1/workload_types.go +++ b/pkg/api/v1/workload_types.go @@ -16,6 +16,7 @@ import ( "github.com/stacklok/toolhive/pkg/runner" "github.com/stacklok/toolhive/pkg/secrets" "github.com/stacklok/toolhive/pkg/transport/middleware" + "github.com/stacklok/toolhive/pkg/workloads/upgrade" ) // workloadListResponse represents the response for listing workloads @@ -183,6 +184,25 @@ type oidcOptions struct { Scopes []string `json:"scopes,omitempty"` } +// upgradeCheckResponse is the response for a single-workload upgrade check. +// +// @Description Result of checking a single workload for an available upgrade +type upgradeCheckResponse struct { + // Result is the upgrade-check outcome for the workload. It carries only + // metadata (status, image references, drift) and never secret values. + Result *upgrade.CheckResult `json:"result"` +} + +// upgradeCheckBulkResponse is the response for a batch upgrade check. +// +// @Description Results of checking multiple workloads for available upgrades +type upgradeCheckBulkResponse struct { + // Results holds one upgrade-check outcome per scoped workload, in the order + // the workloads were enumerated. Each entry carries only metadata and never + // secret values. + Results []*upgrade.CheckResult `json:"results"` +} + // createWorkloadResponse represents the response for workload creation // // @Description Response after successfully creating a workload diff --git a/pkg/api/v1/workloads.go b/pkg/api/v1/workloads.go index f10c4eb9ec..f5d804934e 100644 --- a/pkg/api/v1/workloads.go +++ b/pkg/api/v1/workloads.go @@ -18,6 +18,7 @@ import ( apierrors "github.com/stacklok/toolhive/pkg/api/errors" "github.com/stacklok/toolhive/pkg/container/runtime" "github.com/stacklok/toolhive/pkg/groups" + "github.com/stacklok/toolhive/pkg/registry" "github.com/stacklok/toolhive/pkg/runner" "github.com/stacklok/toolhive/pkg/workloads" wt "github.com/stacklok/toolhive/pkg/workloads/types" @@ -41,6 +42,16 @@ type WorkloadRoutes struct { debugMode bool groupManager groups.Manager workloadService *WorkloadService + + // loadRunConfig loads a single workload's saved RunConfig. Injected so the + // upgrade-check handlers can be unit tested without the global state store. + loadRunConfig runConfigLoader + // listRunConfigNames lists all saved RunConfig names. Injected for the same + // testability reason as loadRunConfig. + listRunConfigNames runConfigLister + // registryProvider returns the registry provider used by upgrade checks. + // Injected so tests can supply a mock provider. + registryProvider registryProviderFunc } // @title ToolHive API @@ -64,11 +75,19 @@ func WorkloadRouter( ) routes := WorkloadRoutes{ - workloadManager: workloadManager, - containerRuntime: containerRuntime, - debugMode: debugMode, - groupManager: groupManager, - workloadService: workloadService, + workloadManager: workloadManager, + containerRuntime: containerRuntime, + debugMode: debugMode, + groupManager: groupManager, + workloadService: workloadService, + loadRunConfig: runner.LoadState, + listRunConfigNames: defaultRunConfigLister, + registryProvider: func() (registry.Provider, error) { + return registry.GetDefaultProviderWithConfig( + workloadService.configProvider, + registry.WithInteractive(false), + ) + }, } r := chi.NewRouter() @@ -80,6 +99,10 @@ func WorkloadRouter( r.With(stdTimeout).Post("/stop", apierrors.ErrorHandler(routes.stopWorkloadsBulk)) r.With(stdTimeout).Post("/restart", apierrors.ErrorHandler(routes.restartWorkloadsBulk)) r.With(stdTimeout).Post("/delete", apierrors.ErrorHandler(routes.deleteWorkloadsBulk)) + // Register the literal /upgrade-check before /{name} so chi routes it + // distinctly from the single-workload wildcard. + r.With(stdTimeout).Get("/upgrade-check", apierrors.ErrorHandler(routes.upgradeCheckBulk)) + r.With(stdTimeout).Get("/{name}/upgrade-check", apierrors.ErrorHandler(routes.upgradeCheckSingle)) r.With(stdTimeout).Get("/{name}", apierrors.ErrorHandler(routes.getWorkload)) r.With(longTimeout).Post("/{name}/edit", apierrors.ErrorHandler(routes.updateWorkload)) r.With(stdTimeout).Post("/{name}/stop", apierrors.ErrorHandler(routes.stopWorkload)) diff --git a/pkg/api/v1/workloads_upgrade.go b/pkg/api/v1/workloads_upgrade.go new file mode 100644 index 0000000000..ad27312a2b --- /dev/null +++ b/pkg/api/v1/workloads_upgrade.go @@ -0,0 +1,183 @@ +// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package v1 + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "net/http" + + "github.com/go-chi/chi/v5" + + "github.com/stacklok/toolhive-core/httperr" + groupval "github.com/stacklok/toolhive-core/validation/group" + "github.com/stacklok/toolhive/pkg/registry" + "github.com/stacklok/toolhive/pkg/runner" + "github.com/stacklok/toolhive/pkg/state" + "github.com/stacklok/toolhive/pkg/workloads" + "github.com/stacklok/toolhive/pkg/workloads/upgrade" +) + +// runConfigLoader loads a single workload's saved RunConfig by name. +type runConfigLoader func(ctx context.Context, name string) (*runner.RunConfig, error) + +// runConfigLister lists the names of all saved RunConfigs. +type runConfigLister func(ctx context.Context) ([]string, error) + +// registryProviderFunc returns a registry provider for upgrade checks. +type registryProviderFunc func() (registry.Provider, error) + +// defaultRunConfigLister lists saved RunConfig names from the local state store. +// It mirrors the enumeration used by manager.ListWorkloadsUsingSecret. +func defaultRunConfigLister(ctx context.Context) ([]string, error) { + store, err := state.NewRunConfigStore(state.DefaultAppName) + if err != nil { + return nil, fmt.Errorf("failed to create state store: %w", err) + } + return store.List(ctx) +} + +// upgradeCheckSingle handles GET /workloads/{name}/upgrade-check. +// +// @Summary Check a workload for an available upgrade +// @Description Check whether a single workload has a newer image available in +// @Description its source registry. This is an offline metadata comparison; it +// @Description does not pull images. Secret values are never returned. +// @Tags workloads +// @Produce json +// @Param name path string true "Workload name" +// @Success 200 {object} upgradeCheckResponse +// @Failure 400 {string} string "Bad Request" +// @Failure 404 {string} string "Not Found" +// @Router /api/v1beta/workloads/{name}/upgrade-check [get] +func (s *WorkloadRoutes) upgradeCheckSingle(w http.ResponseWriter, r *http.Request) error { + ctx := r.Context() + name := chi.URLParam(r, "name") + + // Check if workload exists first (mirrors getWorkload's existence check). + if _, err := s.workloadManager.GetWorkload(ctx, name); err != nil { + return err // ErrWorkloadNotFound (404) or ErrInvalidWorkloadName (400) already have status codes + } + + runConfig, err := s.loadRunConfig(ctx, name) + if err != nil { + return httperr.WithCode( + fmt.Errorf("workload configuration not found: %w", err), + http.StatusNotFound, + ) + } + + checker, err := s.newUpgradeChecker() + if err != nil { + return err + } + + result, err := checker.Check(ctx, runConfig) + if err != nil { + return fmt.Errorf("failed to check workload for upgrade: %w", err) + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(upgradeCheckResponse{Result: result}); err != nil { + return fmt.Errorf("failed to marshal upgrade check result: %w", err) + } + return nil +} + +// upgradeCheckBulk handles GET /workloads/upgrade-check. +// +// @Summary Check workloads for available upgrades +// @Description Check all workloads (optionally filtered by group) for newer +// @Description images available in their source registries. This is an offline +// @Description metadata comparison; it does not pull images. Secret values are +// @Description never returned. +// @Tags workloads +// @Produce json +// @Param all query bool false "Include stopped workloads" +// @Param group query string false "Filter workloads by group name" +// @Success 200 {object} upgradeCheckBulkResponse +// @Failure 400 {string} string "Bad Request" +// @Failure 404 {string} string "Group not found" +// @Router /api/v1beta/workloads/upgrade-check [get] +func (s *WorkloadRoutes) upgradeCheckBulk(w http.ResponseWriter, r *http.Request) error { + ctx := r.Context() + listAll := r.URL.Query().Get("all") == "true" + groupFilter := r.URL.Query().Get("group") + + // Reuse the exact group/authorization scoping of listWorkloads so the batch + // endpoint can never report on workloads the caller cannot otherwise list. + workloadList, err := s.workloadManager.ListWorkloads(ctx, listAll) + if err != nil { + return fmt.Errorf("failed to list workloads: %w", err) + } + if groupFilter != "" { + if err := groupval.ValidateName(groupFilter); err != nil { + return httperr.WithCode( + fmt.Errorf("invalid group name: %w", err), + http.StatusBadRequest, + ) + } + workloadList, err = workloads.FilterByGroup(workloadList, groupFilter) + if err != nil { + return err // groups.ErrGroupNotFound already has 404 status code + } + } + + // Restrict the set of RunConfigs to those in the scoped workload list. + inScope := make(map[string]struct{}, len(workloadList)) + for _, wl := range workloadList { + inScope[wl.Name] = struct{}{} + } + + configNames, err := s.listRunConfigNames(ctx) + if err != nil { + return fmt.Errorf("failed to list workload configurations: %w", err) + } + + configs := make([]*runner.RunConfig, 0, len(inScope)) + for _, cfgName := range configNames { + if _, ok := inScope[cfgName]; !ok { + continue + } + runConfig, err := s.loadRunConfig(ctx, cfgName) + if err != nil { + // Skip configs we can't load; they may be corrupted or from an older + // version. The workload simply won't appear in the results. + slog.Debug("failed to load run config for upgrade check", "workload", cfgName, "error", err) + continue + } + configs = append(configs, runConfig) + } + + checker, err := s.newUpgradeChecker() + if err != nil { + return err + } + + results := checker.CheckAll(ctx, configs) + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(upgradeCheckBulkResponse{Results: results}); err != nil { + return fmt.Errorf("failed to marshal upgrade check results: %w", err) + } + return nil +} + +// newUpgradeChecker builds an upgrade.Checker backed by the registry provider. +func (s *WorkloadRoutes) newUpgradeChecker() (*upgrade.Checker, error) { + provider, err := s.registryProvider() + if err != nil { + return nil, httperr.WithCode( + fmt.Errorf("failed to get registry provider: %w", err), + http.StatusServiceUnavailable, + ) + } + checker, err := upgrade.NewChecker(provider) + if err != nil { + return nil, fmt.Errorf("failed to create upgrade checker: %w", err) + } + return checker, nil +} diff --git a/pkg/api/v1/workloads_upgrade_test.go b/pkg/api/v1/workloads_upgrade_test.go new file mode 100644 index 0000000000..07640cc0aa --- /dev/null +++ b/pkg/api/v1/workloads_upgrade_test.go @@ -0,0 +1,439 @@ +// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package v1 + +import ( + "context" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "testing" + + "github.com/go-chi/chi/v5" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/mock/gomock" + + regtypes "github.com/stacklok/toolhive-core/registry/types" + apierrors "github.com/stacklok/toolhive/pkg/api/errors" + "github.com/stacklok/toolhive/pkg/container/runtime" + "github.com/stacklok/toolhive/pkg/core" + "github.com/stacklok/toolhive/pkg/registry" + registrymocks "github.com/stacklok/toolhive/pkg/registry/mocks" + "github.com/stacklok/toolhive/pkg/runner" + "github.com/stacklok/toolhive/pkg/transport/types" + workloadsmocks "github.com/stacklok/toolhive/pkg/workloads/mocks" + wt "github.com/stacklok/toolhive/pkg/workloads/types" + "github.com/stacklok/toolhive/pkg/workloads/upgrade" +) + +// upgradeTestRoutes builds a WorkloadRoutes wired with the supplied workload +// manager and registry provider plus in-memory loaders, so the upgrade-check +// handlers can run without touching the global state store or a real registry. +func upgradeTestRoutes( + wm *workloadsmocks.MockManager, + provider registry.Provider, + configs map[string]*runner.RunConfig, +) *WorkloadRoutes { + return &WorkloadRoutes{ + workloadManager: wm, + loadRunConfig: func(_ context.Context, name string) (*runner.RunConfig, error) { + cfg, ok := configs[name] + if !ok { + return nil, runtime.ErrWorkloadNotFound + } + return cfg, nil + }, + listRunConfigNames: func(_ context.Context) ([]string, error) { + names := make([]string, 0, len(configs)) + for n := range configs { + names = append(names, n) + } + return names, nil + }, + registryProvider: func() (registry.Provider, error) { + return provider, nil + }, + } +} + +// imageServer is a registry image entry fixture. +func imageServer(image string) *regtypes.ImageMetadata { + return ®types.ImageMetadata{Image: image} +} + +func TestUpgradeCheckSingle(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + workloadName string + setupMock func(*workloadsmocks.MockManager, *registrymocks.MockProvider) + configs map[string]*runner.RunConfig + expectedStatus int + expectedStatusBody upgrade.UpgradeStatus + expectBody string + }{ + { + name: "up-to-date", + workloadName: "fetch", + setupMock: func(wm *workloadsmocks.MockManager, p *registrymocks.MockProvider) { + wm.EXPECT().GetWorkload(gomock.Any(), "fetch").Return(core.Workload{Name: "fetch"}, nil) + p.EXPECT().GetServer("io.github/fetch").Return(imageServer("ghcr.io/example/fetch:1.0.0"), nil) + }, + configs: map[string]*runner.RunConfig{ + "fetch": {Name: "fetch", Image: "ghcr.io/example/fetch:1.0.0", RegistryServerName: "io.github/fetch"}, + }, + expectedStatus: http.StatusOK, + expectedStatusBody: upgrade.StatusUpToDate, + }, + { + name: "upgrade-available", + workloadName: "fetch", + setupMock: func(wm *workloadsmocks.MockManager, p *registrymocks.MockProvider) { + wm.EXPECT().GetWorkload(gomock.Any(), "fetch").Return(core.Workload{Name: "fetch"}, nil) + p.EXPECT().GetServer("io.github/fetch").Return(imageServer("ghcr.io/example/fetch:1.1.0"), nil) + }, + configs: map[string]*runner.RunConfig{ + "fetch": {Name: "fetch", Image: "ghcr.io/example/fetch:1.0.0", RegistryServerName: "io.github/fetch"}, + }, + expectedStatus: http.StatusOK, + expectedStatusBody: upgrade.StatusUpgradeAvailable, + }, + { + name: "workload not found", + workloadName: "missing", + setupMock: func(wm *workloadsmocks.MockManager, _ *registrymocks.MockProvider) { + wm.EXPECT().GetWorkload(gomock.Any(), "missing"). + Return(core.Workload{}, runtime.ErrWorkloadNotFound) + }, + configs: map[string]*runner.RunConfig{}, + expectedStatus: http.StatusNotFound, + expectBody: "workload not found", + }, + { + name: "invalid workload name", + workloadName: "bad-name", + setupMock: func(wm *workloadsmocks.MockManager, _ *registrymocks.MockProvider) { + wm.EXPECT().GetWorkload(gomock.Any(), "bad-name"). + Return(core.Workload{}, wt.ErrInvalidWorkloadName) + }, + configs: map[string]*runner.RunConfig{}, + expectedStatus: http.StatusBadRequest, + expectBody: "invalid workload name", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + wm := workloadsmocks.NewMockManager(ctrl) + provider := registrymocks.NewMockProvider(ctrl) + tt.setupMock(wm, provider) + + routes := upgradeTestRoutes(wm, provider, tt.configs) + + req := httptest.NewRequest("GET", "/"+tt.workloadName+"/upgrade-check", nil) + rctx := chi.NewRouteContext() + rctx.URLParams.Add("name", tt.workloadName) + req = req.WithContext(context.WithValue(req.Context(), chi.RouteCtxKey, rctx)) + + w := httptest.NewRecorder() + apierrors.ErrorHandler(routes.upgradeCheckSingle).ServeHTTP(w, req) + + assert.Equal(t, tt.expectedStatus, w.Code) + if tt.expectBody != "" { + assert.Contains(t, w.Body.String(), tt.expectBody) + return + } + + var resp upgradeCheckResponse + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp)) + require.NotNil(t, resp.Result) + assert.Equal(t, tt.expectedStatusBody, resp.Result.Status) + assert.Equal(t, tt.workloadName, resp.Result.WorkloadName) + }) + } +} + +func TestUpgradeCheckBulk(t *testing.T) { + t.Parallel() + + t.Run("mixed results", func(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + wm := workloadsmocks.NewMockManager(ctrl) + provider := registrymocks.NewMockProvider(ctrl) + + wm.EXPECT().ListWorkloads(gomock.Any(), false).Return([]core.Workload{ + {Name: "fetch"}, + {Name: "time"}, + {Name: "custom"}, + }, nil) + provider.EXPECT().GetServer("io.github/fetch").Return(imageServer("ghcr.io/example/fetch:1.0.0"), nil) + provider.EXPECT().GetServer("io.github/time").Return(imageServer("ghcr.io/example/time:2.0.0"), nil) + + configs := map[string]*runner.RunConfig{ + "fetch": {Name: "fetch", Image: "ghcr.io/example/fetch:1.0.0", RegistryServerName: "io.github/fetch"}, + "time": {Name: "time", Image: "ghcr.io/example/time:1.0.0", RegistryServerName: "io.github/time"}, + "custom": {Name: "custom", Image: "ghcr.io/example/custom:1.0.0"}, + } + routes := upgradeTestRoutes(wm, provider, configs) + + req := httptest.NewRequest("GET", "/upgrade-check", nil) + w := httptest.NewRecorder() + apierrors.ErrorHandler(routes.upgradeCheckBulk).ServeHTTP(w, req) + + require.Equal(t, http.StatusOK, w.Code) + + var resp upgradeCheckBulkResponse + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp)) + require.Len(t, resp.Results, 3) + + byName := map[string]*upgrade.CheckResult{} + for _, r := range resp.Results { + byName[r.WorkloadName] = r + } + assert.Equal(t, upgrade.StatusUpToDate, byName["fetch"].Status) + assert.Equal(t, upgrade.StatusUpgradeAvailable, byName["time"].Status) + assert.Equal(t, upgrade.StatusNotRegistrySourced, byName["custom"].Status) + }) + + t.Run("group filter scopes results", func(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + wm := workloadsmocks.NewMockManager(ctrl) + provider := registrymocks.NewMockProvider(ctrl) + + // Listing returns two workloads in different groups; only the prod one + // survives FilterByGroup, so only its registry lookup occurs. + wm.EXPECT().ListWorkloads(gomock.Any(), false).Return([]core.Workload{ + {Name: "fetch", Group: "prod"}, + {Name: "time", Group: "dev"}, + }, nil) + provider.EXPECT().GetServer("io.github/fetch").Return(imageServer("ghcr.io/example/fetch:1.1.0"), nil) + + configs := map[string]*runner.RunConfig{ + "fetch": {Name: "fetch", Image: "ghcr.io/example/fetch:1.0.0", RegistryServerName: "io.github/fetch"}, + "time": {Name: "time", Image: "ghcr.io/example/time:1.0.0", RegistryServerName: "io.github/time"}, + } + routes := upgradeTestRoutes(wm, provider, configs) + + req := httptest.NewRequest("GET", "/upgrade-check?group=prod", nil) + w := httptest.NewRecorder() + apierrors.ErrorHandler(routes.upgradeCheckBulk).ServeHTTP(w, req) + + require.Equal(t, http.StatusOK, w.Code) + + var resp upgradeCheckBulkResponse + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp)) + require.Len(t, resp.Results, 1) + assert.Equal(t, "fetch", resp.Results[0].WorkloadName) + assert.Equal(t, upgrade.StatusUpgradeAvailable, resp.Results[0].Status) + }) + + t.Run("invalid group name", func(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + wm := workloadsmocks.NewMockManager(ctrl) + provider := registrymocks.NewMockProvider(ctrl) + wm.EXPECT().ListWorkloads(gomock.Any(), false).Return([]core.Workload{}, nil) + + routes := upgradeTestRoutes(wm, provider, map[string]*runner.RunConfig{}) + + req := httptest.NewRequest("GET", "/upgrade-check?group=Invalid%20Group", nil) + w := httptest.NewRecorder() + apierrors.ErrorHandler(routes.upgradeCheckBulk).ServeHTTP(w, req) + + assert.Equal(t, http.StatusBadRequest, w.Code) + assert.Contains(t, w.Body.String(), "invalid group name") + }) + + t.Run("stale on-disk config absent from ListWorkloads is excluded", func(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + wm := workloadsmocks.NewMockManager(ctrl) + provider := registrymocks.NewMockProvider(ctrl) + + // ListWorkloads reports only "fetch". "orphan" is a stale on-disk config + // that the manager no longer lists. The inScope intersection must drop it, + // and the provider must never be queried for the orphan's registry server + // (mock strictness fails the test if GetServer("io.github/orphan") fires). + wm.EXPECT().ListWorkloads(gomock.Any(), false).Return([]core.Workload{ + {Name: "fetch"}, + }, nil) + provider.EXPECT().GetServer("io.github/fetch").Return(imageServer("ghcr.io/example/fetch:1.0.0"), nil) + + configs := map[string]*runner.RunConfig{ + "fetch": {Name: "fetch", Image: "ghcr.io/example/fetch:1.0.0", RegistryServerName: "io.github/fetch"}, + "orphan": {Name: "orphan", Image: "ghcr.io/example/orphan:1.0.0", RegistryServerName: "io.github/orphan"}, + } + routes := upgradeTestRoutes(wm, provider, configs) + + req := httptest.NewRequest("GET", "/upgrade-check", nil) + w := httptest.NewRecorder() + apierrors.ErrorHandler(routes.upgradeCheckBulk).ServeHTTP(w, req) + + require.Equal(t, http.StatusOK, w.Code) + + var resp upgradeCheckBulkResponse + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp)) + require.Len(t, resp.Results, 1) + assert.Equal(t, "fetch", resp.Results[0].WorkloadName) + for _, r := range resp.Results { + assert.NotEqual(t, "orphan", r.WorkloadName, "stale on-disk config must not appear in results") + } + }) + + t.Run("unloadable in-scope config is skipped, not fatal", func(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + wm := workloadsmocks.NewMockManager(ctrl) + provider := registrymocks.NewMockProvider(ctrl) + + // Both workloads are in scope, but "broken" fails to load. The batch must + // still succeed and return a result for the loadable workload. + wm.EXPECT().ListWorkloads(gomock.Any(), false).Return([]core.Workload{ + {Name: "fetch"}, + {Name: "broken"}, + }, nil) + provider.EXPECT().GetServer("io.github/fetch").Return(imageServer("ghcr.io/example/fetch:1.1.0"), nil) + + fetchCfg := &runner.RunConfig{Name: "fetch", Image: "ghcr.io/example/fetch:1.0.0", RegistryServerName: "io.github/fetch"} + routes := &WorkloadRoutes{ + workloadManager: wm, + loadRunConfig: func(_ context.Context, name string) (*runner.RunConfig, error) { + if name == "broken" { + return nil, errors.New("corrupted run config") + } + return fetchCfg, nil + }, + listRunConfigNames: func(_ context.Context) ([]string, error) { + return []string{"fetch", "broken"}, nil + }, + registryProvider: func() (registry.Provider, error) { return provider, nil }, + } + + req := httptest.NewRequest("GET", "/upgrade-check", nil) + w := httptest.NewRecorder() + apierrors.ErrorHandler(routes.upgradeCheckBulk).ServeHTTP(w, req) + + require.Equal(t, http.StatusOK, w.Code) + + var resp upgradeCheckBulkResponse + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp)) + require.Len(t, resp.Results, 1) + assert.Equal(t, "fetch", resp.Results[0].WorkloadName) + assert.Equal(t, upgrade.StatusUpgradeAvailable, resp.Results[0].Status) + }) +} + +// TestUpgradeCheckNoSecretLeak asserts the upgrade-check response carries only +// metadata and never any secret values from the workload's RunConfig. +func TestUpgradeCheckNoSecretLeak(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + wm := workloadsmocks.NewMockManager(ctrl) + provider := registrymocks.NewMockProvider(ctrl) + + wm.EXPECT().GetWorkload(gomock.Any(), "fetch").Return(core.Workload{Name: "fetch"}, nil) + // Candidate declares a secret env var with a default; the drift report must + // surface the name but never the secret default value. + provider.EXPECT().GetServer("io.github/fetch").Return(®types.ImageMetadata{ + Image: "ghcr.io/example/fetch:1.1.0", + EnvVars: []*regtypes.EnvVar{ + {Name: "API_TOKEN", Secret: true, Required: true, Default: "super-secret-default"}, + }, + }, nil) + + configs := map[string]*runner.RunConfig{ + "fetch": { + Name: "fetch", + Image: "ghcr.io/example/fetch:1.0.0", + RegistryServerName: "io.github/fetch", + Transport: types.TransportTypeStdio, + EnvVars: map[string]string{"PLAIN": "value"}, + Secrets: []string{"my-vault-secret,target=OTHER_TOKEN"}, + }, + } + routes := upgradeTestRoutes(wm, provider, configs) + + req := httptest.NewRequest("GET", "/fetch/upgrade-check", nil) + rctx := chi.NewRouteContext() + rctx.URLParams.Add("name", "fetch") + req = req.WithContext(context.WithValue(req.Context(), chi.RouteCtxKey, rctx)) + + w := httptest.NewRecorder() + apierrors.ErrorHandler(routes.upgradeCheckSingle).ServeHTTP(w, req) + + require.Equal(t, http.StatusOK, w.Code) + + body := w.Body.String() + assert.NotContains(t, body, "super-secret-default", "secret env var default must not leak") + assert.NotContains(t, body, "my-vault-secret", "secret parameter name must not leak") + assert.NotContains(t, body, "OTHER_TOKEN", "secret target must not leak") + // The drift report should still name the missing candidate env var. + assert.Contains(t, body, "API_TOKEN") +} + +// TestUpgradeCheckRouting asserts /upgrade-check resolves to the batch handler +// and is not captured by the /{name} wildcard (chi static-before-wildcard +// ordering), while /{name}/upgrade-check resolves to the single handler. +func TestUpgradeCheckRouting(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + wm := workloadsmocks.NewMockManager(ctrl) + + // The batch handler lists workloads; getWorkload would instead call + // GetWorkload("upgrade-check"). Asserting ListWorkloads is hit proves the + // literal route won, not the wildcard. + wm.EXPECT().ListWorkloads(gomock.Any(), false).Return([]core.Workload{}, nil) + + provider := registrymocks.NewMockProvider(ctrl) + routes := upgradeTestRoutes(wm, provider, map[string]*runner.RunConfig{}) + + router := newUpgradeTestRouter(routes) + + req := httptest.NewRequest("GET", "/upgrade-check", nil) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + assert.Equal(t, http.StatusOK, w.Code) +} + +// newUpgradeTestRouter mounts only the routes relevant to ordering so the test +// exercises chi's real matching between /upgrade-check and /{name}. +func newUpgradeTestRouter(routes *WorkloadRoutes) chi.Router { + r := chi.NewRouter() + r.Get("/upgrade-check", apierrors.ErrorHandler(routes.upgradeCheckBulk)) + r.Get("/{name}/upgrade-check", apierrors.ErrorHandler(routes.upgradeCheckSingle)) + r.Get("/{name}", apierrors.ErrorHandler(routes.getWorkload)) + return r +} From 5819b6eed5facc535bf75a1b5b68418c3a4e3ee2 Mon Sep 17 00:00:00 2001 From: Juan Antonio Osorio Date: Mon, 1 Jun 2026 13:09:36 +0000 Subject: [PATCH 4/5] Return 404 for unknown group in workload listing FilterByGroup returns an empty slice (nil error) for a group that does not exist, so a typo'd ?group= silently returned 200 with an empty list instead of the documented 404. Check group existence explicitly via the group manager before filtering, in both the upgrade-check and the listWorkloads handlers, so the advertised 404 is real. Add a bulk upgrade-check test covering the unknown-group path. Co-Authored-By: Claude Opus 4.8 (1M context) --- pkg/api/v1/workloads.go | 11 +++++++++- pkg/api/v1/workloads_upgrade.go | 12 ++++++++++- pkg/api/v1/workloads_upgrade_test.go | 32 ++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 2 deletions(-) diff --git a/pkg/api/v1/workloads.go b/pkg/api/v1/workloads.go index f5d804934e..3aac701e9d 100644 --- a/pkg/api/v1/workloads.go +++ b/pkg/api/v1/workloads.go @@ -144,9 +144,18 @@ func (s *WorkloadRoutes) listWorkloads(w http.ResponseWriter, r *http.Request) e http.StatusBadRequest, ) } + // FilterByGroup silently returns an empty slice for an unknown group, + // so check existence explicitly to honor the documented 404. + exists, existsErr := s.groupManager.Exists(ctx, groupFilter) + if existsErr != nil { + return fmt.Errorf("failed to check group existence: %w", existsErr) + } + if !exists { + return fmt.Errorf("%w: %s", groups.ErrGroupNotFound, groupFilter) + } workloadList, err = workloads.FilterByGroup(workloadList, groupFilter) if err != nil { - return err // groups.ErrGroupNotFound already has 404 status code + return err } } diff --git a/pkg/api/v1/workloads_upgrade.go b/pkg/api/v1/workloads_upgrade.go index ad27312a2b..efa1022575 100644 --- a/pkg/api/v1/workloads_upgrade.go +++ b/pkg/api/v1/workloads_upgrade.go @@ -14,6 +14,7 @@ import ( "github.com/stacklok/toolhive-core/httperr" groupval "github.com/stacklok/toolhive-core/validation/group" + "github.com/stacklok/toolhive/pkg/groups" "github.com/stacklok/toolhive/pkg/registry" "github.com/stacklok/toolhive/pkg/runner" "github.com/stacklok/toolhive/pkg/state" @@ -120,9 +121,18 @@ func (s *WorkloadRoutes) upgradeCheckBulk(w http.ResponseWriter, r *http.Request http.StatusBadRequest, ) } + // FilterByGroup silently returns an empty slice for an unknown group, + // so check existence explicitly to honor the documented 404. + exists, existsErr := s.groupManager.Exists(ctx, groupFilter) + if existsErr != nil { + return fmt.Errorf("failed to check group existence: %w", existsErr) + } + if !exists { + return fmt.Errorf("%w: %s", groups.ErrGroupNotFound, groupFilter) + } workloadList, err = workloads.FilterByGroup(workloadList, groupFilter) if err != nil { - return err // groups.ErrGroupNotFound already has 404 status code + return err } } diff --git a/pkg/api/v1/workloads_upgrade_test.go b/pkg/api/v1/workloads_upgrade_test.go index 07640cc0aa..edb4c54001 100644 --- a/pkg/api/v1/workloads_upgrade_test.go +++ b/pkg/api/v1/workloads_upgrade_test.go @@ -20,6 +20,7 @@ import ( apierrors "github.com/stacklok/toolhive/pkg/api/errors" "github.com/stacklok/toolhive/pkg/container/runtime" "github.com/stacklok/toolhive/pkg/core" + groupsmocks "github.com/stacklok/toolhive/pkg/groups/mocks" "github.com/stacklok/toolhive/pkg/registry" registrymocks "github.com/stacklok/toolhive/pkg/registry/mocks" "github.com/stacklok/toolhive/pkg/runner" @@ -225,11 +226,15 @@ func TestUpgradeCheckBulk(t *testing.T) { }, nil) provider.EXPECT().GetServer("io.github/fetch").Return(imageServer("ghcr.io/example/fetch:1.1.0"), nil) + gm := groupsmocks.NewMockManager(ctrl) + gm.EXPECT().Exists(gomock.Any(), "prod").Return(true, nil) + configs := map[string]*runner.RunConfig{ "fetch": {Name: "fetch", Image: "ghcr.io/example/fetch:1.0.0", RegistryServerName: "io.github/fetch"}, "time": {Name: "time", Image: "ghcr.io/example/time:1.0.0", RegistryServerName: "io.github/time"}, } routes := upgradeTestRoutes(wm, provider, configs) + routes.groupManager = gm req := httptest.NewRequest("GET", "/upgrade-check?group=prod", nil) w := httptest.NewRecorder() @@ -244,6 +249,33 @@ func TestUpgradeCheckBulk(t *testing.T) { assert.Equal(t, upgrade.StatusUpgradeAvailable, resp.Results[0].Status) }) + t.Run("unknown group returns 404", func(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + wm := workloadsmocks.NewMockManager(ctrl) + provider := registrymocks.NewMockProvider(ctrl) + + // The group does not exist, so no per-workload registry lookup happens. + wm.EXPECT().ListWorkloads(gomock.Any(), false).Return([]core.Workload{ + {Name: "fetch", Group: "prod"}, + }, nil) + + gm := groupsmocks.NewMockManager(ctrl) + gm.EXPECT().Exists(gomock.Any(), "ghost").Return(false, nil) + + routes := upgradeTestRoutes(wm, provider, map[string]*runner.RunConfig{}) + routes.groupManager = gm + + req := httptest.NewRequest("GET", "/upgrade-check?group=ghost", nil) + w := httptest.NewRecorder() + apierrors.ErrorHandler(routes.upgradeCheckBulk).ServeHTTP(w, req) + + require.Equal(t, http.StatusNotFound, w.Code) + }) + t.Run("invalid group name", func(t *testing.T) { t.Parallel() From 664e5941d1c0d111987271d01a84db4b073e3927 Mon Sep 17 00:00:00 2001 From: Juan Antonio Osorio Date: Mon, 1 Jun 2026 13:24:34 +0000 Subject: [PATCH 5/5] Regenerate OpenAPI spec after dropping network-isolation drift The upgrade detection change removed the ConfigDrift.NetworkIsolation field and the BoolChange type, so regenerate the committed OpenAPI spec to drop the stale schema and property. Fixes the Verify Swagger Documentation CI check. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/server/docs.go | 17 +---------------- docs/server/swagger.json | 17 +---------------- docs/server/swagger.yaml | 16 ++-------------- 3 files changed, 4 insertions(+), 46 deletions(-) diff --git a/docs/server/docs.go b/docs/server/docs.go index cc932c0f79..9459ce1f7c 100644 --- a/docs/server/docs.go +++ b/docs/server/docs.go @@ -1966,18 +1966,6 @@ const docTemplate = `{ }, "type": "object" }, - "github_com_stacklok_toolhive_pkg_workloads_upgrade.BoolChange": { - "description": "NetworkIsolation is set when the candidate's network isolation posture\ndiffers from the workload's current setting.", - "properties": { - "from": { - "type": "boolean" - }, - "to": { - "type": "boolean" - } - }, - "type": "object" - }, "github_com_stacklok_toolhive_pkg_workloads_upgrade.CheckResult": { "description": "Result is the upgrade-check outcome for the workload. It carries only\nmetadata (status, image references, drift) and never secret values.", "properties": { @@ -2014,11 +2002,8 @@ const docTemplate = `{ "type": "object" }, "github_com_stacklok_toolhive_pkg_workloads_upgrade.ConfigDrift": { - "description": "ConfigDrift describes posture differences (transport, network isolation,\npermission profile) between the workload and the candidate registry entry.", + "description": "ConfigDrift describes posture differences (transport, permission profile)\nbetween the workload and the candidate registry entry.", "properties": { - "network_isolation": { - "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.BoolChange" - }, "permission_profile": { "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.StringChange" }, diff --git a/docs/server/swagger.json b/docs/server/swagger.json index b34be6673d..ca7beee46b 100644 --- a/docs/server/swagger.json +++ b/docs/server/swagger.json @@ -1959,18 +1959,6 @@ }, "type": "object" }, - "github_com_stacklok_toolhive_pkg_workloads_upgrade.BoolChange": { - "description": "NetworkIsolation is set when the candidate's network isolation posture\ndiffers from the workload's current setting.", - "properties": { - "from": { - "type": "boolean" - }, - "to": { - "type": "boolean" - } - }, - "type": "object" - }, "github_com_stacklok_toolhive_pkg_workloads_upgrade.CheckResult": { "description": "Result is the upgrade-check outcome for the workload. It carries only\nmetadata (status, image references, drift) and never secret values.", "properties": { @@ -2007,11 +1995,8 @@ "type": "object" }, "github_com_stacklok_toolhive_pkg_workloads_upgrade.ConfigDrift": { - "description": "ConfigDrift describes posture differences (transport, network isolation,\npermission profile) between the workload and the candidate registry entry.", + "description": "ConfigDrift describes posture differences (transport, permission profile)\nbetween the workload and the candidate registry entry.", "properties": { - "network_isolation": { - "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.BoolChange" - }, "permission_profile": { "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.StringChange" }, diff --git a/docs/server/swagger.yaml b/docs/server/swagger.yaml index fc8f69b3c8..301dc33004 100644 --- a/docs/server/swagger.yaml +++ b/docs/server/swagger.yaml @@ -1922,16 +1922,6 @@ components: WARNING: This should only be used for development/testing. type: boolean type: object - github_com_stacklok_toolhive_pkg_workloads_upgrade.BoolChange: - description: |- - NetworkIsolation is set when the candidate's network isolation posture - differs from the workload's current setting. - properties: - from: - type: boolean - to: - type: boolean - type: object github_com_stacklok_toolhive_pkg_workloads_upgrade.CheckResult: description: |- Result is the upgrade-check outcome for the workload. It carries only @@ -1965,11 +1955,9 @@ components: type: object github_com_stacklok_toolhive_pkg_workloads_upgrade.ConfigDrift: description: |- - ConfigDrift describes posture differences (transport, network isolation, - permission profile) between the workload and the candidate registry entry. + ConfigDrift describes posture differences (transport, permission profile) + between the workload and the candidate registry entry. properties: - network_isolation: - $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.BoolChange' permission_profile: $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_workloads_upgrade.StringChange' transport: