Skip to content

Commit b1361f7

Browse files
JAORMXclaude
andcommitted
Add upgrade detection for registry workloads
CLI and API users have no way to discover when a newer version of a registry-sourced MCP server is available; only Studio implements drift detection, in its frontend. Introduce a backend package that all clients can consume. Add pkg/workloads/upgrade with a Checker that compares a running workload's image tag against its registry entry (semver-aware, with a string fallback) and reports environment-variable and configuration (transport / permission-profile / network-isolation) drift. Comparison degrades safely to "unknown" for :latest, digest refs, repository changes, and non-registry-sourced workloads, so only a strictly-newer tag on the same repository yields "upgrade-available". This is the read-only detection core (RFC THV-0068, phase A); the apply path, API endpoints, and CLI follow in later changes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 6f63ac0 commit b1361f7

7 files changed

Lines changed: 1158 additions & 0 deletions

File tree

pkg/workloads/upgrade/checker.go

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package upgrade
5+
6+
import (
7+
"context"
8+
"errors"
9+
"fmt"
10+
11+
regtypes "github.com/stacklok/toolhive-core/registry/types"
12+
"github.com/stacklok/toolhive/pkg/registry"
13+
"github.com/stacklok/toolhive/pkg/runner"
14+
"github.com/stacklok/toolhive/pkg/secrets"
15+
)
16+
17+
// Checker determines whether registry-sourced workloads have an available
18+
// upgrade by comparing their current image and configuration against the
19+
// metadata the injected registry provider reports.
20+
type Checker struct {
21+
provider registry.Provider
22+
}
23+
24+
// NewChecker creates a Checker backed by the given registry provider.
25+
//
26+
// The provider is the source of truth for candidate image metadata; callers
27+
// typically pass the shared singleton from registry.GetDefaultProvider so the
28+
// provider's response cache is reused across checks. It returns an error if the
29+
// provider is nil.
30+
func NewChecker(provider registry.Provider) (*Checker, error) {
31+
if provider == nil {
32+
return nil, fmt.Errorf("registry provider must not be nil")
33+
}
34+
return &Checker{provider: provider}, nil
35+
}
36+
37+
// Check evaluates a single workload's RunConfig against the registry and
38+
// returns the upgrade status. It never mutates the supplied config. Per-item
39+
// problems (missing server, unparseable tags, non-image entries) are encoded in
40+
// the returned CheckResult's Status/Reason rather than returned as an error;
41+
// an error is returned only for an invalid call (nil config).
42+
func (c *Checker) Check(_ context.Context, cfg *runner.RunConfig) (*CheckResult, error) {
43+
if cfg == nil {
44+
return nil, fmt.Errorf("run config must not be nil")
45+
}
46+
47+
result := &CheckResult{
48+
WorkloadName: cfg.Name,
49+
RegistryServer: cfg.RegistryServerName,
50+
CurrentImage: cfg.Image,
51+
}
52+
53+
if cfg.RegistryServerName == "" {
54+
result.Status = StatusNotRegistrySourced
55+
return result, nil
56+
}
57+
58+
server, err := c.provider.GetServer(cfg.RegistryServerName)
59+
if err != nil {
60+
if errors.Is(err, registry.ErrServerNotFound) {
61+
result.Status = StatusServerNotFound
62+
return result, nil
63+
}
64+
result.Status = StatusUnknown
65+
result.Reason = fmt.Sprintf("registry lookup failed: %v", err)
66+
return result, nil
67+
}
68+
69+
imgMeta, ok := server.(*regtypes.ImageMetadata)
70+
if !ok {
71+
result.Status = StatusUnknown
72+
result.Reason = fmt.Sprintf("registry entry %q is not a container image (cannot determine upgrade)", cfg.RegistryServerName)
73+
return result, nil
74+
}
75+
76+
result.CandidateImage = imgMeta.Image
77+
78+
comparison, reason := compareImageTags(cfg.Image, imgMeta.Image)
79+
switch comparison {
80+
case comparisonNewer:
81+
result.Status = StatusUpgradeAvailable
82+
result.EnvVarDrift = computeEnvDrift(cfg, imgMeta)
83+
result.ConfigDrift = computeConfigDrift(cfg, imgMeta)
84+
case comparisonSameOrOlder:
85+
result.Status = StatusUpToDate
86+
case comparisonUndecidable:
87+
result.Status = StatusUnknown
88+
result.Reason = reason
89+
}
90+
91+
return result, nil
92+
}
93+
94+
// CheckAll evaluates a batch of workloads. It never returns an error: each
95+
// workload's outcome (including per-item failures) is encoded in its own
96+
// CheckResult. The returned slice preserves the input order. Nil entries in the
97+
// input are skipped.
98+
func (c *Checker) CheckAll(ctx context.Context, configs []*runner.RunConfig) []*CheckResult {
99+
results := make([]*CheckResult, 0, len(configs))
100+
for _, cfg := range configs {
101+
if cfg == nil {
102+
continue
103+
}
104+
// Check only errors on a nil config, which we already guarded against,
105+
// so the error here is unreachable; encode defensively rather than drop.
106+
res, err := c.Check(ctx, cfg)
107+
if err != nil {
108+
res = &CheckResult{
109+
WorkloadName: cfg.Name,
110+
Status: StatusUnknown,
111+
Reason: fmt.Sprintf("check failed: %v", err),
112+
}
113+
}
114+
results = append(results, res)
115+
}
116+
return results
117+
}
118+
119+
// computeEnvDrift reports the candidate environment variables the workload does
120+
// not currently satisfy. A variable is considered satisfied if it appears as a
121+
// plain env var key in the config, or as the target of one of the config's
122+
// secret parameters. Removed is left unpopulated (best-effort, forward-compat).
123+
//
124+
// It treats the config as read-only. Returns nil when there is no drift.
125+
func computeEnvDrift(cfg *runner.RunConfig, imgMeta *regtypes.ImageMetadata) *EnvVarDrift {
126+
satisfied := make(map[string]struct{}, len(cfg.EnvVars)+len(cfg.Secrets))
127+
for k := range cfg.EnvVars {
128+
satisfied[k] = struct{}{}
129+
}
130+
for _, s := range cfg.Secrets {
131+
parsed, err := secrets.ParseSecretParameter(s)
132+
if err != nil {
133+
// Malformed secret parameters can't satisfy a variable; skip them.
134+
continue
135+
}
136+
if parsed.Target != "" {
137+
satisfied[parsed.Target] = struct{}{}
138+
}
139+
}
140+
141+
var added []EnvVarInfo
142+
for _, ev := range imgMeta.EnvVars {
143+
if ev == nil {
144+
continue
145+
}
146+
if _, ok := satisfied[ev.Name]; ok {
147+
continue
148+
}
149+
added = append(added, toEnvVarInfo(ev))
150+
}
151+
152+
if len(added) == 0 {
153+
return nil
154+
}
155+
return &EnvVarDrift{Added: added}
156+
}
157+
158+
// computeConfigDrift reports posture differences between the workload's current
159+
// configuration and the candidate registry entry. Each field is nil when that
160+
// aspect did not drift or could not be compared.
161+
//
162+
// The permission profile is compared against imgMeta.Permissions.Name (a
163+
// *permissions.Profile, not a string). Comparison degrades gracefully: when the
164+
// candidate has no profile, or the workload's profile is a custom name/path
165+
// that has no registry analogue, that dimension is not reported as drift unless
166+
// both sides are known and differ. It treats the config as read-only.
167+
func computeConfigDrift(cfg *runner.RunConfig, imgMeta *regtypes.ImageMetadata) *ConfigDrift {
168+
drift := &ConfigDrift{}
169+
170+
// Transport: compare the workload's transport string against the registry
171+
// entry's transport. GetTransport() may return an empty string when the
172+
// registry entry does not declare one; only report drift when both are set.
173+
currentTransport := cfg.Transport.String()
174+
candidateTransport := imgMeta.GetTransport()
175+
if candidateTransport != "" && currentTransport != "" && currentTransport != candidateTransport {
176+
drift.Transport = &StringChange{From: currentTransport, To: candidateTransport}
177+
}
178+
179+
// Network isolation is a plain boolean on the config; the registry entry has
180+
// no explicit network-isolation field, so the candidate posture is the
181+
// default (false). Report drift only when the workload currently isolates.
182+
if cfg.IsolateNetwork {
183+
drift.NetworkIsolation = &BoolChange{From: true, To: false}
184+
}
185+
186+
// Permission profile: compare names. The candidate name is only known when
187+
// the registry entry carries a profile with a non-empty Name.
188+
candidateProfile := ""
189+
if imgMeta.Permissions != nil {
190+
candidateProfile = imgMeta.Permissions.Name
191+
}
192+
currentProfile := cfg.PermissionProfileNameOrPath
193+
if candidateProfile != "" && currentProfile != "" && currentProfile != candidateProfile {
194+
drift.PermissionProfile = &StringChange{From: currentProfile, To: candidateProfile}
195+
}
196+
197+
if drift.Transport == nil && drift.NetworkIsolation == nil && drift.PermissionProfile == nil {
198+
return nil
199+
}
200+
return drift
201+
}
202+
203+
// toEnvVarInfo converts a registry EnvVar into the drift-report shape, clearing
204+
// the Default value when the variable is a secret to avoid leaking sensitive
205+
// data into reports that may be logged or returned over the API.
206+
func toEnvVarInfo(ev *regtypes.EnvVar) EnvVarInfo {
207+
info := EnvVarInfo{
208+
Name: ev.Name,
209+
Description: ev.Description,
210+
Required: ev.Required,
211+
Secret: ev.Secret,
212+
Default: ev.Default,
213+
}
214+
if info.Secret {
215+
info.Default = ""
216+
}
217+
return info
218+
}

0 commit comments

Comments
 (0)