|
| 1 | +// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc. |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +package upgrade |
| 5 | + |
| 6 | +import ( |
| 7 | + "context" |
| 8 | + "errors" |
| 9 | + "fmt" |
| 10 | + "log/slog" |
| 11 | + |
| 12 | + regtypes "github.com/stacklok/toolhive-core/registry/types" |
| 13 | + "github.com/stacklok/toolhive/pkg/registry" |
| 14 | + "github.com/stacklok/toolhive/pkg/runner" |
| 15 | + "github.com/stacklok/toolhive/pkg/secrets" |
| 16 | +) |
| 17 | + |
| 18 | +// Checker determines whether registry-sourced workloads have an available |
| 19 | +// upgrade by comparing their current image and configuration against the |
| 20 | +// metadata the injected registry provider reports. |
| 21 | +type Checker struct { |
| 22 | + provider registry.Provider |
| 23 | +} |
| 24 | + |
| 25 | +// NewChecker creates a Checker backed by the given registry provider. |
| 26 | +// |
| 27 | +// The provider is the source of truth for candidate image metadata; callers |
| 28 | +// typically pass the shared singleton from registry.GetDefaultProvider so the |
| 29 | +// provider's response cache is reused across checks. It returns an error if the |
| 30 | +// provider is nil. |
| 31 | +func NewChecker(provider registry.Provider) (*Checker, error) { |
| 32 | + if provider == nil { |
| 33 | + return nil, fmt.Errorf("registry provider must not be nil") |
| 34 | + } |
| 35 | + return &Checker{provider: provider}, nil |
| 36 | +} |
| 37 | + |
| 38 | +// Check evaluates a single workload's RunConfig against the registry and |
| 39 | +// returns the upgrade status. It never mutates the supplied config. Per-item |
| 40 | +// problems (missing server, unparsable tags, non-image entries) are encoded in |
| 41 | +// the returned CheckResult's Status/Reason rather than returned as an error; |
| 42 | +// an error is returned only for an invalid call (nil config). |
| 43 | +func (c *Checker) Check(_ context.Context, cfg *runner.RunConfig) (*CheckResult, error) { |
| 44 | + if cfg == nil { |
| 45 | + return nil, fmt.Errorf("run config must not be nil") |
| 46 | + } |
| 47 | + |
| 48 | + result := &CheckResult{ |
| 49 | + WorkloadName: cfg.Name, |
| 50 | + RegistryServer: cfg.RegistryServerName, |
| 51 | + CurrentImage: cfg.Image, |
| 52 | + } |
| 53 | + |
| 54 | + if cfg.RegistryServerName == "" { |
| 55 | + result.Status = StatusNotRegistrySourced |
| 56 | + return result, nil |
| 57 | + } |
| 58 | + |
| 59 | + server, err := c.provider.GetServer(cfg.RegistryServerName) |
| 60 | + if err != nil { |
| 61 | + if errors.Is(err, registry.ErrServerNotFound) { |
| 62 | + result.Status = StatusServerNotFound |
| 63 | + return result, nil |
| 64 | + } |
| 65 | + // Keep the detailed provider error out of the result: Reason is |
| 66 | + // serialized into the HTTP response, and for an unreachable or |
| 67 | + // misconfigured registry the raw error can carry internal addressing |
| 68 | + // (e.g. "dial tcp 10.x.x.x:443: ..."). Log it for operators instead. |
| 69 | + slog.Debug("registry lookup failed", "server", cfg.RegistryServerName, "error", err) |
| 70 | + result.Status = StatusUnknown |
| 71 | + result.Reason = "registry lookup failed" |
| 72 | + return result, nil |
| 73 | + } |
| 74 | + |
| 75 | + imgMeta, ok := server.(*regtypes.ImageMetadata) |
| 76 | + if !ok { |
| 77 | + result.Status = StatusUnknown |
| 78 | + result.Reason = fmt.Sprintf("registry entry %q is not a container image (cannot determine upgrade)", cfg.RegistryServerName) |
| 79 | + return result, nil |
| 80 | + } |
| 81 | + |
| 82 | + result.CandidateImage = imgMeta.Image |
| 83 | + |
| 84 | + comparison, reason := compareImageTags(cfg.Image, imgMeta.Image) |
| 85 | + switch comparison { |
| 86 | + case comparisonNewer: |
| 87 | + result.Status = StatusUpgradeAvailable |
| 88 | + result.EnvVarDrift = computeEnvDrift(cfg, imgMeta) |
| 89 | + result.ConfigDrift = computeConfigDrift(cfg, imgMeta) |
| 90 | + case comparisonSameOrOlder: |
| 91 | + result.Status = StatusUpToDate |
| 92 | + case comparisonUndecidable: |
| 93 | + result.Status = StatusUnknown |
| 94 | + result.Reason = reason |
| 95 | + default: |
| 96 | + // Defensive: a future tagComparison value (or an unset zero value) must |
| 97 | + // not fall through to the least-safe StatusUpToDate. Treat anything |
| 98 | + // unexpected as unknown. |
| 99 | + result.Status = StatusUnknown |
| 100 | + } |
| 101 | + |
| 102 | + return result, nil |
| 103 | +} |
| 104 | + |
| 105 | +// CheckAll evaluates a batch of workloads. It never returns an error: each |
| 106 | +// workload's outcome (including per-item failures) is encoded in its own |
| 107 | +// CheckResult. The returned slice preserves the input order. Nil entries in the |
| 108 | +// input are skipped. |
| 109 | +func (c *Checker) CheckAll(ctx context.Context, configs []*runner.RunConfig) []*CheckResult { |
| 110 | + results := make([]*CheckResult, 0, len(configs)) |
| 111 | + for _, cfg := range configs { |
| 112 | + if cfg == nil { |
| 113 | + continue |
| 114 | + } |
| 115 | + // Check only errors on a nil config, which we already guarded against, |
| 116 | + // so the error here is unreachable; encode defensively rather than drop. |
| 117 | + res, err := c.Check(ctx, cfg) |
| 118 | + if err != nil { |
| 119 | + slog.Debug("upgrade check failed", "workload", cfg.Name, "error", err) |
| 120 | + res = &CheckResult{ |
| 121 | + WorkloadName: cfg.Name, |
| 122 | + Status: StatusUnknown, |
| 123 | + Reason: "check failed", |
| 124 | + } |
| 125 | + } |
| 126 | + results = append(results, res) |
| 127 | + } |
| 128 | + return results |
| 129 | +} |
| 130 | + |
| 131 | +// computeEnvDrift reports the candidate environment variables the workload does |
| 132 | +// not currently satisfy. A variable is considered satisfied if it appears as a |
| 133 | +// plain env var key in the config, or as the target of one of the config's |
| 134 | +// secret parameters. Removed is left unpopulated (best-effort, forward-compat). |
| 135 | +// |
| 136 | +// It treats the config as read-only. Returns nil when there is no drift. |
| 137 | +func computeEnvDrift(cfg *runner.RunConfig, imgMeta *regtypes.ImageMetadata) *EnvVarDrift { |
| 138 | + satisfied := make(map[string]struct{}, len(cfg.EnvVars)+len(cfg.Secrets)) |
| 139 | + for k := range cfg.EnvVars { |
| 140 | + satisfied[k] = struct{}{} |
| 141 | + } |
| 142 | + for _, s := range cfg.Secrets { |
| 143 | + parsed, err := secrets.ParseSecretParameter(s) |
| 144 | + if err != nil { |
| 145 | + // Malformed secret parameters can't satisfy a variable; skip them. |
| 146 | + continue |
| 147 | + } |
| 148 | + if parsed.Target != "" { |
| 149 | + satisfied[parsed.Target] = struct{}{} |
| 150 | + } |
| 151 | + } |
| 152 | + |
| 153 | + var added []EnvVarInfo |
| 154 | + for _, ev := range imgMeta.EnvVars { |
| 155 | + if ev == nil { |
| 156 | + continue |
| 157 | + } |
| 158 | + if _, ok := satisfied[ev.Name]; ok { |
| 159 | + continue |
| 160 | + } |
| 161 | + added = append(added, toEnvVarInfo(ev)) |
| 162 | + } |
| 163 | + |
| 164 | + if len(added) == 0 { |
| 165 | + return nil |
| 166 | + } |
| 167 | + return &EnvVarDrift{Added: added} |
| 168 | +} |
| 169 | + |
| 170 | +// computeConfigDrift reports posture differences between the workload's current |
| 171 | +// configuration and the candidate registry entry. Each field is nil when that |
| 172 | +// aspect did not drift or could not be compared. |
| 173 | +// |
| 174 | +// The permission profile is compared against imgMeta.Permissions.Name (a |
| 175 | +// *permissions.Profile, not a string). Comparison degrades gracefully: when the |
| 176 | +// candidate has no profile, or the workload's profile is a custom name/path |
| 177 | +// that has no registry analogue, that dimension is not reported as drift unless |
| 178 | +// both sides are known and differ. It treats the config as read-only. |
| 179 | +func computeConfigDrift(cfg *runner.RunConfig, imgMeta *regtypes.ImageMetadata) *ConfigDrift { |
| 180 | + drift := &ConfigDrift{} |
| 181 | + |
| 182 | + // Transport: compare the workload's transport string against the registry |
| 183 | + // entry's transport. GetTransport() may return an empty string when the |
| 184 | + // registry entry does not declare one; only report drift when both are set. |
| 185 | + currentTransport := cfg.Transport.String() |
| 186 | + candidateTransport := imgMeta.GetTransport() |
| 187 | + if candidateTransport != "" && currentTransport != "" && currentTransport != candidateTransport { |
| 188 | + drift.Transport = &StringChange{From: currentTransport, To: candidateTransport} |
| 189 | + } |
| 190 | + |
| 191 | + // Permission profile: compare names. The candidate name is only known when |
| 192 | + // the registry entry carries a profile with a non-empty Name. |
| 193 | + candidateProfile := "" |
| 194 | + if imgMeta.Permissions != nil { |
| 195 | + candidateProfile = imgMeta.Permissions.Name |
| 196 | + } |
| 197 | + currentProfile := cfg.PermissionProfileNameOrPath |
| 198 | + if candidateProfile != "" && currentProfile != "" && currentProfile != candidateProfile { |
| 199 | + drift.PermissionProfile = &StringChange{From: currentProfile, To: candidateProfile} |
| 200 | + } |
| 201 | + |
| 202 | + if drift.Transport == nil && drift.PermissionProfile == nil { |
| 203 | + return nil |
| 204 | + } |
| 205 | + return drift |
| 206 | +} |
| 207 | + |
| 208 | +// toEnvVarInfo converts a registry EnvVar into the drift-report shape, clearing |
| 209 | +// the Default value when the variable is a secret to avoid leaking sensitive |
| 210 | +// data into reports that may be logged or returned over the API. |
| 211 | +func toEnvVarInfo(ev *regtypes.EnvVar) EnvVarInfo { |
| 212 | + info := EnvVarInfo{ |
| 213 | + Name: ev.Name, |
| 214 | + Description: ev.Description, |
| 215 | + Required: ev.Required, |
| 216 | + Secret: ev.Secret, |
| 217 | + Default: ev.Default, |
| 218 | + } |
| 219 | + if info.Secret { |
| 220 | + info.Default = "" |
| 221 | + } |
| 222 | + return info |
| 223 | +} |
0 commit comments