Skip to content

Commit ee823d8

Browse files
reyortiz3claude
andauthored
Add policy_stopped workload status and enforce policy gate on restart (#4857)
* Add policy_stopped workload status and enforce policy gate on restart Introduces WorkloadStatusPolicyStopped as a first-class workload status to support enterprise policy enforcement. When non-registry servers are blocked by policy, workloads can be set to this status so the UI and CLI can surface the reason clearly, rather than showing an opaque "error". - Add WorkloadStatusPolicyStopped constant to pkg/container/runtime/types.go - Update enums tag in pkg/core/workload.go to include policy_stopped - Map policy_stopped to BackendUnhealthy in mapWorkloadStatusToVMCPHealth - Call EagerCheckCreateServer in restartSingleWorkload so the policy gate blocks restart of workloads that violate the active policy - Add 🚫 indicator for policy_stopped in CLI list and status output, following the existing ⚠️ pattern for unauthenticated workloads - Regenerate swagger docs to include the new status enum value Part of stacklok/stacklok-enterprise-platform#406 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Remove enterprise reference from WorkloadStatusPolicyStopped comment Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Fix exhaustive lint errors in status switch statements Add all WorkloadStatus cases to the switches in list and status commands to satisfy the exhaustive linter. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Address PR review comments: fix policy gate bypass and missing enum - Add policy_stopped to workloadStatusResponse enums tag so the swagger spec for the status endpoint includes the new value - Add EagerCheckCreateServer check in maybeSetupContainerWorkload after loadRunnerFromState to cover the path where the outer LoadState call fails on a partial name but succeeds after label-based name resolution - Regenerate swagger docs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Set policy_stopped status on blocked restart and deduplicate status indicator - Call SetWorkloadStatus with WorkloadStatusPolicyStopped in both policy gate failure paths in restartSingleWorkload and maybeSetupContainerWorkload, so a blocked restart transitions the workload into the new status instead of leaving it as stopped - Extract the status indicator switch (⚠️ / 🚫) from list.go and status.go into a single workloadStatusIndicator helper in common.go Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 8504427 commit ee823d8

File tree

10 files changed

+63
-17
lines changed

10 files changed

+63
-17
lines changed

cmd/thv/app/common.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
groupval "github.com/stacklok/toolhive-core/validation/group"
1313
"github.com/stacklok/toolhive/pkg/config"
14+
"github.com/stacklok/toolhive/pkg/container/runtime"
1415
"github.com/stacklok/toolhive/pkg/secrets"
1516
"github.com/stacklok/toolhive/pkg/workloads"
1617
)
@@ -155,6 +156,23 @@ func completeLogsArgs(cmd *cobra.Command, args []string, _ string) ([]string, co
155156
return completions, cobra.ShellCompDirectiveNoFileComp
156157
}
157158

159+
// workloadStatusIndicator returns the status string with a visual indicator prepended
160+
// for statuses that warrant user attention (unauthenticated, policy_stopped).
161+
// All other statuses are returned as plain strings.
162+
func workloadStatusIndicator(status runtime.WorkloadStatus) string {
163+
switch status {
164+
case runtime.WorkloadStatusUnauthenticated:
165+
return "⚠️ " + string(status)
166+
case runtime.WorkloadStatusPolicyStopped:
167+
return "🚫 " + string(status)
168+
case runtime.WorkloadStatusRunning, runtime.WorkloadStatusStopped, runtime.WorkloadStatusError,
169+
runtime.WorkloadStatusStarting, runtime.WorkloadStatusStopping, runtime.WorkloadStatusUnhealthy,
170+
runtime.WorkloadStatusRemoving, runtime.WorkloadStatusUnknown:
171+
return string(status)
172+
}
173+
return string(status)
174+
}
175+
158176
// AddGroupFlag adds a --group flag to the provided command for filtering by group.
159177
// If withShorthand is true, adds the -g shorthand as well.
160178
func AddGroupFlag(cmd *cobra.Command, groupVar *string, withShorthand bool) {

cmd/thv/app/list.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212

1313
"github.com/spf13/cobra"
1414

15-
rt "github.com/stacklok/toolhive/pkg/container/runtime"
1615
"github.com/stacklok/toolhive/pkg/core"
1716
"github.com/stacklok/toolhive/pkg/workloads"
1817
)
@@ -165,11 +164,8 @@ func printTextOutput(workloadList []core.Workload) {
165164

166165
// Print workload information
167166
for _, c := range workloadList {
168-
// Highlight unauthenticated workloads with a warning indicator
169-
status := string(c.Status)
170-
if c.Status == rt.WorkloadStatusUnauthenticated {
171-
status = "⚠️ " + status
172-
}
167+
// Highlight unauthenticated and policy-stopped workloads with indicators
168+
status := workloadStatusIndicator(c.Status)
173169

174170
// Print workload information
175171
if _, err := fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%d\t%s\t%s\n",

cmd/thv/app/status.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import (
1313

1414
"github.com/spf13/cobra"
1515

16-
"github.com/stacklok/toolhive/pkg/container/runtime"
1716
"github.com/stacklok/toolhive/pkg/core"
1817
"github.com/stacklok/toolhive/pkg/workloads"
1918
)
@@ -100,10 +99,7 @@ func printStatusJSONOutput(workload core.Workload) error {
10099

101100
func printStatusTextOutput(workload core.Workload) {
102101
w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
103-
status := string(workload.Status)
104-
if workload.Status == runtime.WorkloadStatusUnauthenticated {
105-
status = "⚠️ " + status
106-
}
102+
status := workloadStatusIndicator(workload.Status)
107103

108104
// Print workload information in key-value format
109105
_, _ = fmt.Fprintf(w, "Name:\t%s\n", workload.Name)

docs/server/docs.go

Lines changed: 6 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/server/swagger.json

Lines changed: 6 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/server/swagger.yaml

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/api/v1/workload_types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ type workloadListResponse struct {
3232
type workloadStatusResponse struct {
3333
// Current status of the workload
3434
//nolint:lll // enums tag needed for swagger generation with --parseDependencyLevel
35-
Status runtime.WorkloadStatus `json:"status" enums:"running,stopped,error,starting,stopping,unhealthy,removing,unknown,unauthenticated"`
35+
Status runtime.WorkloadStatus `json:"status" enums:"running,stopped,error,starting,stopping,unhealthy,removing,unknown,unauthenticated,policy_stopped"`
3636
}
3737

3838
// updateRequest represents the request to update an existing workload

pkg/container/runtime/types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ const (
4444
// WorkloadStatusUnauthenticated indicates that the workload is running but
4545
// cannot authenticate with the remote MCP server (e.g., expired refresh token).
4646
WorkloadStatusUnauthenticated WorkloadStatus = "unauthenticated"
47+
// WorkloadStatusPolicyStopped indicates that the workload was stopped by
48+
// policy enforcement. The StatusContext field carries the human-readable reason.
49+
WorkloadStatusPolicyStopped WorkloadStatus = "policy_stopped"
4750
)
4851

4952
// ContainerInfo represents information about a container

pkg/core/workload.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ type Workload struct {
3333
ProxyMode string `json:"proxy_mode,omitempty"`
3434
// Status is the current status of the workload.
3535
//nolint:lll // enums tag needed for swagger generation with --parseDependencyLevel
36-
Status runtime.WorkloadStatus `json:"status" enums:"running,stopped,error,starting,stopping,unhealthy,removing,unknown,unauthenticated"`
36+
Status runtime.WorkloadStatus `json:"status" enums:"running,stopped,error,starting,stopping,unhealthy,removing,unknown,unauthenticated,policy_stopped"`
3737
// StatusContext provides additional context about the workload's status.
3838
// The exact meaning is determined by the status and the underlying runtime.
3939
StatusContext string `json:"status_context,omitempty"`

pkg/workloads/manager.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,8 @@ func mapWorkloadStatusToVMCPHealth(status rt.WorkloadStatus) vmcp.BackendHealthS
243243
return vmcp.BackendUnknown
244244
case rt.WorkloadStatusUnauthenticated:
245245
return vmcp.BackendUnauthenticated
246+
case rt.WorkloadStatusPolicyStopped:
247+
return vmcp.BackendUnhealthy
246248
default:
247249
return vmcp.BackendUnknown
248250
}
@@ -1086,6 +1088,15 @@ func (d *DefaultManager) restartSingleWorkload(ctx context.Context, name string,
10861088
return d.restartContainerWorkload(ctx, name, foreground)
10871089
}
10881090

1091+
// Check policy gates before restarting — the loaded RunConfig carries the same
1092+
// fields (RegistryAPIURL, RegistryURL, RemoteURL) that the gate evaluates on create.
1093+
if err := runner.EagerCheckCreateServer(ctx, runConfig); err != nil {
1094+
if statusErr := d.statuses.SetWorkloadStatus(ctx, name, rt.WorkloadStatusPolicyStopped, err.Error()); statusErr != nil {
1095+
slog.Warn("Failed to set workload status to policy_stopped", "workload", name, "error", statusErr)
1096+
}
1097+
return fmt.Errorf("server restart blocked by policy: %w", err)
1098+
}
1099+
10891100
// Check if this is a remote workload
10901101
if runConfig.RemoteURL != "" {
10911102
return d.restartRemoteWorkload(ctx, name, runConfig, foreground)
@@ -1295,6 +1306,16 @@ func (d *DefaultManager) maybeSetupContainerWorkload(ctx context.Context, name s
12951306
return "", nil, fmt.Errorf("failed to load state for %s: %w", workloadName, err)
12961307
}
12971308

1309+
// Check policy gates before restarting. This covers the case where the caller
1310+
// could not load state via the original name but we resolved the canonical name
1311+
// from container labels above, so the check must happen here.
1312+
if err := runner.EagerCheckCreateServer(ctx, mcpRunner.Config); err != nil {
1313+
if statusErr := d.statuses.SetWorkloadStatus(ctx, workloadName, rt.WorkloadStatusPolicyStopped, err.Error()); statusErr != nil {
1314+
slog.Warn("Failed to set workload status to policy_stopped", "workload", workloadName, "error", statusErr)
1315+
}
1316+
return "", nil, fmt.Errorf("server restart blocked by policy: %w", err)
1317+
}
1318+
12981319
// Set workload status to starting - use the workload name for status operations
12991320
if err := d.statuses.SetWorkloadStatus(ctx, workloadName, rt.WorkloadStatusStarting, ""); err != nil {
13001321
slog.Warn("Failed to set workload status to starting", "workload", workloadName, "error", err)

0 commit comments

Comments
 (0)