diff --git a/cmd/thv/app/run.go b/cmd/thv/app/run.go index 04bcc1598d..a10f12949a 100644 --- a/cmd/thv/app/run.go +++ b/cmd/thv/app/run.go @@ -250,6 +250,10 @@ func runSingleServer(ctx context.Context, runFlags *RunFlags, serverOrImage stri return fmt.Errorf("server creation blocked by policy: %w", err) } + // Record which runtime owns this workload so that reconciliation logic + // does not corrupt its status file when a different runtime is active. + runnerConfig.RuntimeName = rt.Name() + // Always save the run config to disk before starting (both foreground and detached modes) // NOTE: Save before secrets processing to avoid storing secrets in the state store if err := runnerConfig.SaveState(ctx); err != nil { @@ -469,6 +473,7 @@ func runFromConfigFile(ctx context.Context) error { // Set the runtime in the config runConfig.Deployer = rt + runConfig.RuntimeName = rt.Name() // Create workload manager workloadManager, err := workloads.NewManagerFromRuntime(rt) diff --git a/docs/server/docs.go b/docs/server/docs.go index 9ca280736f..5ae175529b 100644 --- a/docs/server/docs.go +++ b/docs/server/docs.go @@ -1404,6 +1404,10 @@ const docTemplate = `{ "runtime_config": { "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_container_templates.RuntimeConfig" }, + "runtime_name": { + "description": "RuntimeName is the registered name of the container runtime that owns this\nworkload (e.g., \"docker\", \"kubernetes\"). Used during reconciliation to avoid\ncorrupting status files of workloads managed by a different runtime.", + "type": "string" + }, "scaling_config": { "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_runner.ScalingConfig" }, diff --git a/docs/server/swagger.json b/docs/server/swagger.json index 8127a52c45..a3967d0be1 100644 --- a/docs/server/swagger.json +++ b/docs/server/swagger.json @@ -1397,6 +1397,10 @@ "runtime_config": { "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_container_templates.RuntimeConfig" }, + "runtime_name": { + "description": "RuntimeName is the registered name of the container runtime that owns this\nworkload (e.g., \"docker\", \"kubernetes\"). Used during reconciliation to avoid\ncorrupting status files of workloads managed by a different runtime.", + "type": "string" + }, "scaling_config": { "$ref": "#/components/schemas/github_com_stacklok_toolhive_pkg_runner.ScalingConfig" }, diff --git a/docs/server/swagger.yaml b/docs/server/swagger.yaml index 6bbded4bce..e166553ba9 100644 --- a/docs/server/swagger.yaml +++ b/docs/server/swagger.yaml @@ -1406,6 +1406,12 @@ components: type: string runtime_config: $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_container_templates.RuntimeConfig' + runtime_name: + description: |- + RuntimeName is the registered name of the container runtime that owns this + workload (e.g., "docker", "kubernetes"). Used during reconciliation to avoid + corrupting status files of workloads managed by a different runtime. + type: string scaling_config: $ref: '#/components/schemas/github_com_stacklok_toolhive_pkg_runner.ScalingConfig' schema_version: diff --git a/pkg/api/v1/workload_service.go b/pkg/api/v1/workload_service.go index e0d899f8ec..6ef324c9f1 100644 --- a/pkg/api/v1/workload_service.go +++ b/pkg/api/v1/workload_service.go @@ -107,6 +107,9 @@ func (s *WorkloadService) CreateWorkloadFromRequest(ctx context.Context, req *cr return nil, fmt.Errorf("server creation blocked by policy: %w", err) } + // Record which runtime owns this workload for cross-runtime reconciliation. + runConfig.RuntimeName = s.containerRuntime.Name() + // Save the workload state if err := runConfig.SaveState(ctx); err != nil { slog.Error("failed to save workload config", "error", err) diff --git a/pkg/api/v1/workloads_test.go b/pkg/api/v1/workloads_test.go index 6091879908..b647218d7d 100644 --- a/pkg/api/v1/workloads_test.go +++ b/pkg/api/v1/workloads_test.go @@ -261,6 +261,7 @@ func TestCreateWorkload(t *testing.T) { mockRuntime := runtimemocks.NewMockRuntime(ctrl) mockGroupManager := groupsmocks.NewMockManager(ctrl) + mockRuntime.EXPECT().Name().Return("docker").AnyTimes() tt.setupMock(t, mockWorkloadManager, mockRuntime, mockGroupManager) expectedServerOrImage := tt.expectedServerOrImage if expectedServerOrImage == "" { @@ -281,6 +282,7 @@ func TestCreateWorkload(t *testing.T) { workloadService: &WorkloadService{ groupManager: mockGroupManager, workloadManager: mockWorkloadManager, + containerRuntime: mockRuntime, imageRetriever: mockRetriever, imagePuller: func(_ context.Context, _ string) error { return nil }, configProvider: config.NewDefaultProvider(), @@ -502,6 +504,7 @@ func TestUpdateWorkload(t *testing.T) { workloadService: &WorkloadService{ groupManager: mockGroupManager, workloadManager: mockWorkloadManager, + containerRuntime: mockRuntime, imageRetriever: mockRetriever, imagePuller: func(_ context.Context, _ string) error { return nil }, configProvider: config.NewDefaultProvider(), diff --git a/pkg/container/docker/client.go b/pkg/container/docker/client.go index ea5ef8fe88..087832bd60 100644 --- a/pkg/container/docker/client.go +++ b/pkg/container/docker/client.go @@ -651,6 +651,21 @@ func (c *Client) AttachToWorkload(ctx context.Context, workloadName string) (io. return resp.Conn, stdoutReader, nil } +// Name returns the name of the concrete runtime this client is connected to +// (e.g. "docker", "podman", "colima"), as detected at connection time. +// +// This intentionally returns the detected runtimeType rather than the static +// RuntimeName constant used for factory registration. RuntimeName collapses all +// Docker-API-compatible runtimes under a single "docker" registration, but for +// workload-ownership tracking each concrete runtime must identify itself +// distinctly: Docker and Podman are separate daemons with separate containers, +// so a workload created under one must not be claimed by the other during +// status reconciliation. Returning the constant would let Podman-owned and +// Docker-owned workloads share the identity "docker" and corrupt each other. +func (c *Client) Name() string { + return string(c.runtimeType) +} + // IsRunning checks the health of the container runtime. // This is used to verify that the runtime is operational and can manage workloads. func (c *Client) IsRunning(ctx context.Context) error { diff --git a/pkg/container/docker/client_name_test.go b/pkg/container/docker/client_name_test.go new file mode 100644 index 0000000000..a2cac57468 --- /dev/null +++ b/pkg/container/docker/client_name_test.go @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package docker + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + rt "github.com/stacklok/toolhive/pkg/container/runtime" +) + +// TestClientName verifies that Name reports the concrete detected runtime type +// rather than the static factory-registration constant. Distinct identities are +// required so that workload-ownership tracking does not let Docker- and +// Podman-owned workloads (separate daemons) corrupt each other's status. +func TestClientName(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + runtimeType rt.Type + want string + }{ + {name: "docker", runtimeType: rt.TypeDocker, want: "docker"}, + {name: "podman", runtimeType: rt.TypePodman, want: "podman"}, + {name: "colima", runtimeType: rt.TypeColima, want: "colima"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + c := &Client{runtimeType: tt.runtimeType} + assert.Equal(t, tt.want, c.Name()) + }) + } +} + +// TestClientNameOwnershipRoundTrip asserts the property the cross-runtime +// protection relies on: the identity stamped at create time and the identity +// compared at reconcile time come from the same Name call, so a workload created +// under Podman is recognized as Podman-owned (and not as Docker-owned) later. +func TestClientNameOwnershipRoundTrip(t *testing.T) { + t.Parallel() + + podman := &Client{runtimeType: rt.TypePodman} + docker := &Client{runtimeType: rt.TypeDocker} + + // Identity recorded on the RunConfig when the workload is created. + stampedAtCreate := podman.Name() + + // Same Podman runtime later reconciling: must recognize ownership. + assert.Equal(t, stampedAtCreate, podman.Name(), + "podman must recognize its own workloads across calls") + + // A different runtime (Docker) reconciling the same workload: must NOT + // claim ownership, otherwise it would corrupt the Podman workload's status. + assert.NotEqual(t, stampedAtCreate, docker.Name(), + "docker must not claim ownership of a podman-created workload") +} diff --git a/pkg/container/kubernetes/client.go b/pkg/container/kubernetes/client.go index 0b6d72cd09..405e58493e 100644 --- a/pkg/container/kubernetes/client.go +++ b/pkg/container/kubernetes/client.go @@ -787,6 +787,11 @@ func (*Client) StopWorkload(_ context.Context, _ string) error { return nil } +// Name returns the registered name of this runtime. +func (*Client) Name() string { + return RuntimeName +} + // IsRunning checks the health of the container runtime. // This is used to verify that the runtime is operational and can manage workloads. func (c *Client) IsRunning(ctx context.Context) error { diff --git a/pkg/container/runtime/mocks/mock_runtime.go b/pkg/container/runtime/mocks/mock_runtime.go index b890ea1f3f..1141c4b467 100644 --- a/pkg/container/runtime/mocks/mock_runtime.go +++ b/pkg/container/runtime/mocks/mock_runtime.go @@ -232,6 +232,20 @@ func (mr *MockRuntimeMockRecorder) ListWorkloads(ctx any) *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListWorkloads", reflect.TypeOf((*MockRuntime)(nil).ListWorkloads), ctx) } +// Name mocks base method. +func (m *MockRuntime) Name() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Name") + ret0, _ := ret[0].(string) + return ret0 +} + +// Name indicates an expected call of Name. +func (mr *MockRuntimeMockRecorder) Name() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Name", reflect.TypeOf((*MockRuntime)(nil).Name)) +} + // RemoveWorkload mocks base method. func (m *MockRuntime) RemoveWorkload(ctx context.Context, workloadName string) error { m.ctrl.T.Helper() diff --git a/pkg/container/runtime/types.go b/pkg/container/runtime/types.go index b65c518820..1f96ec0fba 100644 --- a/pkg/container/runtime/types.go +++ b/pkg/container/runtime/types.go @@ -180,6 +180,11 @@ type Runtime interface { // IsRunning checks the health of the container runtime. // This is used to verify that the runtime is operational and can manage workloads. IsRunning(ctx context.Context) error + + // Name returns the registered name of this runtime (e.g., "docker", "kubernetes"). + // Used to track which runtime owns a workload so that reconciliation logic + // does not corrupt status files of workloads managed by a different runtime. + Name() string } // Monitor defines the interface for container monitoring diff --git a/pkg/mcp/server/run_server.go b/pkg/mcp/server/run_server.go index cc1f542fd8..2789cc1238 100644 --- a/pkg/mcp/server/run_server.go +++ b/pkg/mcp/server/run_server.go @@ -169,7 +169,14 @@ func buildServerConfig( // Build the configuration envVarValidator := &runner.DetachedEnvVarValidator{} - return runner.NewRunConfigBuilder(ctx, imageMetadata, envVars, envVarValidator, opts...) + cfg, err := runner.NewRunConfigBuilder(ctx, imageMetadata, envVars, envVarValidator, opts...) + if err != nil { + return nil, err + } + + // Record which runtime owns this workload for cross-runtime reconciliation. + cfg.RuntimeName = rt.Name() + return cfg, nil } // configureTransport sets up transport configuration from metadata diff --git a/pkg/runner/config.go b/pkg/runner/config.go index 399cff3729..0265282470 100644 --- a/pkg/runner/config.go +++ b/pkg/runner/config.go @@ -55,6 +55,11 @@ type RunConfig struct { // means unversioned (backward-compat with older operators, or non-operator callers). MCPServerGeneration int64 `json:"mcpserver_generation,omitempty" yaml:"mcpserver_generation,omitempty"` + // RuntimeName is the registered name of the container runtime that owns this + // workload (e.g., "docker", "kubernetes"). Used during reconciliation to avoid + // corrupting status files of workloads managed by a different runtime. + RuntimeName string `json:"runtime_name,omitempty" yaml:"runtime_name,omitempty"` + // Image is the Docker image to run Image string `json:"image" yaml:"image"` diff --git a/pkg/workloads/statuses/file_status.go b/pkg/workloads/statuses/file_status.go index 92350fd6a8..0c61d81996 100644 --- a/pkg/workloads/statuses/file_status.go +++ b/pkg/workloads/statuses/file_status.go @@ -74,6 +74,111 @@ type fileStatusManager struct { runConfigStore state.Store } +// isOwnedByActiveRuntime checks whether the named workload was created by the +// currently active runtime. It loads the runtime_name field from the persisted +// RunConfig and compares it against f.runtime.Name(). +// +// Ownership can only be denied when we positively identify a *different* owning +// runtime: the RunConfig must exist, decode cleanly, and carry a non-empty +// runtime_name that differs from the active runtime. Every other case -- +// missing/unreadable config, decode failure, or a legacy workload that predates +// the runtime_name field (empty string) -- is conservatively treated as owned by +// the active runtime, preserving the pre-feature validation behaviour. This bias +// matters: a false "not ours" would silently suppress reconciliation for a +// workload we are responsible for, leaving it stuck in a stale status. +func (f *fileStatusManager) isOwnedByActiveRuntime(ctx context.Context, workloadName string) bool { + // A workload with no RunConfig cannot have been tagged with another + // runtime's name, so it can only be ours (or a legacy workload). Checking + // existence explicitly lets us distinguish "no config" from "config present + // but cannot be parsed" and mirrors isRemoteWorkload's handling. + exists, err := f.runConfigStore.Exists(ctx, workloadName) + if err != nil || !exists { + return true + } + + reader, err := f.runConfigStore.GetReader(ctx, workloadName) + if err != nil { + // RunConfig unreadable -- assume ours so we don't silently skip + // validation for workloads we should be checking. + return true + } + defer func() { + if err := reader.Close(); err != nil { + slog.Warn("failed to close reader", "error", err) + } + }() + + var config struct { + RuntimeName string `json:"runtime_name"` + } + if err := json.NewDecoder(reader).Decode(&config); err != nil { + return true // can't determine ownership -- assume ours + } + + // Empty means legacy workload (predates runtime_name); treat as ours. + if strings.TrimSpace(config.RuntimeName) == "" { + return true + } + + // Only deny ownership when another runtime is positively identified. + return config.RuntimeName == f.runtime.Name() +} + +// migrateRuntimeName stamps a legacy workload (RuntimeName == "") with the +// active runtime's name. This is called only after the runtime has confirmed the +// workload is healthy, so we know with certainty it belongs to the active +// runtime. Save failures are non-fatal — the migration will be retried on the +// next reconciliation cycle. +func (f *fileStatusManager) migrateRuntimeName(ctx context.Context, workloadName string) { + reader, err := f.runConfigStore.GetReader(ctx, workloadName) + if err != nil { + slog.Debug("skipping runtime name migration: cannot read config", "workload", workloadName, "error", err) + return + } + + var raw map[string]json.RawMessage + decodeErr := json.NewDecoder(reader).Decode(&raw) + // Always close the reader before acting on the decode result. + if closeErr := reader.Close(); closeErr != nil { + slog.Warn("failed to close reader for runtime name migration", "workload", workloadName, "error", closeErr) + } + if decodeErr != nil { + return + } + + // Check if runtime_name is already set + if rn, ok := raw["runtime_name"]; ok { + var name string + if err := json.Unmarshal(rn, &name); err == nil && name != "" { + return // Already migrated + } + } + + // Stamp with the active runtime name + runtimeBytes, err := json.Marshal(f.runtime.Name()) + if err != nil { + return + } + raw["runtime_name"] = runtimeBytes + + writer, err := f.runConfigStore.GetWriter(ctx, workloadName) + if err != nil { + slog.Warn("failed to open writer for runtime name migration", "workload", workloadName, "error", err) + return + } + defer func() { + if err := writer.Close(); err != nil { + slog.Warn("failed to close writer for runtime name migration", "workload", workloadName, "error", err) + } + }() + + encoder := json.NewEncoder(writer) + encoder.SetIndent("", " ") + if err := encoder.Encode(raw); err != nil { + slog.Warn("failed to write migrated RunConfig", "workload", workloadName, "error", err) + } +} + // isRemoteWorkload checks if a workload is remote by attempting to load its run configuration // and checking if it has a RemoteURL field set. // TODO: This is a temporary solution to check if a workload is remote @@ -910,6 +1015,12 @@ func (f *fileStatusManager) validateRunningWorkload( return result, nil } + // Skip validation for workloads owned by a different runtime to avoid + // corrupting their status files (see #4432). + if !f.isOwnedByActiveRuntime(ctx, workloadName) { + return result, nil + } + // Get raw container info from runtime (before label filtering) containerInfo, err := f.runtime.GetWorkloadInfo(ctx, workloadName) if err != nil { @@ -926,7 +1037,12 @@ func (f *fileStatusManager) validateRunningWorkload( return unhealthyWorkload, nil } - // Runtime and proxy confirm workload is healthy - merge runtime data with file status + // Runtime and proxy confirm workload is healthy — opportunistically migrate + // legacy workloads that predate the runtime_name field so they are stamped + // with the owning runtime for future reconciliation cycles. + f.migrateRuntimeName(ctx, workloadName) + + // Merge runtime data with file status return f.mergeHealthyWorkloadData(containerInfo, result) } @@ -957,10 +1073,16 @@ func (f *fileStatusManager) handleRuntimeMismatch( func (f *fileStatusManager) handleRuntimeMissing( ctx context.Context, workloadName string, fileWorkload core.Workload, ) (core.Workload, error) { - // Check if this is a remote workload using the Remote field + // Remote workloads don't exist in the container runtime, so it's normal + // for them to be missing. This also means they bypass the runtime-ownership + // check below — remote workloads have no owning runtime. if fileWorkload.Remote { - // Remote workloads don't exist in the container runtime, so it's normal for them to be missing - // Don't mark them as unhealthy + return fileWorkload, nil + } + + // Skip reconciliation for workloads owned by a different runtime to avoid + // corrupting their status files (see #4432). + if !f.isOwnedByActiveRuntime(ctx, workloadName) { return fileWorkload, nil } diff --git a/pkg/workloads/statuses/file_status_test.go b/pkg/workloads/statuses/file_status_test.go index de2c85bc85..3823fdfc1b 100644 --- a/pkg/workloads/statuses/file_status_test.go +++ b/pkg/workloads/statuses/file_status_test.go @@ -4,6 +4,7 @@ package statuses import ( + "bytes" "context" "encoding/json" "errors" @@ -233,6 +234,14 @@ func TestFileStatusManager_GetWorkload_FileAndRuntimeCombination(t *testing.T) { return io.NopCloser(strings.NewReader(`{"name": "running-workload", "transport": "sse"}`)), nil }).AnyTimes() + // Mock runtime name for isOwnedByActiveRuntime and migrateRuntimeName + mockRuntime.EXPECT().Name().Return("docker").AnyTimes() + + // Mock GetWriter for migrateRuntimeName (legacy workload with no runtime_name gets migrated) + mockRunConfigStore.EXPECT().GetWriter(gomock.Any(), "running-workload").DoAndReturn(func(context.Context, string) (io.WriteCloser, error) { + return nopWriteCloser{&bytes.Buffer{}}, nil + }).AnyTimes() + // Create a workload status file and set it to running err := manager.SetWorkloadStatus(ctx, "running-workload", rt.WorkloadStatusStarting, "") require.NoError(t, err) @@ -915,6 +924,14 @@ func TestFileStatusManager_GetWorkload_HealthyRunningWorkload(t *testing.T) { return io.NopCloser(strings.NewReader(`{"name": "healthy-workload", "transport": "sse"}`)), nil }).AnyTimes() + // Mock runtime name for isOwnedByActiveRuntime and migrateRuntimeName + mockRuntime.EXPECT().Name().Return("docker").AnyTimes() + + // Mock GetWriter for migrateRuntimeName (legacy workload with no runtime_name gets migrated) + mockRunConfigStore.EXPECT().GetWriter(gomock.Any(), "healthy-workload").DoAndReturn(func(context.Context, string) (io.WriteCloser, error) { + return nopWriteCloser{&bytes.Buffer{}}, nil + }).AnyTimes() + // Set the workload status to running in the file err := manager.SetWorkloadStatus(ctx, "healthy-workload", rt.WorkloadStatusRunning, "container started") require.NoError(t, err) @@ -964,6 +981,9 @@ func TestFileStatusManager_GetWorkload_ProxyNotRunning(t *testing.T) { return io.NopCloser(strings.NewReader(`{"name": "proxy-down-workload", "transport": "sse"}`)), nil }).AnyTimes() + // Mock runtime name for isOwnedByActiveRuntime + mockRuntime.EXPECT().Name().Return("docker").AnyTimes() + // First, create a status file manually to ensure file is found statusFile := workloadStatusFile{ Status: rt.WorkloadStatusRunning, @@ -1041,6 +1061,14 @@ func TestFileStatusManager_GetWorkload_HealthyWithProxy(t *testing.T) { return io.NopCloser(strings.NewReader(`{"name": "healthy-with-proxy", "transport": "sse"}`)), nil }).AnyTimes() + // Mock runtime name for isOwnedByActiveRuntime and migrateRuntimeName + mockRuntime.EXPECT().Name().Return("docker").AnyTimes() + + // Mock GetWriter for migrateRuntimeName (legacy workload with no runtime_name gets migrated) + mockRunConfigStore.EXPECT().GetWriter(gomock.Any(), "healthy-with-proxy").DoAndReturn(func(context.Context, string) (io.WriteCloser, error) { + return nopWriteCloser{&bytes.Buffer{}}, nil + }).AnyTimes() + // Set the workload status to running in the file err := manager.SetWorkloadStatus(ctx, "healthy-with-proxy", rt.WorkloadStatusRunning, "container started") require.NoError(t, err) @@ -2229,3 +2257,361 @@ func TestJSONRecovery_MultipleExtraClosingBraces(t *testing.T) { assert.Equal(t, rt.WorkloadStatusRunning, statusFile.Status) assert.Equal(t, 12345, statusFile.ProcessID) } + +func TestFileStatusManager_CrossRuntimeProtection(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + // runtimeName is what the active runtime reports via Name() + runtimeName string + // runConfigJSON is the JSON returned by GetReader for the workload's RunConfig. + // It controls what runtime_name the workload was created with. + runConfigJSON string + // operation selects which code path to exercise: "list" or "get" + operation string + // expectUnhealthy indicates whether we expect the workload to be marked unhealthy + expectUnhealthy bool + // expectRuntimeGetWorkloadInfo indicates whether GetWorkloadInfo should be called (for "get" operation) + expectRuntimeGetWorkloadInfo bool + }{ + { + name: "workload owned by different runtime is not marked unhealthy during list", + runtimeName: "docker", + runConfigJSON: `{"runtime_name": "go-microvm"}`, + operation: "list", + expectUnhealthy: false, + }, + { + name: "workload owned by active runtime IS marked unhealthy when missing from runtime", + runtimeName: "docker", + runConfigJSON: `{"runtime_name": "docker"}`, + operation: "list", + expectUnhealthy: true, + }, + { + name: "legacy workload without runtime_name IS validated normally", + runtimeName: "docker", + runConfigJSON: `{"name": "test-workload"}`, + operation: "list", + expectUnhealthy: true, + }, + { + name: "workload owned by different runtime is not validated during GetWorkload", + runtimeName: "docker", + runConfigJSON: `{"runtime_name": "go-microvm"}`, + operation: "get", + expectUnhealthy: false, + expectRuntimeGetWorkloadInfo: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + manager, mockRuntime, mockRunConfigStore := newTestFileStatusManager(t, ctrl) + ctx := t.Context() + + workloadName := "test-workload" + + // Create a running workload status file + err := manager.SetWorkloadStatus(ctx, workloadName, rt.WorkloadStatusRunning, "container started") + require.NoError(t, err) + + // Mock runtime name + mockRuntime.EXPECT().Name().Return(tt.runtimeName).AnyTimes() + + // Mock run config store -- returns the test-specific RunConfig JSON. + // Both isRemoteWorkload (via getWorkloadsFromFiles) and isOwnedByActiveRuntime + // read from GetReader, so we use AnyTimes with a fresh reader per call. + mockRunConfigStore.EXPECT().Exists(gomock.Any(), workloadName).Return(true, nil).AnyTimes() + mockRunConfigStore.EXPECT().GetReader(gomock.Any(), workloadName).DoAndReturn(func(context.Context, string) (io.ReadCloser, error) { + return io.NopCloser(strings.NewReader(tt.runConfigJSON)), nil + }).AnyTimes() + + switch tt.operation { + case "list": + // Runtime returns no containers -- workload exists only in file + mockRuntime.EXPECT().ListWorkloads(gomock.Any()).Return([]rt.ContainerInfo{}, nil) + + workloads, listErr := manager.ListWorkloads(ctx, true, nil) + require.NoError(t, listErr) + require.Len(t, workloads, 1, "expected exactly one workload in list") + + if tt.expectUnhealthy { + assert.Equal(t, rt.WorkloadStatusUnhealthy, workloads[0].Status, + "workload should be marked unhealthy") + } else { + assert.Equal(t, rt.WorkloadStatusRunning, workloads[0].Status, + "workload should remain running (cross-runtime protection)") + + // Verify the status file on disk was NOT modified + statusFilePath := filepath.Join(manager.baseDir, workloadName+".json") + data, readErr := os.ReadFile(statusFilePath) + require.NoError(t, readErr) + + var statusFile workloadStatusFile + require.NoError(t, json.Unmarshal(data, &statusFile)) + assert.Equal(t, rt.WorkloadStatusRunning, statusFile.Status, + "status file on disk should still show running") + } + + case "get": + if tt.expectRuntimeGetWorkloadInfo { + mockRuntime.EXPECT().GetWorkloadInfo(gomock.Any(), workloadName).Return(rt.ContainerInfo{}, errors.New("not found")) + } + // If we do NOT expect GetWorkloadInfo to be called, the mock controller + // will fail the test if it is called unexpectedly. + + workload, getErr := manager.GetWorkload(ctx, workloadName) + require.NoError(t, getErr) + + if tt.expectUnhealthy { + assert.Equal(t, rt.WorkloadStatusUnhealthy, workload.Status, + "workload should be marked unhealthy") + } else { + assert.Equal(t, rt.WorkloadStatusRunning, workload.Status, + "workload should remain running (cross-runtime protection)") + } + } + }) + } +} + +// nopWriteCloser wraps a bytes.Buffer with a no-op Close. +type nopWriteCloser struct { + *bytes.Buffer +} + +func (nopWriteCloser) Close() error { return nil } + +// TestIsOwnedByActiveRuntime exercises every branch of the ownership check. +// Ownership may only be denied when a *different* runtime is positively +// identified; every ambiguous case must default to "owned" so we never silently +// suppress reconciliation for a workload we are responsible for. +func TestIsOwnedByActiveRuntime(t *testing.T) { + t.Parallel() + + const workloadName = "test-workload" + const activeRuntime = "docker" + + tests := []struct { + name string + // existsResult/existsErr control the runConfigStore.Exists mock. + existsResult bool + existsErr error + // when the config exists, getReaderErr or runConfigJSON drive GetReader. + getReaderErr error + runConfigJSON string + expectOwned bool + }{ + { + name: "owned by active runtime", + existsResult: true, + runConfigJSON: `{"runtime_name": "docker"}`, + expectOwned: true, + }, + { + name: "owned by a different runtime is not ours", + existsResult: true, + runConfigJSON: `{"runtime_name": "go-microvm"}`, + expectOwned: false, + }, + { + name: "legacy workload with empty runtime_name is assumed ours", + existsResult: true, + runConfigJSON: `{"name": "test-workload"}`, + expectOwned: true, + }, + { + name: "whitespace-only runtime_name is treated as legacy", + existsResult: true, + runConfigJSON: `{"runtime_name": " "}`, + expectOwned: true, + }, + { + name: "missing run config is assumed ours", + existsResult: false, + expectOwned: true, + }, + { + name: "exists error is assumed ours", + existsErr: errors.New("store unavailable"), + expectOwned: true, + }, + { + name: "unreadable run config is assumed ours", + existsResult: true, + getReaderErr: errors.New("read error"), + expectOwned: true, + }, + { + name: "undecodable run config is assumed ours", + existsResult: true, + runConfigJSON: `{bad json`, + expectOwned: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + manager, mockRuntime, mockRunConfigStore := newTestFileStatusManager(t, ctrl) + ctx := t.Context() + + // Name() is only consulted when a non-empty runtime_name is compared. + mockRuntime.EXPECT().Name().Return(activeRuntime).AnyTimes() + + mockRunConfigStore.EXPECT().Exists(gomock.Any(), workloadName). + Return(tt.existsResult, tt.existsErr) + + // GetReader is only reached when the config exists and Exists succeeds. + if tt.existsErr == nil && tt.existsResult { + if tt.getReaderErr != nil { + mockRunConfigStore.EXPECT().GetReader(gomock.Any(), workloadName). + Return(nil, tt.getReaderErr) + } else { + mockRunConfigStore.EXPECT().GetReader(gomock.Any(), workloadName).DoAndReturn( + func(context.Context, string) (io.ReadCloser, error) { + return io.NopCloser(strings.NewReader(tt.runConfigJSON)), nil + }) + } + } + + assert.Equal(t, tt.expectOwned, manager.isOwnedByActiveRuntime(ctx, workloadName)) + }) + } +} + +func TestMigrateRuntimeName(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + runtimeName string + runConfigJSON string + expectWrite bool + expectedRuntime string + getReaderErr bool + getWriterErr bool + }{ + { + name: "migrates empty runtime_name", + runtimeName: "docker", + runConfigJSON: `{"name": "my-workload", "image": "test:latest"}`, + expectWrite: true, + expectedRuntime: "docker", + }, + { + name: "migrates explicit empty runtime_name", + runtimeName: "docker", + runConfigJSON: `{"name": "my-workload", "runtime_name": "", "image": "test:latest"}`, + expectWrite: true, + expectedRuntime: "docker", + }, + { + name: "no-op when runtime_name already set", + runtimeName: "docker", + runConfigJSON: `{"name": "my-workload", "runtime_name": "docker", "image": "test:latest"}`, + expectWrite: false, + }, + { + name: "no-op when runtime_name set to different runtime", + runtimeName: "docker", + runConfigJSON: `{"name": "my-workload", "runtime_name": "go-microvm", "image": "test:latest"}`, + expectWrite: false, + }, + { + name: "graceful on GetReader error", + runtimeName: "docker", + getReaderErr: true, + expectWrite: false, + }, + { + name: "graceful on GetWriter error", + runtimeName: "docker", + runConfigJSON: `{"name": "my-workload"}`, + getWriterErr: true, + expectWrite: false, + }, + { + name: "graceful on malformed JSON", + runtimeName: "docker", + runConfigJSON: `{bad json`, + expectWrite: false, + }, + { + name: "migrates non-string runtime_name", + runtimeName: "docker", + runConfigJSON: `{"name": "my-workload", "runtime_name": 42}`, + expectWrite: true, + expectedRuntime: "docker", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + manager, mockRuntime, mockRunConfigStore := newTestFileStatusManager(t, ctrl) + ctx := t.Context() + + workloadName := "test-workload" + mockRuntime.EXPECT().Name().Return(tt.runtimeName).AnyTimes() + + if tt.getReaderErr { + mockRunConfigStore.EXPECT().GetReader(gomock.Any(), workloadName). + Return(nil, errors.New("store error")) + } else { + mockRunConfigStore.EXPECT().GetReader(gomock.Any(), workloadName).DoAndReturn( + func(context.Context, string) (io.ReadCloser, error) { + return io.NopCloser(strings.NewReader(tt.runConfigJSON)), nil + }, + ) + } + + var writeBuf nopWriteCloser + if tt.expectWrite { + if tt.getWriterErr { + mockRunConfigStore.EXPECT().GetWriter(gomock.Any(), workloadName). + Return(nil, errors.New("write error")) + } else { + writeBuf = nopWriteCloser{&bytes.Buffer{}} + mockRunConfigStore.EXPECT().GetWriter(gomock.Any(), workloadName). + Return(writeBuf, nil) + } + } else if tt.getWriterErr { + mockRunConfigStore.EXPECT().GetWriter(gomock.Any(), workloadName). + Return(nil, errors.New("write error")) + } + + manager.migrateRuntimeName(ctx, workloadName) + + if tt.expectWrite && !tt.getWriterErr { + var written map[string]json.RawMessage + require.NoError(t, json.Unmarshal(writeBuf.Bytes(), &written)) + + var runtimeName string + require.NoError(t, json.Unmarshal(written["runtime_name"], &runtimeName)) + assert.Equal(t, tt.expectedRuntime, runtimeName, + "migrated runtime_name should match active runtime") + + // Verify other fields are preserved + var name string + require.NoError(t, json.Unmarshal(written["name"], &name)) + assert.Equal(t, "my-workload", name, "other fields should be preserved") + } + }) + } +}