Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions cmd/workspace/clusters/start_idempotent.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package clusters

import (
"github.com/databricks/cli/libs/cmdctx"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/spf13/cobra"
)

// startIdempotentOverride wraps the generated start command's RunE so that
// calling "clusters start" on a cluster that is not in the TERMINATED state
// is treated as a no-op instead of returning an error.
//
// The Databricks documentation states: "If the cluster is not currently in a
// TERMINATED state, nothing will happen." The API, however, rejects the
// request with INVALID_STATE when the cluster is already RUNNING. This override
// aligns the CLI behavior with the documented behavior.
func startIdempotentOverride(cmd *cobra.Command, req *compute.StartCluster) {
originalRunE := cmd.RunE
cmd.RunE = func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
w := cmdctx.WorkspaceClient(ctx)

// Resolve the cluster ID from args or the request (populated by the
// original RunE's argument-parsing logic). We need the cluster ID
// before calling the original RunE so we can check the current state.
// If args is empty the original code shows a picker; we delegate that
// to the original RunE and let a potential INVALID_STATE error surface
// as a user-visible message only in that edge case.
clusterID := req.ClusterId
if len(args) == 1 {
clusterID = args[0]
}

if clusterID != "" {
cluster, err := w.Clusters.GetByClusterId(ctx, clusterID)
if err == nil && cluster.State != compute.StateTerminated {
cmdio.LogString(ctx, "Cluster is already "+cluster.State.String()+", skipping start.")
return nil
}
}

return originalRunE(cmd, args)
}
}

func init() {
startOverrides = append(startOverrides, startIdempotentOverride)
}
109 changes: 109 additions & 0 deletions cmd/workspace/clusters/start_idempotent_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package clusters

import (
"testing"

"github.com/databricks/cli/libs/cmdctx"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/qa"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

// TestStartIdempotent_AlreadyRunning verifies that invoking the start command
// on a cluster that is already RUNNING returns nil instead of an error.
func TestStartIdempotent_AlreadyRunning(t *testing.T) {
cfg, server := qa.HTTPFixtures{
{
Method: "GET",
Resource: "/api/2.1/clusters/get?cluster_id=abc-123",
Response: compute.ClusterDetails{
ClusterId: "abc-123",
State: compute.StateRunning,
},
},
}.Config(t)
defer server.Close()

w := databricks.Must(databricks.NewWorkspaceClient((*databricks.Config)(cfg)))

ctx := cmdio.MockDiscard(t.Context())
ctx = cmdctx.SetWorkspaceClient(ctx, w)

cmd := newStart()
cmd.SetContext(ctx)

err := cmd.RunE(cmd, []string{"abc-123"})
require.NoError(t, err, "start on a RUNNING cluster should be a no-op, not an error")
}

// TestStartIdempotent_AlreadyResizing verifies that invoking the start command
// on a cluster that is RESIZING (also active) returns nil.
func TestStartIdempotent_AlreadyResizing(t *testing.T) {
cfg, server := qa.HTTPFixtures{
{
Method: "GET",
Resource: "/api/2.1/clusters/get?cluster_id=abc-789",
Response: compute.ClusterDetails{
ClusterId: "abc-789",
State: compute.StateResizing,
},
},
}.Config(t)
defer server.Close()

w := databricks.Must(databricks.NewWorkspaceClient((*databricks.Config)(cfg)))

ctx := cmdio.MockDiscard(t.Context())
ctx = cmdctx.SetWorkspaceClient(ctx, w)

cmd := newStart()
cmd.SetContext(ctx)

err := cmd.RunE(cmd, []string{"abc-789"})
require.NoError(t, err, "start on a RESIZING cluster should be a no-op, not an error")
}

// TestStartIdempotent_GetState verifies that cluster state can be checked
// to distinguish TERMINATED from active states.
func TestStartIdempotent_GetState(t *testing.T) {
tests := []struct {
name string
clusterID string
state compute.State
shouldSkip bool
}{
{"running cluster", "run-id", compute.StateRunning, true},
{"resizing cluster", "rsz-id", compute.StateResizing, true},
{"pending cluster", "pnd-id", compute.StatePending, true},
{"terminated cluster", "trm-id", compute.StateTerminated, false},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg, server := qa.HTTPFixtures{
{
Method: "GET",
Resource: "/api/2.1/clusters/get?cluster_id=" + tt.clusterID,
Response: compute.ClusterDetails{
ClusterId: tt.clusterID,
State: tt.state,
},
},
}.Config(t)
defer server.Close()

w := databricks.Must(databricks.NewWorkspaceClient((*databricks.Config)(cfg)))
ctx := cmdio.MockDiscard(t.Context())

cluster, err := w.Clusters.GetByClusterId(ctx, tt.clusterID)
require.NoError(t, err)

shouldSkip := cluster.State != compute.StateTerminated
assert.Equal(t, tt.shouldSkip, shouldSkip,
"cluster in state %s should skip=%v", tt.state, tt.shouldSkip)
})
}
}