|
| 1 | +// Copyright 2026 Redpanda Data, Inc. |
| 2 | +// |
| 3 | +// Use of this software is governed by the Business Source License |
| 4 | +// included in the file licenses/BSL.md |
| 5 | +// |
| 6 | +// As of the Change Date specified in that file, in accordance with |
| 7 | +// the Business Source License, use of this software will be governed |
| 8 | +// by the Apache License, Version 2.0 |
| 9 | + |
| 10 | +package brokers |
| 11 | + |
| 12 | +import ( |
| 13 | + "fmt" |
| 14 | + "strconv" |
| 15 | + |
| 16 | + "github.com/redpanda-data/common-go/rpadmin" |
| 17 | + |
| 18 | + "github.com/redpanda-data/redpanda/src/go/rpk/pkg/adminapi" |
| 19 | + "github.com/redpanda-data/redpanda/src/go/rpk/pkg/config" |
| 20 | + "github.com/redpanda-data/redpanda/src/go/rpk/pkg/out" |
| 21 | + "github.com/redpanda-data/redpanda/src/go/rpk/pkg/redpanda" |
| 22 | + "github.com/spf13/afero" |
| 23 | + "github.com/spf13/cobra" |
| 24 | +) |
| 25 | + |
| 26 | +// NewDecommissionBroker returns the cluster brokers decommission command. |
| 27 | +func NewDecommissionBroker(fs afero.Fs, p *config.Params) *cobra.Command { |
| 28 | + var skipLivenessCheck bool |
| 29 | + cmd := &cobra.Command{ |
| 30 | + Use: "decommission [BROKER ID]", |
| 31 | + Short: "Decommission the given broker", |
| 32 | + Long: `Decommission the given broker. |
| 33 | +
|
| 34 | +Decommissioning a broker removes it from the cluster. |
| 35 | +
|
| 36 | +A decommission request is sent to every broker in the cluster, only the cluster |
| 37 | +leader handles the request. |
| 38 | +
|
| 39 | +For safety on v22.x clusters, this command will not run if the requested |
| 40 | +broker is in maintenance mode. As of v23.x, Redpanda supports |
| 41 | +decommissioning a node that is currently in maintenance mode. If you are on |
| 42 | +a v22.x cluster and need to bypass the maintenance mode check (perhaps your |
| 43 | +cluster is unreachable), use the --skip-liveness-check flag. |
| 44 | +`, |
| 45 | + Args: cobra.ExactArgs(1), |
| 46 | + Run: func(cmd *cobra.Command, args []string) { |
| 47 | + broker, err := strconv.Atoi(args[0]) |
| 48 | + out.MaybeDie(err, "invalid broker %s: %v", args[0], err) |
| 49 | + if broker < 0 { |
| 50 | + out.Die("invalid negative broker id %v", broker) |
| 51 | + } |
| 52 | + |
| 53 | + p, err := p.LoadVirtualProfile(fs) |
| 54 | + out.MaybeDie(err, "rpk unable to load config: %v", err) |
| 55 | + config.CheckExitCloudAdmin(p) |
| 56 | + |
| 57 | + cl, err := adminapi.NewClient(cmd.Context(), fs, p) |
| 58 | + out.MaybeDie(err, "unable to initialize admin client: %v", err) |
| 59 | + |
| 60 | + if !skipLivenessCheck { |
| 61 | + brokers, err := cl.Brokers(cmd.Context()) |
| 62 | + out.MaybeDie(err, "unable to get broker list: %v; to bypass the node version check re-run this with --skip-liveness-check; see this command's help text for more details", err) |
| 63 | + |
| 64 | + var ( |
| 65 | + b rpadmin.Broker |
| 66 | + found, anyOld bool |
| 67 | + ) |
| 68 | + for _, br := range brokers { |
| 69 | + if br.NodeID == broker { |
| 70 | + if br.Version == "" { |
| 71 | + out.Exit("version for broker %d is unknown, is the node offline?\nto bypass the node version check re-run this with --skip-liveness-check; see this command's help text for more details", br.NodeID) |
| 72 | + } |
| 73 | + version, err := redpanda.VersionFromString(br.Version) |
| 74 | + out.MaybeDie(err, "unable to get broker %d version: %v; to bypass the node version check re-run this with --skip-liveness-check; see this command's help text for more details", br.NodeID, err) |
| 75 | + isOld := version.Less(redpanda.Version{Major: 23, Feature: 1, Patch: 1}) |
| 76 | + |
| 77 | + anyOld = anyOld || isOld |
| 78 | + b, found = br, true |
| 79 | + break |
| 80 | + } |
| 81 | + } |
| 82 | + if !found { |
| 83 | + out.Die("unable to find broker %v in the cluster; to bypass the node version check re-run this with --skip-liveness-check; see this command's help text for more details", broker) |
| 84 | + } |
| 85 | + |
| 86 | + // If any of the brokers is older than v23.1.1 we need to check |
| 87 | + // that the broker that is about to be decommissioned is not |
| 88 | + // in maintenance mode. |
| 89 | + if anyOld { |
| 90 | + // Old brokers (< v22.1) don't have maintenance mode, so we must |
| 91 | + // check if b.Maintenance is not nil. |
| 92 | + if b.Maintenance != nil && b.Maintenance.Draining { |
| 93 | + out.Die(`node cannot be decommissioned while it is in maintenance mode |
| 94 | +take the node out of maintenance mode first by running: |
| 95 | + rpk cluster maintenance disable %v |
| 96 | +to bypass the node version check re-run this with --skip-liveness-check; see this command's |
| 97 | +help text for more details on why`, broker) |
| 98 | + } |
| 99 | + } |
| 100 | + } |
| 101 | + |
| 102 | + err = cl.DecommissionBroker(cmd.Context(), broker) |
| 103 | + out.MaybeDie(err, "unable to decommission broker: %v", err) |
| 104 | + |
| 105 | + fmt.Printf("Success, broker %d decommission started. Use 'rpk cluster brokers decommission-status %d' to monitor data movement.\n", broker, broker) |
| 106 | + }, |
| 107 | + } |
| 108 | + |
| 109 | + // Before using the flag, make sure that the controller leader version is at |
| 110 | + // least 23.1.0, using it is the equivalent of manually issuing the request |
| 111 | + // using /v1/brokers/<broker-id>/decommission. |
| 112 | + cmd.Flags().BoolVar(&skipLivenessCheck, "skip-liveness-check", false, "If enabled, rpk will issue the decommission request without checking if the broker is in maintenance mode") |
| 113 | + |
| 114 | + // Old flag, renamed to skip-liveness-check. |
| 115 | + cmd.Flags().BoolVar(&skipLivenessCheck, "force", false, "If enabled, rpk will issue the decommission request without checking if the broker is in maintenance mode") |
| 116 | + cmd.Flags().MarkHidden("force") |
| 117 | + cmd.Flags().MarkDeprecated("force", "use --skip-liveness-check") |
| 118 | + |
| 119 | + return cmd |
| 120 | +} |
0 commit comments