-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy path048-base-managed_clusters-ensure-cleanup.sh
More file actions
executable file
·287 lines (233 loc) · 10.6 KB
/
048-base-managed_clusters-ensure-cleanup.sh
File metadata and controls
executable file
·287 lines (233 loc) · 10.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
source "$(dirname "${BASH_SOURCE[0]}")"/utils/log.sh
# Global variables
CLUSTER_DATA_DIR="data/clusters"
MC_KUBECONFIG_INDEX_FILE="$CLUSTER_DATA_DIR/mc-kubeconfig-index-file"
# Common function to validate kubeconfig index file
function validate_kubeconfig_index() {
if [[ ! -f "$MC_KUBECONFIG_INDEX_FILE" ]]; then
log error "No management cluster kubeconfig index file found at $MC_KUBECONFIG_INDEX_FILE"
return 1
fi
return 0
}
# Common function to get workload cluster file path
function get_wc_file() {
local mc_name="$1"
echo "$CLUSTER_DATA_DIR/wc_of_${mc_name}.yaml"
}
# Callback function for processing cluster annotations
function process_cluster_annotations() {
local name="$1"
local prov="$2"
local mc_name="$3"
local kubeconfig_path="$4"
log info "Removing target annotations from cluster: $name in namespace: $prov"
# Define specific annotations to remove
local annotations_to_remove=(
"run.tanzu.vmware.com/agent-uid"
"run.tanzu.vmware.com/vmware-system-tmc-cluster-group"
"run.tanzu.vmware.com/vmware-system-tmc-applied"
"run.tanzu.vmware.com/vmware-system-tmc-managed"
"run.tanzu.vmware.com/tmc-already-attached"
"run.tanzu.vmware.com/proxy-name"
"run.tanzu.vmware.com/image-registry"
"run.tanzu.vmware.com/auto-scaling"
"run.tanzu.vmware.com/auto-scaler-status"
"run.tanzu.vmware.com/kcp-status"
"run.tanzu.vmware.com/nodepool-status"
)
# Build kubectl annotate command to remove annotations
local annotate_cmd="kubectl --kubeconfig=\"$kubeconfig_path\" annotate cluster \"$name\" -n \"$prov\""
for annotation in "${annotations_to_remove[@]}"; do
annotate_cmd+=" \"$annotation\"-"
done
# Execute the command (kubectl will ignore non-existent annotations)
if eval "$annotate_cmd" 2>/dev/null; then
log info "Successfully processed annotations for cluster $name"
else
log error "Failed to remove annotations from cluster $name."
return 1
fi
return 0
}
# Function to generate and save kubeconfig for a workload cluster
# Returns: The path to the generated kubeconfig file, or empty string on failure
function generate_cluster_kubeconfig() {
local cluster_name="$1"
local provisioner_namespace="$2"
local mc_kubeconfig="$3"
local mc_name="$4"
# Get the cluster's kubeconfig from the secret (using $clusterName-kubeconfig format)
local cluster_kubeconfig_b64=$(kubectl --kubeconfig="$mc_kubeconfig" get secret "${cluster_name}-kubeconfig" -n "$provisioner_namespace" -o jsonpath='{.data.value}' 2>/dev/null)
if [[ $? -ne 0 || -z "$cluster_kubeconfig_b64" ]]; then
return 1
fi
# Create kubeconfigs directory if it doesn't exist
local kubeconfig_dir="$CLUSTER_DATA_DIR/kubeconfigs"
mkdir -p "$kubeconfig_dir"
# Decode the kubeconfig and save to kubeconfigs directory with format: ${mc_name}_${prov}_${name}.kubeconfig
local cluster_kubeconfig="$kubeconfig_dir/${mc_name}_${provisioner_namespace}_${cluster_name}.kubeconfig"
echo "$cluster_kubeconfig_b64" | base64 -d > "$cluster_kubeconfig"
if [[ $? -ne 0 ]]; then
rm -f "$cluster_kubeconfig"
return 1
fi
# Return the kubeconfig path
echo "$cluster_kubeconfig"
return 0
}
# Callback function for processing TMC agents
# Parameters: cluster_name, cluster_kubeconfig_path
function process_tmc_agents() {
local name="$1"
local cluster_kubeconfig="$2"
log info "Checking TMC namespace for cluster: $name"
# Check if vmware-system-tmc namespace exists
local namespace_exists=$(kubectl --kubeconfig="$cluster_kubeconfig" get namespace vmware-system-tmc --ignore-not-found=true -o name 2>/dev/null)
if [[ -z "$namespace_exists" ]]; then
log info "No vmware-system-tmc namespace found in cluster $name, skipping cleanup..."
else
log info "Found vmware-system-tmc namespace in cluster $name, performing cleanup..."
# https://techdocs.broadcom.com/us/en/vmware-tanzu/standalone-components/tanzu-mission-control-self-managed/1-4/tmc-self-managed-documentation/using-tmc/managing-clusters/remove-a-cluster-from-your-organization.html
# Delete namespace
kubectl --kubeconfig="$cluster_kubeconfig" delete namespace vmware-system-tmc --ignore-not-found=true --timeout=1m 2>/dev/null || true
# Define TMC CRDs to delete
local tmc_crds=(
"extensions.clusters.tmc.cloud.vmware.com"
"agents.clusters.tmc.cloud.vmware.com"
"extensionresourceowners.clusters.tmc.cloud.vmware.com"
"extensionintegrations.clusters.tmc.cloud.vmware.com"
"extensionconfigs.intents.tmc.cloud.vmware.com"
)
# Delete CRDs
for crd in "${tmc_crds[@]}"; do
kubectl --kubeconfig="$cluster_kubeconfig" delete crd "$crd" --ignore-not-found=true 2>/dev/null || true
done
# Define TMC cluster roles to delete
local tmc_cluster_roles=(
"extension-updater-clusterrole"
"extension-manager-role"
"agent-updater-role"
"vmware-system-tmc-psp-agent-restricted"
)
# Delete cluster roles
for role in "${tmc_cluster_roles[@]}"; do
kubectl --kubeconfig="$cluster_kubeconfig" delete clusterrole "$role" --ignore-not-found=true 2>/dev/null || true
done
# Define TMC cluster role bindings to delete
local tmc_cluster_role_bindings=(
"extension-updater-clusterrolebinding"
"extension-manager-rolebinding"
"agent-updater-rolebinding"
"vmware-system-tmc-psp-agent-restricted"
)
# Delete cluster role bindings
for binding in "${tmc_cluster_role_bindings[@]}"; do
kubectl --kubeconfig="$cluster_kubeconfig" delete clusterrolebinding "$binding" --ignore-not-found=true 2>/dev/null || true
done
# Delete PSP
kubectl --kubeconfig="$cluster_kubeconfig" delete psp vmware-system-tmc-agent-restricted --ignore-not-found=true 2>/dev/null || true
# Wait up to 2 minutes for namespace to be deleted, then force delete if stuck
local timeout=120
local elapsed=0
local interval=15
while [[ $elapsed -lt $timeout ]]; do
local namespace_status=$(kubectl --kubeconfig="$cluster_kubeconfig" get namespace vmware-system-tmc --ignore-not-found=true -o jsonpath='{.status.phase}' 2>/dev/null)
if [[ -z "$namespace_status" ]]; then
log info "Successfully deleted vmware-system-tmc namespace from cluster $name"
break
elif [[ "$namespace_status" == "Terminating" ]]; then
log info "Namespace vmware-system-tmc is in Terminating state, waiting... (${elapsed}s/${timeout}s)"
sleep $interval
elapsed=$((elapsed + interval))
else
log error "Namespace vmware-system-tmc has unexpected status: $namespace_status"
return 1
fi
done
# If namespace is still stuck in Terminating after timeout, force delete
local final_status=$(kubectl --kubeconfig="$cluster_kubeconfig" get namespace vmware-system-tmc --ignore-not-found=true -o jsonpath='{.status.phase}' 2>/dev/null)
if [[ "$final_status" == "Terminating" ]]; then
log warn "Namespace vmware-system-tmc is stuck in Terminating state after ${timeout}s, force deleting..."
# Get the namespace JSON and remove finalizers
kubectl --kubeconfig="$cluster_kubeconfig" get namespace vmware-system-tmc -o json 2>/dev/null | \
jq '.spec.finalizers = []' | \
kubectl --kubeconfig="$cluster_kubeconfig" replace --raw "/api/v1/namespaces/vmware-system-tmc/finalize" -f - 2>/dev/null || true
log info "Force deleted vmware-system-tmc namespace from cluster $name"
fi
fi
return 0
}
# Combined precheck function that processes both annotations and TMC agents per cluster
function precheck_clusters() {
log info "Starting cluster precheck (annotations and TMC agents)..."
# Read management cluster names and kubeconfig paths from index file
while IFS=':' read -r mc_name kubeconfig_path; do
# Trim leading and trailing spaces from kubeconfig_path
kubeconfig_path=$(echo "$kubeconfig_path" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')
# Skip empty lines
[[ -z "$mc_name" || -z "$kubeconfig_path" ]] && continue
# Check if the kubeconfig file exists
if [[ ! -f "$kubeconfig_path" ]]; then
log error "Kubeconfig file for management cluster $mc_name: $kubeconfig_path not found."
return 1
fi
local wc_file=$(get_wc_file "$mc_name")
if [[ ! -f "$wc_file" ]]; then
log error "File of workload clusters on $mc_name: $wc_file not found."
return 1
fi
log info "Using kubeconfig: $kubeconfig_path for management cluster: $mc_name"
# Get cluster count
local cluster_count=$(yq eval '.clusters | length' "$wc_file")
# Process each cluster one by one
for ((i=0; i<cluster_count; i++)); do
local name=$(yq eval ".clusters[$i].fullName.name" "$wc_file")
local prov=$(yq eval ".clusters[$i].fullName.provisionerName" "$wc_file")
log info "=========================================="
log info "Processing cluster $((i+1))/$cluster_count: $name (management cluster: $mc_name, namespace: $prov)"
log info "=========================================="
# Step 1: Process cluster annotations
log info "Step 1/2: Checking and removing run.tanzu.vmware.com annotations..."
process_cluster_annotations "$name" "$prov" "$mc_name" "$kubeconfig_path"
if [[ $? -ne 0 ]]; then
log error "Failed to process annotations for cluster $name"
return 1
fi
# Step 2: Generate kubeconfig and process TMC agents
log info "Step 2/2: Checking and cleaning up TMC agents..."
local cluster_kubeconfig=$(generate_cluster_kubeconfig "$name" "$prov" "$kubeconfig_path" "$mc_name")
if [[ $? -ne 0 || -z "$cluster_kubeconfig" ]]; then
log error "Failed to generate kubeconfig for cluster $name."
return 1
fi
# Process TMC agents with the generated kubeconfig
process_tmc_agents "$name" "$cluster_kubeconfig"
if [[ $? -ne 0 ]]; then
log error "TMC agent cleanup encountered issues for cluster $name"
return 1
fi
log info "Completed processing cluster: $name"
echo ""
done
done < "$MC_KUBECONFIG_INDEX_FILE"
log info "Cluster precheck completed."
}
# Main function to run all prechecks
function main() {
# Validate kubeconfig index file once at the start
validate_kubeconfig_index
if [[ $? -ne 0 ]]; then
log error "Validation failed. Exiting..."
return 1
fi
# Process all clusters (annotations and TMC agents) one by one
precheck_clusters
if [[ $? -ne 0 ]]; then
log error "Cluster precheck failed. Exiting..."
return 1
fi
log info "Precheck completed successfully."
return 0
}
main