Skip to content

Commit ad4d81c

Browse files
author
Yuriy Bezsonov
committed
WIP: refactoring - cleanup
1 parent 1c4608e commit ad4d81c

6 files changed

Lines changed: 238 additions & 1382 deletions

File tree

infrastructure/scripts/cleanup/monitoring.sh

Lines changed: 31 additions & 227 deletions
Original file line numberDiff line numberDiff line change
@@ -6,244 +6,48 @@ log() {
66
echo "[$(date +'%H:%M:%S')] $*"
77
}
88

9-
# --- Configuration ---
109
NAMESPACE="monitoring"
11-
GRAFANA_SECRET_NAME="grafana-admin"
1210

13-
# Temporary files to clean up
14-
VALUES_FILE="prometheus-values.yaml"
15-
EXTRA_SCRAPE_FILE="extra-scrape-configs.yaml"
16-
DATASOURCE_FILE="grafana-datasource.yaml"
17-
DASHBOARD_JSON_FILE="jvm-dashboard.json"
18-
DASHBOARD_PROVISIONING_FILE="dashboard-provisioning.yaml"
19-
ALERT_RULE_FILE="grafana-alert-rules.yaml"
20-
GRAFANA_VALUES_FILE="grafana-values.yaml"
21-
LAMBDA_ALERT_RULE_FILE="lambda-alert-rule.json"
22-
NOTIFICATION_POLICY_CONFIGMAP_FILE="notification-policy.yaml"
11+
log "🧹 Starting monitoring cleanup..."
2312

24-
cleanup_temp_files() {
25-
log "🧹 Cleaning up temporary files..."
26-
rm -f "$VALUES_FILE" "$EXTRA_SCRAPE_FILE" "$DATASOURCE_FILE" "$NOTIFICATION_POLICY_CONFIGMAP_FILE" \
27-
"$DASHBOARD_JSON_FILE" "$DASHBOARD_PROVISIONING_FILE" \
28-
"$ALERT_RULE_FILE" "$GRAFANA_VALUES_FILE" "$LAMBDA_ALERT_RULE_FILE"
29-
}
30-
trap cleanup_temp_files EXIT
31-
32-
log "🚨 Starting monitoring stack cleanup..."
33-
34-
# --- Get Grafana credentials before cleanup ---
35-
GRAFANA_USER="admin"
36-
GRAFANA_PASSWORD=""
37-
38-
if kubectl get secret "$GRAFANA_SECRET_NAME" -n "$NAMESPACE" >/dev/null 2>&1; then
39-
GRAFANA_PASSWORD=$(kubectl get secret "$GRAFANA_SECRET_NAME" -n "$NAMESPACE" -o jsonpath="{.data.password}" | base64 --decode)
40-
log "📋 Retrieved Grafana password from existing secret"
41-
fi
42-
43-
# Get Grafana LoadBalancer hostname before cleanup
44-
GRAFANA_LB=$(kubectl get svc grafana -n "$NAMESPACE" -o jsonpath="{.status.loadBalancer.ingress[0].hostname}" 2>/dev/null || true)
45-
if [[ -n "$GRAFANA_LB" && "$GRAFANA_LB" != "<no value>" ]]; then
46-
GRAFANA_URL="http://$GRAFANA_LB"
47-
log "📋 Found Grafana URL: $GRAFANA_URL"
48-
fi
49-
50-
# Get Prometheus LoadBalancer hostname before cleanup
51-
PROM_LB_HOSTNAME=$(kubectl get svc prometheus-server -n "$NAMESPACE" -o jsonpath="{.status.loadBalancer.ingress[0].hostname}" 2>/dev/null || true)
52-
if [[ -n "$PROM_LB_HOSTNAME" && "$PROM_LB_HOSTNAME" != "<no value>" ]]; then
53-
log "📋 Found Prometheus hostname: $PROM_LB_HOSTNAME"
54-
fi
55-
56-
# --- Clean up Grafana alert rules (if Grafana is accessible) ---
57-
if [[ -n "$GRAFANA_LB" && -n "$GRAFANA_PASSWORD" ]]; then
58-
log "🔧 Cleaning up Grafana alert rules..."
59-
60-
# Wait briefly for Grafana to be accessible
61-
for i in {1..5}; do
62-
if curl -s -o /dev/null -w "%{http_code}" -u "$GRAFANA_USER:$GRAFANA_PASSWORD" "$GRAFANA_URL/api/health" | grep -q "200"; then
63-
log "✅ Grafana is accessible for cleanup"
64-
break
65-
fi
66-
log "⏳ Waiting for Grafana access... ($i/5)"
67-
sleep 2
68-
done
69-
70-
# Delete alert rules
71-
ALERT_RULES=$(curl -s -u "$GRAFANA_USER:$GRAFANA_PASSWORD" "$GRAFANA_URL/api/v1/provisioning/alert-rules" 2>/dev/null || echo "[]")
72-
if [[ "$ALERT_RULES" != "[]" ]]; then
73-
echo "$ALERT_RULES" | jq -r '.[].uid' | while read -r rule_uid; do
74-
if [[ -n "$rule_uid" && "$rule_uid" != "null" ]]; then
75-
log "🗑️ Deleting alert rule: $rule_uid"
76-
curl -s -X DELETE -u "$GRAFANA_USER:$GRAFANA_PASSWORD" \
77-
"$GRAFANA_URL/api/v1/provisioning/alert-rules/$rule_uid" >/dev/null || true
78-
fi
79-
done
80-
fi
81-
82-
# Delete contact points
83-
CONTACT_POINTS=$(curl -s -u "$GRAFANA_USER:$GRAFANA_PASSWORD" "$GRAFANA_URL/api/v1/provisioning/contact-points" 2>/dev/null || echo "[]")
84-
if [[ "$CONTACT_POINTS" != "[]" ]]; then
85-
echo "$CONTACT_POINTS" | jq -r '.[] | select(.name=="lambda-webhook") | .uid' | while read -r cp_uid; do
86-
if [[ -n "$cp_uid" && "$cp_uid" != "null" ]]; then
87-
log "🗑️ Deleting contact point: $cp_uid"
88-
curl -s -X DELETE -u "$GRAFANA_USER:$GRAFANA_PASSWORD" \
89-
"$GRAFANA_URL/api/v1/provisioning/contact-points/$cp_uid" >/dev/null || true
90-
fi
91-
done
92-
fi
93-
94-
# Delete folders
95-
FOLDERS=$(curl -s -u "$GRAFANA_USER:$GRAFANA_PASSWORD" "$GRAFANA_URL/api/folders" 2>/dev/null || echo "[]")
96-
if [[ "$FOLDERS" != "[]" ]]; then
97-
echo "$FOLDERS" | jq -r '.[] | select(.title=="Unicorn Store Dashboards") | .uid' | while read -r folder_uid; do
98-
if [[ -n "$folder_uid" && "$folder_uid" != "null" ]]; then
99-
log "🗑️ Deleting folder: $folder_uid"
100-
curl -s -X DELETE -u "$GRAFANA_USER:$GRAFANA_PASSWORD" \
101-
"$GRAFANA_URL/api/folders/$folder_uid" >/dev/null || true
102-
fi
103-
done
104-
fi
105-
fi
106-
107-
# --- Clean up Prometheus LoadBalancer Security Group rules ---
108-
if [[ -n "$PROM_LB_HOSTNAME" ]]; then
109-
log "🔐 Cleaning up Prometheus LoadBalancer Security Group rules..."
110-
111-
VPC_ID=$(aws ec2 describe-vpcs --filters "Name=tag:Name,Values=unicornstore-vpc" --query "Vpcs[0].VpcId" --output text 2>/dev/null || true)
112-
if [[ -n "$VPC_ID" && "$VPC_ID" != "None" ]]; then
113-
VPC_CIDR=$(aws ec2 describe-vpcs --vpc-ids "$VPC_ID" --query "Vpcs[0].CidrBlock" --output text 2>/dev/null || true)
114-
115-
LB_ARN=$(aws elbv2 describe-load-balancers --output json 2>/dev/null | jq -r \
116-
--arg dns "$PROM_LB_HOSTNAME" '
117-
.LoadBalancers[] | select(.DNSName == $dns) | .LoadBalancerArn' || true)
118-
119-
if [[ -n "$LB_ARN" ]]; then
120-
ILB_SG_ID=$(aws elbv2 describe-load-balancers \
121-
--load-balancer-arns "$LB_ARN" \
122-
--query "LoadBalancers[0].SecurityGroups[0]" \
123-
--output text 2>/dev/null || true)
124-
125-
if [[ -n "$ILB_SG_ID" && "$ILB_SG_ID" != "None" ]]; then
126-
log "🗑️ Removing security group rule from $ILB_SG_ID"
127-
aws ec2 revoke-security-group-ingress \
128-
--group-id "$ILB_SG_ID" \
129-
--protocol tcp \
130-
--port 9090 \
131-
--cidr "$VPC_CIDR" \
132-
--output text 2>/dev/null || log "ℹ️ Security group rule may not exist"
133-
fi
134-
fi
135-
fi
136-
fi
13+
# Get Grafana URL before cleanup
14+
GRAFANA_LB=$(kubectl get svc grafana -n "$NAMESPACE" -o jsonpath="{.status.loadBalancer.ingress[0].hostname}" 2>/dev/null || echo "")
13715

138-
# --- Uninstall Helm releases ---
139-
log "🗑️ Uninstalling Helm releases..."
16+
log "🔧 Uninstalling Helm releases..."
17+
helm uninstall grafana -n "$NAMESPACE" 2>/dev/null || true
18+
helm uninstall prometheus -n "$NAMESPACE" 2>/dev/null || true
14019

141-
if helm list -n "$NAMESPACE" | grep -q "grafana"; then
142-
log "🗑️ Uninstalling Grafana..."
143-
helm uninstall grafana --namespace "$NAMESPACE" || log "⚠️ Failed to uninstall Grafana"
144-
fi
145-
146-
if helm list -n "$NAMESPACE" | grep -q "prometheus"; then
147-
log "🗑️ Uninstalling Prometheus..."
148-
helm uninstall prometheus --namespace "$NAMESPACE" || log "⚠️ Failed to uninstall Prometheus"
149-
fi
150-
151-
# --- Clean up Kubernetes resources ---
152-
log "🗑️ Cleaning up Kubernetes resources..."
153-
154-
# Delete ConfigMaps
155-
kubectl delete configmap unicornstore-datasource -n "$NAMESPACE" 2>/dev/null || log "ℹ️ ConfigMap unicornstore-datasource not found"
156-
kubectl delete configmap unicornstore-dashboard -n "$NAMESPACE" 2>/dev/null || log "ℹ️ ConfigMap unicornstore-dashboard not found"
157-
kubectl delete configmap prometheus-extra-scrape -n "$NAMESPACE" 2>/dev/null || log "ℹ️ ConfigMap prometheus-extra-scrape not found"
158-
kubectl delete configmap unicornstore-notification-policy -n "$NAMESPACE" 2>/dev/null || log "ℹ️ ConfigMap unicornstore-notification-policy not found"
159-
160-
# Delete Secrets
161-
kubectl delete secret "$GRAFANA_SECRET_NAME" -n "$NAMESPACE" 2>/dev/null || log "ℹ️ Secret $GRAFANA_SECRET_NAME not found"
20+
log "🗑️ Deleting ConfigMaps..."
21+
kubectl delete configmap prometheus-datasource -n "$NAMESPACE" 2>/dev/null || true
16222

163-
# Delete PVCs (Persistent Volume Claims)
164-
log "🗑️ Cleaning up Persistent Volume Claims..."
165-
kubectl get pvc -n "$NAMESPACE" -o name 2>/dev/null | while read -r pvc; do
166-
if [[ -n "$pvc" ]]; then
167-
log "🗑️ Deleting $pvc"
168-
kubectl delete "$pvc" -n "$NAMESPACE" || log "⚠️ Failed to delete $pvc"
169-
fi
170-
done
171-
172-
# Wait for PVCs to be deleted
173-
log "⏳ Waiting for PVCs to be fully deleted..."
174-
for i in {1..30}; do
175-
PVC_COUNT=$(kubectl get pvc -n "$NAMESPACE" --no-headers 2>/dev/null | wc -l || echo "0")
176-
if [[ "$PVC_COUNT" -eq 0 ]]; then
177-
log "✅ All PVCs deleted"
178-
break
179-
fi
180-
log "⏳ Waiting for $PVC_COUNT PVCs to be deleted... ($i/30)"
181-
sleep 5
182-
done
23+
log "🔐 Deleting secrets and PVCs..."
24+
kubectl delete secret grafana-admin -n "$NAMESPACE" 2>/dev/null || true
25+
kubectl delete pvc --all -n "$NAMESPACE" 2>/dev/null || true
18326

184-
# --- Delete namespace ---
185-
log "🗑️ Deleting namespace $NAMESPACE..."
186-
kubectl delete namespace "$NAMESPACE" --timeout=60s 2>/dev/null || log "⚠️ Failed to delete namespace or namespace not found"
27+
log "🧹 Deleting remaining resources..."
28+
kubectl delete all --all -n "$NAMESPACE" 2>/dev/null || true
29+
kubectl delete namespace "$NAMESPACE" 2>/dev/null || true
18730

188-
# Wait for namespace deletion
18931
log "⏳ Waiting for namespace deletion..."
190-
for i in {1..30}; do
191-
if ! kubectl get namespace "$NAMESPACE" >/dev/null 2>&1; then
192-
log "✅ Namespace $NAMESPACE deleted"
193-
break
194-
fi
195-
log "⏳ Waiting for namespace deletion... ($i/30)"
32+
while kubectl get namespace "$NAMESPACE" 2>/dev/null; do
19633
sleep 5
19734
done
19835

199-
# --- Clean up AWS resources ---
200-
log "🗑️ Cleaning up AWS resources..."
201-
202-
# --- Remove Helm repositories (optional) ---
203-
log "🗑️ Cleaning up Helm repositories..."
204-
helm repo remove prometheus-community 2>/dev/null || log "ℹ️ prometheus-community repo not found"
205-
helm repo remove grafana 2>/dev/null || log "ℹ️ grafana repo not found"
206-
207-
# --- Final validation ---
208-
log "🔍 Validating cleanup..."
209-
210-
# Check if namespace still exists
211-
if kubectl get namespace "$NAMESPACE" >/dev/null 2>&1; then
212-
log "⚠️ Warning: Namespace $NAMESPACE still exists"
213-
else
214-
log "✅ Namespace $NAMESPACE successfully deleted"
215-
fi
216-
217-
# Check if Helm releases still exist
218-
REMAINING_RELEASES=$(helm list -A | grep -E "(prometheus|grafana)" || true)
219-
if [[ -n "$REMAINING_RELEASES" ]]; then
220-
log "⚠️ Warning: Some Helm releases may still exist:"
221-
echo "$REMAINING_RELEASES"
222-
else
223-
log "✅ All monitoring Helm releases cleaned up"
224-
fi
225-
226-
# Check AWS resources - no secrets to check since we use IDE password
227-
228-
log "✅ Monitoring stack cleanup completed!"
229-
log "ℹ️ Note: Some AWS resources (like Load Balancers) may take additional time to fully terminate"
230-
log "ℹ️ Note: Persistent Volumes may need manual cleanup if they were not automatically deleted"
231-
232-
# --- Optional: List remaining resources for manual cleanup ---
233-
log "📋 Checking for any remaining resources that may need manual cleanup..."
234-
235-
# Check for remaining PVs
236-
REMAINING_PVS=$(kubectl get pv | grep "$NAMESPACE" || true)
237-
if [[ -n "$REMAINING_PVS" ]]; then
238-
log "⚠️ Warning: Found Persistent Volumes that may need manual cleanup:"
239-
echo "$REMAINING_PVS"
240-
fi
241-
242-
# Check for remaining Load Balancers
243-
REMAINING_LBS=$(aws elbv2 describe-load-balancers --output table | grep -E "(prometheus|grafana)" || true)
244-
if [[ -n "$REMAINING_LBS" ]]; then
245-
log "⚠️ Warning: Found Load Balancers that may need manual cleanup:"
246-
echo "$REMAINING_LBS"
36+
# Clean up LoadBalancer
37+
if [[ -n "$GRAFANA_LB" ]]; then
38+
log "🔧 Cleaning up LoadBalancer..."
39+
LB_ARNS=$(aws elbv2 describe-load-balancers --query "LoadBalancers[?DNSName=='$GRAFANA_LB'].LoadBalancerArn" --output text 2>/dev/null || echo "")
40+
41+
for LB_ARN in $LB_ARNS; do
42+
if [[ -n "$LB_ARN" && "$LB_ARN" != "None" ]]; then
43+
LISTENER_ARNS=$(aws elbv2 describe-listeners --load-balancer-arn "$LB_ARN" --query 'Listeners[].ListenerArn' --output text 2>/dev/null || echo "")
44+
for LISTENER_ARN in $LISTENER_ARNS; do
45+
aws elbv2 delete-listener --listener-arn "$LISTENER_ARN" 2>/dev/null || true
46+
done
47+
aws elbv2 delete-load-balancer --load-balancer-arn "$LB_ARN" 2>/dev/null || true
48+
log "✅ LoadBalancer cleaned up"
49+
fi
50+
done
24751
fi
24852

249-
log "🎉 Cleanup script execution completed!"
53+
log "✅ Monitoring cleanup completed"

infrastructure/scripts/deploy/ecs.sh

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,6 @@ aws elbv2 create-listener --no-cli-pager \
124124
VPC_ID=$(aws ec2 describe-vpcs --filters "Name=tag:Name,Values=unicornstore-vpc" \
125125
--query 'Vpcs[0].VpcId' --output text) && echo $VPC_ID
126126

127-
EKS_VPC_CIDR=$(aws ec2 describe-vpcs \
128-
--vpc-ids "$VPC_ID" \
129-
--query "Vpcs[0].CidrBlock" --output text)
130-
131127
LAMBDA_SG_ID=$(aws ec2 describe-security-groups --filters "Name=vpc-id,Values='$VPC_ID'" \
132128
--query 'SecurityGroups[?GroupName==`'unicornstore-thread-dump-lambda-sg'`].GroupId' --output text)
133129

@@ -144,16 +140,6 @@ aws ec2 authorize-security-group-ingress \
144140
--protocol tcp \
145141
--port 8080 \
146142
--source-group $SECURITY_GROUP_ALB_ID
147-
aws ec2 authorize-security-group-ingress \
148-
--group-id "$SECURITY_GROUP_ECS_ID" \
149-
--protocol tcp \
150-
--port 9090 \
151-
--cidr "$EKS_VPC_CIDR"
152-
aws ec2 authorize-security-group-ingress \
153-
--group-id "$SECURITY_GROUP_ECS_ID" \
154-
--protocol tcp \
155-
--port 9404 \
156-
--cidr "$EKS_VPC_CIDR"
157143
aws ec2 authorize-security-group-ingress \
158144
--group-id $SECURITY_GROUP_ECS_ID \
159145
--protocol tcp \

0 commit comments

Comments
 (0)