Skip to content

Commit bbffaf5

Browse files
committed
separate snapshot cleanup and improve snapshot creation for better observality
1 parent 98e67f2 commit bbffaf5

5 files changed

Lines changed: 75 additions & 14 deletions

File tree

configuration-sample/ods-core.env.sample

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,23 +81,29 @@ NEXUS_MEMORY_LIMIT=5Gi
8181
NEXUS_DATA_CAPACITY=60Gi
8282

8383
# Nexus storage name
84-
NEXUS_STORAGE_NAME="storage"
84+
NEXUS_STORAGE_NAME="nexus-storage"
8585

8686
# Storage class provisioner, for AWS this should be "kubernetes.io/aws-ebs"
8787
NEXUS_STORAGE_PROVISIONER=""
8888

8989
# Storage class for Nexus data, for AWS this should be "gp3"
90-
NEXUS_STORAGE_CLASS_DATA=""
90+
NEXUS_STORAGE_CLASS_NAME=""
9191

9292
# Storage class for Nexus backup, for AWS this should be "gp2-encrypted"
9393
NEXUS_STORAGE_CLASS_BACKUP=""
9494

9595
# Nexus snapshot configuration, default to run daily at 2 AM
9696
NEXUS_SNAPSHOT_SCHEDULE="0 2 * * *"
9797

98+
# Nexus snapshot cleanup configuration, default to run daily at 3 AM
99+
NEXUS_SNAPSHOT_CLEANUP_SCHEDULE="0 3 * * *"
100+
98101
# Nexus snapshot TTL in seconds (default: 30 days)
99102
NEXUS_SNAPSHOT_TTL=2592000
100103

104+
# Timeout in seconds to wait for a VolumeSnapshot to become ready
105+
NEXUS_SNAPSHOT_CHECK_TIMEOUT=600
106+
101107
#############
102108
# SonarQube #
103109
#############

nexus/chart/templates/nexus-snapshot-cronjob.yaml

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ spec:
1212
ttlSecondsAfterFinished: {{ .Values.global.nexusSnapshotTTL }}
1313
template:
1414
spec:
15+
backoffLimit: 0
1516
serviceAccountName: ods-edit
1617
containers:
1718
- name: snapshot-creator
@@ -20,27 +21,42 @@ spec:
2021
- /bin/sh
2122
- -c
2223
- |
24+
# compute snapshot name so we can check it later
25+
SNAP_NAME="{{ .Values.global.appName }}-snapshot.$(date +%Y-%m-%d.%H-%M-%S)"
2326
cat <<EOF | oc apply -f -
2427
apiVersion: snapshot.storage.k8s.io/v1
2528
kind: VolumeSnapshot
2629
metadata:
27-
name: {{ .Values.global.appName }}-snapshot.$(date +%Y-%m-%d.%H-%M-%S)
30+
name: $SNAP_NAME
2831
namespace: {{ .Values.global.odsNamespace }}
2932
spec:
3033
volumeSnapshotClassName: {{ .Values.global.nexusSnapshotClass }}
3134
source:
3235
persistentVolumeClaimName: {{ .Values.global.nexusStorageName }}
3336
EOF
34-
# Cleanup snapshots older than the TTL
35-
oc get volumesnapshots --namespace {{ .Values.global.odsNamespace }} \
36-
--no-headers -o custom-columns=NAME:.metadata.name,CREATED:.metadata.creationTimestamp | \
37-
while read name created; do
38-
if [[ $(date -d "$created" +%s) -lt $(date -d "-{{ .Values.global.nexusSnapshotTTL }} seconds" +%s) ]]; then
39-
oc delete volumesnapshot "$name" --namespace {{ .Values.global.odsNamespace }}
40-
fi
41-
done
37+
38+
# Wait for the VolumeSnapshot to become Ready (configurable timeout)
39+
TIMEOUT={{ .Values.global.nexusSnapshotCheckTimeout }}
40+
INTERVAL=30
41+
elapsed=0
42+
TIMED_OUT=0
43+
echo "Waiting for VolumeSnapshot $SNAP_NAME to be ready (timeout: $TIMEOUT seconds)..."
44+
until [ $elapsed -ge $TIMEOUT ]; do
45+
ready=$(oc get volumesnapshot "$SNAP_NAME" -n {{ .Values.global.odsNamespace }} -o jsonpath='{.status.readyToUse}' 2>/dev/null || echo "false")
46+
if [ "$ready" = "true" ]; then
47+
echo "VolumeSnapshot $SNAP_NAME is ready"
48+
break
49+
fi
50+
sleep $INTERVAL
51+
elapsed=$((elapsed + INTERVAL))
52+
echo " ... waited $elapsed seconds out of $TIMEOUT seconds"
53+
done
54+
if [ $elapsed -ge $TIMEOUT ]; then
55+
echo "Timeout waiting for VolumeSnapshot $SNAP_NAME to be ready" >&2
56+
exit 1
57+
fi
4258
resources: {}
4359
imagePullPolicy: IfNotPresent
44-
restartPolicy: OnFailure
60+
restartPolicy: Never
4561
successfulJobsHistoryLimit: 30
4662
failedJobsHistoryLimit: 30

nexus/chart/templates/pvc-data.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ spec:
1515
resources:
1616
requests:
1717
storage: {{ .Values.nexus.pvcDataCapacity }}
18-
storageClassName: {{ .Values.global.storageClassData }}
18+
storageClassName: {{ .Values.global.storageClassName }}
1919
volumeMode: Filesystem
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
apiVersion: batch/v1
2+
kind: CronJob
3+
metadata:
4+
name: volume-snapshot-cleanup
5+
labels:
6+
app: nexus
7+
spec:
8+
schedule: "{{ .Values.global.nexusSnapshotCleanupSchedule }}"
9+
concurrencyPolicy: Forbid
10+
jobTemplate:
11+
spec:
12+
ttlSecondsAfterFinished: {{ int .Values.global.nexusSnapshotTTL }}
13+
template:
14+
spec:
15+
serviceAccountName: ods-edit
16+
containers:
17+
- name: snapshot-cleaner
18+
image: image-registry.openshift-image-registry.svc:5000/openshift/ose-cli:latest
19+
command:
20+
- /bin/sh
21+
- -c
22+
- |
23+
# Delete VolumeSnapshots older than the configured TTL (in seconds)
24+
oc get volumesnapshots --namespace {{ .Values.global.odsNamespace }} \
25+
--no-headers -o custom-columns=NAME:.metadata.name,CREATED:.metadata.creationTimestamp | \
26+
while read name created; do
27+
if [[ $(date -d "$created" +%s) -lt $(date -d "-{{ int .Values.global.nexusSnapshotTTL }} seconds" +%s) ]]; then
28+
oc delete volumesnapshot "$name" --namespace {{ .Values.global.odsNamespace }}
29+
else
30+
echo "Keeping VolumeSnapshot $name created at $created"
31+
fi
32+
done
33+
resources: {}
34+
imagePullPolicy: IfNotPresent
35+
restartPolicy: OnFailure
36+
successfulJobsHistoryLimit: 30
37+
failedJobsHistoryLimit: 30

nexus/chart/values.yaml.template

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ global:
77
nexusImageTag: $NEXUS_IMAGE_TAG
88
appName: 'nexus'
99
storageProvisioner: $NEXUS_STORAGE_PROVISIONER
10-
storageClassData: $NEXUS_STORAGE_CLASS_DATA
10+
storageClassName: $NEXUS_STORAGE_CLASS_NAME
1111
nexusHost: $NEXUS_HOST
1212
nexusAdminPasswordB64: $NEXUS_ADMIN_PASSWORD_B64
1313
registry: $DOCKER_REGISTRY
@@ -18,6 +18,8 @@ global:
1818
nexusSnapshotClass: $NEXUS_STORAGE_CLASS_BACKUP
1919
nexusSnapshotTTL: $NEXUS_SNAPSHOT_TTL
2020
nexusStorageName: $NEXUS_STORAGE_NAME
21+
nexusSnapshotCheckTimeout: $NEXUS_SNAPSHOT_CHECK_TIMEOUT
22+
nexusSnapshotCleanupSchedule: $NEXUS_SNAPSHOT_CLEANUP_SCHEDULE
2123
nexus:
2224
cpuRequest: $NEXUS_CPU_REQUEST
2325
cpuLimit: $NEXUS_CPU_LIMIT

0 commit comments

Comments
 (0)