diff --git a/CHANGELOG.md b/CHANGELOG.md index a50764962..0e6c2ea82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ### Changed - Nexus storage change ([#1341](https://github.com/opendevstack/ods-core/issues/1341)) - Update PVC migration script, adding threads to rsync execution ([#1345](https://github.com/opendevstack/ods-core/pull/1345)) +- Improve Nexus cronjobs for snapshots ([#1349](https://github.com/opendevstack/ods-core/pull/1349)) - Update Aqua cli to 760 ([#1344](https://github.com/opendevstack/ods-core/pull/1344)) - Adapted Sonarqube server configuration to make projects private and have custom gate ([#1347](https://github.com/opendevstack/ods-core/pull/1347)) diff --git a/configuration-sample/ods-core.env.sample b/configuration-sample/ods-core.env.sample index 493762e4b..13d8c3581 100644 --- a/configuration-sample/ods-core.env.sample +++ b/configuration-sample/ods-core.env.sample @@ -81,23 +81,29 @@ NEXUS_MEMORY_LIMIT=5Gi NEXUS_DATA_CAPACITY=60Gi # Nexus storage name -NEXUS_STORAGE_NAME="storage" +NEXUS_STORAGE_NAME="nexus-storage" # Storage class provisioner, for AWS this should be "kubernetes.io/aws-ebs" -NEXUS_STORAGE_PROVISIONER="" +NEXUS_STORAGE_PROVISIONER="ebs.csi.aws.com" -# Storage class for Nexus data, for AWS this should be "gp3" -NEXUS_STORAGE_CLASS_DATA="" +# Storage class for Nexus data, for AWS this should be "gp3-csi" +NEXUS_STORAGE_CLASS_DATA="gp3-csi" # Storage class for Nexus backup, for AWS this should be "gp2-encrypted" -NEXUS_STORAGE_CLASS_BACKUP="" +NEXUS_STORAGE_CLASS_BACKUP="csi-aws-vsc" # Nexus snapshot configuration, default to run daily at 2 AM NEXUS_SNAPSHOT_SCHEDULE="0 2 * * *" +# Nexus snapshot cleanup configuration, default to run daily at 3 AM +NEXUS_SNAPSHOT_CLEANUP_SCHEDULE="0 3 * * *" + # Nexus snapshot TTL in seconds (default: 30 days) NEXUS_SNAPSHOT_TTL=2592000 +# Timeout in seconds to wait for a VolumeSnapshot to become ready +NEXUS_SNAPSHOT_CHECK_TIMEOUT=600 + ############# # SonarQube # ############# diff --git a/nexus/chart/templates/nexus-snapshot-cronjob.yaml b/nexus/chart/templates/nexus-snapshot-cronjob.yaml index e80d0f3a6..632129f69 100644 --- a/nexus/chart/templates/nexus-snapshot-cronjob.yaml +++ b/nexus/chart/templates/nexus-snapshot-cronjob.yaml @@ -7,11 +7,13 @@ metadata: spec: schedule: "{{ .Values.global.nexusSnapshotSchedule }}" concurrencyPolicy: Forbid + suspend: false jobTemplate: spec: ttlSecondsAfterFinished: {{ .Values.global.nexusSnapshotTTL }} template: spec: + backoffLimit: 0 serviceAccountName: ods-edit containers: - name: snapshot-creator @@ -20,27 +22,48 @@ spec: - /bin/sh - -c - | + # compute snapshot name so we can check it later + SNAP_NAME="{{ .Values.global.appName }}-snapshot.$(date +%Y-%m-%d.%H-%M-%S)" cat </dev/null || echo "false") + if [ "$ready" = "true" ]; then + echo "VolumeSnapshot $SNAP_NAME is ready" + break + fi + sleep $INTERVAL + elapsed=$((elapsed + INTERVAL)) + echo " ... waited $elapsed seconds out of $TIMEOUT seconds" + done + if [ $elapsed -ge $TIMEOUT ]; then + echo "Timeout waiting for VolumeSnapshot $SNAP_NAME to be ready" >&2 + exit 1 + fi + resources: + limits: + cpu: '1' + memory: 512Mi + requests: + cpu: 100m + memory: 256Mi imagePullPolicy: IfNotPresent - restartPolicy: OnFailure + restartPolicy: Never successfulJobsHistoryLimit: 30 failedJobsHistoryLimit: 30 diff --git a/nexus/chart/templates/pvc-data.yml b/nexus/chart/templates/pvc-data.yml index 839d99f34..d0c0ec9e4 100644 --- a/nexus/chart/templates/pvc-data.yml +++ b/nexus/chart/templates/pvc-data.yml @@ -15,5 +15,5 @@ spec: resources: requests: storage: {{ .Values.nexus.pvcDataCapacity }} - storageClassName: {{ .Values.global.storageClassData }} + storageClassName: {{ .Values.global.storageClassName }} volumeMode: Filesystem diff --git a/nexus/chart/templates/snapshot-cleanup-cronjob.yaml b/nexus/chart/templates/snapshot-cleanup-cronjob.yaml new file mode 100644 index 000000000..ea79d3ee5 --- /dev/null +++ b/nexus/chart/templates/snapshot-cleanup-cronjob.yaml @@ -0,0 +1,45 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: volume-snapshot-cleanup + labels: + app: nexus +spec: + schedule: "{{ .Values.global.nexusSnapshotCleanupSchedule }}" + concurrencyPolicy: Forbid + suspend: false + jobTemplate: + spec: + ttlSecondsAfterFinished: {{ int .Values.global.nexusSnapshotTTL }} + template: + spec: + backoffLimit: 0 + serviceAccountName: ods-edit + containers: + - name: snapshot-cleaner + image: image-registry.openshift-image-registry.svc:5000/openshift/ose-cli:latest + command: + - /bin/sh + - -c + - | + # Delete VolumeSnapshots older than the configured TTL (in seconds) + oc get volumesnapshots --namespace {{ .Values.global.odsNamespace }} \ + --no-headers -o custom-columns=NAME:.metadata.name,CREATED:.metadata.creationTimestamp | \ + while read name created; do + if [[ $(date -d "$created" +%s) -lt $(date -d "-{{ int .Values.global.nexusSnapshotTTL }} seconds" +%s) ]]; then + oc delete volumesnapshot "$name" --namespace {{ .Values.global.odsNamespace }} + else + echo "Keeping VolumeSnapshot $name created at $created" + fi + done + resources: + limits: + cpu: '1' + memory: 512Mi + requests: + cpu: 100m + memory: 256Mi + imagePullPolicy: IfNotPresent + restartPolicy: OnFailure + successfulJobsHistoryLimit: 30 + failedJobsHistoryLimit: 30 diff --git a/nexus/chart/values.yaml.template b/nexus/chart/values.yaml.template index c7b2ba695..d63c8e371 100644 --- a/nexus/chart/values.yaml.template +++ b/nexus/chart/values.yaml.template @@ -7,7 +7,7 @@ global: nexusImageTag: $NEXUS_IMAGE_TAG appName: 'nexus' storageProvisioner: $NEXUS_STORAGE_PROVISIONER - storageClassData: $NEXUS_STORAGE_CLASS_DATA + storageClassName: $NEXUS_STORAGE_CLASS_NAME nexusHost: $NEXUS_HOST nexusAdminPasswordB64: $NEXUS_ADMIN_PASSWORD_B64 registry: $DOCKER_REGISTRY @@ -18,6 +18,8 @@ global: nexusSnapshotClass: $NEXUS_STORAGE_CLASS_BACKUP nexusSnapshotTTL: $NEXUS_SNAPSHOT_TTL nexusStorageName: $NEXUS_STORAGE_NAME + nexusSnapshotCheckTimeout: $NEXUS_SNAPSHOT_CHECK_TIMEOUT + nexusSnapshotCleanupSchedule: $NEXUS_SNAPSHOT_CLEANUP_SCHEDULE nexus: cpuRequest: $NEXUS_CPU_REQUEST cpuLimit: $NEXUS_CPU_LIMIT