Skip to content

Commit 27794a5

Browse files
stuggiclaude
andcommitted
Fix automated database restore pod name and add wait for pods
Fix two critical issues in automated database restore: 1. Wrong pod name construction: - Before: Used backup name (openstack) → openstack-restore-openstack - After: Use restore CR name (openstackrestore) → openstack-restore-openstackrestore - GaleraRestore CR naming: <backupName>restore 2. Missing wait for pods to be ready: - GaleraRestore CRs were created but pods not ready yet - Added wait task that: - Waits up to 5 minutes for pod to be created - Waits for pod Ready condition (timeout 5 minutes) - Ensures pods are running before attempting restore Changes: - Add BACKUP_NAME and RESTORE_NAME variables for clarity - Construct pod name using RESTORE_NAME (not BACKUP_NAME) - Add wait task after creating GaleraRestore CRs - Wait for each pod individually before proceeding to restore Example: Backup name: "openstack" Restore CR name: "openstackrestore" Pod name: "openstack-restore-openstackrestore" This fixes the "Restore pod not found" error in automated mode. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 4eb8a76 commit 27794a5

1 file changed

Lines changed: 38 additions & 4 deletions

File tree

docs/dev/playbooks/restore-openstack-ctlplane.yaml

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,6 +1032,38 @@
10321032
msg: "{{ galerarestore_list.stdout_lines }}"
10331033
when: galerabackup_backup_file.stat.exists and galerabackup_names.stdout_lines | length > 0 and galerarestore_list.rc == 0
10341034

1035+
- name: Wait for GaleraRestore pods to be ready
1036+
ansible.builtin.shell: |
1037+
RESTORE_NAME="{{ item }}restore"
1038+
POD_NAME="openstack-restore-${RESTORE_NAME}"
1039+
1040+
echo "Waiting for pod: ${POD_NAME}"
1041+
1042+
# Wait for pod to exist
1043+
for i in {1..60}; do
1044+
if oc get pod "${POD_NAME}" -n {{ openstack_namespace }} &>/dev/null; then
1045+
echo "Pod found"
1046+
break
1047+
fi
1048+
echo "Waiting for pod to be created... ($i/60)"
1049+
sleep 5
1050+
done
1051+
1052+
# Wait for pod to be running
1053+
oc wait --for=condition=Ready pod/"${POD_NAME}" -n {{ openstack_namespace }} --timeout=300s
1054+
1055+
echo "✓ Pod ${POD_NAME} is ready"
1056+
args:
1057+
executable: /bin/bash
1058+
loop: "{{ galerabackup_names.stdout_lines }}"
1059+
changed_when: false
1060+
when: galerabackup_backup_file.stat.exists and galerabackup_names.stdout_lines | length > 0
1061+
1062+
- name: Print wait completion
1063+
ansible.builtin.debug:
1064+
msg: "✓ All GaleraRestore pods are ready"
1065+
when: galerabackup_backup_file.stat.exists and galerabackup_names.stdout_lines | length > 0
1066+
10351067
# Step 12b: Restore Galera/MariaDB
10361068
- name: Print Step 12b header (automated mode)
10371069
ansible.builtin.debug:
@@ -1086,13 +1118,15 @@
10861118
- name: Automated database restore
10871119
ansible.builtin.shell: |
10881120
set -e
1089-
RESTORE_NAME="{{ item }}"
1121+
BACKUP_NAME="{{ item }}"
1122+
RESTORE_NAME="${BACKUP_NAME}restore"
10901123
POD_NAME="openstack-restore-${RESTORE_NAME}"
10911124
1092-
echo "Processing restore: ${RESTORE_NAME}"
1125+
echo "Processing restore for backup: ${BACKUP_NAME}"
1126+
echo "Restore CR: ${RESTORE_NAME}"
10931127
echo "Pod name: ${POD_NAME}"
10941128
1095-
# Check if pod exists and is running
1129+
# Verify pod exists and is running
10961130
if ! oc get pod "${POD_NAME}" -n {{ openstack_namespace }} &>/dev/null; then
10971131
echo "ERROR: Restore pod not found: ${POD_NAME}"
10981132
exit 1
@@ -1148,7 +1182,7 @@
11481182
oc exec -n {{ openstack_namespace }} "${POD_NAME}" -- \
11491183
/var/lib/backup-scripts/restore_galera --yes "${RESTORE_PATTERN}"
11501184
1151-
echo "✓ Database restore completed for: ${RESTORE_NAME}"
1185+
echo "✓ Database restore completed for: ${BACKUP_NAME} (restore CR: ${RESTORE_NAME})"
11521186
args:
11531187
executable: /bin/bash
11541188
loop: "{{ galerabackup_names.stdout_lines }}"

0 commit comments

Comments
 (0)