|
40 | 40 | # ansible-playbook restore-openstack-ctlplane.yaml -e backup_file=openstack-ctlplane-backup-20260119-120000.tar.gz |
41 | 41 | # ansible-playbook restore-openstack-ctlplane.yaml -e backup_file=backup.tar.gz -e openstack_namespace=my-openstack |
42 | 42 | # ansible-playbook restore-openstack-ctlplane.yaml -e backup_file=backup.tar.gz -e skip_rabbitmq_restore=true |
| 43 | +# ansible-playbook restore-openstack-ctlplane.yaml -e backup_file=backup.tar.gz -e automated_db_restore=false # Manual DB restore |
43 | 44 |
|
44 | 45 | - name: Restore OpenStack Control Plane |
45 | 46 | hosts: localhost |
|
52 | 53 | ansible.builtin.set_fact: |
53 | 54 | openstack_namespace: "{{ openstack_namespace | default('openstack') }}" |
54 | 55 | skip_rabbitmq_restore: "{{ skip_rabbitmq_restore | default(false) | bool }}" |
| 56 | + automated_db_restore: "{{ automated_db_restore | default(true) | bool }}" |
55 | 57 |
|
56 | 58 | - name: Check if backup_file is provided |
57 | 59 | ansible.builtin.fail: |
|
82 | 84 | - "Target Namespace: {{ openstack_namespace }}" |
83 | 85 | - "Backup File: {{ backup_file_abs }}" |
84 | 86 | - "Skip RabbitMQ Restore: {{ skip_rabbitmq_restore }}" |
| 87 | + - "Automated Database Restore: {{ automated_db_restore }}" |
85 | 88 | - "" |
86 | 89 | - "NOTE: This playbook assumes cleanup was already done." |
87 | 90 | - "If not, run cleanup-openstack-ctlplane.yaml first." |
|
996 | 999 | when: galerabackup_backup_file.stat.exists and galerabackup_names.stdout_lines | length > 0 and galerarestore_list.rc == 0 |
997 | 1000 |
|
998 | 1001 | # Step 12b: Restore Galera/MariaDB |
999 | | - - name: Print Step 12b header |
| 1002 | + - name: Print Step 12b header (automated mode) |
1000 | 1003 | ansible.builtin.debug: |
1001 | 1004 | msg: |
1002 | 1005 | - "----------------------------------------" |
1003 | | - - "Step 12b: Restore Galera/MariaDB Database Contents" |
| 1006 | + - "Step 12b: Restore Galera/MariaDB Database Contents (Automated)" |
1004 | 1007 | - "----------------------------------------" |
1005 | 1008 | - "" |
1006 | 1009 | - "CRITICAL: Restore database contents while services are NOT running." |
1007 | 1010 | - "This is only possible because of the staged deployment pause." |
1008 | 1011 | - "" |
1009 | 1012 | - "GaleraRestore CRs have been automatically created." |
1010 | | - - "For each Galera instance, you must now:" |
1011 | | - - " 1. Find the matching dump file in the restore pod (timestamp may not match exactly)" |
1012 | | - - " 2. Execute restore command in the pod" |
| 1013 | + - "Now executing automated database restore (uses latest backup from each instance)." |
1013 | 1014 | - "" |
1014 | | - - "Example for main galera instance:" |
1015 | | - - " # List dump files to find closest timestamp to backup" |
1016 | | - - " oc exec -n {{ openstack_namespace }} openstack-restore-openstackrestore -- ls -la /backup/data/" |
| 1015 | + - "To disable automation, run with: -e automated_db_restore=false" |
| 1016 | + when: automated_db_restore |
| 1017 | + |
| 1018 | + - name: Print Step 12b header (manual mode) |
| 1019 | + ansible.builtin.debug: |
| 1020 | + msg: |
| 1021 | + - "----------------------------------------" |
| 1022 | + - "Step 12b: Restore Galera/MariaDB Database Contents (Manual)" |
| 1023 | + - "----------------------------------------" |
| 1024 | + - "" |
| 1025 | + - "CRITICAL: Restore database contents while services are NOT running." |
| 1026 | + - "This is only possible because of the staged deployment pause." |
| 1027 | + - "" |
| 1028 | + - "GaleraRestore CRs have been automatically created." |
| 1029 | + - "For each Galera instance, you must now execute the database restore." |
| 1030 | + - "" |
| 1031 | + - "AUTOMATED APPROACH:" |
| 1032 | + - " Use the helper script to automatically restore from latest backup:" |
| 1033 | + - " ../scripts/restore-galera-latest.sh openstackrestore" |
| 1034 | + - " ../scripts/restore-galera-latest.sh openstackrestorecell1" |
| 1035 | + - "" |
| 1036 | + - "MANUAL APPROACH:" |
| 1037 | + - " If you need to restore from a specific timestamp (not the latest):" |
| 1038 | + - "" |
| 1039 | + - " 1. List dump files to find the closest timestamp to backup:" |
| 1040 | + - " oc exec -n {{ openstack_namespace }} openstack-restore-openstackrestore -- ls -la /backup/data/" |
1017 | 1041 | - "" |
1018 | | - - " # Restore using the matching timestamp file" |
1019 | | - - " oc exec -n {{ openstack_namespace }} openstack-restore-openstackrestore -- \\" |
1020 | | - - " /var/lib/backup-scripts/restore_galera --yes /backup/data/*_YYYY-MM-DD_HH-MM-SS.sql.gz" |
| 1042 | + - " 2. Execute restore using the matching timestamp file:" |
| 1043 | + - " oc exec -n {{ openstack_namespace }} openstack-restore-openstackrestore -- \\" |
| 1044 | + - " /var/lib/backup-scripts/restore_galera --yes /backup/data/*_YYYY-MM-DD_HH-MM-SS.sql.gz" |
1021 | 1045 | - "" |
1022 | | - - "Repeat for cell1 and any additional galeras." |
| 1046 | + - " 3. Repeat for cell1 and any additional galeras." |
1023 | 1047 | - "" |
1024 | 1048 | - "LIMITATION: Dump file timestamps may not exactly match the control plane backup timestamp." |
1025 | | - - "You must manually find the dump file with the closest timestamp." |
1026 | | - - "Future enhancement: See docs/dev/README.md#galera-backup-timestamp-tracking" |
| 1049 | + - "The dump is created when the backup job runs, slightly later than when triggered." |
| 1050 | + when: not automated_db_restore |
| 1051 | + |
| 1052 | + - name: Automated database restore |
| 1053 | + ansible.builtin.shell: | |
| 1054 | + set -e |
| 1055 | + RESTORE_NAME="{{ item }}" |
| 1056 | + POD_NAME="openstack-restore-${RESTORE_NAME}" |
| 1057 | +
|
| 1058 | + echo "Processing restore: ${RESTORE_NAME}" |
| 1059 | + echo "Pod name: ${POD_NAME}" |
| 1060 | +
|
| 1061 | + # Check if pod exists and is running |
| 1062 | + if ! oc get pod "${POD_NAME}" -n {{ openstack_namespace }} &>/dev/null; then |
| 1063 | + echo "ERROR: Restore pod not found: ${POD_NAME}" |
| 1064 | + exit 1 |
| 1065 | + fi |
| 1066 | +
|
| 1067 | + POD_PHASE=$(oc get pod "${POD_NAME}" -n {{ openstack_namespace }} -o jsonpath='{.status.phase}') |
| 1068 | + if [ "$POD_PHASE" != "Running" ]; then |
| 1069 | + echo "ERROR: Restore pod is not running (phase: ${POD_PHASE})" |
| 1070 | + exit 1 |
| 1071 | + fi |
| 1072 | +
|
| 1073 | + echo "Pod is running, finding latest backup..." |
| 1074 | +
|
| 1075 | + # List backup files (excluding grants) |
| 1076 | + BACKUP_FILES=$(oc exec -n {{ openstack_namespace }} "${POD_NAME}" -- ls -1 /backup/data/*_backup_*.sql.gz 2>/dev/null | grep -v grants || true) |
1027 | 1077 |
|
1028 | | - - name: Confirm database restore completion |
| 1078 | + if [ -z "${BACKUP_FILES}" ]; then |
| 1079 | + echo "ERROR: No backup files found in /backup/data/" |
| 1080 | + exit 1 |
| 1081 | + fi |
| 1082 | +
|
| 1083 | + # Get latest backup |
| 1084 | + LATEST_BACKUP=$(echo "${BACKUP_FILES}" | sort | tail -1) |
| 1085 | + echo "Latest backup: ${LATEST_BACKUP}" |
| 1086 | +
|
| 1087 | + # Extract timestamp from filename |
| 1088 | + TIMESTAMP=$(basename "${LATEST_BACKUP}" | sed -E 's/.*_backup_(.*)\.sql\.gz/\1/') |
| 1089 | +
|
| 1090 | + if [ -z "${TIMESTAMP}" ]; then |
| 1091 | + echo "ERROR: Could not extract timestamp from filename: ${LATEST_BACKUP}" |
| 1092 | + exit 1 |
| 1093 | + fi |
| 1094 | +
|
| 1095 | + echo "Extracted timestamp: ${TIMESTAMP}" |
| 1096 | +
|
| 1097 | + # Construct restore pattern |
| 1098 | + RESTORE_PATTERN="/backup/data/*_${TIMESTAMP}.sql.gz" |
| 1099 | + echo "Restore pattern: ${RESTORE_PATTERN}" |
| 1100 | +
|
| 1101 | + # Verify files exist |
| 1102 | + MATCHED_FILES=$(oc exec -n {{ openstack_namespace }} "${POD_NAME}" -- ls -1 "${RESTORE_PATTERN}" 2>/dev/null || true) |
| 1103 | +
|
| 1104 | + if [ -z "${MATCHED_FILES}" ]; then |
| 1105 | + echo "ERROR: No files match pattern: ${RESTORE_PATTERN}" |
| 1106 | + exit 1 |
| 1107 | + fi |
| 1108 | +
|
| 1109 | + FILE_COUNT=$(echo "${MATCHED_FILES}" | wc -l) |
| 1110 | + echo "Found ${FILE_COUNT} file(s) matching pattern" |
| 1111 | +
|
| 1112 | + # Execute restore |
| 1113 | + echo "Executing database restore..." |
| 1114 | + oc exec -n {{ openstack_namespace }} "${POD_NAME}" -- \ |
| 1115 | + /var/lib/backup-scripts/restore_galera --yes "${RESTORE_PATTERN}" |
| 1116 | +
|
| 1117 | + echo "✓ Database restore completed for: ${RESTORE_NAME}" |
| 1118 | + args: |
| 1119 | + executable: /bin/bash |
| 1120 | + loop: "{{ galerabackup_names.stdout_lines }}" |
| 1121 | + changed_when: true |
| 1122 | + when: automated_db_restore and galerabackup_backup_file.stat.exists and galerabackup_names.stdout_lines | length > 0 |
| 1123 | + |
| 1124 | + - name: Print Step 12b completion (automated) |
| 1125 | + ansible.builtin.debug: |
| 1126 | + msg: "✓ All database restores completed ({{ galerabackup_names.stdout_lines | length }} instance(s))" |
| 1127 | + when: automated_db_restore and galerabackup_backup_file.stat.exists and galerabackup_names.stdout_lines | length > 0 |
| 1128 | + |
| 1129 | + - name: Confirm database restore completion (manual mode) |
1029 | 1130 | ansible.builtin.pause: |
1030 | 1131 | prompt: | |
1031 | 1132 |
|
1032 | 1133 | Have you completed database restore? (yes/no) |
1033 | 1134 | register: db_restore_confirm |
| 1135 | + when: not automated_db_restore and galerabackup_backup_file.stat.exists and galerabackup_names.stdout_lines | length > 0 |
1034 | 1136 |
|
1035 | | - - name: Warn about missing database restore |
| 1137 | + - name: Warn about missing database restore (manual mode) |
1036 | 1138 | ansible.builtin.pause: |
1037 | 1139 | prompt: | |
1038 | 1140 |
|
|
1041 | 1143 |
|
1042 | 1144 | Continue anyway without database restore? (yes/no) |
1043 | 1145 | register: skip_db_confirm |
1044 | | - when: db_restore_confirm.user_input != "yes" |
| 1146 | + when: not automated_db_restore and galerabackup_backup_file.stat.exists and galerabackup_names.stdout_lines | length > 0 and db_restore_confirm.user_input != "yes" |
1045 | 1147 |
|
1046 | | - - name: Fail if database restore not completed |
| 1148 | + - name: Fail if database restore not completed (manual mode) |
1047 | 1149 | ansible.builtin.fail: |
1048 | 1150 | msg: | |
1049 | 1151 | Aborting. Restore databases and then resume with: |
1050 | 1152 | oc annotate openstackcontrolplane {{ ctlplane_name }} -n {{ openstack_namespace }} core.openstack.org/deployment-stage- |
1051 | | - when: db_restore_confirm.user_input != "yes" and skip_db_confirm.user_input != "yes" |
| 1153 | + when: not automated_db_restore and galerabackup_backup_file.stat.exists and galerabackup_names.stdout_lines | length > 0 and db_restore_confirm.user_input != "yes" and skip_db_confirm.user_input != "yes" |
1052 | 1154 |
|
1053 | | - - name: Print Step 12b completion |
| 1155 | + - name: Print Step 12b completion (manual) |
1054 | 1156 | ansible.builtin.debug: |
1055 | 1157 | msg: "✓ Database restore completed" |
1056 | | - when: db_restore_confirm.user_input == "yes" |
| 1158 | + when: not automated_db_restore and galerabackup_backup_file.stat.exists and galerabackup_names.stdout_lines | length > 0 and db_restore_confirm.user_input == "yes" |
| 1159 | + |
| 1160 | + - name: Print Step 12b skip message |
| 1161 | + ansible.builtin.debug: |
| 1162 | + msg: "No GaleraBackup CRs found, skipping database restore..." |
| 1163 | + when: not galerabackup_backup_file.stat.exists or galerabackup_names.stdout_lines | length == 0 |
1057 | 1164 |
|
1058 | 1165 | # Step 13: Restore OVN Database Contents |
1059 | 1166 | - name: Print Step 13 header |
|
0 commit comments