Skip to content

Commit 43e2f75

Browse files
committed
feat(backup): on-demand bitmap recreation for incremental NAS backup
CloudStack rebuilds the libvirt domain XML on every VM start, which means persistent QEMU dirty bitmaps don't survive a stop/start cycle. Rather than hooking into the VM start lifecycle (intrusive across the orchestration layer), this commit handles the missing bitmap *lazily* at the next backup attempt: nasbackup.sh - When -M incremental is requested, the script first checks `virsh checkpoint-list` for the parent bitmap. If absent, it recreates the checkpoint on the running domain so libvirt accepts the <incremental> reference. The next incremental will be larger than usual (it captures all writes since recreate, not since the previous incremental) but is correct; subsequent ones return to normal size. - On recreation, emits BITMAP_RECREATED=<name> on stdout for the orchestrator to record. BackupAnswer + bitmapRecreated field surfaced from the agent. LibvirtTakeBackupCommandWrapper - Strips BITMAP_RECREATED= line from stdout before size parsing. - Sets answer.setBitmapRecreated(...). NASBackupChainKeys + BITMAP_RECREATED key for backup_details. NASBackupProvider - When the agent reports a recreated bitmap, persists it under backup_details and logs an info-level message so operators can correlate larger-than-usual incrementals with VM restarts. This satisfies the bitmap-loss-on-VM-restart concern from the RFC review without touching VirtualMachineManager / StartCommand / agent lifecycle. Refs: #12899
1 parent 1f2aebc commit 43e2f75

5 files changed

Lines changed: 51 additions & 0 deletions

File tree

core/src/main/java/org/apache/cloudstack/backup/BackupAnswer.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ public class BackupAnswer extends Answer {
3535
// Set when an incremental was requested but the agent had to fall back to a full
3636
// (e.g. VM was stopped). Provider should record this backup as type=full.
3737
private Boolean incrementalFallback;
38+
// Set when the agent had to recreate the parent bitmap before this incremental
39+
// (e.g. CloudStack rebuilt the domain XML on the previous VM start, losing bitmaps).
40+
// The first incremental after a recreate is larger than usual; subsequent
41+
// incrementals return to normal size. Informational — recorded in backup_details.
42+
private String bitmapRecreated;
3843

3944
public BackupAnswer(final Command command, final boolean success, final String details) {
4045
super(command, success, details);
@@ -90,4 +95,12 @@ public Boolean getIncrementalFallback() {
9095
public void setIncrementalFallback(Boolean incrementalFallback) {
9196
this.incrementalFallback = incrementalFallback;
9297
}
98+
99+
public String getBitmapRecreated() {
100+
return bitmapRecreated;
101+
}
102+
103+
public void setBitmapRecreated(String bitmapRecreated) {
104+
this.bitmapRecreated = bitmapRecreated;
105+
}
93106
}

plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupChainKeys.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ public final class NASBackupChainKeys {
4242
public static final String TYPE_FULL = "full";
4343
public static final String TYPE_INCREMENTAL = "incremental";
4444

45+
/** Set to the bitmap name when this incremental had to recreate its parent bitmap on the host (informational; this incremental is larger than usual). */
46+
public static final String BITMAP_RECREATED = "nas.bitmap_recreated";
47+
4548
private NASBackupChainKeys() {
4649
}
4750
}

plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,12 @@ public Pair<Boolean, Backup> takeBackup(final VirtualMachine vm, Boolean quiesce
434434
backupVO.setBackedUpVolumes(backupManager.createVolumeInfoFromVolumes(volumes));
435435
if (backupDao.update(backupVO.getId(), backupVO)) {
436436
persistChainMetadata(backupVO, effective, answer.getBitmapCreated());
437+
if (answer.getBitmapRecreated() != null) {
438+
backupDetailsDao.persist(new BackupDetailVO(backupVO.getId(),
439+
NASBackupChainKeys.BITMAP_RECREATED, answer.getBitmapRecreated(), true));
440+
logger.info("NAS incremental for VM {} recreated parent bitmap {} (likely VM was restarted since last backup)",
441+
vm.getInstanceName(), answer.getBitmapRecreated());
442+
}
437443
return new Pair<>(true, backupVO);
438444
} else {
439445
throw new CloudRuntimeException("Failed to update backup");

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtTakeBackupCommandWrapper.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ public Answer execute(TakeBackupCommand command, LibvirtComputingResource libvir
117117
// numeric-suffix parser keeps working.
118118
String stdout = result.second().trim();
119119
String bitmapCreated = null;
120+
String bitmapRecreated = null;
120121
boolean incrementalFallback = false;
121122
StringBuilder filtered = new StringBuilder();
122123
for (String line : stdout.split("\n")) {
@@ -125,6 +126,10 @@ public Answer execute(TakeBackupCommand command, LibvirtComputingResource libvir
125126
bitmapCreated = trimmed.substring("BITMAP_CREATED=".length());
126127
continue;
127128
}
129+
if (trimmed.startsWith("BITMAP_RECREATED=")) {
130+
bitmapRecreated = trimmed.substring("BITMAP_RECREATED=".length());
131+
continue;
132+
}
128133
if (trimmed.startsWith("INCREMENTAL_FALLBACK=")) {
129134
incrementalFallback = true;
130135
continue;
@@ -152,6 +157,7 @@ public Answer execute(TakeBackupCommand command, LibvirtComputingResource libvir
152157
BackupAnswer answer = new BackupAnswer(command, true, stdout);
153158
answer.setSize(backupSize);
154159
answer.setBitmapCreated(bitmapCreated);
160+
answer.setBitmapRecreated(bitmapRecreated);
155161
answer.setIncrementalFallback(incrementalFallback);
156162
return answer;
157163
}

scripts/vm/hypervisor/kvm/nasbackup.sh

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,29 @@ backup_running_vm() {
150150
;;
151151
esac
152152

153+
# When incremental, verify the parent bitmap still exists on the running domain.
154+
# CloudStack rebuilds the libvirt domain XML on every VM start, so persistent bitmaps
155+
# are lost across stop/start. If the parent is missing, recreate it as a fresh bitmap
156+
# so libvirt accepts the <incremental> reference. The first backup after a recreate
157+
# captures all writes since the recreate point — slightly larger than ideal, but correct.
158+
if [[ "$effective_mode" == "incremental" ]]; then
159+
if ! virsh -c qemu:///system checkpoint-list "$VM" --name 2>/dev/null | grep -qx "$BITMAP_PARENT"; then
160+
cat > $dest/recreate-checkpoint.xml <<XML
161+
<domaincheckpoint><name>$BITMAP_PARENT</name><disks>
162+
$(virsh -c qemu:///system domblklist "$VM" --details 2>/dev/null | awk '$2=="disk"{printf "<disk name=\"%s\"/>\n", $3}')
163+
</disks></domaincheckpoint>
164+
XML
165+
if ! virsh -c qemu:///system checkpoint-create "$VM" --xmlfile $dest/recreate-checkpoint.xml > /dev/null 2>&1; then
166+
echo "Failed to recreate parent bitmap $BITMAP_PARENT for $VM"
167+
cleanup
168+
exit 1
169+
fi
170+
# Marker for the orchestrator: this incremental is larger because the bitmap was rebuilt.
171+
echo "BITMAP_RECREATED=$BITMAP_PARENT"
172+
rm -f $dest/recreate-checkpoint.xml
173+
fi
174+
fi
175+
153176
# Build backup XML (and matching checkpoint XML when applicable).
154177
name="root"
155178
echo "<domainbackup mode='push'>" > $dest/backup.xml

0 commit comments

Comments
 (0)