Skip to content

Commit 173adb2

Browse files
committed
ci(snapshot): restore per-source depends_on instead of global wait
Previously every restore step waited for the entire snapshot-create group to finish via a pipeline-wide wait step. Each restore only needs its own source snapshot, so key each create step by instance/kv and have each restore depends_on the specific source it consumes. Restores now start as soon as their source snapshot is ready. Signed-off-by: Jack Thomson <jackabt@amazon.com>
1 parent 1f585d6 commit 173adb2

1 file changed

Lines changed: 17 additions & 3 deletions

File tree

.buildkite/pipeline_cross.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,18 @@
3838
"mkdir -pv snapshots",
3939
"tar cSvf snapshots/{instance}_{kv}.tar snapshot_artifacts",
4040
]
41-
pipeline.build_group(
41+
42+
# Buildkite step keys may only contain [A-Za-z0-9_\-:], so dots in
43+
# instance names (m5n.metal) and kernel versions (linux_5.10) must be
44+
# sanitized. Tarball paths stay unchanged.
45+
def create_step_key(instance, kv):
46+
return f"snap-create-{instance}-{kv}".replace(".", "_")
47+
48+
# Key each snapshot-create step so restore steps can depend on the
49+
# specific source snapshot they need, rather than waiting for every
50+
# snapshot-create step to finish. `build_group` doesn't sanitize
51+
# substituted key values, so we set the final key after it fans out.
52+
x86_create = pipeline.build_group(
4253
"snapshot-create",
4354
commands,
4455
timeout=30,
@@ -49,15 +60,17 @@
4960

5061
# https://github.com/firecracker-microvm/firecracker/blob/main/docs/kernel-policy.md#experimental-snapshot-compatibility-across-kernel-versions
5162
aarch64_platforms = [("al2023", "linux_6.1")]
52-
pipeline.build_group(
63+
aarch64_create = pipeline.build_group(
5364
"snapshot-create-aarch64",
5465
commands,
5566
timeout=30,
5667
artifact_paths="snapshots/**/*",
5768
instances=instances_aarch64,
5869
platforms=aarch64_platforms,
5970
)
60-
pipeline.add_step("wait")
71+
for grp in (x86_create, aarch64_create):
72+
for s in grp["steps"]:
73+
s["key"] = create_step_key(s["agents"]["instance"], s["agents"]["kv"])
6174

6275
# allow-list of what instances can be restored on what other instances (in
6376
# addition to itself). aarch64 is restricted to same-instance restores.
@@ -111,6 +124,7 @@
111124
"label": f"snapshot-restore-src-{src_instance}-{src_kv}-dst-{dst_instance}-{dst_kv}",
112125
"timeout": 30,
113126
"agents": {"instance": dst_instance, "kv": dst_kv, "os": dst_os},
127+
"depends_on": [create_step_key(src_instance, src_kv)],
114128
**per_instance,
115129
}
116130
steps.append(step)

0 commit comments

Comments
 (0)