diff --git a/.buildkite/pipeline_cross.py b/.buildkite/pipeline_cross.py index bbe3a98b7cf..4563736e312 100755 --- a/.buildkite/pipeline_cross.py +++ b/.buildkite/pipeline_cross.py @@ -23,10 +23,13 @@ "m6i.metal", "m7i.metal-24xl", "m7i.metal-48xl", + "m8i.metal-48xl", "m6a.metal", "m7a.metal-48xl", ] - instances_aarch64 = ["m7g.metal"] + instances_aarch64 = ["m6g.metal", "m7g.metal", "m8g.metal-24xl"] + restore_only_platforms = [("al2023", "linux_6.18")] + x86_64_platforms = DEFAULT_PLATFORMS + restore_only_platforms commands = [ "./tools/devtool -y test --no-build --no-archive -- -m nonci -n4 integration_tests/functional/test_snapshot_phase1.py", # punch holes in mem snapshot tiles and tar them so they are preserved in S3 @@ -35,7 +38,21 @@ "mkdir -pv snapshots", "tar cSvf snapshots/{instance}_{kv}.tar snapshot_artifacts", ] - pipeline.build_group( + + def create_step_key(instance, kv): + """Buildkite key for a snapshot-create step. + + Keys may only contain [A-Za-z0-9_\\-:], so dots in instance names + (m5n.metal) and kernel versions (linux_5.10) are sanitized to + underscores. Tarball paths stay unchanged. + """ + return f"snap-create-{instance}-{kv}".replace(".", "_") + + # Key each snapshot-create step so restore steps can depend on the + # specific source snapshot they need, rather than waiting for every + # snapshot-create step to finish. `build_group` doesn't sanitize + # substituted key values, so we set the final key after it fans out. + x86_create = pipeline.build_group( "snapshot-create", commands, timeout=30, @@ -43,23 +60,34 @@ instances=instances_x86_64, platforms=DEFAULT_PLATFORMS, ) - pipeline.add_step("wait") - # allow-list of what instances can be restores on what other instances (in - # addition to itself) + # https://github.com/firecracker-microvm/firecracker/blob/main/docs/snapshotting/snapshot-support.md#where-can-i-resume-my-snapshots + aarch64_platforms = [("al2023", "linux_6.1")] + aarch64_create = pipeline.build_group( + "snapshot-create-aarch64", + commands, + timeout=30, + artifact_paths="snapshots/**/*", + instances=instances_aarch64, + platforms=aarch64_platforms, + ) + for grp in (x86_create, aarch64_create): + for s in grp["steps"]: + s["key"] = create_step_key(s["agents"]["instance"], s["agents"]["kv"]) + + # allow-list of what instances can be restored on what other instances (in + # addition to itself). aarch64 is restricted to same-instance restores. supported = { "m5n.metal": ["m6i.metal"], "m6i.metal": ["m5n.metal"], } - - # https://github.com/firecracker-microvm/firecracker/blob/main/docs/kernel-policy.md#experimental-snapshot-compatibility-across-kernel-versions - aarch64_platforms = [("al2023", "linux_6.1")] + aarch64_all_platforms = aarch64_platforms + restore_only_platforms perms_aarch64 = itertools.product( - instances_aarch64, aarch64_platforms, instances_aarch64, aarch64_platforms + instances_aarch64, aarch64_platforms, instances_aarch64, aarch64_all_platforms ) perms_x86_64 = itertools.product( - instances_x86_64, DEFAULT_PLATFORMS, instances_x86_64, DEFAULT_PLATFORMS + instances_x86_64, DEFAULT_PLATFORMS, instances_x86_64, x86_64_platforms ) steps = [] for ( @@ -74,6 +102,9 @@ # newer -> older is not supported, and does not work if src_kv > dst_kv: continue + # only test cross-kernel restore between adjacent kernel versions + if src_kv == "linux_5.10" and dst_kv == "linux_6.18": + continue if src_instance != dst_instance and dst_instance not in supported.get( src_instance, [] ): @@ -96,6 +127,7 @@ "label": f"snapshot-restore-src-{src_instance}-{src_kv}-dst-{dst_instance}-{dst_kv}", "timeout": 30, "agents": {"instance": dst_instance, "kv": dst_kv, "os": dst_os}, + "depends_on": [create_step_key(src_instance, src_kv)], **per_instance, } steps.append(step) diff --git a/tests/framework/utils.py b/tests/framework/utils.py index 0f09bac5a47..bf5c1b3d715 100644 --- a/tests/framework/utils.py +++ b/tests/framework/utils.py @@ -2,7 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 """Generic utility functions that are used in the framework.""" +import base64 import errno +import hashlib import json import logging import os @@ -555,13 +557,15 @@ def start_screen_process(screen_log, session_name, binary_path, binary_params): def guest_run_fio_iteration(ssh_connection, iteration): - """Start FIO workload into a microVM.""" - fio = """fio --filename=/dev/vda --direct=1 --rw=randread --bs=4k \ - --ioengine=libaio --iodepth=16 --runtime=10 --numjobs=4 --time_based \ - --group_reporting --name=iops-test-job --eta-newline=1 --readonly \ - --output /tmp/fio{} > /dev/null &""".format(iteration) - exit_code, _, stderr = ssh_connection.run(fio) - assert exit_code == 0, stderr + """Run FIO workload on a microVM and verify IO completed successfully.""" + fio = ( + "fio --filename=/dev/vda --direct=1 --rw=randread --bs=4k " + "--ioengine=libaio --iodepth=16 --runtime=10 --numjobs=4 --time_based " + "--group_reporting --name=iops-test-job --readonly --output-format=json" + ) + _, stdout, _ = ssh_connection.check_output(fio) + total_read = json.loads(stdout)["jobs"][0]["read"]["io_bytes"] + assert total_read > 0, f"fio iteration {iteration}: no bytes read from block device" def check_filesystem(ssh_connection, disk_fmt, disk): @@ -576,6 +580,19 @@ def check_entropy(ssh_connection): ssh_connection.check_output("dd if=/dev/hwrng of=/dev/null bs=4096 count=1") +def check_network_data_integrity(ssh_connection, size_bytes=64 * 1024): + """Push random bytes to the guest over SSH and verify the guest-side sha256 + matches the host-side hash. Exercises the virtio-net RX path end-to-end.""" + payload = os.urandom(size_bytes) + host_hash = hashlib.sha256(payload).hexdigest() + b64 = base64.b64encode(payload).decode("ascii") + _, stdout, _ = ssh_connection.check_output(f"echo {b64} | base64 -d | sha256sum") + guest_hash = stdout.strip().split()[0] + assert ( + guest_hash == host_hash + ), f"Guest hash {guest_hash} does not match host hash {host_hash}" + + @retry(wait=wait_fixed(0.5), stop=stop_after_attempt(5), reraise=True) def wait_process_running(process): """Wait for a process to run. diff --git a/tests/integration_tests/functional/test_snapshot_phase1.py b/tests/integration_tests/functional/test_snapshot_phase1.py index 9bdfc9d0ce4..89bed92ee8d 100644 --- a/tests/integration_tests/functional/test_snapshot_phase1.py +++ b/tests/integration_tests/functional/test_snapshot_phase1.py @@ -6,7 +6,6 @@ """ import json -import platform import re import pytest @@ -18,9 +17,6 @@ ) from framework.utils_cpu_templates import get_cpu_template_name -if platform.machine() != "x86_64": - pytestmark = pytest.mark.skip("only x86_64 architecture supported") - # Default IPv4 address to route MMDS requests. IPV4_ADDRESS = "169.254.169.254" NET_IFACE_FOR_MMDS = "eth3" @@ -58,6 +54,8 @@ def test_snapshot_phase1( configure_mmds(vm, ["eth3"], version="V2") # Add a memory balloon. vm.api.balloon.put(amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1) + # Add an entropy device. + vm.api.entropy.put() vm.start() @@ -95,6 +93,14 @@ def test_snapshot_phase1( _, stdout, _ = vm.ssh.run(cmd) assert json.loads(stdout) == data_store + # Record guest CLOCK_MONOTONIC just before snapshotting. The cross-kernel + # restore test reads this back and asserts the clock didn't jump forward + # by the pipeline-elapsed time, which would indicate a kvm-clock regression + # (see a1fd537f9 "fix(kvm-clock): do not jump monotonic clock on restore"). + vm.ssh.check_output( + "python3 -c 'import time; print(time.monotonic())' > /tmp/monotonic-before" + ) + # Copy snapshot files to be published to S3 for the 2nd part of the test # Create snapshot artifacts directory specific for the kernel version used. snapshot = vm.snapshot_full() diff --git a/tests/integration_tests/functional/test_snapshot_restore_cross_kernel.py b/tests/integration_tests/functional/test_snapshot_restore_cross_kernel.py index 253502a2d1f..80e93c2931b 100644 --- a/tests/integration_tests/functional/test_snapshot_restore_cross_kernel.py +++ b/tests/integration_tests/functional/test_snapshot_restore_cross_kernel.py @@ -5,13 +5,14 @@ import json import logging -import platform from pathlib import Path import pytest from framework.defs import FC_WORKSPACE_DIR from framework.utils import ( + check_entropy, + check_network_data_integrity, generate_mmds_get_request, generate_mmds_session_token, guest_run_fio_iteration, @@ -27,6 +28,23 @@ pytestmark = pytest.mark.nonci +def _check_guest_monotonic_did_not_jump(ssh_connection, max_delta_sec=10): + # Phase1 recorded CLOCK_MONOTONIC to /tmp/monotonic-before just before + # snapshotting. Firecracker is supposed to resume MONOTONIC from capture + # time, so the delta here should be near zero regardless of how long + # phase1 and restore are apart in the pipeline. A large delta indicates + # MONOTONIC jumped forward across the snapshot - a kvm-clock regression + # that could surface only on some host-kernel combinations. + _, before_str, _ = ssh_connection.check_output("cat /tmp/monotonic-before") + _, after_str, _ = ssh_connection.check_output( + "python3 -c 'import time; print(time.monotonic())'" + ) + delta = float(after_str.strip()) - float(before_str.strip()) + assert ( + 0 <= delta <= max_delta_sec + ), f"Guest MONOTONIC jumped {delta:.3f}s across snapshot (max {max_delta_sec}s)" + + def _test_balloon(microvm): # Check memory usage. first_reading = get_stable_rss_mem(microvm) @@ -71,12 +89,11 @@ def get_snapshot_dirs(): """Get all the snapshot directories""" snapshot_root_name = "snapshot_artifacts" snapshot_root_dir = Path(FC_WORKSPACE_DIR) / snapshot_root_name - cpu_templates = [] - if platform.machine() == "x86_64": - cpu_templates = ["None"] - cpu_templates += get_supported_cpu_templates() + cpu_templates = ["None"] + get_supported_cpu_templates() for cpu_template in cpu_templates: - for snapshot_dir in snapshot_root_dir.glob(f"*_{cpu_template}_guest_snapshot"): + for snapshot_dir in snapshot_root_dir.glob( + f"**/*_{cpu_template}_guest_snapshot" + ): assert snapshot_dir.is_dir() yield pytest.param(snapshot_dir, id=snapshot_dir.name) @@ -100,7 +117,11 @@ def test_snap_restore_from_artifacts( # in the snapshot root dir. logger.info("Working with snapshot artifacts in %s.", snapshot_dir) - vm = microvm_factory.build() + # Skip memory monitor: the balloon inflation below fragments the guest + # VMA via discard_range's MAP_FIXED anonymous mmap workaround (used only + # for private file-backed mappings from snapshot restore), defeating + # MemoryMonitor.is_guest_mem. Cross-kernel test, not overhead. + vm = microvm_factory.build(monitor_memory=False) vm.time_api_requests = False vm.spawn() logger.info("Loading microVM from snapshot...") @@ -115,6 +136,15 @@ def test_snap_restore_from_artifacts( logger.info("Testing net device %s...", iface["iface"].dev_name) vm.ssh_iface(idx).check_output("true") + # Check MONOTONIC before any other post-restore activity, so the delta + # is bounded by the few seconds of post-resume setup rather than the + # full test runtime. + logger.info("Testing guest MONOTONIC did not jump across snapshot...") + _check_guest_monotonic_did_not_jump(vm.ssh) + + logger.info("Testing network data integrity...") + check_network_data_integrity(vm.ssh) + logger.info("Testing data store behavior...") _test_mmds(vm, vm.iface["eth3"]["iface"]) @@ -124,9 +154,10 @@ def test_snap_restore_from_artifacts( logger.info("Testing vsock device...") check_vsock_device(vm, bin_vsock_path, test_fc_session_root_path, vm.ssh) - # Run fio on the guest. - # TODO: check the result of FIO or use fsck to check that the root device is - # not corrupted. No obvious errors will be returned here. + logger.info("Testing block device via fio...") guest_run_fio_iteration(vm.ssh, 0) + logger.info("Testing entropy...") + check_entropy(vm.ssh) + vm.kill()