From 644a353f61a7f8a0ebb1cc44982e1c24355a73e0 Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Fri, 29 May 2026 18:36:51 -0700 Subject: [PATCH 1/8] Add Hyper-V passthrough validation and Windows NTTTCP support Extend device passthrough functional and network performance suites to run on Hyper-V alongside Cloud Hypervisor. Add Windows ntttcp.exe installation, execution, parsing, firewall setup, and no-sync support for passthrough host-guest tests. Improve passthrough device context resolution, Hyper-V NIC eligibility handling, and reboot readiness waits. --- .../device_passthrough/functional_tests.py | 64 +++-- .../performance/networkperf_passthrough.py | 265 ++++++++++++++---- .../libvirt/libvirt_device_pool.py | 12 +- lisa/tools/ntttcp.py | 171 ++++++++++- lisa/tools/reboot.py | 55 +++- 5 files changed, 490 insertions(+), 77 deletions(-) diff --git a/lisa/microsoft/testsuites/device_passthrough/functional_tests.py b/lisa/microsoft/testsuites/device_passthrough/functional_tests.py index a67ab31be9..47f21dc733 100644 --- a/lisa/microsoft/testsuites/device_passthrough/functional_tests.py +++ b/lisa/microsoft/testsuites/device_passthrough/functional_tests.py @@ -1,18 +1,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -from typing import TYPE_CHECKING, Dict, Tuple, cast +import re +from typing import Any, Dict, Tuple from lisa import Environment, Node, TestCaseMetadata, TestSuite, TestSuiteMetadata from lisa.base_tools import Cat from lisa.operating_system import Windows -from lisa.sut_orchestrator import CLOUD_HYPERVISOR +from lisa.sut_orchestrator import CLOUD_HYPERVISOR, HYPERV from lisa.testsuite import TestResult, simple_requirement from lisa.tools import Lspci from lisa.util import LisaException, SkippedException -if TYPE_CHECKING: - from lisa.sut_orchestrator.libvirt.ch_platform import CloudHypervisorPlatform - from lisa.sut_orchestrator.libvirt.schema import DeviceAddressSchema +SUPPORTED_PASSTHROUGH_PLATFORMS = [CLOUD_HYPERVISOR, HYPERV] @TestSuiteMetadata( @@ -22,7 +21,7 @@ This test suite is for testing device passthrough functional tests. """, requirement=simple_requirement( - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, unsupported_os=[Windows], ), ) @@ -30,8 +29,8 @@ class DevicePassthroughFunctionalTests(TestSuite): @TestCaseMetadata( description=""" Check if passthrough device is visible to guest. - This testcase support only on CLOUD_HYPERVISOR - platform of LISA. Please refer below runbook snippet. + This testcase supports the CLOUD_HYPERVISOR and HYPERV platforms + of LISA. Please refer below runbook snippet. platform: - type: cloud-hypervisor @@ -61,7 +60,7 @@ class DevicePassthroughFunctionalTests(TestSuite): """, priority=4, requirement=simple_requirement( - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, ), ) def verify_device_passthrough_on_guest( @@ -71,9 +70,18 @@ def verify_device_passthrough_on_guest( result: TestResult, ) -> None: lspci = node.tools[Lspci] - platform = cast("CloudHypervisorPlatform", environment.platform) - # Import at runtime to avoid libvirt dependency on other platforms. - from lisa.sut_orchestrator.libvirt.context import get_node_context + platform = environment.platform + platform_name = platform.type_name() + + if platform_name == CLOUD_HYPERVISOR: + # Import at runtime to avoid libvirt dependency on other platforms. + from lisa.sut_orchestrator.libvirt.context import get_node_context + elif platform_name == HYPERV: + from lisa.sut_orchestrator.hyperv.context import get_node_context + else: + raise SkippedException( + f"Device passthrough validation is not supported on '{platform_name}'" + ) node_context = get_node_context(node) if not node_context.passthrough_devices: @@ -88,7 +96,7 @@ def verify_device_passthrough_on_guest( ) for host_device in passthrough_context.device_list: vendor_device_id = self._vendor_device_from_host_device( - platform, host_device + platform_name, platform, host_device ) key = ( pool_type, @@ -113,12 +121,32 @@ def verify_device_passthrough_on_guest( @staticmethod def _vendor_device_from_host_device( - platform: "CloudHypervisorPlatform", - device: "DeviceAddressSchema", + platform_name: str, + platform: Any, + device: Any, ) -> Dict[str, str]: - """Read vendor_id and device_id for an assigned host PCI device.""" - bdf = (f"{device.domain}:{device.bus}:{device.slot}.{device.function}").lower() - cat = platform.host_node.tools[Cat] + if platform_name == HYPERV: + instance_id = str(getattr(device, "instance_id", "")) + match = re.search( + r"VEN_(?P[0-9A-Fa-f]{4})&" + r"DEV_(?P[0-9A-Fa-f]{4})", + instance_id, + ) + if not match: + raise LisaException( + f"Cannot resolve vendor/device id from Hyper-V host device " + f"instance id: {instance_id}" + ) + return { + "vendor_id": match.group("vendor_id").lower(), + "device_id": match.group("device_id").lower(), + } + + cloud_hypervisor = platform + bdf = ( + f"{device.domain}:{device.bus}:{device.slot}.{device.function}" + ).lower() + cat = cloud_hypervisor.host_node.tools[Cat] vendor_raw = cat.read(f"/sys/bus/pci/devices/{bdf}/vendor", sudo=True).strip() device_raw = cat.read(f"/sys/bus/pci/devices/{bdf}/device", sudo=True).strip() # Normalize to 4-digit lowercase hex used by lspci identifiers. diff --git a/lisa/microsoft/testsuites/performance/networkperf_passthrough.py b/lisa/microsoft/testsuites/performance/networkperf_passthrough.py index 68c2b5980d..96463b84a3 100644 --- a/lisa/microsoft/testsuites/performance/networkperf_passthrough.py +++ b/lisa/microsoft/testsuites/performance/networkperf_passthrough.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import re +from decimal import Decimal from functools import partial from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Tuple, cast @@ -18,13 +19,14 @@ TestSuite, TestSuiteMetadata, node_requirement, + notifier, schema, search_space, simple_requirement, ) from lisa.environment import Environment, Node from lisa.operating_system import Windows -from lisa.sut_orchestrator import CLOUD_HYPERVISOR +from lisa.sut_orchestrator import CLOUD_HYPERVISOR, HYPERV from lisa.testsuite import TestResult from lisa.tools import Dhclient, Kill, PowerShell, Sysctl from lisa.tools.iperf3 import ( @@ -33,6 +35,8 @@ IPERF_UDP_BUFFER_LENGTHS, IPERF_UDP_CONCURRENCY, ) +from lisa.tools.ip import Ip +from lisa.tools.ntttcp import NTTTCP_TCP_CONCURRENCY, NTTTCP_UDP_CONCURRENCY, Ntttcp from lisa.util import ( LisaException, SkippedException, @@ -43,6 +47,8 @@ from lisa.util.logger import get_logger from lisa.util.parallel import run_in_parallel +SUPPORTED_PASSTHROUGH_PLATFORMS = [CLOUD_HYPERVISOR, HYPERV] + @TestSuiteMetadata( area="network passthrough", @@ -52,7 +58,7 @@ for various NIC passthrough scenarios. """, requirement=simple_requirement( - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, unsupported_os=[Windows], ), ) @@ -76,7 +82,7 @@ class NetworkPerformance(TestSuite): timeout=TIMEOUT, requirement=simple_requirement( min_count=1, - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, unsupported_os=[Windows], ), ) @@ -88,6 +94,7 @@ def perf_tcp_iperf_passthrough_host_guest( variables: Dict[str, Any], ) -> None: server = self._get_host_as_server(variables) + self._skip_if_windows_server(server, "iperf3") # Reboot guest into fresh state; never reboot the baremetal host. cast(RemoteNode, node).reboot() @@ -114,7 +121,7 @@ def perf_tcp_iperf_passthrough_host_guest( timeout=TIMEOUT, requirement=simple_requirement( min_count=1, - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, unsupported_os=[Windows], ), ) @@ -126,6 +133,7 @@ def perf_udp_iperf_passthrough_host_guest( variables: Dict[str, Any], ) -> None: server = self._get_host_as_server(variables) + self._skip_if_windows_server(server, "iperf3") # Reboot guest into fresh state; never reboot the baremetal host. cast(RemoteNode, node).reboot() @@ -154,7 +162,7 @@ def perf_udp_iperf_passthrough_host_guest( timeout=PPS_TIMEOUT, requirement=simple_requirement( min_count=1, - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, unsupported_os=[Windows], ), ) @@ -166,6 +174,7 @@ def perf_tcp_single_pps_passthrough_host_guest( variables: Dict[str, Any], ) -> None: server = self._get_host_as_server(variables) + self._skip_if_windows_server(server, "netperf/sar") # Reboot guest into fresh state; never reboot the baremetal host. cast(RemoteNode, node).reboot() @@ -192,7 +201,7 @@ def perf_tcp_single_pps_passthrough_host_guest( timeout=PPS_TIMEOUT, requirement=simple_requirement( min_count=1, - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, unsupported_os=[Windows], ), ) @@ -204,6 +213,7 @@ def perf_tcp_max_pps_passthrough_host_guest( variables: Dict[str, Any], ) -> None: server = self._get_host_as_server(variables) + self._skip_if_windows_server(server, "netperf/sar") # Reboot guest into fresh state; never reboot the baremetal host. cast(RemoteNode, node).reboot() @@ -232,7 +242,7 @@ def perf_tcp_max_pps_passthrough_host_guest( node_count=1, memory_mb=search_space.IntRange(min=8192), ), - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, ), ) def perf_tcp_ntttcp_passthrough_host_guest( @@ -251,14 +261,25 @@ def perf_tcp_ntttcp_passthrough_host_guest( node, log_path, host_node=server ) - perf_ntttcp( - test_result=result, - client=client, - server=server, - server_nic_name=self._get_host_nic_name(server), - client_nic_name=client_nic_name, - skip_server_task_max=True, # host: TasksMax reboot clears NIC DHCP state - ) + if isinstance(server.os, Windows): + self._perf_ntttcp_with_windows_server( + test_result=result, + client=client, + server=server, + client_nic_name=client_nic_name, + udp_mode=False, + test_case_name="perf_tcp_ntttcp_passthrough_host_guest", + ) + else: + perf_ntttcp( + test_result=result, + client=client, + server=server, + server_nic_name=self._get_host_nic_name(server), + client_nic_name=client_nic_name, + # host: TasksMax reboot clears NIC DHCP state + skip_server_task_max=True, + ) @TestCaseMetadata( description=""" @@ -271,7 +292,7 @@ def perf_tcp_ntttcp_passthrough_host_guest( node_count=1, memory_mb=search_space.IntRange(min=8192), ), - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, ), ) def perf_udp_1k_ntttcp_passthrough_host_guest( @@ -290,15 +311,26 @@ def perf_udp_1k_ntttcp_passthrough_host_guest( node, log_path, host_node=server ) - perf_ntttcp( - test_result=result, - client=client, - server=server, - server_nic_name=self._get_host_nic_name(server), - client_nic_name=client_nic_name, - udp_mode=True, - skip_server_task_max=True, # host: TasksMax reboot clears NIC DHCP state - ) + if isinstance(server.os, Windows): + self._perf_ntttcp_with_windows_server( + test_result=result, + client=client, + server=server, + client_nic_name=client_nic_name, + udp_mode=True, + test_case_name="perf_udp_1k_ntttcp_passthrough_host_guest", + ) + else: + perf_ntttcp( + test_result=result, + client=client, + server=server, + server_nic_name=self._get_host_nic_name(server), + client_nic_name=client_nic_name, + udp_mode=True, + # host: TasksMax reboot clears NIC DHCP state + skip_server_task_max=True, + ) # Network device passthrough tests between 2 guests @TestCaseMetadata( @@ -309,7 +341,7 @@ def perf_udp_1k_ntttcp_passthrough_host_guest( timeout=TIMEOUT, requirement=simple_requirement( min_count=2, - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, unsupported_os=[Windows], ), ) @@ -347,7 +379,7 @@ def perf_tcp_iperf_passthrough_two_guest( timeout=TIMEOUT, requirement=simple_requirement( min_count=2, - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, unsupported_os=[Windows], ), ) @@ -388,7 +420,7 @@ def perf_udp_iperf_passthrough_two_guest( timeout=PPS_TIMEOUT, requirement=simple_requirement( min_count=2, - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, unsupported_os=[Windows], ), ) @@ -427,7 +459,7 @@ def perf_tcp_single_pps_passthrough_two_guest( timeout=PPS_TIMEOUT, requirement=simple_requirement( min_count=2, - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, unsupported_os=[Windows], ), ) @@ -468,7 +500,7 @@ def perf_tcp_max_pps_passthrough_two_guest( node_count=2, memory_mb=search_space.IntRange(min=8192), ), - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, ), ) def perf_tcp_ntttcp_passthrough_two_guest( @@ -511,7 +543,7 @@ def perf_tcp_ntttcp_passthrough_two_guest( node_count=2, memory_mb=search_space.IntRange(min=8192), ), - supported_platform_type=[CLOUD_HYPERVISOR], + supported_platform_type=SUPPORTED_PASSTHROUGH_PLATFORMS, ), ) def perf_udp_1k_ntttcp_passthrough_two_guest( @@ -554,9 +586,7 @@ def _configure_passthrough_nic_for_node( log_path: Path, host_node: Optional[RemoteNode] = None, ) -> Tuple[RemoteNode, str]: - from lisa.sut_orchestrator.libvirt.context import get_node_context - - ctx = get_node_context(node) + ctx = self._get_passthrough_node_context(node) if not ctx.passthrough_devices: raise SkippedException("No passthrough devices found for node") @@ -564,14 +594,10 @@ def _configure_passthrough_nic_for_node( if not passthrough_dev.device_list: raise LisaException("passthrough_devices[0].device_list is empty") device_addr_obj = passthrough_dev.device_list[0] - domain = self._norm_hex(device_addr_obj.domain or "0000", 4) - bus = self._norm_hex(device_addr_obj.bus, 2) - slot = self._norm_hex(device_addr_obj.slot, 2) - function = self._norm_hex(device_addr_obj.function, 1) - device_bdf = f"{domain}:{bus}:{slot}.{function}" + device_bdf = self._get_device_bdf(device_addr_obj) host_nic_name = "" - if host_node is not None: + if host_node is not None and device_bdf: _h = host_node.execute( f"ls /sys/bus/pci/devices/{device_bdf}/net/ 2>/dev/null" " | head -1 || true", @@ -687,6 +713,29 @@ def _configure_passthrough_nic_for_node( return test_node, interface_name + def _get_passthrough_node_context(self, node: Node) -> Any: + try: + from lisa.sut_orchestrator.libvirt.context import get_node_context + + return get_node_context(node) + except AssertionError: + from lisa.sut_orchestrator.hyperv.context import get_node_context + + return get_node_context(node) + + def _get_device_bdf(self, device_addr_obj: Any) -> str: + bus = getattr(device_addr_obj, "bus", "") + slot = getattr(device_addr_obj, "slot", "") + function = getattr(device_addr_obj, "function", "") + if not (bus and slot and function): + return "" + + domain = self._norm_hex(getattr(device_addr_obj, "domain", "") or "0000", 4) + return ( + f"{domain}:{self._norm_hex(bus, 2)}:" + f"{self._norm_hex(slot, 2)}.{self._norm_hex(function, 1)}" + ) + def _find_guest_passthrough_iface( self, node: Node, @@ -1019,19 +1068,139 @@ def _get_host_as_server(self, variables: Dict[str, Any]) -> RemoteNode: server.initialize() - if isinstance(server.os, Windows): - server.close() - server.cleanup() - raise SkippedException( - "Host/guest passthrough performance tests require a Linux " - "baremetal host; Windows baremetal hosts are not supported." - ) - # Track baremetal host for cleanup. if server not in self._baremetal_hosts: self._baremetal_hosts.append(server) return server + def _skip_if_windows_server(self, server: RemoteNode, tool_name: str) -> None: + if isinstance(server.os, Windows): + raise SkippedException( + f"Host/guest passthrough performance with {tool_name} requires " + "Linux server tooling. Use the NTTTCP passthrough cases for " + "Windows baremetal hosts." + ) + + def _perf_ntttcp_with_windows_server( + self, + test_result: TestResult, + client: RemoteNode, + server: RemoteNode, + client_nic_name: str, + udp_mode: bool, + test_case_name: str, + ) -> None: + client_ntttcp = client.tools[Ntttcp] + server_ntttcp = server.tools[Ntttcp] + client_ntttcp.setup_system(udp_mode) + server_ntttcp.setup_system(udp_mode, set_task_max=False) + + client_ip = client.tools[Ip] + client_mtu = client_ip.get_mtu(client_nic_name) + connections = NTTTCP_UDP_CONCURRENCY if udp_mode else NTTTCP_TCP_CONCURRENCY + max_server_threads = 64 + + for test_thread in connections: + if test_thread < max_server_threads: + num_threads_p = test_thread + num_threads_n = 1 + else: + num_threads_p = max_server_threads + num_threads_n = int(test_thread / num_threads_p) + buffer_size = int(1024 / 1024) if udp_mode else int(65536 / 1024) + if not udp_mode and num_threads_p == 1 and num_threads_n == 1: + buffer_size = int(1048576 / 1024) + use_no_sync = True + + receiver_process = ( + client_ntttcp.run_as_server_async( + client_nic_name, + ports_count=num_threads_p, + buffer_size=buffer_size, + udp_mode=True, + dev_differentiator="", + no_sync=use_no_sync, + ) + if udp_mode + else server_ntttcp.run_as_server_async( + "", + ports_count=num_threads_p, + buffer_size=buffer_size, + server_ip=server.internal_address, + dev_differentiator="", + no_sync=use_no_sync, + ) + ) + try: + if udp_mode: + sender_result = server_ntttcp.run_as_client( + "", + client.internal_address, + threads_count=num_threads_n, + ports_count=num_threads_p, + buffer_size=buffer_size, + udp_mode=True, + no_sync=use_no_sync, + ) + else: + sender_result = client_ntttcp.run_as_client( + client_nic_name, + server.internal_address, + threads_count=num_threads_n, + ports_count=num_threads_p, + buffer_size=buffer_size, + dev_differentiator="", + no_sync=use_no_sync, + ) + receiver_result = receiver_process.wait_result(timeout=90) + finally: + server.tools[PowerShell].run_cmdlet( + "Stop-Process -Name ntttcp -Force -ErrorAction SilentlyContinue", + force_run=True, + fail_on_error=False, + timeout=30, + ) + + parsed_client_result = ( + server_ntttcp.create_ntttcp_result(sender_result, role="client") + if udp_mode + else client_ntttcp.create_ntttcp_result(sender_result, role="client") + ) + try: + parsed_server_result = ( + client_ntttcp.create_ntttcp_result(receiver_result) + if udp_mode + else server_ntttcp.create_ntttcp_result(receiver_result) + ) + except (AssertionError, LisaException): + client.log.debug( + "NTTTCP receiver output was not parseable; using sender " + "totals for receiver-side metrics." + ) + parsed_server_result = parsed_client_result + if udp_mode: + perf_message = client_ntttcp.create_ntttcp_udp_performance_message( + parsed_server_result, + parsed_client_result, + str(test_thread), + buffer_size, + test_case_name, + test_result, + client_mtu, + ) + else: + perf_message = client_ntttcp.create_ntttcp_tcp_performance_message( + parsed_server_result, + parsed_client_result, + Decimal(0), + str(test_thread), + buffer_size, + test_case_name, + test_result, + client_mtu, + ) + notifier.notify(perf_message) + def _get_host_nic_name(self, node: RemoteNode) -> str: ip = node.connection_info[constants.ENVIRONMENTS_NODES_REMOTE_ADDRESS] command = "ip route show" diff --git a/lisa/sut_orchestrator/libvirt/libvirt_device_pool.py b/lisa/sut_orchestrator/libvirt/libvirt_device_pool.py index b15878ae39..79d26ab389 100644 --- a/lisa/sut_orchestrator/libvirt/libvirt_device_pool.py +++ b/lisa/sut_orchestrator/libvirt/libvirt_device_pool.py @@ -477,7 +477,9 @@ def _create_pool( devices.append(dev) def _is_nic_cable_connected(self, bdf: str) -> bool: - # True if any iface bound to this NIC reports carrier=1. + # True if any iface bound to this NIC reports carrier=1. Some + # Hyper-V-backed PCI NICs expose a carrier file but return EINVAL before + # assignment; keep them eligible because link state is unknown, not down. ls = self.host_node.tools[Ls] net_dir = f"/sys/bus/pci/devices/{bdf}/net" if not ls.path_exists(net_dir, sudo=True): @@ -497,6 +499,14 @@ def _is_nic_cable_connected(self, bdf: str) -> bool: ) if result.exit_code == 0 and result.stdout.strip() == "1": return True + if result.exit_code != 0 and "Invalid argument" in ( + result.stdout + result.stderr + ): + self.host_node.log.debug( + f"NIC {bdf} interface {iface} does not expose carrier state; " + "treating link as eligible for passthrough." + ) + return True return False def _get_pci_address_instance( diff --git a/lisa/tools/ntttcp.py b/lisa/tools/ntttcp.py index 8203060816..28d1ed49d1 100644 --- a/lisa/tools/ntttcp.py +++ b/lisa/tools/ntttcp.py @@ -17,6 +17,7 @@ ) from lisa.operating_system import BSD, CBLMariner, Ubuntu from lisa.tools import Firewall, Gcc, Git, Lscpu, Make, Sed +from lisa.tools.powershell import PowerShell from lisa.tools.taskset import TaskSet from lisa.util import LisaException, constants from lisa.util.process import ExecutableResult, Process @@ -166,6 +167,10 @@ def can_install(self) -> bool: def _freebsd_tool(cls) -> Optional[Type[Tool]]: return BSDNtttcp + @classmethod + def _windows_tool(cls) -> Optional[Type[Tool]]: + return WindowsNtttcp + def setup_system(self, udp_mode: bool = False, set_task_max: bool = True) -> None: sysctl = self.node.tools[Sysctl] sys_list = self.sys_list_tcp @@ -214,6 +219,7 @@ def run_as_server_async( dev_differentiator: str = "Hypervisor callback interrupts", run_as_daemon: bool = False, udp_mode: bool = False, + no_sync: bool = False, ) -> Process: cmd = "" if server_ip: @@ -231,6 +237,8 @@ def run_as_server_async( cmd += f" --show-dev-interrupts {dev_differentiator} " if run_as_daemon: cmd += " -D " + if no_sync: + cmd += " -N " process = self.node.execute_async( f"ulimit -n 204800 && {self.pre_command}{self.command} {cmd}", @@ -241,8 +249,12 @@ def run_as_server_async( # --------------------------------------------------------- # 01:16:35 INFO: no role specified. use receiver role # 01:16:35 INFO: 65 threads created - # above output means ntttcp server is ready - process.wait_output("threads created") + # above output means ntttcp server is ready. In no-sync mode, the + # receiver may not print the same readiness line before clients start. + if no_sync: + time.sleep(5) + else: + process.wait_output("threads created") return process def run_as_server( @@ -258,6 +270,7 @@ def run_as_server( dev_differentiator: str = "Hypervisor callback interrupts", run_as_daemon: bool = False, udp_mode: bool = False, + no_sync: bool = False, ) -> ExecutableResult: # -rserver_ip: run as a receiver with specified server ip address # -P: Number of ports listening on receiver side [default: 16] [max: 512] @@ -286,6 +299,7 @@ def run_as_server( dev_differentiator, run_as_daemon, udp_mode, + no_sync, ) return self.wait_server_result(process) @@ -309,6 +323,7 @@ def run_as_client_async( dev_differentiator: str = "Hypervisor callback interrupts", run_as_daemon: bool = False, udp_mode: bool = False, + no_sync: bool = False, ) -> Process: cmd = ( f" -s{server_ip} -P {ports_count} -n {threads_count} -t {run_time_seconds} " @@ -321,6 +336,8 @@ def run_as_client_async( cmd += f" --show-dev-interrupts {dev_differentiator} " if run_as_daemon: cmd += " -D " + if no_sync: + cmd += " -N " process = self.node.execute_async( f"ulimit -n 204800 && {self.pre_command}{self.command} {cmd}", shell=True, @@ -342,6 +359,7 @@ def run_as_client( run_as_daemon: bool = False, udp_mode: bool = False, tolerance_seconds: int = 60, + no_sync: bool = False, ) -> ExecutableResult: # -sserver_ip: run as a sender with server ip address # -P: Number of ports listening on receiver side [default: 16] [max: 512] @@ -373,6 +391,7 @@ def run_as_client( dev_differentiator, run_as_daemon, udp_mode, + no_sync, ) return process.wait_result( expected_exit_code=0, @@ -868,6 +887,7 @@ def run_as_server_async( dev_differentiator: str = "Hypervisor callback interrupts", run_as_daemon: bool = False, udp_mode: bool = False, + no_sync: bool = False, ) -> Process: assert server_ip, "server ip is required for ntttcp server" self._log.debug( @@ -883,6 +903,8 @@ def run_as_server_async( cmd += " -D " if udp_mode: raise LisaException("UDP mode is not supported in FreeBSD") + if no_sync: + cmd += " -N " # Start the server and wait for the threads to be created process = self.node.execute_async( @@ -908,6 +930,7 @@ def run_as_client( run_as_daemon: bool = False, udp_mode: bool = False, tolerance_seconds: int = 60, + no_sync: bool = False, ) -> ExecutableResult: self._log.debug( "Paramers nic_name, cool_down_time_seconds, warm_up_time_seconds, " @@ -921,6 +944,8 @@ def run_as_client( raise LisaException("UDP mode is not supported in FreeBSD") if run_as_daemon: cmd += " -D " + if no_sync: + cmd += " -N " result = self.node.execute( f"ulimit -n 204800 && {self.pre_command}{self.command} {cmd}", shell=True, @@ -950,3 +975,145 @@ def create_ntttcp_result( matched_results.group("cycles_per_byte") ) return ntttcp_result + + +class WindowsNtttcp(Ntttcp): + _download_url = ( + "https://github.com/microsoft/ntttcp/releases/latest/download/ntttcp.exe" + ) + _total_mbps_pattern = re.compile( + r"(?im)^\s*TOTAL\s+(?P[0-9]+(?:\.[0-9]+)?)\s*$" + ) + _throughput_mbps_pattern = re.compile( + r"(?is)Bytes\(MEG\).*?Throughput\(MB/s\).*?" + r"=+\s+=+\s+=+\s+=+\s*\n" + r"\s*[0-9]+(?:\.[0-9]+)?\s+" + r"[0-9]+(?:\.[0-9]+)?\s+" + r"[0-9]+(?:\.[0-9]+)?\s+" + r"(?P[0-9]+(?:\.[0-9]+)?)" + ) + + @property + def command(self) -> str: + return "ntttcp.exe" + + @property + def dependencies(self) -> List[Type[Tool]]: + return [] + + def setup_system(self, udp_mode: bool = False, set_task_max: bool = True) -> None: + self.node.tools[PowerShell].run_cmdlet( + "Set-NetFirewallProfile -Profile Domain,Public,Private " + "-Enabled False", + fail_on_error=False, + ) + + def restore_system(self, udp_mode: bool = False) -> None: + return + + def run_as_server_async( + self, + nic_name: str, + run_time_seconds: int = 10, + ports_count: int = 64, + buffer_size: int = 64, + cool_down_time_seconds: int = 1, + warm_up_time_seconds: int = 1, + use_epoll: bool = True, + server_ip: str = "", + dev_differentiator: str = "Hypervisor callback interrupts", + run_as_daemon: bool = False, + udp_mode: bool = False, + no_sync: bool = False, + ) -> Process: + receiver_name = server_ip if server_ip else "*" + cmd = f"-r -m {ports_count},*,{receiver_name} -p 5001 -t {run_time_seconds}" + if udp_mode: + cmd += " -u" + if no_sync: + cmd += " -ns" + process = self.node.execute_async( + f"{self.command} {cmd}", + shell=True, + sudo=True, + ) + if udp_mode: + time.sleep(5) + else: + self.node.tools[PowerShell].run_cmdlet( + "for ($i = 0; $i -lt 10; $i++) { " + "if (Get-NetTCPConnection -State Listen -LocalPort 5001 " + "-ErrorAction SilentlyContinue) { exit 0 }; " + "Start-Sleep -Seconds 1 }; exit 1", + force_run=True, + timeout=15, + ) + return process + + def run_as_client( + self, + nic_name: str, + server_ip: str, + threads_count: int, + run_time_seconds: int = 10, + ports_count: int = 64, + buffer_size: int = 64, + cool_down_time_seconds: int = 1, + warm_up_time_seconds: int = 1, + dev_differentiator: str = "Hypervisor callback interrupts", + run_as_daemon: bool = False, + udp_mode: bool = False, + tolerance_seconds: int = 60, + no_sync: bool = False, + ) -> ExecutableResult: + cmd = f"-s -m {ports_count},*,{server_ip} -t {run_time_seconds}" + if udp_mode: + cmd += " -u" + if no_sync: + cmd += " -ns" + return self.node.execute( + f"{self.command} {cmd}", + shell=True, + sudo=True, + expected_exit_code=0, + expected_exit_code_failure_message=f"fail to run {self.command} {cmd}", + timeout=run_time_seconds + tolerance_seconds, + ) + + def create_ntttcp_result( + self, result: ExecutableResult, role: str = "server" + ) -> NtttcpResult: + matched_results = self._total_mbps_pattern.search(result.stdout) + throughput_multiplier = Decimal(1) / Decimal(1000) + if not matched_results: + matched_results = self._throughput_mbps_pattern.search(result.stdout) + throughput_multiplier = Decimal(8) / Decimal(1000) + if not matched_results: + try: + return super().create_ntttcp_result(result, role) + except AssertionError as identifier_error: + raise LisaException( + f"not found matched Windows ntttcp results: {result.stdout}" + ) from identifier_error + + ntttcp_result = NtttcpResult() + ntttcp_result.role = role + ntttcp_result.throughput_in_gbps = ( + Decimal(matched_results.group("throughput")) * throughput_multiplier + ) + return ntttcp_result + + def _initialize(self, *args: Any, **kwargs: Any) -> None: + self.pre_command = "" + self.setup_system() + + def _install(self) -> bool: + cmdlet = f""" +$destination = Join-Path $env:SystemRoot 'System32\\ntttcp.exe' +if (-not (Test-Path $destination)) {{ + [Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 + Invoke-WebRequest -Uri '{self._download_url}' -OutFile $destination +}} +""" + self.node.tools[PowerShell].run_cmdlet(cmdlet, timeout=300) + return self._check_exists() diff --git a/lisa/tools/reboot.py b/lisa/tools/reboot.py index 32246e1186..7463a19dc8 100644 --- a/lisa/tools/reboot.py +++ b/lisa/tools/reboot.py @@ -58,6 +58,33 @@ def _get_last_boot_time(self) -> datetime: last_boot_time = self.node.tools[Uptime].since_time() return last_boot_time + def _wait_ssh_session_stable(self, time_out: int) -> None: + timer = create_timer() + while timer.elapsed(False) < time_out: + try: + self.node.close() + self.node.execute( + "echo lisa reboot ready", + shell=True, + timeout=10, + no_info_log=True, + ).assert_exit_code() + sleep(10) + self.node.close() + self.node.execute( + "echo lisa reboot ready", + shell=True, + timeout=10, + no_info_log=True, + ).assert_exit_code() + return + except Exception as e: + self._log.debug(f"waiting for stable ssh session after reboot: {e}") + sleep(5) + raise LisaException( + f"cannot get stable ssh session after reboot in {time_out} seconds" + ) + def reboot_and_check_panic(self, log_path: Path) -> None: try: self.reboot() @@ -96,19 +123,28 @@ def reboot(self, time_out: int = 300) -> None: sleep(wait_seconds) current_delta = date.current().replace(tzinfo=None) - current_boot_time - # Get reboot execution path - # Not all distros have the same reboot execution path - command_result = self.node.execute( - "command -v reboot", shell=True, sudo=True, no_info_log=True - ) - if command_result.exit_code == 0: - self._command = command_result.stdout self._log.debug(f"rebooting with boot time: {last_boot_time}") try: # Reboot is not reliable, and sometime stuck, # like SUSE sles-15-sp1-sapcal gen1 2020.10.23. # In this case, use timeout to prevent hanging. - self.run(force_run=True, sudo=True, timeout=10) + systemctl_result = self.node.execute( + "command -v systemctl", shell=True, sudo=True, no_info_log=True + ) + if systemctl_result.exit_code == 0: + self.node.execute( + "systemctl reboot -i", + shell=True, + sudo=True, + timeout=10, + ) + else: + command_result = self.node.execute( + "command -v reboot", shell=True, sudo=True, no_info_log=True + ) + if command_result.exit_code == 0: + self._command = command_result.stdout + self.run(force_run=True, sudo=True, timeout=10) except Exception as e: # it doesn't matter to exceptions here. The system may reboot fast self._log.debug(f"ignorable exception on rebooting: {e}") @@ -132,6 +168,9 @@ def reboot(self, time_out: int = 300) -> None: self._log.debug(f"ignorable ssh exception: {e}") self._log.debug(f"reconnected with uptime: {current_boot_time}") if last_boot_time < current_boot_time: + self._wait_ssh_session_stable( + max(30, time_out - int(timer.elapsed(False))) + ) break if last_boot_time == current_boot_time: if connected: From c41a69dd915b335eeee03ffcdcdf549c8648bf01 Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Fri, 29 May 2026 18:57:06 -0700 Subject: [PATCH 2/8] Fix Windows passthrough NTTTCP readiness Fail Windows-host passthrough NTTTCP tests when receiver output cannot be parsed instead of reusing sender totals. Add bounded listener readiness checks for Linux and Windows NTTTCP receivers, and fall back to reboot when systemctl reboot exits unsuccessfully. --- .../device_passthrough/functional_tests.py | 4 +- .../performance/networkperf_passthrough.py | 19 +++-- lisa/tools/ntttcp.py | 85 ++++++++++++++----- lisa/tools/reboot.py | 46 ++++++---- 4 files changed, 104 insertions(+), 50 deletions(-) diff --git a/lisa/microsoft/testsuites/device_passthrough/functional_tests.py b/lisa/microsoft/testsuites/device_passthrough/functional_tests.py index 47f21dc733..c61df1ff13 100644 --- a/lisa/microsoft/testsuites/device_passthrough/functional_tests.py +++ b/lisa/microsoft/testsuites/device_passthrough/functional_tests.py @@ -143,9 +143,7 @@ def _vendor_device_from_host_device( } cloud_hypervisor = platform - bdf = ( - f"{device.domain}:{device.bus}:{device.slot}.{device.function}" - ).lower() + bdf = f"{device.domain}:{device.bus}:{device.slot}.{device.function}".lower() cat = cloud_hypervisor.host_node.tools[Cat] vendor_raw = cat.read(f"/sys/bus/pci/devices/{bdf}/vendor", sudo=True).strip() device_raw = cat.read(f"/sys/bus/pci/devices/{bdf}/device", sudo=True).strip() diff --git a/lisa/microsoft/testsuites/performance/networkperf_passthrough.py b/lisa/microsoft/testsuites/performance/networkperf_passthrough.py index 96463b84a3..060c65814d 100644 --- a/lisa/microsoft/testsuites/performance/networkperf_passthrough.py +++ b/lisa/microsoft/testsuites/performance/networkperf_passthrough.py @@ -29,13 +29,13 @@ from lisa.sut_orchestrator import CLOUD_HYPERVISOR, HYPERV from lisa.testsuite import TestResult from lisa.tools import Dhclient, Kill, PowerShell, Sysctl +from lisa.tools.ip import Ip from lisa.tools.iperf3 import ( IPERF_TCP_BUFFER_LENGTHS, IPERF_TCP_CONCURRENCY, IPERF_UDP_BUFFER_LENGTHS, IPERF_UDP_CONCURRENCY, ) -from lisa.tools.ip import Ip from lisa.tools.ntttcp import NTTTCP_TCP_CONCURRENCY, NTTTCP_UDP_CONCURRENCY, Ntttcp from lisa.util import ( LisaException, @@ -1172,12 +1172,17 @@ def _perf_ntttcp_with_windows_server( if udp_mode else server_ntttcp.create_ntttcp_result(receiver_result) ) - except (AssertionError, LisaException): - client.log.debug( - "NTTTCP receiver output was not parseable; using sender " - "totals for receiver-side metrics." - ) - parsed_server_result = parsed_client_result + except (AssertionError, LisaException) as parse_error: + receiver_node = "Linux guest" if udp_mode else "Windows host" + raise LisaException( + f"Failed to parse NTTTCP receiver output from {receiver_node} " + f"for {test_case_name} with {test_thread} connections. " + "Verify that the receiver completed and emitted NTTTCP " + "totals before publishing performance data. " + f"Exit code: {receiver_result.exit_code}. " + f"Stdout: {receiver_result.stdout[:2000]}. " + f"Stderr: {receiver_result.stderr[:2000]}" + ) from parse_error if udp_mode: perf_message = client_ntttcp.create_ntttcp_udp_performance_message( parsed_server_result, diff --git a/lisa/tools/ntttcp.py b/lisa/tools/ntttcp.py index 28d1ed49d1..b72f09d6aa 100644 --- a/lisa/tools/ntttcp.py +++ b/lisa/tools/ntttcp.py @@ -19,7 +19,7 @@ from lisa.tools import Firewall, Gcc, Git, Lscpu, Make, Sed from lisa.tools.powershell import PowerShell from lisa.tools.taskset import TaskSet -from lisa.util import LisaException, constants +from lisa.util import LisaException, check_till_timeout, constants from lisa.util.process import ExecutableResult, Process from .sysctl import Sysctl @@ -245,18 +245,48 @@ def run_as_server_async( shell=True, sudo=True, ) - # NTTTCP for Linux 1.4.0 - # --------------------------------------------------------- - # 01:16:35 INFO: no role specified. use receiver role - # 01:16:35 INFO: 65 threads created - # above output means ntttcp server is ready. In no-sync mode, the - # receiver may not print the same readiness line before clients start. if no_sync: - time.sleep(5) + self._wait_server_port_ready(process, udp_mode) else: process.wait_output("threads created") return process + def _wait_server_port_ready( + self, + process: Process, + udp_mode: bool, + port: int = 5001, + ) -> None: + def is_ready() -> bool: + if not process.is_running(): + raise LisaException( + "ntttcp receiver exited before the listen port became ready" + ) + return self._is_server_port_open(udp_mode, port) + + protocol = "UDP" if udp_mode else "TCP" + check_till_timeout( + is_ready, + timeout_message=f"wait for ntttcp {protocol} receiver port {port} open", + timeout=30, + ) + + def _is_server_port_open(self, udp_mode: bool, port: int) -> bool: + ss_options = "-lun" if udp_mode else "-ltn" + netstat_options = "-lnu" if udp_mode else "-ltn" + result = self.node.execute( + cmd=( + f"(ss {ss_options} 2>/dev/null || " + f"netstat {netstat_options} 2>/dev/null || true) " + f"| grep -E '[:.]?{port}[[:space:]]'" + ), + shell=True, + sudo=True, + no_info_log=True, + no_error_log=True, + ) + return result.exit_code == 0 + def run_as_server( self, nic_name: str, @@ -979,7 +1009,7 @@ def create_ntttcp_result( class WindowsNtttcp(Ntttcp): _download_url = ( - "https://github.com/microsoft/ntttcp/releases/latest/download/ntttcp.exe" + "https://github.com/microsoft/ntttcp/releases/latest/download/" "ntttcp.exe" ) _total_mbps_pattern = re.compile( r"(?im)^\s*TOTAL\s+(?P[0-9]+(?:\.[0-9]+)?)\s*$" @@ -1003,8 +1033,7 @@ def dependencies(self) -> List[Type[Tool]]: def setup_system(self, udp_mode: bool = False, set_task_max: bool = True) -> None: self.node.tools[PowerShell].run_cmdlet( - "Set-NetFirewallProfile -Profile Domain,Public,Private " - "-Enabled False", + "Set-NetFirewallProfile -Profile Domain,Public,Private " "-Enabled False", fail_on_error=False, ) @@ -1037,19 +1066,31 @@ def run_as_server_async( shell=True, sudo=True, ) - if udp_mode: - time.sleep(5) - else: - self.node.tools[PowerShell].run_cmdlet( - "for ($i = 0; $i -lt 10; $i++) { " - "if (Get-NetTCPConnection -State Listen -LocalPort 5001 " - "-ErrorAction SilentlyContinue) { exit 0 }; " - "Start-Sleep -Seconds 1 }; exit 1", - force_run=True, - timeout=15, - ) + self._wait_receiver_port_ready(udp_mode) return process + def _wait_receiver_port_ready( + self, + udp_mode: bool, + port: int = 5001, + timeout: int = 30, + ) -> None: + endpoint_cmdlet = ( + f"Get-NetUDPEndpoint -LocalPort {port} -ErrorAction SilentlyContinue" + if udp_mode + else f"Get-NetTCPConnection -State Listen -LocalPort {port} " + "-ErrorAction SilentlyContinue" + ) + self.node.tools[PowerShell].run_cmdlet( + "$deadline = (Get-Date).AddSeconds(" + str(timeout) + "); " + "do { " + f"if ({endpoint_cmdlet}) {{ exit 0 }}; " + "Start-Sleep -Milliseconds 500 " + "} while ((Get-Date) -lt $deadline); exit 1", + force_run=True, + timeout=timeout + 5, + ) + def run_as_client( self, nic_name: str, diff --git a/lisa/tools/reboot.py b/lisa/tools/reboot.py index 7463a19dc8..b1d105743a 100644 --- a/lisa/tools/reboot.py +++ b/lisa/tools/reboot.py @@ -60,6 +60,8 @@ def _get_last_boot_time(self) -> datetime: def _wait_ssh_session_stable(self, time_out: int) -> None: timer = create_timer() + consecutive_successes = 0 + last_error = "" while timer.elapsed(False) < time_out: try: self.node.close() @@ -69,21 +71,26 @@ def _wait_ssh_session_stable(self, time_out: int) -> None: timeout=10, no_info_log=True, ).assert_exit_code() - sleep(10) - self.node.close() - self.node.execute( - "echo lisa reboot ready", - shell=True, - timeout=10, - no_info_log=True, - ).assert_exit_code() - return + consecutive_successes += 1 + if consecutive_successes >= 2: + return except Exception as e: + consecutive_successes = 0 + last_error = str(e) self._log.debug(f"waiting for stable ssh session after reboot: {e}") - sleep(5) + sleep(2) raise LisaException( - f"cannot get stable ssh session after reboot in {time_out} seconds" + f"cannot get stable ssh session after reboot in {time_out} seconds. " + f"Last error: {last_error}" + ) + + def _run_reboot_command(self) -> None: + command_result = self.node.execute( + "command -v reboot", shell=True, sudo=True, no_info_log=True ) + if command_result.exit_code == 0: + self._command = command_result.stdout.strip() + self.run(force_run=True, sudo=True, timeout=10) def reboot_and_check_panic(self, log_path: Path) -> None: try: @@ -132,19 +139,22 @@ def reboot(self, time_out: int = 300) -> None: "command -v systemctl", shell=True, sudo=True, no_info_log=True ) if systemctl_result.exit_code == 0: - self.node.execute( + reboot_result = self.node.execute( "systemctl reboot -i", shell=True, sudo=True, timeout=10, ) + if reboot_result.exit_code != 0: + self._log.debug( + "systemctl reboot failed with exit code " + f"{reboot_result.exit_code}; falling back to reboot. " + f"stdout: {reboot_result.stdout}, " + f"stderr: {reboot_result.stderr}" + ) + self._run_reboot_command() else: - command_result = self.node.execute( - "command -v reboot", shell=True, sudo=True, no_info_log=True - ) - if command_result.exit_code == 0: - self._command = command_result.stdout - self.run(force_run=True, sudo=True, timeout=10) + self._run_reboot_command() except Exception as e: # it doesn't matter to exceptions here. The system may reboot fast self._log.debug(f"ignorable exception on rebooting: {e}") From 5ab1f7cc5ff375bf9cb20b55adc92682c3a2fd3f Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Sat, 30 May 2026 19:43:11 -0700 Subject: [PATCH 3/8] Add rollback for Hyper-V DDA assignment failures Track each stage of Hyper-V DDA assignment so a later failure can unwind earlier host-side changes. The assignment path now records disabled PnP devices, successfully dismounted devices, and devices already attached to the VM. On failure, rollback removes assigned devices from the VM, mounts dismounted devices back to the host, re-enables the PnP devices that were disabled, and reports rollback errors alongside the original assignment error. Add a focused selftest that simulates a second-device dismount failure and verifies rollback removes, mounts, and re-enables devices in the expected order. --- .../device_passthrough/functional_tests.py | 19 ++- .../performance/networkperf_passthrough.py | 51 +++--- .../hyperv/hyperv_device_pool.py | 150 ++++++++++++++++-- lisa/tools/ntttcp.py | 4 +- selftests/test_hyperv_device_pool.py | 85 ++++++++++ 5 files changed, 267 insertions(+), 42 deletions(-) create mode 100644 selftests/test_hyperv_device_pool.py diff --git a/lisa/microsoft/testsuites/device_passthrough/functional_tests.py b/lisa/microsoft/testsuites/device_passthrough/functional_tests.py index c61df1ff13..6e1cdbdbf8 100644 --- a/lisa/microsoft/testsuites/device_passthrough/functional_tests.py +++ b/lisa/microsoft/testsuites/device_passthrough/functional_tests.py @@ -71,19 +71,32 @@ def verify_device_passthrough_on_guest( ) -> None: lspci = node.tools[Lspci] platform = environment.platform + if platform is None: + raise SkippedException( + "Device passthrough validation requires a LISA platform context. " + "Verify the runbook uses cloud-hypervisor or hyperv." + ) platform_name = platform.type_name() + node_context: Any if platform_name == CLOUD_HYPERVISOR: # Import at runtime to avoid libvirt dependency on other platforms. - from lisa.sut_orchestrator.libvirt.context import get_node_context + from lisa.sut_orchestrator.libvirt.context import ( + get_node_context as get_libvirt_node_context, + ) + + node_context = get_libvirt_node_context(node) elif platform_name == HYPERV: - from lisa.sut_orchestrator.hyperv.context import get_node_context + from lisa.sut_orchestrator.hyperv.context import ( + get_node_context as get_hyperv_node_context, + ) + + node_context = get_hyperv_node_context(node) else: raise SkippedException( f"Device passthrough validation is not supported on '{platform_name}'" ) - node_context = get_node_context(node) if not node_context.passthrough_devices: raise SkippedException("No passthrough devices are assigned to node") diff --git a/lisa/microsoft/testsuites/performance/networkperf_passthrough.py b/lisa/microsoft/testsuites/performance/networkperf_passthrough.py index 060c65814d..7b26df1aa6 100644 --- a/lisa/microsoft/testsuites/performance/networkperf_passthrough.py +++ b/lisa/microsoft/testsuites/performance/networkperf_passthrough.py @@ -715,13 +715,17 @@ def _configure_passthrough_nic_for_node( def _get_passthrough_node_context(self, node: Node) -> Any: try: - from lisa.sut_orchestrator.libvirt.context import get_node_context + from lisa.sut_orchestrator.libvirt.context import ( + get_node_context as get_libvirt_node_context, + ) - return get_node_context(node) + return get_libvirt_node_context(node) except AssertionError: - from lisa.sut_orchestrator.hyperv.context import get_node_context + from lisa.sut_orchestrator.hyperv.context import ( + get_node_context as get_hyperv_node_context, + ) - return get_node_context(node) + return get_hyperv_node_context(node) def _get_device_bdf(self, device_addr_obj: Any) -> str: bus = getattr(device_addr_obj, "bus", "") @@ -1184,27 +1188,30 @@ def _perf_ntttcp_with_windows_server( f"Stderr: {receiver_result.stderr[:2000]}" ) from parse_error if udp_mode: - perf_message = client_ntttcp.create_ntttcp_udp_performance_message( - parsed_server_result, - parsed_client_result, - str(test_thread), - buffer_size, - test_case_name, - test_result, - client_mtu, + notifier.notify( + client_ntttcp.create_ntttcp_udp_performance_message( + parsed_server_result, + parsed_client_result, + str(test_thread), + buffer_size, + test_case_name, + test_result, + client_mtu, + ) ) else: - perf_message = client_ntttcp.create_ntttcp_tcp_performance_message( - parsed_server_result, - parsed_client_result, - Decimal(0), - str(test_thread), - buffer_size, - test_case_name, - test_result, - client_mtu, + notifier.notify( + client_ntttcp.create_ntttcp_tcp_performance_message( + parsed_server_result, + parsed_client_result, + Decimal(0), + str(test_thread), + buffer_size, + test_case_name, + test_result, + client_mtu, + ) ) - notifier.notify(perf_message) def _get_host_nic_name(self, node: RemoteNode) -> str: ip = node.connection_info[constants.ENVIRONMENTS_NODES_REMOTE_ADDRESS] diff --git a/lisa/sut_orchestrator/hyperv/hyperv_device_pool.py b/lisa/sut_orchestrator/hyperv/hyperv_device_pool.py index 584e95350e..99358b40b9 100644 --- a/lisa/sut_orchestrator/hyperv/hyperv_device_pool.py +++ b/lisa/sut_orchestrator/hyperv/hyperv_device_pool.py @@ -408,29 +408,149 @@ def _assign_devices_to_vm( ) -> None: # Assign the devices to the VM escaped_vm_name = vm_name.replace("'", "''") - config_commands: List[str] = [] - for device in devices: - escaped_instance_id = device.instance_id.replace("'", "''") + disabled_devices: List[DeviceAddressSchema] = [] + dismounted_devices: List[DeviceAddressSchema] = [] + assigned_devices: List[DeviceAddressSchema] = [] + powershell = self._server.tools[PowerShell] + + try: + for device in devices: + escaped_instance_id = device.instance_id.replace("'", "''") + escaped_location_path = device.location_path.replace("'", "''") + powershell.run_cmdlet( + cmdlet=( + f"Disable-PnpDevice -InstanceId '{escaped_instance_id}' " + "-Confirm:$false" + ), + force_run=True, + ) + disabled_devices.append(device) + + powershell.run_cmdlet( + cmdlet=( + f"Dismount-VMHostAssignableDevice -Force " + f"-LocationPath '{escaped_location_path}'" + ), + force_run=True, + ) + dismounted_devices.append(device) + + powershell.run_cmdlet( + cmdlet=( + f"Add-VMAssignableDevice " + f"-LocationPath '{escaped_location_path}' " + f"-VMName '{escaped_vm_name}'" + ), + force_run=True, + ) + assigned_devices.append(device) + except LisaException as err: + rollback_errors = self._rollback_dda_assignment( + vm_name=vm_name, + disabled_devices=disabled_devices, + dismounted_devices=dismounted_devices, + assigned_devices=assigned_devices, + ) + if rollback_errors: + raise LisaException( + "Failed to assign Hyper-V DDA device(s) to VM " + f"'{vm_name}': {err}. Rollback also failed: " + f"{'; '.join(rollback_errors)}" + ) from err + raise + + def _rollback_dda_assignment( + self, + vm_name: str, + disabled_devices: List[DeviceAddressSchema], + dismounted_devices: List[DeviceAddressSchema], + assigned_devices: List[DeviceAddressSchema], + ) -> List[str]: + powershell = self._server.tools[PowerShell] + escaped_vm_name = vm_name.replace("'", "''") + rollback_errors: List[str] = [] + + self.log.info(f"Rolling back Hyper-V DDA assignment for VM '{vm_name}'") + + for device in reversed(assigned_devices): escaped_location_path = device.location_path.replace("'", "''") - config_commands.append( - f"Disable-PnpDevice -InstanceId '{escaped_instance_id}' " - "-Confirm:$false" + error = self._run_dda_rollback_cmdlet( + powershell=powershell, + description=( + f"Remove DDA device '{device.location_path}' from VM '{vm_name}'" + ), + cmdlet=( + f"Remove-VMAssignableDevice " + f"-LocationPath '{escaped_location_path}' " + f"-VMName '{escaped_vm_name}'" + ), ) - config_commands.append( - f"Dismount-VMHostAssignableDevice -Force " - f"-LocationPath '{escaped_location_path}'" + if error: + rollback_errors.append(error) + + for device in reversed(dismounted_devices): + escaped_location_path = device.location_path.replace("'", "''") + error = self._run_dda_rollback_cmdlet( + powershell=powershell, + description=f"Mount DDA device '{device.location_path}' on host", + cmdlet=( + f"Mount-VMHostAssignableDevice " + f"-LocationPath '{escaped_location_path}'" + ), ) - config_commands.append( - f"Add-VMAssignableDevice -LocationPath '{escaped_location_path}' " - f"-VMName '{escaped_vm_name}'" + if error: + rollback_errors.append(error) + + for device in reversed(disabled_devices): + escaped_instance_id = device.instance_id.replace("'", "''") + error = self._run_dda_rollback_cmdlet( + powershell=powershell, + description=( + f"Enable PnP device '{device.instance_id}' for location path " + f"'{device.location_path}'" + ), + cmdlet=( + f"Enable-PnpDevice -InstanceId '{escaped_instance_id}' " + "-Confirm:$false" + ), ) + if error: + rollback_errors.append(error) + continue - powershell = self._server.tools[PowerShell] - for cmd in config_commands: + try: + self._wait_for_pnp_device_enabled( + device.instance_id, + device.location_path, + ) + except LisaException as err: + rollback_errors.append( + f"PnP device '{device.instance_id}' for location path " + f"'{device.location_path}' was not enabled during rollback: " + f"{err}" + ) + + for error in rollback_errors: + self.log.warning(error) + + return rollback_errors + + def _run_dda_rollback_cmdlet( + self, + powershell: PowerShell, + description: str, + cmdlet: str, + ) -> Optional[str]: + self.log.info(description) + try: powershell.run_cmdlet( - cmdlet=cmd, + cmdlet=cmdlet, force_run=True, ) + except LisaException as err: + return f"{description} failed: {err}" + + return None def _set_device_passthrough_node_context( self, diff --git a/lisa/tools/ntttcp.py b/lisa/tools/ntttcp.py index b72f09d6aa..28d8abd2e4 100644 --- a/lisa/tools/ntttcp.py +++ b/lisa/tools/ntttcp.py @@ -1009,7 +1009,7 @@ def create_ntttcp_result( class WindowsNtttcp(Ntttcp): _download_url = ( - "https://github.com/microsoft/ntttcp/releases/latest/download/" "ntttcp.exe" + "https://github.com/microsoft/ntttcp/releases/latest/download/ntttcp.exe" ) _total_mbps_pattern = re.compile( r"(?im)^\s*TOTAL\s+(?P[0-9]+(?:\.[0-9]+)?)\s*$" @@ -1033,7 +1033,7 @@ def dependencies(self) -> List[Type[Tool]]: def setup_system(self, udp_mode: bool = False, set_task_max: bool = True) -> None: self.node.tools[PowerShell].run_cmdlet( - "Set-NetFirewallProfile -Profile Domain,Public,Private " "-Enabled False", + "Set-NetFirewallProfile -Profile Domain,Public,Private -Enabled False", fail_on_error=False, ) diff --git a/selftests/test_hyperv_device_pool.py b/selftests/test_hyperv_device_pool.py new file mode 100644 index 0000000000..228be9b14d --- /dev/null +++ b/selftests/test_hyperv_device_pool.py @@ -0,0 +1,85 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from types import SimpleNamespace +from typing import Any, List, cast +from unittest import TestCase +from unittest.mock import MagicMock, patch + +from lisa.sut_orchestrator.hyperv.hyperv_device_pool import HyperVDevicePool +from lisa.sut_orchestrator.hyperv.schema import ( + DeviceAddressSchema, + HypervPlatformSchema, +) +from lisa.tools import PowerShell +from lisa.util import LisaException + + +class HyperVDevicePoolTestCase(TestCase): + def test_assign_devices_rolls_back_on_dismount_failure(self) -> None: + first_device = DeviceAddressSchema( + instance_id="PCI\\FIRST", + location_path="PCIROOT(1)#PCI(0000)", + ) + second_device = DeviceAddressSchema( + instance_id="PCI\\SECOND", + location_path="PCIROOT(1)#PCI(0001)", + ) + commands: List[str] = [] + + def run_cmdlet(cmdlet: str, **_: Any) -> str: + commands.append(cmdlet) + if ( + "Dismount-VMHostAssignableDevice" in cmdlet + and second_device.location_path in cmdlet + ): + raise LisaException("pcip failed") + return "" + + powershell = SimpleNamespace(run_cmdlet=MagicMock(side_effect=run_cmdlet)) + node = SimpleNamespace(tools={PowerShell: powershell}) + pool = HyperVDevicePool( + node=cast(Any, node), + runbook=HypervPlatformSchema(), + log=MagicMock(), + ) + + with patch.object(pool, "_wait_for_pnp_device_enabled") as wait_enabled: + with self.assertRaises(LisaException): + pool._assign_devices_to_vm( + vm_name="vm1", + devices=[first_device, second_device], + ) + + remove_index = next( + index + for index, command in enumerate(commands) + if "Remove-VMAssignableDevice" in command + and first_device.location_path in command + ) + mount_index = next( + index + for index, command in enumerate(commands) + if "Mount-VMHostAssignableDevice" in command + and first_device.location_path in command + ) + enable_second_index = next( + index + for index, command in enumerate(commands) + if "Enable-PnpDevice" in command and second_device.instance_id in command + ) + enable_first_index = next( + index + for index, command in enumerate(commands) + if "Enable-PnpDevice" in command and first_device.instance_id in command + ) + + self.assertLess(remove_index, mount_index) + self.assertLess(mount_index, enable_second_index) + self.assertLess(enable_second_index, enable_first_index) + wait_enabled.assert_any_call( + second_device.instance_id, second_device.location_path + ) + wait_enabled.assert_any_call( + first_device.instance_id, first_device.location_path + ) From 24753daa5fcdceff0893b1a4e4e4c6667ba97eaf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 1 Jun 2026 18:06:55 +0000 Subject: [PATCH 4/8] Fix PR review thread findings in passthrough and reboot logic --- .../performance/networkperf_passthrough.py | 12 +++++++-- lisa/tools/ntttcp.py | 26 ++++++++++++++++++- lisa/tools/reboot.py | 6 +++-- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/lisa/microsoft/testsuites/performance/networkperf_passthrough.py b/lisa/microsoft/testsuites/performance/networkperf_passthrough.py index 7b26df1aa6..ea38b7b92e 100644 --- a/lisa/microsoft/testsuites/performance/networkperf_passthrough.py +++ b/lisa/microsoft/testsuites/performance/networkperf_passthrough.py @@ -48,6 +48,8 @@ from lisa.util.parallel import run_in_parallel SUPPORTED_PASSTHROUGH_PLATFORMS = [CLOUD_HYPERVISOR, HYPERV] +WINDOWS_NTTTCP_MAX_SERVER_THREADS = 64 +WINDOWS_NTTTCP_RECEIVER_WAIT_TIMEOUT = 90 @TestSuiteMetadata( @@ -1079,6 +1081,10 @@ def _get_host_as_server(self, variables: Dict[str, Any]) -> RemoteNode: def _skip_if_windows_server(self, server: RemoteNode, tool_name: str) -> None: if isinstance(server.os, Windows): + if server in self._baremetal_hosts: + server.close() + server.cleanup() + self._baremetal_hosts.remove(server) raise SkippedException( f"Host/guest passthrough performance with {tool_name} requires " "Linux server tooling. Use the NTTTCP passthrough cases for " @@ -1102,7 +1108,7 @@ def _perf_ntttcp_with_windows_server( client_ip = client.tools[Ip] client_mtu = client_ip.get_mtu(client_nic_name) connections = NTTTCP_UDP_CONCURRENCY if udp_mode else NTTTCP_TCP_CONCURRENCY - max_server_threads = 64 + max_server_threads = WINDOWS_NTTTCP_MAX_SERVER_THREADS for test_thread in connections: if test_thread < max_server_threads: @@ -1156,7 +1162,9 @@ def _perf_ntttcp_with_windows_server( dev_differentiator="", no_sync=use_no_sync, ) - receiver_result = receiver_process.wait_result(timeout=90) + receiver_result = receiver_process.wait_result( + timeout=WINDOWS_NTTTCP_RECEIVER_WAIT_TIMEOUT + ) finally: server.tools[PowerShell].run_cmdlet( "Stop-Process -Name ntttcp -Force -ErrorAction SilentlyContinue", diff --git a/lisa/tools/ntttcp.py b/lisa/tools/ntttcp.py index 28d8abd2e4..271a99c2b7 100644 --- a/lisa/tools/ntttcp.py +++ b/lisa/tools/ntttcp.py @@ -1032,13 +1032,36 @@ def dependencies(self) -> List[Type[Tool]]: return [] def setup_system(self, udp_mode: bool = False, set_task_max: bool = True) -> None: + firewall_profiles = self.node.tools[PowerShell].run_cmdlet( + ( + "Get-NetFirewallProfile -Profile Domain,Public,Private " + "| Select-Object Name,Enabled" + ), + output_json=True, + fail_on_error=False, + ) + if isinstance(firewall_profiles, dict): + firewall_profiles = [firewall_profiles] + self._firewall_profile_states = { + profile["Name"]: bool(profile["Enabled"]) + for profile in firewall_profiles or [] + if "Name" in profile and "Enabled" in profile + } + self.node.tools[PowerShell].run_cmdlet( "Set-NetFirewallProfile -Profile Domain,Public,Private -Enabled False", fail_on_error=False, ) def restore_system(self, udp_mode: bool = False) -> None: - return + if self._firewall_profile_states: + for profile_name, enabled in self._firewall_profile_states.items(): + enabled_value = "True" if enabled else "False" + self.node.tools[PowerShell].run_cmdlet( + f"Set-NetFirewallProfile -Profile {profile_name} " + f"-Enabled {enabled_value}", + fail_on_error=False, + ) def run_as_server_async( self, @@ -1145,6 +1168,7 @@ def create_ntttcp_result( return ntttcp_result def _initialize(self, *args: Any, **kwargs: Any) -> None: + self._firewall_profile_states: Dict[str, bool] = {} self.pre_command = "" self.setup_system() diff --git a/lisa/tools/reboot.py b/lisa/tools/reboot.py index b1d105743a..34bfd2c324 100644 --- a/lisa/tools/reboot.py +++ b/lisa/tools/reboot.py @@ -178,9 +178,11 @@ def reboot(self, time_out: int = 300) -> None: self._log.debug(f"ignorable ssh exception: {e}") self._log.debug(f"reconnected with uptime: {current_boot_time}") if last_boot_time < current_boot_time: - self._wait_ssh_session_stable( - max(30, time_out - int(timer.elapsed(False))) + remaining_stability_wait = max( + 0, time_out - int(timer.elapsed(False)) ) + if remaining_stability_wait > 0: + self._wait_ssh_session_stable(remaining_stability_wait) break if last_boot_time == current_boot_time: if connected: From 352bd770984a0b712eda01d259c43a1f2c5f79f4 Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Sun, 7 Jun 2026 21:56:55 -0700 Subject: [PATCH 5/8] Address passthrough review type comments Type the device passthrough host-device helper with the common Platform base and the existing Hyper-V/libvirt DeviceAddressSchema classes instead of Any. Derive the platform name inside the helper from the platform object. Also resolve the branch-local mypy no-redef issue in WindowsNtttcp by moving the firewall state annotation to the class level. --- .../device_passthrough/functional_tests.py | 42 +++++++++++++++---- lisa/tools/ntttcp.py | 3 +- lisa/tools/reboot.py | 4 +- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/lisa/microsoft/testsuites/device_passthrough/functional_tests.py b/lisa/microsoft/testsuites/device_passthrough/functional_tests.py index 6e1cdbdbf8..6a3c536a5f 100644 --- a/lisa/microsoft/testsuites/device_passthrough/functional_tests.py +++ b/lisa/microsoft/testsuites/device_passthrough/functional_tests.py @@ -1,16 +1,31 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import re -from typing import Any, Dict, Tuple +from typing import TYPE_CHECKING, Any, Dict, Tuple, Union, cast from lisa import Environment, Node, TestCaseMetadata, TestSuite, TestSuiteMetadata from lisa.base_tools import Cat from lisa.operating_system import Windows +from lisa.platform_ import Platform from lisa.sut_orchestrator import CLOUD_HYPERVISOR, HYPERV from lisa.testsuite import TestResult, simple_requirement from lisa.tools import Lspci from lisa.util import LisaException, SkippedException +if TYPE_CHECKING: + from lisa.sut_orchestrator.hyperv.schema import ( + DeviceAddressSchema as HypervDeviceAddressSchema, + ) + from lisa.sut_orchestrator.libvirt.ch_platform import CloudHypervisorPlatform + from lisa.sut_orchestrator.libvirt.schema import ( + DeviceAddressSchema as LibvirtDeviceAddressSchema, + ) + + HostDeviceAddressSchema = Union[ + HypervDeviceAddressSchema, + LibvirtDeviceAddressSchema, + ] + SUPPORTED_PASSTHROUGH_PLATFORMS = [CLOUD_HYPERVISOR, HYPERV] @@ -109,7 +124,7 @@ def verify_device_passthrough_on_guest( ) for host_device in passthrough_context.device_list: vendor_device_id = self._vendor_device_from_host_device( - platform_name, platform, host_device + platform, host_device ) key = ( pool_type, @@ -134,12 +149,13 @@ def verify_device_passthrough_on_guest( @staticmethod def _vendor_device_from_host_device( - platform_name: str, - platform: Any, - device: Any, + platform: Platform, + device: "HostDeviceAddressSchema", ) -> Dict[str, str]: + platform_name = platform.type_name() if platform_name == HYPERV: - instance_id = str(getattr(device, "instance_id", "")) + hyperv_device = cast("HypervDeviceAddressSchema", device) + instance_id = hyperv_device.instance_id match = re.search( r"VEN_(?P[0-9A-Fa-f]{4})&" r"DEV_(?P[0-9A-Fa-f]{4})", @@ -155,8 +171,18 @@ def _vendor_device_from_host_device( "device_id": match.group("device_id").lower(), } - cloud_hypervisor = platform - bdf = f"{device.domain}:{device.bus}:{device.slot}.{device.function}".lower() + if platform_name != CLOUD_HYPERVISOR: + raise LisaException( + f"Device passthrough host device lookup is not supported on " + f"'{platform_name}'. Use a cloud-hypervisor or hyperv platform." + ) + + cloud_hypervisor = cast("CloudHypervisorPlatform", platform) + libvirt_device = cast("LibvirtDeviceAddressSchema", device) + bdf = ( + f"{libvirt_device.domain}:{libvirt_device.bus}:" + f"{libvirt_device.slot}.{libvirt_device.function}" + ).lower() cat = cloud_hypervisor.host_node.tools[Cat] vendor_raw = cat.read(f"/sys/bus/pci/devices/{bdf}/vendor", sudo=True).strip() device_raw = cat.read(f"/sys/bus/pci/devices/{bdf}/device", sudo=True).strip() diff --git a/lisa/tools/ntttcp.py b/lisa/tools/ntttcp.py index 271a99c2b7..66830b53d6 100644 --- a/lisa/tools/ntttcp.py +++ b/lisa/tools/ntttcp.py @@ -1008,6 +1008,7 @@ def create_ntttcp_result( class WindowsNtttcp(Ntttcp): + _firewall_profile_states: Dict[str, bool] _download_url = ( "https://github.com/microsoft/ntttcp/releases/latest/download/ntttcp.exe" ) @@ -1168,7 +1169,7 @@ def create_ntttcp_result( return ntttcp_result def _initialize(self, *args: Any, **kwargs: Any) -> None: - self._firewall_profile_states: Dict[str, bool] = {} + self._firewall_profile_states = {} self.pre_command = "" self.setup_system() diff --git a/lisa/tools/reboot.py b/lisa/tools/reboot.py index 34bfd2c324..3cc0efcc0c 100644 --- a/lisa/tools/reboot.py +++ b/lisa/tools/reboot.py @@ -178,9 +178,7 @@ def reboot(self, time_out: int = 300) -> None: self._log.debug(f"ignorable ssh exception: {e}") self._log.debug(f"reconnected with uptime: {current_boot_time}") if last_boot_time < current_boot_time: - remaining_stability_wait = max( - 0, time_out - int(timer.elapsed(False)) - ) + remaining_stability_wait = max(0, time_out - int(timer.elapsed(False))) if remaining_stability_wait > 0: self._wait_ssh_session_stable(remaining_stability_wait) break From 5c6ab555e6e969543378d16fa8a0ce3d1b2acca6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 8 Jun 2026 05:17:33 +0000 Subject: [PATCH 6/8] Fix review comments: TCP port wait, firewall state guard, buffer_size in command, restore_system, clear buffer constants --- .../performance/networkperf_passthrough.py | 204 +++++++++--------- lisa/tools/ntttcp.py | 54 +++-- lisa/tools/reboot.py | 16 +- 3 files changed, 157 insertions(+), 117 deletions(-) diff --git a/lisa/microsoft/testsuites/performance/networkperf_passthrough.py b/lisa/microsoft/testsuites/performance/networkperf_passthrough.py index ea38b7b92e..5b28cd07dc 100644 --- a/lisa/microsoft/testsuites/performance/networkperf_passthrough.py +++ b/lisa/microsoft/testsuites/performance/networkperf_passthrough.py @@ -1105,121 +1105,127 @@ def _perf_ntttcp_with_windows_server( client_ntttcp.setup_system(udp_mode) server_ntttcp.setup_system(udp_mode, set_task_max=False) - client_ip = client.tools[Ip] - client_mtu = client_ip.get_mtu(client_nic_name) - connections = NTTTCP_UDP_CONCURRENCY if udp_mode else NTTTCP_TCP_CONCURRENCY - max_server_threads = WINDOWS_NTTTCP_MAX_SERVER_THREADS - - for test_thread in connections: - if test_thread < max_server_threads: - num_threads_p = test_thread - num_threads_n = 1 - else: - num_threads_p = max_server_threads - num_threads_n = int(test_thread / num_threads_p) - buffer_size = int(1024 / 1024) if udp_mode else int(65536 / 1024) - if not udp_mode and num_threads_p == 1 and num_threads_n == 1: - buffer_size = int(1048576 / 1024) - use_no_sync = True - - receiver_process = ( - client_ntttcp.run_as_server_async( - client_nic_name, - ports_count=num_threads_p, - buffer_size=buffer_size, - udp_mode=True, - dev_differentiator="", - no_sync=use_no_sync, - ) - if udp_mode - else server_ntttcp.run_as_server_async( - "", - ports_count=num_threads_p, - buffer_size=buffer_size, - server_ip=server.internal_address, - dev_differentiator="", - no_sync=use_no_sync, - ) - ) - try: - if udp_mode: - sender_result = server_ntttcp.run_as_client( - "", - client.internal_address, - threads_count=num_threads_n, + try: + client_ip = client.tools[Ip] + client_mtu = client_ip.get_mtu(client_nic_name) + connections = NTTTCP_UDP_CONCURRENCY if udp_mode else NTTTCP_TCP_CONCURRENCY + max_server_threads = WINDOWS_NTTTCP_MAX_SERVER_THREADS + + for test_thread in connections: + if test_thread < max_server_threads: + num_threads_p = test_thread + num_threads_n = 1 + else: + num_threads_p = max_server_threads + num_threads_n = int(test_thread / num_threads_p) + # UDP uses a 1 KB buffer; TCP uses 64 KB, except for single-stream + # which uses 1 MB to maximize per-connection throughput. + buffer_size = 1 if udp_mode else 64 # KB + if not udp_mode and num_threads_p == 1 and num_threads_n == 1: + buffer_size = 1024 # 1 MB for single-stream TCP + use_no_sync = True + + receiver_process = ( + client_ntttcp.run_as_server_async( + client_nic_name, ports_count=num_threads_p, buffer_size=buffer_size, udp_mode=True, + dev_differentiator="", no_sync=use_no_sync, ) - else: - sender_result = client_ntttcp.run_as_client( - client_nic_name, - server.internal_address, - threads_count=num_threads_n, + if udp_mode + else server_ntttcp.run_as_server_async( + "", ports_count=num_threads_p, buffer_size=buffer_size, + server_ip=server.internal_address, dev_differentiator="", no_sync=use_no_sync, ) - receiver_result = receiver_process.wait_result( - timeout=WINDOWS_NTTTCP_RECEIVER_WAIT_TIMEOUT - ) - finally: - server.tools[PowerShell].run_cmdlet( - "Stop-Process -Name ntttcp -Force -ErrorAction SilentlyContinue", - force_run=True, - fail_on_error=False, - timeout=30, ) + try: + if udp_mode: + sender_result = server_ntttcp.run_as_client( + "", + client.internal_address, + threads_count=num_threads_n, + ports_count=num_threads_p, + buffer_size=buffer_size, + udp_mode=True, + no_sync=use_no_sync, + ) + else: + sender_result = client_ntttcp.run_as_client( + client_nic_name, + server.internal_address, + threads_count=num_threads_n, + ports_count=num_threads_p, + buffer_size=buffer_size, + dev_differentiator="", + no_sync=use_no_sync, + ) + receiver_result = receiver_process.wait_result( + timeout=WINDOWS_NTTTCP_RECEIVER_WAIT_TIMEOUT + ) + finally: + server.tools[PowerShell].run_cmdlet( + "Stop-Process -Name ntttcp -Force -ErrorAction SilentlyContinue", + force_run=True, + fail_on_error=False, + timeout=30, + ) - parsed_client_result = ( - server_ntttcp.create_ntttcp_result(sender_result, role="client") - if udp_mode - else client_ntttcp.create_ntttcp_result(sender_result, role="client") - ) - try: - parsed_server_result = ( - client_ntttcp.create_ntttcp_result(receiver_result) + parsed_client_result = ( + server_ntttcp.create_ntttcp_result(sender_result, role="client") if udp_mode - else server_ntttcp.create_ntttcp_result(receiver_result) + else client_ntttcp.create_ntttcp_result(sender_result, role="client") ) - except (AssertionError, LisaException) as parse_error: - receiver_node = "Linux guest" if udp_mode else "Windows host" - raise LisaException( - f"Failed to parse NTTTCP receiver output from {receiver_node} " - f"for {test_case_name} with {test_thread} connections. " - "Verify that the receiver completed and emitted NTTTCP " - "totals before publishing performance data. " - f"Exit code: {receiver_result.exit_code}. " - f"Stdout: {receiver_result.stdout[:2000]}. " - f"Stderr: {receiver_result.stderr[:2000]}" - ) from parse_error - if udp_mode: - notifier.notify( - client_ntttcp.create_ntttcp_udp_performance_message( - parsed_server_result, - parsed_client_result, - str(test_thread), - buffer_size, - test_case_name, - test_result, - client_mtu, + try: + parsed_server_result = ( + client_ntttcp.create_ntttcp_result(receiver_result) + if udp_mode + else server_ntttcp.create_ntttcp_result(receiver_result) ) - ) - else: - notifier.notify( - client_ntttcp.create_ntttcp_tcp_performance_message( - parsed_server_result, - parsed_client_result, - Decimal(0), - str(test_thread), - buffer_size, - test_case_name, - test_result, - client_mtu, + except (AssertionError, LisaException) as parse_error: + receiver_node = "Linux guest" if udp_mode else "Windows host" + raise LisaException( + f"Failed to parse NTTTCP receiver output from {receiver_node} " + f"for {test_case_name} with {test_thread} connections. " + "Verify that the receiver completed and emitted NTTTCP " + "totals before publishing performance data. " + f"Exit code: {receiver_result.exit_code}. " + f"Stdout: {receiver_result.stdout[:2000]}. " + f"Stderr: {receiver_result.stderr[:2000]}" + ) from parse_error + if udp_mode: + notifier.notify( + client_ntttcp.create_ntttcp_udp_performance_message( + parsed_server_result, + parsed_client_result, + str(test_thread), + buffer_size, + test_case_name, + test_result, + client_mtu, + ) ) - ) + else: + notifier.notify( + client_ntttcp.create_ntttcp_tcp_performance_message( + parsed_server_result, + parsed_client_result, + Decimal(0), + str(test_thread), + buffer_size, + test_case_name, + test_result, + client_mtu, + ) + ) + finally: + client_ntttcp.restore_system(udp_mode) + server_ntttcp.restore_system(udp_mode) def _get_host_nic_name(self, node: RemoteNode) -> str: ip = node.connection_info[constants.ENVIRONMENTS_NODES_REMOTE_ADDRESS] diff --git a/lisa/tools/ntttcp.py b/lisa/tools/ntttcp.py index 66830b53d6..038e6a52e3 100644 --- a/lisa/tools/ntttcp.py +++ b/lisa/tools/ntttcp.py @@ -1033,21 +1033,25 @@ def dependencies(self) -> List[Type[Tool]]: return [] def setup_system(self, udp_mode: bool = False, set_task_max: bool = True) -> None: - firewall_profiles = self.node.tools[PowerShell].run_cmdlet( - ( - "Get-NetFirewallProfile -Profile Domain,Public,Private " - "| Select-Object Name,Enabled" - ), - output_json=True, - fail_on_error=False, - ) - if isinstance(firewall_profiles, dict): - firewall_profiles = [firewall_profiles] - self._firewall_profile_states = { - profile["Name"]: bool(profile["Enabled"]) - for profile in firewall_profiles or [] - if "Name" in profile and "Enabled" in profile - } + # Only snapshot the original firewall state on the first call so that + # subsequent calls (e.g. explicit caller + _initialize) do not overwrite + # the snapshot with already-disabled profiles. + if not self._firewall_profile_states: + firewall_profiles = self.node.tools[PowerShell].run_cmdlet( + ( + "Get-NetFirewallProfile -Profile Domain,Public,Private " + "| Select-Object Name,Enabled" + ), + output_json=True, + fail_on_error=False, + ) + if isinstance(firewall_profiles, dict): + firewall_profiles = [firewall_profiles] + self._firewall_profile_states = { + profile["Name"]: bool(profile["Enabled"]) + for profile in firewall_profiles or [] + if "Name" in profile and "Enabled" in profile + } self.node.tools[PowerShell].run_cmdlet( "Set-NetFirewallProfile -Profile Domain,Public,Private -Enabled False", @@ -1079,8 +1083,16 @@ def run_as_server_async( udp_mode: bool = False, no_sync: bool = False, ) -> Process: + self._log.debug( + "Parameters nic_name, cool_down_time_seconds, warm_up_time_seconds, " + "use_epoll and dev_differentiator are not supported in Windows ntttcp" + ) receiver_name = server_ip if server_ip else "*" - cmd = f"-r -m {ports_count},*,{receiver_name} -p 5001 -t {run_time_seconds}" + # buffer_size is in KB; Windows ntttcp -b expects bytes + cmd = ( + f"-r -m {ports_count},*,{receiver_name} -p 5001" + f" -t {run_time_seconds} -b {buffer_size * 1024}" + ) if udp_mode: cmd += " -u" if no_sync: @@ -1131,7 +1143,15 @@ def run_as_client( tolerance_seconds: int = 60, no_sync: bool = False, ) -> ExecutableResult: - cmd = f"-s -m {ports_count},*,{server_ip} -t {run_time_seconds}" + self._log.debug( + "Parameters nic_name, cool_down_time_seconds, warm_up_time_seconds, " + "use_epoll and dev_differentiator are not supported in Windows ntttcp" + ) + # buffer_size is in KB; Windows ntttcp -b expects bytes + cmd = ( + f"-s -m {ports_count},*,{server_ip}" + f" -t {run_time_seconds} -b {buffer_size * 1024}" + ) if udp_mode: cmd += " -u" if no_sync: diff --git a/lisa/tools/reboot.py b/lisa/tools/reboot.py index 3cc0efcc0c..621a0cd07c 100644 --- a/lisa/tools/reboot.py +++ b/lisa/tools/reboot.py @@ -59,9 +59,12 @@ def _get_last_boot_time(self) -> datetime: return last_boot_time def _wait_ssh_session_stable(self, time_out: int) -> None: + from lisa.node import RemoteNode + timer = create_timer() consecutive_successes = 0 last_error = "" + remote_node = cast(RemoteNode, self.node) while timer.elapsed(False) < time_out: try: self.node.close() @@ -78,7 +81,18 @@ def _wait_ssh_session_stable(self, time_out: int) -> None: consecutive_successes = 0 last_error = str(e) self._log.debug(f"waiting for stable ssh session after reboot: {e}") - sleep(2) + remaining = max(0, int(time_out - timer.elapsed(False))) + if remaining > 0: + wait_tcp_port_ready( + address=remote_node.connection_info[ + constants.ENVIRONMENTS_NODES_REMOTE_ADDRESS + ], + port=remote_node.connection_info[ + constants.ENVIRONMENTS_NODES_REMOTE_PORT + ], + log=self._log, + timeout=min(2, remaining), + ) raise LisaException( f"cannot get stable ssh session after reboot in {time_out} seconds. " f"Last error: {last_error}" From 786657a10fa646918833f05a67d7e297418f3a4c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 9 Jun 2026 04:13:01 +0000 Subject: [PATCH 7/8] Fix flake8 E501 and BLK100 errors in networkperf_passthrough.py --- .../testsuites/performance/networkperf_passthrough.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lisa/microsoft/testsuites/performance/networkperf_passthrough.py b/lisa/microsoft/testsuites/performance/networkperf_passthrough.py index 5b28cd07dc..b4e1ef841b 100644 --- a/lisa/microsoft/testsuites/performance/networkperf_passthrough.py +++ b/lisa/microsoft/testsuites/performance/networkperf_passthrough.py @@ -1170,7 +1170,8 @@ def _perf_ntttcp_with_windows_server( ) finally: server.tools[PowerShell].run_cmdlet( - "Stop-Process -Name ntttcp -Force -ErrorAction SilentlyContinue", + "Stop-Process -Name ntttcp -Force" + " -ErrorAction SilentlyContinue", force_run=True, fail_on_error=False, timeout=30, @@ -1179,7 +1180,9 @@ def _perf_ntttcp_with_windows_server( parsed_client_result = ( server_ntttcp.create_ntttcp_result(sender_result, role="client") if udp_mode - else client_ntttcp.create_ntttcp_result(sender_result, role="client") + else client_ntttcp.create_ntttcp_result( + sender_result, role="client" + ) ) try: parsed_server_result = ( From e683d33ddeb2a0edee14eb25357bd4f600088ac9 Mon Sep 17 00:00:00 2001 From: Vivek Yadav Date: Wed, 10 Jun 2026 20:04:59 -0700 Subject: [PATCH 8/8] Fix Windows NTTTCP no_debug_log override --- lisa/tools/ntttcp.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lisa/tools/ntttcp.py b/lisa/tools/ntttcp.py index edc8a4ee94..3274eaae6e 100644 --- a/lisa/tools/ntttcp.py +++ b/lisa/tools/ntttcp.py @@ -1094,6 +1094,7 @@ def run_as_server_async( run_as_daemon: bool = False, udp_mode: bool = False, no_sync: bool = False, + no_debug_log: bool = False, ) -> Process: self._log.debug( "Parameters nic_name, cool_down_time_seconds, warm_up_time_seconds, " @@ -1113,6 +1114,7 @@ def run_as_server_async( f"{self.command} {cmd}", shell=True, sudo=True, + no_debug_log=no_debug_log, ) self._wait_receiver_port_ready(udp_mode) return process @@ -1154,6 +1156,7 @@ def run_as_client( udp_mode: bool = False, tolerance_seconds: int = 60, no_sync: bool = False, + no_debug_log: bool = False, ) -> ExecutableResult: self._log.debug( "Parameters nic_name, cool_down_time_seconds, warm_up_time_seconds, " @@ -1175,6 +1178,7 @@ def run_as_client( expected_exit_code=0, expected_exit_code_failure_message=f"fail to run {self.command} {cmd}", timeout=run_time_seconds + tolerance_seconds, + no_debug_log=no_debug_log, ) def create_ntttcp_result(