Skip to content

Commit c41a69d

Browse files
committed
Fix Windows passthrough NTTTCP readiness
Fail Windows-host passthrough NTTTCP tests when receiver output cannot be parsed instead of reusing sender totals. Add bounded listener readiness checks for Linux and Windows NTTTCP receivers, and fall back to reboot when systemctl reboot exits unsuccessfully.
1 parent 644a353 commit c41a69d

4 files changed

Lines changed: 104 additions & 50 deletions

File tree

lisa/microsoft/testsuites/device_passthrough/functional_tests.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,7 @@ def _vendor_device_from_host_device(
143143
}
144144

145145
cloud_hypervisor = platform
146-
bdf = (
147-
f"{device.domain}:{device.bus}:{device.slot}.{device.function}"
148-
).lower()
146+
bdf = f"{device.domain}:{device.bus}:{device.slot}.{device.function}".lower()
149147
cat = cloud_hypervisor.host_node.tools[Cat]
150148
vendor_raw = cat.read(f"/sys/bus/pci/devices/{bdf}/vendor", sudo=True).strip()
151149
device_raw = cat.read(f"/sys/bus/pci/devices/{bdf}/device", sudo=True).strip()

lisa/microsoft/testsuites/performance/networkperf_passthrough.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@
2929
from lisa.sut_orchestrator import CLOUD_HYPERVISOR, HYPERV
3030
from lisa.testsuite import TestResult
3131
from lisa.tools import Dhclient, Kill, PowerShell, Sysctl
32+
from lisa.tools.ip import Ip
3233
from lisa.tools.iperf3 import (
3334
IPERF_TCP_BUFFER_LENGTHS,
3435
IPERF_TCP_CONCURRENCY,
3536
IPERF_UDP_BUFFER_LENGTHS,
3637
IPERF_UDP_CONCURRENCY,
3738
)
38-
from lisa.tools.ip import Ip
3939
from lisa.tools.ntttcp import NTTTCP_TCP_CONCURRENCY, NTTTCP_UDP_CONCURRENCY, Ntttcp
4040
from lisa.util import (
4141
LisaException,
@@ -1172,12 +1172,17 @@ def _perf_ntttcp_with_windows_server(
11721172
if udp_mode
11731173
else server_ntttcp.create_ntttcp_result(receiver_result)
11741174
)
1175-
except (AssertionError, LisaException):
1176-
client.log.debug(
1177-
"NTTTCP receiver output was not parseable; using sender "
1178-
"totals for receiver-side metrics."
1179-
)
1180-
parsed_server_result = parsed_client_result
1175+
except (AssertionError, LisaException) as parse_error:
1176+
receiver_node = "Linux guest" if udp_mode else "Windows host"
1177+
raise LisaException(
1178+
f"Failed to parse NTTTCP receiver output from {receiver_node} "
1179+
f"for {test_case_name} with {test_thread} connections. "
1180+
"Verify that the receiver completed and emitted NTTTCP "
1181+
"totals before publishing performance data. "
1182+
f"Exit code: {receiver_result.exit_code}. "
1183+
f"Stdout: {receiver_result.stdout[:2000]}. "
1184+
f"Stderr: {receiver_result.stderr[:2000]}"
1185+
) from parse_error
11811186
if udp_mode:
11821187
perf_message = client_ntttcp.create_ntttcp_udp_performance_message(
11831188
parsed_server_result,

lisa/tools/ntttcp.py

Lines changed: 63 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from lisa.tools import Firewall, Gcc, Git, Lscpu, Make, Sed
2020
from lisa.tools.powershell import PowerShell
2121
from lisa.tools.taskset import TaskSet
22-
from lisa.util import LisaException, constants
22+
from lisa.util import LisaException, check_till_timeout, constants
2323
from lisa.util.process import ExecutableResult, Process
2424

2525
from .sysctl import Sysctl
@@ -245,18 +245,48 @@ def run_as_server_async(
245245
shell=True,
246246
sudo=True,
247247
)
248-
# NTTTCP for Linux 1.4.0
249-
# ---------------------------------------------------------
250-
# 01:16:35 INFO: no role specified. use receiver role
251-
# 01:16:35 INFO: 65 threads created
252-
# above output means ntttcp server is ready. In no-sync mode, the
253-
# receiver may not print the same readiness line before clients start.
254248
if no_sync:
255-
time.sleep(5)
249+
self._wait_server_port_ready(process, udp_mode)
256250
else:
257251
process.wait_output("threads created")
258252
return process
259253

254+
def _wait_server_port_ready(
255+
self,
256+
process: Process,
257+
udp_mode: bool,
258+
port: int = 5001,
259+
) -> None:
260+
def is_ready() -> bool:
261+
if not process.is_running():
262+
raise LisaException(
263+
"ntttcp receiver exited before the listen port became ready"
264+
)
265+
return self._is_server_port_open(udp_mode, port)
266+
267+
protocol = "UDP" if udp_mode else "TCP"
268+
check_till_timeout(
269+
is_ready,
270+
timeout_message=f"wait for ntttcp {protocol} receiver port {port} open",
271+
timeout=30,
272+
)
273+
274+
def _is_server_port_open(self, udp_mode: bool, port: int) -> bool:
275+
ss_options = "-lun" if udp_mode else "-ltn"
276+
netstat_options = "-lnu" if udp_mode else "-ltn"
277+
result = self.node.execute(
278+
cmd=(
279+
f"(ss {ss_options} 2>/dev/null || "
280+
f"netstat {netstat_options} 2>/dev/null || true) "
281+
f"| grep -E '[:.]?{port}[[:space:]]'"
282+
),
283+
shell=True,
284+
sudo=True,
285+
no_info_log=True,
286+
no_error_log=True,
287+
)
288+
return result.exit_code == 0
289+
260290
def run_as_server(
261291
self,
262292
nic_name: str,
@@ -979,7 +1009,7 @@ def create_ntttcp_result(
9791009

9801010
class WindowsNtttcp(Ntttcp):
9811011
_download_url = (
982-
"https://github.com/microsoft/ntttcp/releases/latest/download/ntttcp.exe"
1012+
"https://github.com/microsoft/ntttcp/releases/latest/download/" "ntttcp.exe"
9831013
)
9841014
_total_mbps_pattern = re.compile(
9851015
r"(?im)^\s*TOTAL\s+(?P<throughput>[0-9]+(?:\.[0-9]+)?)\s*$"
@@ -1003,8 +1033,7 @@ def dependencies(self) -> List[Type[Tool]]:
10031033

10041034
def setup_system(self, udp_mode: bool = False, set_task_max: bool = True) -> None:
10051035
self.node.tools[PowerShell].run_cmdlet(
1006-
"Set-NetFirewallProfile -Profile Domain,Public,Private "
1007-
"-Enabled False",
1036+
"Set-NetFirewallProfile -Profile Domain,Public,Private " "-Enabled False",
10081037
fail_on_error=False,
10091038
)
10101039

@@ -1037,19 +1066,31 @@ def run_as_server_async(
10371066
shell=True,
10381067
sudo=True,
10391068
)
1040-
if udp_mode:
1041-
time.sleep(5)
1042-
else:
1043-
self.node.tools[PowerShell].run_cmdlet(
1044-
"for ($i = 0; $i -lt 10; $i++) { "
1045-
"if (Get-NetTCPConnection -State Listen -LocalPort 5001 "
1046-
"-ErrorAction SilentlyContinue) { exit 0 }; "
1047-
"Start-Sleep -Seconds 1 }; exit 1",
1048-
force_run=True,
1049-
timeout=15,
1050-
)
1069+
self._wait_receiver_port_ready(udp_mode)
10511070
return process
10521071

1072+
def _wait_receiver_port_ready(
1073+
self,
1074+
udp_mode: bool,
1075+
port: int = 5001,
1076+
timeout: int = 30,
1077+
) -> None:
1078+
endpoint_cmdlet = (
1079+
f"Get-NetUDPEndpoint -LocalPort {port} -ErrorAction SilentlyContinue"
1080+
if udp_mode
1081+
else f"Get-NetTCPConnection -State Listen -LocalPort {port} "
1082+
"-ErrorAction SilentlyContinue"
1083+
)
1084+
self.node.tools[PowerShell].run_cmdlet(
1085+
"$deadline = (Get-Date).AddSeconds(" + str(timeout) + "); "
1086+
"do { "
1087+
f"if ({endpoint_cmdlet}) {{ exit 0 }}; "
1088+
"Start-Sleep -Milliseconds 500 "
1089+
"} while ((Get-Date) -lt $deadline); exit 1",
1090+
force_run=True,
1091+
timeout=timeout + 5,
1092+
)
1093+
10531094
def run_as_client(
10541095
self,
10551096
nic_name: str,

lisa/tools/reboot.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ def _get_last_boot_time(self) -> datetime:
6060

6161
def _wait_ssh_session_stable(self, time_out: int) -> None:
6262
timer = create_timer()
63+
consecutive_successes = 0
64+
last_error = ""
6365
while timer.elapsed(False) < time_out:
6466
try:
6567
self.node.close()
@@ -69,21 +71,26 @@ def _wait_ssh_session_stable(self, time_out: int) -> None:
6971
timeout=10,
7072
no_info_log=True,
7173
).assert_exit_code()
72-
sleep(10)
73-
self.node.close()
74-
self.node.execute(
75-
"echo lisa reboot ready",
76-
shell=True,
77-
timeout=10,
78-
no_info_log=True,
79-
).assert_exit_code()
80-
return
74+
consecutive_successes += 1
75+
if consecutive_successes >= 2:
76+
return
8177
except Exception as e:
78+
consecutive_successes = 0
79+
last_error = str(e)
8280
self._log.debug(f"waiting for stable ssh session after reboot: {e}")
83-
sleep(5)
81+
sleep(2)
8482
raise LisaException(
85-
f"cannot get stable ssh session after reboot in {time_out} seconds"
83+
f"cannot get stable ssh session after reboot in {time_out} seconds. "
84+
f"Last error: {last_error}"
85+
)
86+
87+
def _run_reboot_command(self) -> None:
88+
command_result = self.node.execute(
89+
"command -v reboot", shell=True, sudo=True, no_info_log=True
8690
)
91+
if command_result.exit_code == 0:
92+
self._command = command_result.stdout.strip()
93+
self.run(force_run=True, sudo=True, timeout=10)
8794

8895
def reboot_and_check_panic(self, log_path: Path) -> None:
8996
try:
@@ -132,19 +139,22 @@ def reboot(self, time_out: int = 300) -> None:
132139
"command -v systemctl", shell=True, sudo=True, no_info_log=True
133140
)
134141
if systemctl_result.exit_code == 0:
135-
self.node.execute(
142+
reboot_result = self.node.execute(
136143
"systemctl reboot -i",
137144
shell=True,
138145
sudo=True,
139146
timeout=10,
140147
)
148+
if reboot_result.exit_code != 0:
149+
self._log.debug(
150+
"systemctl reboot failed with exit code "
151+
f"{reboot_result.exit_code}; falling back to reboot. "
152+
f"stdout: {reboot_result.stdout}, "
153+
f"stderr: {reboot_result.stderr}"
154+
)
155+
self._run_reboot_command()
141156
else:
142-
command_result = self.node.execute(
143-
"command -v reboot", shell=True, sudo=True, no_info_log=True
144-
)
145-
if command_result.exit_code == 0:
146-
self._command = command_result.stdout
147-
self.run(force_run=True, sudo=True, timeout=10)
157+
self._run_reboot_command()
148158
except Exception as e:
149159
# it doesn't matter to exceptions here. The system may reboot fast
150160
self._log.debug(f"ignorable exception on rebooting: {e}")

0 commit comments

Comments
 (0)