Skip to content

Commit 6dfaadd

Browse files
authored
[autest] thread_config: add startup polling and skip test on non-Linux (#12940)
* thread_config: add startup polling for thread checks and skip on non-Linux check_threads.py now uses a short bounded poll/retry window so thread-count validation does not fail on startup races; the test is also skipped on non-Linux platforms because per-thread introspection used by this check is not reliably available there. * thread_config: stop retrying when Process.threads() access is denied
1 parent 801e5be commit 6dfaadd

2 files changed

Lines changed: 124 additions & 83 deletions

File tree

tests/gold_tests/thread_config/check_threads.py

Lines changed: 123 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -19,92 +19,132 @@
1919

2020
import psutil
2121
import argparse
22+
import os
2223
import sys
23-
24-
25-
def count_threads(ts_path, etnet_threads, accept_threads, task_threads, aio_threads):
26-
27-
for p in psutil.process_iter(['name', 'cwd', 'threads']):
28-
29-
# Find the pid corresponding to the ats process we started in autest.
30-
# It needs to match the process name and the binary path.
31-
# If autest can expose the pid of the process this is not needed anymore.
32-
if p.name() == '[TS_MAIN]' and p.cwd() == ts_path:
33-
34-
etnet_check = set()
35-
accept_check = set()
36-
task_check = set()
37-
aio_check = set()
38-
39-
for t in p.threads():
40-
24+
import time
25+
26+
COUNT_THREAD_WAIT_SECONDS = 10.0
27+
COUNT_THREAD_POLL_SECONDS = 0.1
28+
29+
30+
def _count_threads_once(ts_path, etnet_threads, accept_threads, task_threads, aio_threads):
31+
"""
32+
Return (code, message) for a single snapshot of ATS thread state.
33+
"""
34+
for p in psutil.process_iter():
35+
try:
36+
# Find the pid corresponding to the ats process we started in autest.
37+
# It needs to match the process name and the binary path.
38+
# If autest can expose the pid of the process this is not needed anymore.
39+
process_name = p.name()
40+
process_cwd = p.cwd()
41+
process_exe = p.exe()
42+
if process_cwd != ts_path:
43+
continue
44+
if process_name != '[TS_MAIN]' and process_name != 'traffic_server' and os.path.basename(
45+
process_exe) != 'traffic_server':
46+
continue
47+
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
48+
continue
49+
50+
etnet_check = set()
51+
accept_check = set()
52+
task_check = set()
53+
aio_check = set()
54+
55+
try:
56+
threads = p.threads()
57+
except psutil.AccessDenied:
58+
return 12, 'Could not inspect ATS process threads.'
59+
except (psutil.NoSuchProcess, psutil.ZombieProcess):
60+
return 1, 'ATS process disappeared before thread inspection completed.'
61+
62+
for t in threads:
63+
try:
4164
# Get the name of the thread.
4265
thread_name = psutil.Process(t.id).name()
43-
44-
if thread_name.startswith('[ET_NET'):
45-
46-
# Get the id of this thread and check if it's in range.
47-
etnet_id = int(thread_name.split(' ')[1][:-1])
48-
if etnet_id >= etnet_threads:
49-
sys.stderr.write('Too many ET_NET threads created.\n')
50-
return 2
51-
elif etnet_id in etnet_check:
52-
sys.stderr.write('ET_NET thread with duplicate thread id created.\n')
53-
return 3
54-
else:
55-
etnet_check.add(etnet_id)
56-
57-
elif thread_name.startswith('[ACCEPT'):
58-
59-
# Get the id of this thread and check if it's in range.
60-
accept_id = int(thread_name.split(' ')[1].split(':')[0])
61-
if accept_id >= accept_threads:
62-
sys.stderr.write('Too many ACCEPT threads created.\n')
63-
return 5
64-
else:
65-
accept_check.add(accept_id)
66-
67-
elif thread_name.startswith('[ET_TASK'):
68-
69-
# Get the id of this thread and check if it's in range.
70-
task_id = int(thread_name.split(' ')[1][:-1])
71-
if task_id >= task_threads:
72-
sys.stderr.write('Too many ET_TASK threads created.\n')
73-
return 7
74-
elif task_id in task_check:
75-
sys.stderr.write('ET_TASK thread with duplicate thread id created.\n')
76-
return 8
77-
else:
78-
task_check.add(task_id)
79-
80-
elif thread_name.startswith('[ET_AIO'):
81-
82-
# Get the id of this thread and check if it's in range.
83-
aio_id = int(thread_name.split(' ')[1].split(':')[0])
84-
if aio_id >= aio_threads:
85-
sys.stderr.write('Too many ET_AIO threads created.\n')
86-
return 10
87-
else:
88-
aio_check.add(aio_id)
89-
90-
# Check the size of the sets, must be equal to the expected size.
91-
if len(etnet_check) != etnet_threads:
92-
sys.stderr.write('Expected ET_NET threads: {0}, found: {1}.\n'.format(etnet_threads, len(etnet_check)))
93-
return 4
94-
elif len(accept_check) != accept_threads:
95-
sys.stderr.write('Expected ACCEPT threads: {0}, found: {1}.\n'.format(accept_threads, len(accept_check)))
96-
return 6
97-
elif len(task_check) != task_threads:
98-
sys.stderr.write('Expected ET_TASK threads: {0}, found: {1}.\n'.format(task_threads, len(task_check)))
99-
return 9
100-
elif len(aio_check) != aio_threads:
101-
sys.stderr.write('Expected ET_AIO threads: {0}, found: {1}.\n'.format(aio_threads, len(aio_check)))
102-
return 11
103-
else:
104-
return 0
105-
106-
# Return 1 if no pid is found to match the ats process.
107-
return 1
66+
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
67+
# A thread can disappear while we inspect; treat as transient.
68+
continue
69+
70+
if thread_name.startswith('[ET_NET'):
71+
72+
# Get the id of this thread and check if it's in range.
73+
etnet_id = int(thread_name.split(' ')[1][:-1])
74+
if etnet_id >= etnet_threads:
75+
return 2, 'Too many ET_NET threads created.'
76+
elif etnet_id in etnet_check:
77+
return 3, 'ET_NET thread with duplicate thread id created.'
78+
else:
79+
etnet_check.add(etnet_id)
80+
81+
elif thread_name.startswith('[ACCEPT'):
82+
83+
# Get the id of this thread and check if it's in range.
84+
accept_id = int(thread_name.split(' ')[1].split(':')[0])
85+
if accept_id >= accept_threads:
86+
return 5, 'Too many ACCEPT threads created.'
87+
else:
88+
accept_check.add(accept_id)
89+
90+
elif thread_name.startswith('[ET_TASK'):
91+
92+
# Get the id of this thread and check if it's in range.
93+
task_id = int(thread_name.split(' ')[1][:-1])
94+
if task_id >= task_threads:
95+
return 7, 'Too many ET_TASK threads created.'
96+
elif task_id in task_check:
97+
return 8, 'ET_TASK thread with duplicate thread id created.'
98+
else:
99+
task_check.add(task_id)
100+
101+
elif thread_name.startswith('[ET_AIO'):
102+
103+
# Get the id of this thread and check if it's in range.
104+
aio_id = int(thread_name.split(' ')[1].split(':')[0])
105+
if aio_id >= aio_threads:
106+
return 10, 'Too many ET_AIO threads created.'
107+
else:
108+
aio_check.add(aio_id)
109+
110+
# Check the size of the sets, must be equal to the expected size.
111+
if len(etnet_check) != etnet_threads:
112+
return 4, 'Expected ET_NET threads: {0}, found: {1}.'.format(etnet_threads, len(etnet_check))
113+
elif len(accept_check) != accept_threads:
114+
return 6, 'Expected ACCEPT threads: {0}, found: {1}.'.format(accept_threads, len(accept_check))
115+
elif len(task_check) != task_threads:
116+
return 9, 'Expected ET_TASK threads: {0}, found: {1}.'.format(task_threads, len(task_check))
117+
elif len(aio_check) != aio_threads:
118+
return 11, 'Expected ET_AIO threads: {0}, found: {1}.'.format(aio_threads, len(aio_check))
119+
else:
120+
return 0, ''
121+
122+
return 1, 'Expected ATS process [TS_MAIN] with cwd {0}, but it was not found.'.format(ts_path)
123+
124+
125+
def count_threads(
126+
ts_path,
127+
etnet_threads,
128+
accept_threads,
129+
task_threads,
130+
aio_threads,
131+
wait_seconds=COUNT_THREAD_WAIT_SECONDS,
132+
poll_seconds=COUNT_THREAD_POLL_SECONDS):
133+
deadline = time.monotonic() + wait_seconds
134+
135+
# Retry on startup/transient states:
136+
# 1 : ATS process not found yet
137+
# 4/6/9/11: expected thread count not reached yet
138+
retry_codes = {1, 4, 6, 9, 11}
139+
140+
while True:
141+
code, message = _count_threads_once(ts_path, etnet_threads, accept_threads, task_threads, aio_threads)
142+
if code == 0:
143+
return 0
144+
if code not in retry_codes or time.monotonic() >= deadline:
145+
sys.stderr.write(message + '\n')
146+
return code
147+
time.sleep(poll_seconds)
108148

109149

110150
def main():

tests/gold_tests/thread_config/thread_config.test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
Test.Summary = 'Test that Trafficserver starts with different thread configurations.'
2222
Test.ContinueOnFail = True
23+
Test.SkipUnless(Condition.IsPlatform("linux"))
2324

2425
ts = Test.MakeATSProcess('ts-1_exec-0_accept-1_task-1_aio')
2526
ts.Disk.records_config.update(

0 commit comments

Comments
 (0)