Skip to content

Commit 5e9d90b

Browse files
authored
gh-144503: Pass sys.argv to forkserver as real argv elements (GH-148194)
Avoid embedding the parent's sys.argv into the forkserver -c command string via repr(). When sys.argv is large (e.g. thousands of file paths from a pre-commit hook), the resulting single argument could exceed the OS per-argument length limit (MAX_ARG_STRLEN on Linux, typically 128 KiB), causing posix_spawn to fail and the parent to observe a BrokenPipeError. Instead, append the argv entries as separate command-line arguments after -c; the forkserver child reads them back as sys.argv[1:]. This cannot exceed any limit the parent itself did not already satisfy. Regression introduced by gh-143706 / 298d544.
1 parent 6bb7b33 commit 5e9d90b

File tree

4 files changed

+118
-4
lines changed

4 files changed

+118
-4
lines changed

Lib/multiprocessing/forkserver.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,18 +162,25 @@ def ensure_running(self):
162162
self._forkserver_alive_fd = None
163163
self._forkserver_pid = None
164164

165-
cmd = ('from multiprocessing.forkserver import main; ' +
166-
'main(%d, %d, %r, **%r)')
165+
# gh-144503: sys_argv is passed as real argv elements after the
166+
# ``-c cmd`` rather than repr'd into main_kws so that a large
167+
# parent sys.argv cannot push the single ``-c`` command string
168+
# over the OS per-argument length limit (MAX_ARG_STRLEN on Linux).
169+
# The child sees them as sys.argv[1:].
170+
cmd = ('import sys; '
171+
'from multiprocessing.forkserver import main; '
172+
'main(%d, %d, %r, sys_argv=sys.argv[1:], **%r)')
167173

168174
main_kws = {}
175+
sys_argv = None
169176
if self._preload_modules:
170177
data = spawn.get_preparation_data('ignore')
171178
if 'sys_path' in data:
172179
main_kws['sys_path'] = data['sys_path']
173180
if 'init_main_from_path' in data:
174181
main_kws['main_path'] = data['init_main_from_path']
175182
if 'sys_argv' in data:
176-
main_kws['sys_argv'] = data['sys_argv']
183+
sys_argv = data['sys_argv']
177184
if self._preload_on_error != 'ignore':
178185
main_kws['on_error'] = self._preload_on_error
179186

@@ -197,6 +204,8 @@ def ensure_running(self):
197204
exe = spawn.get_executable()
198205
args = [exe] + util._args_from_interpreter_flags()
199206
args += ['-c', cmd]
207+
if sys_argv is not None:
208+
args += sys_argv
200209
pid = util.spawnv_passfds(exe, args, fds_to_pass)
201210
except:
202211
os.close(alive_w)

Lib/test/_test_multiprocessing.py

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,38 @@ def spawn_check_wrapper(*args, **kwargs):
207207
return decorator
208208

209209

210+
def only_run_in_forkserver_testsuite(reason):
211+
"""Returns a decorator: raises SkipTest unless fork is supported
212+
and the current start method is forkserver.
213+
214+
Combines @support.requires_fork() with the single-run semantics of
215+
only_run_in_spawn_testsuite(), but uses the forkserver testsuite as
216+
the single-run target. Appropriate for tests that exercise
217+
os.fork() directly (raw fork or mp.set_start_method("fork") in a
218+
subprocess) and don't vary by start method, since forkserver is
219+
only available on platforms that support fork.
220+
"""
221+
222+
def decorator(test_item):
223+
224+
@functools.wraps(test_item)
225+
def forkserver_check_wrapper(*args, **kwargs):
226+
if not support.has_fork_support:
227+
raise unittest.SkipTest("requires working os.fork()")
228+
if (start_method := multiprocessing.get_start_method()) != "forkserver":
229+
raise unittest.SkipTest(
230+
f"{start_method=}, not 'forkserver'; {reason}")
231+
return test_item(*args, **kwargs)
232+
233+
return forkserver_check_wrapper
234+
235+
return decorator
236+
237+
210238
class TestInternalDecorators(unittest.TestCase):
211239
"""Logic within a test suite that could errantly skip tests? Test it!"""
212240

213-
@unittest.skipIf(sys.platform == "win32", "test requires that fork exists.")
241+
@support.requires_fork()
214242
def test_only_run_in_spawn_testsuite(self):
215243
if multiprocessing.get_start_method() != "spawn":
216244
raise unittest.SkipTest("only run in test_multiprocessing_spawn.")
@@ -234,6 +262,30 @@ def return_four_if_spawn():
234262
finally:
235263
multiprocessing.set_start_method(orig_start_method, force=True)
236264

265+
@support.requires_fork()
266+
def test_only_run_in_forkserver_testsuite(self):
267+
if multiprocessing.get_start_method() != "forkserver":
268+
raise unittest.SkipTest("only run in test_multiprocessing_forkserver.")
269+
270+
try:
271+
@only_run_in_forkserver_testsuite("testing this decorator")
272+
def return_four_if_forkserver():
273+
return 4
274+
except Exception as err:
275+
self.fail(f"expected decorated `def` not to raise; caught {err}")
276+
277+
orig_start_method = multiprocessing.get_start_method(allow_none=True)
278+
try:
279+
multiprocessing.set_start_method("forkserver", force=True)
280+
self.assertEqual(return_four_if_forkserver(), 4)
281+
multiprocessing.set_start_method("spawn", force=True)
282+
with self.assertRaises(unittest.SkipTest) as ctx:
283+
return_four_if_forkserver()
284+
self.assertIn("testing this decorator", str(ctx.exception))
285+
self.assertIn("start_method=", str(ctx.exception))
286+
finally:
287+
multiprocessing.set_start_method(orig_start_method, force=True)
288+
237289

238290
#
239291
# Creates a wrapper for a function which records the time it takes to finish
@@ -7133,6 +7185,23 @@ def test_preload_main_sys_argv(self):
71337185
'',
71347186
])
71357187

7188+
@only_run_in_forkserver_testsuite("forkserver specific test.")
7189+
def test_preload_main_large_sys_argv(self):
7190+
# gh-144503: a very large parent sys.argv must not prevent the
7191+
# forkserver from starting (it previously overflowed the OS
7192+
# per-argument length limit when repr'd into the -c command string).
7193+
name = os.path.join(os.path.dirname(__file__),
7194+
'mp_preload_large_sysargv.py')
7195+
_, out, err = test.support.script_helper.assert_python_ok(name)
7196+
self.assertEqual(err, b'')
7197+
7198+
out = out.decode().split("\n")
7199+
self.assertEqual(out, [
7200+
'preload:5002:sentinel',
7201+
'worker:5002:sentinel',
7202+
'',
7203+
])
7204+
71367205
#
71377206
# Mixins
71387207
#
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# gh-144503: Test that the forkserver can start when the parent process has
2+
# a very large sys.argv. Prior to the fix, sys.argv was repr'd into the
3+
# forkserver ``-c`` command string which could exceed the OS limit on the
4+
# length of a single argv element (MAX_ARG_STRLEN on Linux, ~128 KiB),
5+
# causing posix_spawn to fail and the parent to see a BrokenPipeError.
6+
7+
import multiprocessing
8+
import sys
9+
10+
EXPECTED_LEN = 5002 # argv[0] + 5000 padding entries + sentinel
11+
12+
13+
def fun():
14+
print(f"worker:{len(sys.argv)}:{sys.argv[-1]}")
15+
16+
17+
if __name__ == "__main__":
18+
# Inflate sys.argv well past 128 KiB before the forkserver is started.
19+
sys.argv[1:] = ["x" * 50] * 5000 + ["sentinel"]
20+
assert len(sys.argv) == EXPECTED_LEN
21+
22+
ctx = multiprocessing.get_context("forkserver")
23+
p = ctx.Process(target=fun)
24+
p.start()
25+
p.join()
26+
sys.exit(p.exitcode)
27+
else:
28+
# This branch runs when the forkserver preloads this module as
29+
# __mp_main__; confirm the large argv was propagated intact.
30+
print(f"preload:{len(sys.argv)}:{sys.argv[-1]}")
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Fix a regression introduced in 3.14.3 and 3.13.12 where the
2+
:mod:`multiprocessing` ``forkserver`` start method would fail with
3+
:exc:`BrokenPipeError` when the parent process had a very large
4+
:data:`sys.argv`. The argv is now passed to the forkserver as separate
5+
command-line arguments rather than being embedded in the ``-c`` command
6+
string, avoiding the operating system's per-argument length limit.

0 commit comments

Comments
 (0)