diff --git a/changelog/1314.feature.rst b/changelog/1314.feature.rst new file mode 100644 index 00000000..440fce39 --- /dev/null +++ b/changelog/1314.feature.rst @@ -0,0 +1,2 @@ +Workers that finish all their tests are now torn down immediately, freeing +their subprocess memory instead of keeping them alive until the session ends. diff --git a/src/xdist/dsession.py b/src/xdist/dsession.py index 47e1de7d..45a78f4a 100644 --- a/src/xdist/dsession.py +++ b/src/xdist/dsession.py @@ -216,6 +216,7 @@ def worker_workerfinished(self, node: WorkerController) -> None: crashitem = self.sched.remove_node(node) assert not crashitem, (crashitem, node) self._active_nodes.remove(node) + node.ensure_teardown() def worker_internal_error( self, node: WorkerController, formatted_error: str @@ -228,6 +229,7 @@ def worker_internal_error( here ourselves using the formatted message. """ self._active_nodes.remove(node) + node.ensure_teardown() try: assert False, formatted_error except AssertionError: diff --git a/src/xdist/workermanage.py b/src/xdist/workermanage.py index 201c8e71..afdb43f2 100644 --- a/src/xdist/workermanage.py +++ b/src/xdist/workermanage.py @@ -114,7 +114,10 @@ def setup_node( return node def teardown_nodes(self) -> None: - self.group.terminate(self.EXIT_TIMEOUT) + try: + self.group.terminate(self.EXIT_TIMEOUT) + except OSError: + pass def _gettxspecs(self) -> list[execnet.XSpec]: return [execnet.XSpec(x) for x in parse_tx_spec_config(self.config)] @@ -357,11 +360,20 @@ def ensure_teardown(self) -> None: if not self.channel.isclosed(): self.log("closing", self.channel) self.channel.close() - # del self.channel if hasattr(self, "gateway"): self.log("exiting", self.gateway) - self.gateway.exit() - # del self.gateway + try: + self.gateway.exit() + except OSError: + pass + try: + self.gateway.join(timeout=1) + except Exception: + pass + try: + self.gateway._io.wait() + except Exception: + pass def send_runtest_some(self, indices: Sequence[int]) -> None: self.sendcommand("runtests", indices=indices) diff --git a/testing/acceptance_test.py b/testing/acceptance_test.py index 1b44985d..f47cba46 100644 --- a/testing/acceptance_test.py +++ b/testing/acceptance_test.py @@ -344,6 +344,44 @@ def pytest_testnodedown(node, error): assert "s2call" in s assert "Interrupted" in s + def test_idle_worker_freed_early(self, pytester: pytest.Pytester) -> None: + """Workers that finish all their tests should have their gateway + process terminated early, before the session ends (#1314).""" + pytester.makepyfile( + test_fast=""" + import os + import pathlib + + def test_fast(request): + pid_file = request.config.rootpath / "fast_worker_pid.txt" + pid_file.write_text(str(os.getpid())) + """, + test_slow=""" + import os + import time + import pathlib + + def test_slow(request): + pid_file = request.config.rootpath / "fast_worker_pid.txt" + # Wait for the fast worker to write its PID and be torn down. + for _ in range(40): + time.sleep(0.1) + if pid_file.exists(): + pid = int(pid_file.read_text()) + # Check if the process is gone (killed by ensure_teardown). + try: + os.kill(pid, 0) + except OSError: + # Process is dead - early teardown worked. + return + raise AssertionError( + "Fast worker process was not terminated during slow test" + ) + """, + ) + result = pytester.runpytest("-n2", "--dist=load") + result.assert_outcomes(passed=2) + def test_keyboard_interrupt_dist(self, pytester: pytest.Pytester) -> None: # xxx could be refined to check for return code pytester.makepyfile( diff --git a/testing/test_dsession.py b/testing/test_dsession.py index 680b7ae0..2e258480 100644 --- a/testing/test_dsession.py +++ b/testing/test_dsession.py @@ -53,6 +53,9 @@ def send_steal(self, indices: Sequence[int]) -> None: def shutdown(self) -> None: self._shutdown = True + def ensure_teardown(self) -> None: + pass + @property def shutting_down(self) -> bool: return self._shutdown