Skip to content

Commit 5ef3a61

Browse files
committed
Add random-walk controller state to FTP parallel GC (tracking only)
Phase 6 step 2 of bringing the free-threaded parallel GC to feature parity with the GIL parallel GC. The GIL build adopted the random-walk adaptive worker controller in Phase 5; this commit adds the same controller state and the same shared update call to the FTP build, so both run identical worker-allocation logic. Changes: - _PyGCThreadPool gains adaptive_workers, prev_cost_per_obj_ns, explore_rng fields (mirroring _PyParallelGCState on the GIL side). - _PyGC_ThreadPoolInit initialises them: adaptive_workers = min(4, num_workers), prev_cost = 0, rng = _PyGC_RandomWalkSeed(). - gc_free_threading.c::gc_collect_internal calls _PyGC_RandomWalkUpdate after cleanup_end_ns is recorded — same algorithm as the GIL build via the shared helper introduced in 77c1247. What this commit does NOT yet do (and why): adaptive_workers is tracked but NOT yet used to skip work on idle workers. The natural place — an early return in thread_pool_do_work for worker_id >= adaptive_workers — deadlocks because FTP's work functions (update_refs, mark_heap, scan_heap, propagate) use internal phase_barriers that require ALL num_workers participants to arrive. Skipping the work skips the phase_barriers and the active workers wait forever for the missing ones. The barrier-vs-condvar dispatch mismatch is what blocks this; the next commit replaces FTP's barrier dispatch with per-worker condvars (mirror of Phase 5.3 on the GIL side), and at that point adaptive_workers naturally takes effect because idle workers don't wake at all. So: this commit is "controller state and call wiring", not "dispatch behaviour change". The state evolves identically to the GIL build; the observed worker count is still num_workers until the dispatch refactor lands. Verified: FTP build, all 5 parallel-GC test files pass (177 tests, 7 platform skips). Smoke test (20 collections with 4 workers) completes without hang.
1 parent 12a3440 commit 5ef3a61

3 files changed

Lines changed: 47 additions & 0 deletions

File tree

Include/internal/pycore_gc_ft_parallel.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,14 @@ typedef struct _PyGCThreadPool {
336336
// Worker control
337337
int shutdown; // 1 = pool is shutting down (use atomics)
338338

339+
// Adaptive worker count — see Include/internal/pycore_gc_random_walk.h.
340+
// Same controller as the GIL parallel GC for identical behaviour.
341+
// adaptive_workers ∈ [2, num_workers]; workers with worker_id >=
342+
// adaptive_workers no-op for this collection.
343+
size_t adaptive_workers;
344+
double prev_cost_per_obj_ns;
345+
uint32_t explore_rng;
346+
339347
// Debug/testing counters (for assertions)
340348
size_t threads_created; // Total threads ever created (should equal num_workers-1)
341349
size_t collections_completed; // Number of GC collections processed

Python/gc_free_threading.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "pycore_freelist.h" // _PyObject_ClearFreeLists()
88
#ifdef Py_PARALLEL_GC
99
#include "pycore_gc_ft_parallel.h" // Parallel GC support
10+
#include "pycore_gc_random_walk.h" // _PyGC_RandomWalkUpdate()
1011
#endif
1112
#include "pycore_genobject.h" // _PyGen_GetGeneratorFromFrame()
1213
#include "pycore_initconfig.h" // _PyStatus_NO_MEMORY()
@@ -2726,6 +2727,22 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
27262727
(void)PyTime_PerfCounterRaw(&cleanup_end);
27272728
interp->gc.cleanup_end_ns = cleanup_end;
27282729

2730+
// Update adaptive worker count via shared random-walk controller.
2731+
// Mirrors the GIL parallel GC body in Python/gc.c — both builds use the
2732+
// same logic to converge on a worker count for this workload.
2733+
{
2734+
_PyGCThreadPool *_pool = interp->gc.thread_pool;
2735+
if (_pool != NULL) {
2736+
_PyGC_RandomWalkUpdate(
2737+
interp->gc.cleanup_end_ns - interp->gc.gc_start_ns,
2738+
state->candidates,
2739+
&_pool->prev_cost_per_obj_ns,
2740+
&_pool->explore_rng,
2741+
&_pool->adaptive_workers,
2742+
(size_t)_pool->num_workers);
2743+
}
2744+
}
2745+
27292746
// Store the current memory usage, can be smaller now if breaking cycles
27302747
// freed some memory.
27312748
Py_ssize_t last_mem = get_process_mem_usage();

Python/gc_free_threading_parallel.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "pycore_gc.h"
1313
#include "pycore_gc_ft_parallel.h"
14+
#include "pycore_gc_random_walk.h" // _PyGC_RandomWalkUpdate, _PyGC_RandomWalkSeed
1415
#include "pycore_interp.h"
1516
#include "pycore_lock.h" // PyMutex_Lock/Unlock
1617
#include "pycore_pystate.h"
@@ -1657,6 +1658,21 @@ thread_pool_do_work(_PyGCThreadPool *pool, int worker_id)
16571658
return; // No work to do
16581659
}
16591660

1661+
// NOTE: pool->adaptive_workers is tracked (see _PyGC_RandomWalkUpdate
1662+
// call after each collection) but is NOT YET used here to skip work
1663+
// for idle workers. Reason: FTP work functions (update_refs, mark_heap,
1664+
// scan_heap, propagate) use internal phase_barriers that require ALL
1665+
// num_workers participants to arrive. Skipping work in workers >=
1666+
// adaptive_workers would leave them not participating in those
1667+
// phase_barriers, causing deadlock.
1668+
//
1669+
// The next commit replaces barrier dispatch with per-worker condvars,
1670+
// at which point idle workers stay asleep and don't enter the work
1671+
// functions at all. Then adaptive_workers will actually skip work.
1672+
// For now, the controller state is updated so both builds use the same
1673+
// logic; the dispatch refactor enables it to take effect on FTP.
1674+
(void)pool->adaptive_workers; // intentionally unused this commit
1675+
16601676
#if GC_DEBUG_ATOMICS
16611677
// Reset TLS stats at start of work
16621678
_PyGC_ATOMIC_RESET_STATS();
@@ -1791,6 +1807,12 @@ _PyGC_ThreadPoolInit(PyInterpreterState *interp, int num_workers)
17911807
pool->threads_created = 0;
17921808
pool->collections_completed = 0;
17931809

1810+
// Adaptive worker controller — same as GIL build, see
1811+
// Include/internal/pycore_gc_random_walk.h.
1812+
pool->adaptive_workers = (num_workers < 4) ? (size_t)num_workers : 4;
1813+
pool->prev_cost_per_obj_ns = 0.0;
1814+
pool->explore_rng = _PyGC_RandomWalkSeed();
1815+
17941816
// Initialize barriers for synchronization
17951817
// All barriers include all workers (main thread as worker 0)
17961818
_PyGCBarrier_Init(&pool->mark_barrier, num_workers);

0 commit comments

Comments
 (0)