@@ -377,7 +377,7 @@ static Word* push_single_word_cuarg(LaunchHelper& helper, Word word) {
377377 return ptr;
378378}
379379
380- static LaunchHelper* g_helper_freelist; // protected by the GIL
380+ static LaunchHelper* g_helper_freelist; // protected by the GIL or g_launch_mutex
381381
382382namespace { struct LaunchHelperDeleter {
383383 void operator () (LaunchHelper* helper) const {
@@ -386,6 +386,10 @@ namespace { struct LaunchHelperDeleter {
386386 }
387387}; }
388388
389+ #ifdef Py_GIL_DISABLED
390+ static PyMutex g_launch_mutex = {0 };
391+ #endif
392+
389393using LaunchHelperPtr = std::unique_ptr<LaunchHelper, LaunchHelperDeleter>;
390394
391395
@@ -1489,6 +1493,9 @@ struct CompareKey <Vec<PyTypeObject*>, Vec<PyPtr>> {
14891493namespace { struct TileContext {
14901494 PyPtr config;
14911495 PyPtr autotune_cache;
1496+ #ifdef Py_GIL_DISABLED
1497+ PyMutex accessor_mutex = {0 };
1498+ #endif
14921499
14931500 static PyTypeObject pytype;
14941501}; }
@@ -1913,7 +1920,7 @@ static Result<CUstream> parse_stream(PyObject* py_stream) {
19131920
19141921using StreamBufferPoolMap = HashMap<unsigned long long , StreamBufferPool*>;
19151922
1916- // Protected by GIL.
1923+ // Protected by GIL or g_launch_mutex .
19171924// We have no reliable way to detect when a context is destroyed, so we never clean these up.
19181925static StreamBufferPoolMap* g_stream_buffer_pool_by_ctx_id;
19191926
@@ -2413,16 +2420,27 @@ static int TileContext_init(PyObject* self, PyObject* args, PyObject* kwargs) {
24132420
24142421
24152422static PyObject * TileContext_get_config (PyObject* self, void *closure) {
2416- return Py_NewRef (py_unwrap<TileContext>(self).config .get ());
2423+ TileContext& context = py_unwrap<TileContext>(self);
2424+ #ifdef Py_GIL_DISABLED
2425+ PyCriticalSectionGuard guard (&context.accessor_mutex );
2426+ #endif
2427+ return Py_NewRef (context.config .get ());
24172428}
24182429
24192430
24202431static PyObject * TileContext_get_autotune_cache (PyObject* self, void *closure) {
2421- return Py_NewRef (py_unwrap<TileContext>(self).autotune_cache .get ());
2432+ TileContext& context = py_unwrap<TileContext>(self);
2433+ #ifdef Py_GIL_DISABLED
2434+ PyCriticalSectionGuard guard (&context.accessor_mutex );
2435+ #endif
2436+ return Py_NewRef (context.autotune_cache .get ());
24222437}
24232438
24242439static int TileContext_set_autotune_cache (PyObject* self, PyObject* value, void * closure) {
24252440 TileContext& context = py_unwrap<TileContext>(self);
2441+ #ifdef Py_GIL_DISABLED
2442+ PyCriticalSectionGuard guard (&context.accessor_mutex );
2443+ #endif
24262444
24272445 // `del ctx.autotune_cache` → set back to None
24282446 if (value == nullptr ) {
@@ -2490,6 +2508,9 @@ PyTypeObject TileDispatcher::pytype = {
24902508};
24912509
24922510static PyObject* get_parameter_constraints_from_pyargs (PyObject* self, PyObject* args) {
2511+ #ifdef Py_GIL_DISABLED
2512+ PyCriticalSectionGuard guard (&g_launch_mutex);
2513+ #endif
24932514 PyObject* dispatcher_pyobj = nullptr ;
24942515 PyObject* pyargs = nullptr ;
24952516 PyObject* cconv = nullptr ;
@@ -2683,6 +2704,9 @@ static Status parse_launch_args(PyObject* const* args, Py_ssize_t nargs, const c
26832704static PyObject* launch_impl (PyObject* const * args, Py_ssize_t nargs,
26842705 PyObject* kwargs, const char * signature, bool with_block
26852706 ) {
2707+ #ifdef Py_GIL_DISABLED
2708+ PyCriticalSectionGuard guard (&g_launch_mutex);
2709+ #endif
26862710 LaunchArgs launch_args;
26872711 if (!parse_launch_args (args, nargs, signature, with_block, &launch_args))
26882712 return nullptr ;
@@ -2723,6 +2747,9 @@ static PyObject *launch_extended(PyObject *, PyObject *const *args,
27232747#define BENCHMARK_SIGNATURE " _benchmark(stream, grid, kernel, pyargs_tuples, /)"
27242748
27252749static PyObject* cuda_tile_benchmark (PyObject* mod, PyObject* const * args, Py_ssize_t nargs) {
2750+ #ifdef Py_GIL_DISABLED
2751+ PyCriticalSectionGuard guard (&g_launch_mutex);
2752+ #endif
27262753 LaunchArgs launch_args;
27272754 if (!parse_launch_args (args, nargs, BENCHMARK_SIGNATURE , false , &launch_args))
27282755 return nullptr ;
0 commit comments