1414#include " gil.h"
1515
1616#include < stdexcept>
17+ #include < unordered_map>
1718
1819#ifndef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
1920# error "This platform does not support subinterpreters, do not include this file."
2021#endif
2122
2223PYBIND11_NAMESPACE_BEGIN (PYBIND11_NAMESPACE)
2324
25+ PYBIND11_NAMESPACE_BEGIN(detail)
26+
27+ // / OS-thread-local cache mapping a target interpreter to the PyThreadState that was created for
28+ // / it *on the current OS thread*. Used by subinterpreter_scoped_activate when
29+ // / subinterpreter_thread_state::cached is requested, so that repeatedly entering the same
30+ // / interpreter from the same OS thread reuses one PyThreadState (swapped in/out) instead of
31+ // / allocating and destroying a fresh one each time.
32+ // /
33+ // / The values are raw, non-owning pointers. At thread exit the map's destructor only frees its
34+ // / own nodes; it deliberately does NOT touch the Python C API (which may be unusable at that
35+ // / point), so a cached PyThreadState is leaked unless the owning thread first calls
36+ // / subinterpreter::release_cached_thread_state() or
37+ // / subinterpreter::release_all_cached_thread_states().
38+ inline std::unordered_map<PyInterpreterState *, PyThreadState *> &
39+ subinterpreter_thread_state_cache() {
40+ thread_local std::unordered_map<PyInterpreterState *, PyThreadState *> cache;
41+ return cache;
42+ }
43+
44+ PYBIND11_NAMESPACE_END (detail)
45+
46+ // / Selects how subinterpreter_scoped_activate obtains a PyThreadState when the calling OS thread
47+ // / is not already running the target interpreter.
48+ enum class subinterpreter_thread_state {
49+ // / Default / legacy behavior: a fresh PyThreadState is created on activation and destroyed
50+ // / when the scope exits.
51+ transient,
52+ // / Reuse (or, on first use, create-and-cache) a PyThreadState held in OS-thread-local
53+ // / storage, keyed by the target interpreter. The scope only swaps it in and out and never
54+ // / destroys it. The owning thread is responsible for eventually destroying it via
55+ // / subinterpreter::release_cached_thread_state() /
56+ // / subinterpreter::release_all_cached_thread_states(); see those functions for the
57+ // / preconditions.
58+ cached
59+ };
60+
2461class subinterpreter ;
2562
2663// / Activate the subinterpreter and acquire its GIL, while also releasing any GIL and interpreter
2764// / currently held. Upon exiting the scope, the previous subinterpreter (if any) and its
2865// / associated GIL are restored to their state as they were before the scope was entered.
2966class subinterpreter_scoped_activate {
3067public:
31- explicit subinterpreter_scoped_activate (subinterpreter const &si);
68+ explicit subinterpreter_scoped_activate (
69+ subinterpreter const &si,
70+ subinterpreter_thread_state ts_policy = subinterpreter_thread_state::transient);
3271 ~subinterpreter_scoped_activate ();
3372
3473 subinterpreter_scoped_activate (subinterpreter_scoped_activate &&) = delete ;
@@ -41,6 +80,9 @@ class subinterpreter_scoped_activate {
4180 PyThreadState *tstate_ = nullptr ;
4281 PyGILState_STATE gil_state_;
4382 bool simple_gil_ = false ;
83+ // When true, tstate_ is owned by the OS-thread-local cache and must NOT be destroyed when
84+ // this scope exits (only swapped out).
85+ bool cached_ = false ;
4486};
4587
4688// / Holds a Python subinterpreter instance
@@ -216,6 +258,26 @@ class subinterpreter {
216258 // / Get the interpreter's state dict. This interpreter's GIL must be held before calling!
217259 dict state_dict () { return reinterpret_borrow<dict>(PyInterpreterState_GetDict (istate_)); }
218260
261+ // / Destroy the PyThreadState (if any) that subinterpreter_thread_state::cached created for
262+ // / THIS interpreter on the CURRENT OS thread, and drop it from that thread's cache.
263+ // /
264+ // / Call this on the same OS thread that activated the interpreter, while this subinterpreter
265+ // / is still alive, and while no subinterpreter_scoped_activate scope for it is active on this
266+ // / thread. It is a no-op if this thread has no cached state for this interpreter. The caller
267+ // / need not hold any GIL: the cached state is briefly swapped in (acquiring this interpreter's
268+ // / GIL) to be cleared and deleted, then whatever was active before is restored.
269+ void release_cached_thread_state () const ;
270+
271+ // / Destroy every cached PyThreadState that was created on the CURRENT OS thread (for any
272+ // / interpreter) and clear this thread's cache. Intended as an end-of-thread cleanup hook for
273+ // / embedder worker threads.
274+ // /
275+ // / Every interpreter that still has a cached state on this thread MUST still be alive when
276+ // / this is called (deleting a PyThreadState whose interpreter was already finalized is
277+ // / undefined behavior). Must be called on the OS thread that owns the cache, with no
278+ // / subinterpreter_scoped_activate scope using a cached state active on this thread.
279+ static void release_all_cached_thread_states ();
280+
219281 // / abandon cleanup of this subinterpreter (leak it). this might be needed during
220282 // / finalization...
221283 void disarm () { creation_tstate_ = nullptr ; }
@@ -244,7 +306,8 @@ class scoped_subinterpreter {
244306 subinterpreter_scoped_activate scope_;
245307};
246308
247- inline subinterpreter_scoped_activate::subinterpreter_scoped_activate (subinterpreter const &si) {
309+ inline subinterpreter_scoped_activate::subinterpreter_scoped_activate (
310+ subinterpreter const &si, subinterpreter_thread_state ts_policy) {
248311 if (!si.istate_ ) {
249312 pybind11_fail (" null subinterpreter" );
250313 }
@@ -256,9 +319,25 @@ inline subinterpreter_scoped_activate::subinterpreter_scoped_activate(subinterpr
256319 return ;
257320 }
258321
259- // we can't really interact with the interpreter at all until we switch to it
260- // not even to, for example, look in its state dict or touch its internals
261- tstate_ = PyThreadState_New (si.istate_ );
322+ if (ts_policy == subinterpreter_thread_state::cached) {
323+ // Reuse a PyThreadState held in this OS thread's cache, or create one and cache it.
324+ // This preserves PyThreadState identity (and its per-thread interpreter state) across
325+ // repeated activations of the same interpreter from the same OS thread, instead of
326+ // creating and destroying a fresh state every time.
327+ auto &cache = detail::subinterpreter_thread_state_cache ();
328+ auto it = cache.find (si.istate_ );
329+ if (it != cache.end ()) {
330+ tstate_ = it->second ;
331+ } else {
332+ tstate_ = PyThreadState_New (si.istate_ );
333+ cache.emplace (si.istate_ , tstate_);
334+ }
335+ cached_ = true ;
336+ } else {
337+ // we can't really interact with the interpreter at all until we switch to it
338+ // not even to, for example, look in its state dict or touch its internals
339+ tstate_ = PyThreadState_New (si.istate_ );
340+ }
262341
263342 // make the interpreter active and acquire the GIL
264343 old_tstate_ = PyThreadState_Swap (tstate_);
@@ -279,13 +358,51 @@ inline subinterpreter_scoped_activate::~subinterpreter_scoped_activate() {
279358 }
280359#endif
281360 detail::get_internals ().tstate .reset ();
282- PyThreadState_Clear (tstate_);
283- PyThreadState_DeleteCurrent ();
361+ if (!cached_) {
362+ PyThreadState_Clear (tstate_);
363+ PyThreadState_DeleteCurrent ();
364+ }
365+ // When cached_, tstate_ stays alive in the OS-thread-local cache for reuse; the
366+ // PyThreadState_Swap below merely detaches it from this thread.
284367 }
285368
286369 // Go back the previous interpreter (if any) and acquire THAT gil
287370 PyThreadState_Swap (old_tstate_);
288371 }
289372}
290373
374+ inline void subinterpreter::release_cached_thread_state () const {
375+ if (istate_ == nullptr ) {
376+ return ;
377+ }
378+ auto &cache = detail::subinterpreter_thread_state_cache ();
379+ auto it = cache.find (istate_);
380+ if (it == cache.end ()) {
381+ return ;
382+ }
383+ PyThreadState *cached = it->second ;
384+ cache.erase (it);
385+
386+ // Make the cached state current (acquiring this interpreter's GIL) so it can be cleared and
387+ // destroyed on the OS thread that created it, then restore whatever was active before.
388+ PyThreadState *prev = PyThreadState_Swap (cached);
389+ PyThreadState_Clear (cached);
390+ PyThreadState_DeleteCurrent ();
391+ PyThreadState_Swap (prev);
392+ }
393+
394+ inline void subinterpreter::release_all_cached_thread_states () {
395+ auto &cache = detail::subinterpreter_thread_state_cache ();
396+ for (auto const &entry : cache) {
397+ PyThreadState *cached = entry.second ;
398+ // prev is the state active before this swap; it is restored after each deletion, so it is
399+ // never one of the cached states being destroyed here.
400+ PyThreadState *prev = PyThreadState_Swap (cached);
401+ PyThreadState_Clear (cached);
402+ PyThreadState_DeleteCurrent ();
403+ PyThreadState_Swap (prev);
404+ }
405+ cache.clear ();
406+ }
407+
291408PYBIND11_NAMESPACE_END (PYBIND11_NAMESPACE)
0 commit comments