@@ -337,19 +337,7 @@ struct internals {
337337 internals (internals &&other) = delete;
338338 internals &operator =(const internals &other) = delete ;
339339 internals &operator =(internals &&other) = delete ;
340- ~internals () {
341- // Normally this destructor runs during interpreter finalization and it may DECREF things.
342- // In odd finalization scenarios it might end up running after the interpreter has
343- // completely shut down, In that case, we should not decref these objects because pymalloc
344- // is gone. This also applies across sub-interpreters, we should only DECREF when the
345- // original owning interpreter is active.
346- auto *cur_istate = get_interpreter_state_unchecked ();
347- if (cur_istate && cur_istate == istate) {
348- Py_CLEAR (instance_base);
349- Py_CLEAR (default_metaclass);
350- Py_CLEAR (static_property_type);
351- }
352- }
340+ ~internals () = default ;
353341};
354342
355343// the internals struct (above) is shared between all the modules. local_internals are only
@@ -359,28 +347,13 @@ struct internals {
359347// impact any other modules, because the only things accessing the local internals is the
360348// module that contains them.
361349struct local_internals {
362- local_internals () : istate(get_interpreter_state_unchecked()) {}
363-
364350 // It should be safe to use fast_type_map here because this entire
365351 // data structure is scoped to our single module, and thus a single
366352 // DSO and single instance of type_info for any particular type.
367353 fast_type_map<type_info *> registered_types_cpp;
368354
369355 std::forward_list<ExceptionTranslator> registered_exception_translators;
370356 PyTypeObject *function_record_py_type = nullptr ;
371- PyInterpreterState *istate = nullptr ;
372-
373- ~local_internals () {
374- // Normally this destructor runs during interpreter finalization and it may DECREF things.
375- // In odd finalization scenarios it might end up running after the interpreter has
376- // completely shut down, In that case, we should not decref these objects because pymalloc
377- // is gone. This also applies across sub-interpreters, we should only DECREF when the
378- // original owning interpreter is active.
379- auto *cur_istate = get_interpreter_state_unchecked ();
380- if (cur_istate && cur_istate == istate) {
381- Py_CLEAR (function_record_py_type);
382- }
383- }
384357};
385358
386359enum class holder_enum_t : uint8_t {
@@ -576,6 +549,10 @@ inline void translate_local_exception(std::exception_ptr p) {
576549}
577550#endif
578551
552+ // Sentinel value for the `dtor` parameter of `atomic_get_or_create_in_state_dict`.
553+ // Indicates no destructor was explicitly provided (distinct from nullptr, which means "leak").
554+ #define PYBIND11_DTOR_USE_DELETE (reinterpret_cast <void (*)(PyObject *)>(1 ))
555+
579556// Get or create per-storage capsule in the current interpreter's state dict.
580557// - The storage is interpreter-dependent: different interpreters will have different storage.
581558// This is important when using multiple-interpreters, to avoid sharing unshareable objects
@@ -592,9 +569,14 @@ inline void translate_local_exception(std::exception_ptr p) {
592569//
593570// Returns: pair of (pointer to storage, bool indicating if newly created).
594571// The bool follows std::map::insert convention: true = created, false = existed.
572+ // `dtor`: optional destructor called when the interpreter shuts down.
573+ // - If not provided: the storage will be deleted using `delete`.
574+ // - If nullptr: the storage will be leaked (useful for singletons that outlive the interpreter).
575+ // - If a function: that function will be called with the capsule object.
595576template <typename Payload>
596577std::pair<Payload *, bool > atomic_get_or_create_in_state_dict (const char *key,
597- void (*dtor)(PyObject *) = nullptr) {
578+ void (*dtor)(PyObject *)
579+ = PYBIND11_DTOR_USE_DELETE) {
598580 error_scope err_scope; // preserve any existing Python error states
599581
600582 auto state_dict = reinterpret_borrow<dict>(get_python_state_dict ());
@@ -640,7 +622,7 @@ std::pair<Payload *, bool> atomic_get_or_create_in_state_dict(const char *key,
640622 // - Otherwise, our `new_capsule` is now in the dict, and it owns the storage and the state
641623 // dict will incref it. We need to set the caller's destructor on it, which will be
642624 // called when the interpreter shuts down.
643- if (created && dtor) {
625+ if (created && dtor != PYBIND11_DTOR_USE_DELETE ) {
644626 if (PyCapsule_SetDestructor (capsule_obj, dtor) < 0 ) {
645627 throw error_already_set ();
646628 }
@@ -657,6 +639,8 @@ std::pair<Payload *, bool> atomic_get_or_create_in_state_dict(const char *key,
657639 return std::pair<Payload *, bool >(static_cast <Payload *>(raw_ptr), created);
658640}
659641
642+ #undef PYBIND11_DTOR_USE_DELETE
643+
660644template <typename InternalsType>
661645class internals_pp_manager {
662646public:
@@ -713,42 +697,75 @@ class internals_pp_manager {
713697 // this could be called without an active interpreter, just use what was cached
714698 if (!tstate || tstate->interp == last_istate_tls ()) {
715699 auto tpp = internals_p_tls ();
716-
717- delete tpp;
700+ {
701+ std::lock_guard<std::mutex> lock (pp_set_mutex_);
702+ pps_have_created_content_.erase (tpp); // untrack deleted pp
703+ }
704+ delete tpp; // may call back into Python
718705 }
719706 unref ();
720707 return ;
721708 }
722709#endif
723- delete internals_singleton_pp_;
710+ {
711+ std::lock_guard<std::mutex> lock (pp_set_mutex_);
712+ pps_have_created_content_.erase (internals_singleton_pp_); // untrack deleted pp
713+ }
714+ delete internals_singleton_pp_; // may call back into Python
724715 unref ();
725716 }
726717
727- private:
728- internals_pp_manager (char const *id, on_fetch_function *on_fetch)
729- : holder_id_(id), on_fetch_(on_fetch) {}
718+ void create_pp_content_once (std::unique_ptr<InternalsType> *const pp) {
719+ // Assume the GIL is held here. May call back into Python. We cannot hold the lock with our
720+ // mutex here. So there may be multiple threads creating the content at the same time. Only
721+ // one will install its content to pp below. Others will be freed when going out of scope.
722+ auto tmp = std::unique_ptr<InternalsType>(new InternalsType ());
730723
731- static void internals_shutdown (PyObject *capsule) {
732- auto *pp = static_cast <std::unique_ptr<InternalsType> *>(
733- PyCapsule_GetPointer (capsule, nullptr ));
734- if (pp) {
735- pp->reset ();
724+ {
725+ // Lock scope must not include Python calls, which may require the GIL and cause
726+ // deadlocks.
727+ std::lock_guard<std::mutex> lock (pp_set_mutex_);
728+
729+ if (*pp) {
730+ // Already created in another thread.
731+ return ;
732+ }
733+
734+ // At this point, pp->get() is nullptr.
735+ // The content is either not yet created, or was previously destroyed via pp->reset().
736+
737+ // Detect re-creation of internals after destruction during interpreter shutdown.
738+ // If pybind11 code (e.g., tp_traverse/tp_clear calling py::cast) runs after internals
739+ // have been destroyed, a new empty internals would be created, causing type lookup
740+ // failures. See also get_or_create_pp_in_state_dict() comments.
741+ if (pps_have_created_content_.find (pp) != pps_have_created_content_.end ()) {
742+ pybind11_fail (
743+ " pybind11::detail::internals_pp_manager::create_pp_content_once() "
744+ " FAILED: reentrant call detected while fetching pybind11 internals!" );
745+ }
746+
747+ // Each interpreter can only create its internals once.
748+ pps_have_created_content_.insert (pp);
749+ // Install the created content.
750+ pp->swap (tmp);
736751 }
737- // We reset the unique_ptr's contents but cannot delete the unique_ptr itself here.
738- // The pp_manager in this module (and possibly other modules sharing internals) holds
739- // a raw pointer to this unique_ptr, and that pointer would dangle if we deleted it now.
740- //
741- // For pybind11-owned interpreters (via embed.h or subinterpreter.h), destroy() is
742- // called after Py_Finalize/Py_EndInterpreter completes, which safely deletes the
743- // unique_ptr. For interpreters not owned by pybind11 (e.g., a pybind11 extension
744- // loaded into an external interpreter), destroy() is never called and the unique_ptr
745- // shell (8 bytes, not its contents) is leaked.
746- // (See PR #5958 for ideas to eliminate this leak.)
747752 }
748753
754+ private:
755+ internals_pp_manager (char const *id, on_fetch_function *on_fetch)
756+ : holder_id_(id), on_fetch_(on_fetch) {}
757+
749758 std::unique_ptr<InternalsType> *get_or_create_pp_in_state_dict () {
759+ // The `unique_ptr<InternalsType>` is intentionally leaked on interpreter shutdown.
760+ // Once an instance is created, it will never be deleted until the process exits (compare
761+ // to interpreter shutdown in multiple-interpreter scenarios).
762+ // We cannot guarantee the destruction order of capsules in the interpreter state dict on
763+ // interpreter shutdown, so deleting internals too early could cause undefined behavior
764+ // when other pybind11 objects access `get_internals()` during finalization (which would
765+ // recreate empty internals). See also create_pp_content_once() above.
766+ // See https://github.com/pybind/pybind11/pull/5958#discussion_r2717645230.
750767 auto result = atomic_get_or_create_in_state_dict<std::unique_ptr<InternalsType>>(
751- holder_id_, &internals_shutdown );
768+ holder_id_, /* dtor= */ nullptr /* leak the capsule content */ );
752769 auto *pp = result.first ;
753770 bool created = result.second ;
754771 // Only call on_fetch_ when fetching existing internals, not when creating new ones.
@@ -774,7 +791,12 @@ class internals_pp_manager {
774791 on_fetch_function *on_fetch_ = nullptr ;
775792 // Pointer-to-pointer to the singleton internals for the first seen interpreter (may not be the
776793 // main interpreter)
777- std::unique_ptr<InternalsType> *internals_singleton_pp_;
794+ std::unique_ptr<InternalsType> *internals_singleton_pp_ = nullptr ;
795+
796+ // Track pointer-to-pointers whose internals have been created, to detect re-entrancy.
797+ // Use instance member over static due to singleton pattern of this class.
798+ std::unordered_set<std::unique_ptr<InternalsType> *> pps_have_created_content_;
799+ std::mutex pp_set_mutex_;
778800};
779801
780802// If We loaded the internals through `state_dict`, our `error_already_set`
@@ -815,7 +837,8 @@ PYBIND11_NOINLINE internals &get_internals() {
815837 // Slow path, something needs fetched from the state dict or created
816838 gil_scoped_acquire_simple gil;
817839 error_scope err_scope;
818- internals_ptr.reset (new internals ());
840+
841+ ppmgr.create_pp_content_once (&internals_ptr);
819842
820843 if (!internals_ptr->instance_base ) {
821844 // This calls get_internals, so cannot be called from within the internals constructor
@@ -826,6 +849,31 @@ PYBIND11_NOINLINE internals &get_internals() {
826849 return *internals_ptr;
827850}
828851
852+ // / Return the PyObject* for the internals capsule (borrowed reference).
853+ // / Returns nullptr if the capsule doesn't exist yet.
854+ inline PyObject *get_internals_capsule () {
855+ auto state_dict = reinterpret_borrow<dict>(get_python_state_dict ());
856+ return dict_getitemstring (state_dict.ptr (), PYBIND11_INTERNALS_ID);
857+ }
858+
859+ // / Return the key used for local_internals in the state dict.
860+ // / This function ensures a consistent key is used across all call sites within the same
861+ // / compilation unit. The key includes the address of a static variable to make it unique per
862+ // / module (DSO), matching the behavior of get_local_internals_pp_manager().
863+ inline const std::string &get_local_internals_key () {
864+ static const std::string key
865+ = PYBIND11_MODULE_LOCAL_ID + std::to_string (reinterpret_cast <uintptr_t >(&key));
866+ return key;
867+ }
868+
869+ // / Return the PyObject* for the local_internals capsule (borrowed reference).
870+ // / Returns nullptr if the capsule doesn't exist yet.
871+ inline PyObject *get_local_internals_capsule () {
872+ const auto &key = get_local_internals_key ();
873+ auto state_dict = reinterpret_borrow<dict>(get_python_state_dict ());
874+ return dict_getitemstring (state_dict.ptr (), key.c_str ());
875+ }
876+
829877inline void ensure_internals () {
830878 pybind11::detail::get_internals_pp_manager ().unref ();
831879#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
@@ -837,20 +885,21 @@ inline void ensure_internals() {
837885}
838886
839887inline internals_pp_manager<local_internals> &get_local_internals_pp_manager () {
840- // Use the address of this static itself as part of the key, so that the value is uniquely tied
888+ // Use the address of a static variable as part of the key, so that the value is uniquely tied
841889 // to where the module is loaded in memory
842- static const std::string this_module_idstr
843- = PYBIND11_MODULE_LOCAL_ID
844- + std::to_string (reinterpret_cast <uintptr_t >(&this_module_idstr));
845- return internals_pp_manager<local_internals>::get_instance (this_module_idstr.c_str (), nullptr );
890+ return internals_pp_manager<local_internals>::get_instance (get_local_internals_key ().c_str (),
891+ nullptr );
846892}
847893
848894// / Works like `get_internals`, but for things which are locally registered.
849895inline local_internals &get_local_internals () {
850896 auto &ppmgr = get_local_internals_pp_manager ();
851897 auto &internals_ptr = *ppmgr.get_pp ();
852898 if (!internals_ptr) {
853- internals_ptr.reset (new local_internals ());
899+ gil_scoped_acquire_simple gil;
900+ error_scope err_scope;
901+
902+ ppmgr.create_pp_content_once (&internals_ptr);
854903 }
855904 return *internals_ptr;
856905}
0 commit comments