Skip to content

Commit e7754de

Browse files
XuehaiPanclaude
andauthored
Revert internals destruction and add test for internals recreation (#5972)
* Bump internals version * Prevent internals destruction before all pybind11 types are destroyed * Use Py_XINCREF and Py_XDECREF * Hold GIL before decref * Use weakrefs * Remove unused code * Move code location * Move code location * Move code location * Try add tests * Fix PYTHONPATH * Fix PYTHONPATH * Skip tests for subprocess * Revert to leak internals * Revert to leak internals * Revert "Revert to leak internals" This reverts commit c5ec1cf. This reverts commit 72c2e0a. * Revert internals version bump * Reapply to leak internals This reverts commit 8f25a25. * Add re-entrancy detection for internals creation Prevent re-creation of internals after destruction during interpreter shutdown. If pybind11 code runs after internals have been destroyed, fail early with a clear error message instead of silently creating new empty internals that would cause type lookup failures. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Fix C++11/C++14 support * Add lock under multiple interpreters * Try fix tests * Try fix tests * Try fix tests * Update comments and assertion messages * Update comments and assertion messages * Update comments * Update lock scope * Use original pointer type for Windows * Change hard error to warning * Update lock scope * Update lock scope to resolve deadlock * Remove scope release of GIL * Update comments * Lock pp on reset * Mark content created after assignment * Update comments * Simplify implementation * Update lock scope when delete unique_ptr --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 0080cae commit e7754de

File tree

6 files changed

+276
-114
lines changed

6 files changed

+276
-114
lines changed

docs/advanced/classes.rst

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,11 +1381,22 @@ You can do that using ``py::custom_type_setup``:
13811381

13821382
.. code-block:: cpp
13831383
1384-
struct OwnsPythonObjects {
1385-
py::object value = py::none();
1384+
struct ContainerOwnsPythonObjects {
1385+
std::vector<py::object> list;
1386+
1387+
void append(const py::object &obj) { list.emplace_back(obj); }
1388+
py::object at(py::ssize_t index) const {
1389+
if (index >= size() || index < 0) {
1390+
throw py::index_error("Index out of range");
1391+
}
1392+
return list.at(py::size_t(index));
1393+
}
1394+
py::ssize_t size() const { return py::ssize_t_cast(list.size()); }
1395+
void clear() { list.clear(); }
13861396
};
1387-
py::class_<OwnsPythonObjects> cls(
1388-
m, "OwnsPythonObjects", py::custom_type_setup([](PyHeapTypeObject *heap_type) {
1397+
1398+
py::class_<ContainerOwnsPythonObjects> cls(
1399+
m, "ContainerOwnsPythonObjects", py::custom_type_setup([](PyHeapTypeObject *heap_type) {
13891400
auto *type = &heap_type->ht_type;
13901401
type->tp_flags |= Py_TPFLAGS_HAVE_GC;
13911402
type->tp_traverse = [](PyObject *self_base, visitproc visit, void *arg) {
@@ -1394,20 +1405,28 @@ You can do that using ``py::custom_type_setup``:
13941405
Py_VISIT(Py_TYPE(self_base));
13951406
#endif
13961407
if (py::detail::is_holder_constructed(self_base)) {
1397-
auto &self = py::cast<OwnsPythonObjects&>(py::handle(self_base));
1398-
Py_VISIT(self.value.ptr());
1408+
auto &self = py::cast<ContainerOwnsPythonObjects &>(py::handle(self_base));
1409+
for (auto &item : self.list) {
1410+
Py_VISIT(item.ptr());
1411+
}
13991412
}
14001413
return 0;
14011414
};
14021415
type->tp_clear = [](PyObject *self_base) {
14031416
if (py::detail::is_holder_constructed(self_base)) {
1404-
auto &self = py::cast<OwnsPythonObjects&>(py::handle(self_base));
1405-
self.value = py::none();
1417+
auto &self = py::cast<ContainerOwnsPythonObjects &>(py::handle(self_base));
1418+
for (auto &item : self.list) {
1419+
Py_CLEAR(item.ptr());
1420+
}
1421+
self.list.clear();
14061422
}
14071423
return 0;
14081424
};
14091425
}));
14101426
cls.def(py::init<>());
1411-
cls.def_readwrite("value", &OwnsPythonObjects::value);
1427+
cls.def("append", &ContainerOwnsPythonObjects::append);
1428+
cls.def("at", &ContainerOwnsPythonObjects::at);
1429+
cls.def("size", &ContainerOwnsPythonObjects::size);
1430+
cls.def("clear", &ContainerOwnsPythonObjects::clear);
14121431
14131432
.. versionadded:: 2.8

include/pybind11/detail/internals.h

Lines changed: 109 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -337,19 +337,7 @@ struct internals {
337337
internals(internals &&other) = delete;
338338
internals &operator=(const internals &other) = delete;
339339
internals &operator=(internals &&other) = delete;
340-
~internals() {
341-
// Normally this destructor runs during interpreter finalization and it may DECREF things.
342-
// In odd finalization scenarios it might end up running after the interpreter has
343-
// completely shut down, In that case, we should not decref these objects because pymalloc
344-
// is gone. This also applies across sub-interpreters, we should only DECREF when the
345-
// original owning interpreter is active.
346-
auto *cur_istate = get_interpreter_state_unchecked();
347-
if (cur_istate && cur_istate == istate) {
348-
Py_CLEAR(instance_base);
349-
Py_CLEAR(default_metaclass);
350-
Py_CLEAR(static_property_type);
351-
}
352-
}
340+
~internals() = default;
353341
};
354342

355343
// the internals struct (above) is shared between all the modules. local_internals are only
@@ -359,28 +347,13 @@ struct internals {
359347
// impact any other modules, because the only things accessing the local internals is the
360348
// module that contains them.
361349
struct local_internals {
362-
local_internals() : istate(get_interpreter_state_unchecked()) {}
363-
364350
// It should be safe to use fast_type_map here because this entire
365351
// data structure is scoped to our single module, and thus a single
366352
// DSO and single instance of type_info for any particular type.
367353
fast_type_map<type_info *> registered_types_cpp;
368354

369355
std::forward_list<ExceptionTranslator> registered_exception_translators;
370356
PyTypeObject *function_record_py_type = nullptr;
371-
PyInterpreterState *istate = nullptr;
372-
373-
~local_internals() {
374-
// Normally this destructor runs during interpreter finalization and it may DECREF things.
375-
// In odd finalization scenarios it might end up running after the interpreter has
376-
// completely shut down, In that case, we should not decref these objects because pymalloc
377-
// is gone. This also applies across sub-interpreters, we should only DECREF when the
378-
// original owning interpreter is active.
379-
auto *cur_istate = get_interpreter_state_unchecked();
380-
if (cur_istate && cur_istate == istate) {
381-
Py_CLEAR(function_record_py_type);
382-
}
383-
}
384357
};
385358

386359
enum class holder_enum_t : uint8_t {
@@ -576,6 +549,10 @@ inline void translate_local_exception(std::exception_ptr p) {
576549
}
577550
#endif
578551

552+
// Sentinel value for the `dtor` parameter of `atomic_get_or_create_in_state_dict`.
553+
// Indicates no destructor was explicitly provided (distinct from nullptr, which means "leak").
554+
#define PYBIND11_DTOR_USE_DELETE (reinterpret_cast<void (*)(PyObject *)>(1))
555+
579556
// Get or create per-storage capsule in the current interpreter's state dict.
580557
// - The storage is interpreter-dependent: different interpreters will have different storage.
581558
// This is important when using multiple-interpreters, to avoid sharing unshareable objects
@@ -592,9 +569,14 @@ inline void translate_local_exception(std::exception_ptr p) {
592569
//
593570
// Returns: pair of (pointer to storage, bool indicating if newly created).
594571
// The bool follows std::map::insert convention: true = created, false = existed.
572+
// `dtor`: optional destructor called when the interpreter shuts down.
573+
// - If not provided: the storage will be deleted using `delete`.
574+
// - If nullptr: the storage will be leaked (useful for singletons that outlive the interpreter).
575+
// - If a function: that function will be called with the capsule object.
595576
template <typename Payload>
596577
std::pair<Payload *, bool> atomic_get_or_create_in_state_dict(const char *key,
597-
void (*dtor)(PyObject *) = nullptr) {
578+
void (*dtor)(PyObject *)
579+
= PYBIND11_DTOR_USE_DELETE) {
598580
error_scope err_scope; // preserve any existing Python error states
599581

600582
auto state_dict = reinterpret_borrow<dict>(get_python_state_dict());
@@ -640,7 +622,7 @@ std::pair<Payload *, bool> atomic_get_or_create_in_state_dict(const char *key,
640622
// - Otherwise, our `new_capsule` is now in the dict, and it owns the storage and the state
641623
// dict will incref it. We need to set the caller's destructor on it, which will be
642624
// called when the interpreter shuts down.
643-
if (created && dtor) {
625+
if (created && dtor != PYBIND11_DTOR_USE_DELETE) {
644626
if (PyCapsule_SetDestructor(capsule_obj, dtor) < 0) {
645627
throw error_already_set();
646628
}
@@ -657,6 +639,8 @@ std::pair<Payload *, bool> atomic_get_or_create_in_state_dict(const char *key,
657639
return std::pair<Payload *, bool>(static_cast<Payload *>(raw_ptr), created);
658640
}
659641

642+
#undef PYBIND11_DTOR_USE_DELETE
643+
660644
template <typename InternalsType>
661645
class internals_pp_manager {
662646
public:
@@ -713,42 +697,75 @@ class internals_pp_manager {
713697
// this could be called without an active interpreter, just use what was cached
714698
if (!tstate || tstate->interp == last_istate_tls()) {
715699
auto tpp = internals_p_tls();
716-
717-
delete tpp;
700+
{
701+
std::lock_guard<std::mutex> lock(pp_set_mutex_);
702+
pps_have_created_content_.erase(tpp); // untrack deleted pp
703+
}
704+
delete tpp; // may call back into Python
718705
}
719706
unref();
720707
return;
721708
}
722709
#endif
723-
delete internals_singleton_pp_;
710+
{
711+
std::lock_guard<std::mutex> lock(pp_set_mutex_);
712+
pps_have_created_content_.erase(internals_singleton_pp_); // untrack deleted pp
713+
}
714+
delete internals_singleton_pp_; // may call back into Python
724715
unref();
725716
}
726717

727-
private:
728-
internals_pp_manager(char const *id, on_fetch_function *on_fetch)
729-
: holder_id_(id), on_fetch_(on_fetch) {}
718+
void create_pp_content_once(std::unique_ptr<InternalsType> *const pp) {
719+
// Assume the GIL is held here. May call back into Python. We cannot hold the lock with our
720+
// mutex here. So there may be multiple threads creating the content at the same time. Only
721+
// one will install its content to pp below. Others will be freed when going out of scope.
722+
auto tmp = std::unique_ptr<InternalsType>(new InternalsType());
730723

731-
static void internals_shutdown(PyObject *capsule) {
732-
auto *pp = static_cast<std::unique_ptr<InternalsType> *>(
733-
PyCapsule_GetPointer(capsule, nullptr));
734-
if (pp) {
735-
pp->reset();
724+
{
725+
// Lock scope must not include Python calls, which may require the GIL and cause
726+
// deadlocks.
727+
std::lock_guard<std::mutex> lock(pp_set_mutex_);
728+
729+
if (*pp) {
730+
// Already created in another thread.
731+
return;
732+
}
733+
734+
// At this point, pp->get() is nullptr.
735+
// The content is either not yet created, or was previously destroyed via pp->reset().
736+
737+
// Detect re-creation of internals after destruction during interpreter shutdown.
738+
// If pybind11 code (e.g., tp_traverse/tp_clear calling py::cast) runs after internals
739+
// have been destroyed, a new empty internals would be created, causing type lookup
740+
// failures. See also get_or_create_pp_in_state_dict() comments.
741+
if (pps_have_created_content_.find(pp) != pps_have_created_content_.end()) {
742+
pybind11_fail(
743+
"pybind11::detail::internals_pp_manager::create_pp_content_once() "
744+
"FAILED: reentrant call detected while fetching pybind11 internals!");
745+
}
746+
747+
// Each interpreter can only create its internals once.
748+
pps_have_created_content_.insert(pp);
749+
// Install the created content.
750+
pp->swap(tmp);
736751
}
737-
// We reset the unique_ptr's contents but cannot delete the unique_ptr itself here.
738-
// The pp_manager in this module (and possibly other modules sharing internals) holds
739-
// a raw pointer to this unique_ptr, and that pointer would dangle if we deleted it now.
740-
//
741-
// For pybind11-owned interpreters (via embed.h or subinterpreter.h), destroy() is
742-
// called after Py_Finalize/Py_EndInterpreter completes, which safely deletes the
743-
// unique_ptr. For interpreters not owned by pybind11 (e.g., a pybind11 extension
744-
// loaded into an external interpreter), destroy() is never called and the unique_ptr
745-
// shell (8 bytes, not its contents) is leaked.
746-
// (See PR #5958 for ideas to eliminate this leak.)
747752
}
748753

754+
private:
755+
internals_pp_manager(char const *id, on_fetch_function *on_fetch)
756+
: holder_id_(id), on_fetch_(on_fetch) {}
757+
749758
std::unique_ptr<InternalsType> *get_or_create_pp_in_state_dict() {
759+
// The `unique_ptr<InternalsType>` is intentionally leaked on interpreter shutdown.
760+
// Once an instance is created, it will never be deleted until the process exits (compare
761+
// to interpreter shutdown in multiple-interpreter scenarios).
762+
// We cannot guarantee the destruction order of capsules in the interpreter state dict on
763+
// interpreter shutdown, so deleting internals too early could cause undefined behavior
764+
// when other pybind11 objects access `get_internals()` during finalization (which would
765+
// recreate empty internals). See also create_pp_content_once() above.
766+
// See https://github.com/pybind/pybind11/pull/5958#discussion_r2717645230.
750767
auto result = atomic_get_or_create_in_state_dict<std::unique_ptr<InternalsType>>(
751-
holder_id_, &internals_shutdown);
768+
holder_id_, /*dtor=*/nullptr /* leak the capsule content */);
752769
auto *pp = result.first;
753770
bool created = result.second;
754771
// Only call on_fetch_ when fetching existing internals, not when creating new ones.
@@ -774,7 +791,12 @@ class internals_pp_manager {
774791
on_fetch_function *on_fetch_ = nullptr;
775792
// Pointer-to-pointer to the singleton internals for the first seen interpreter (may not be the
776793
// main interpreter)
777-
std::unique_ptr<InternalsType> *internals_singleton_pp_;
794+
std::unique_ptr<InternalsType> *internals_singleton_pp_ = nullptr;
795+
796+
// Track pointer-to-pointers whose internals have been created, to detect re-entrancy.
797+
// Use instance member over static due to singleton pattern of this class.
798+
std::unordered_set<std::unique_ptr<InternalsType> *> pps_have_created_content_;
799+
std::mutex pp_set_mutex_;
778800
};
779801

780802
// If We loaded the internals through `state_dict`, our `error_already_set`
@@ -815,7 +837,8 @@ PYBIND11_NOINLINE internals &get_internals() {
815837
// Slow path, something needs fetched from the state dict or created
816838
gil_scoped_acquire_simple gil;
817839
error_scope err_scope;
818-
internals_ptr.reset(new internals());
840+
841+
ppmgr.create_pp_content_once(&internals_ptr);
819842

820843
if (!internals_ptr->instance_base) {
821844
// This calls get_internals, so cannot be called from within the internals constructor
@@ -826,6 +849,31 @@ PYBIND11_NOINLINE internals &get_internals() {
826849
return *internals_ptr;
827850
}
828851

852+
/// Return the PyObject* for the internals capsule (borrowed reference).
853+
/// Returns nullptr if the capsule doesn't exist yet.
854+
inline PyObject *get_internals_capsule() {
855+
auto state_dict = reinterpret_borrow<dict>(get_python_state_dict());
856+
return dict_getitemstring(state_dict.ptr(), PYBIND11_INTERNALS_ID);
857+
}
858+
859+
/// Return the key used for local_internals in the state dict.
860+
/// This function ensures a consistent key is used across all call sites within the same
861+
/// compilation unit. The key includes the address of a static variable to make it unique per
862+
/// module (DSO), matching the behavior of get_local_internals_pp_manager().
863+
inline const std::string &get_local_internals_key() {
864+
static const std::string key
865+
= PYBIND11_MODULE_LOCAL_ID + std::to_string(reinterpret_cast<uintptr_t>(&key));
866+
return key;
867+
}
868+
869+
/// Return the PyObject* for the local_internals capsule (borrowed reference).
870+
/// Returns nullptr if the capsule doesn't exist yet.
871+
inline PyObject *get_local_internals_capsule() {
872+
const auto &key = get_local_internals_key();
873+
auto state_dict = reinterpret_borrow<dict>(get_python_state_dict());
874+
return dict_getitemstring(state_dict.ptr(), key.c_str());
875+
}
876+
829877
inline void ensure_internals() {
830878
pybind11::detail::get_internals_pp_manager().unref();
831879
#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
@@ -837,20 +885,21 @@ inline void ensure_internals() {
837885
}
838886

839887
inline internals_pp_manager<local_internals> &get_local_internals_pp_manager() {
840-
// Use the address of this static itself as part of the key, so that the value is uniquely tied
888+
// Use the address of a static variable as part of the key, so that the value is uniquely tied
841889
// to where the module is loaded in memory
842-
static const std::string this_module_idstr
843-
= PYBIND11_MODULE_LOCAL_ID
844-
+ std::to_string(reinterpret_cast<uintptr_t>(&this_module_idstr));
845-
return internals_pp_manager<local_internals>::get_instance(this_module_idstr.c_str(), nullptr);
890+
return internals_pp_manager<local_internals>::get_instance(get_local_internals_key().c_str(),
891+
nullptr);
846892
}
847893

848894
/// Works like `get_internals`, but for things which are locally registered.
849895
inline local_internals &get_local_internals() {
850896
auto &ppmgr = get_local_internals_pp_manager();
851897
auto &internals_ptr = *ppmgr.get_pp();
852898
if (!internals_ptr) {
853-
internals_ptr.reset(new local_internals());
899+
gil_scoped_acquire_simple gil;
900+
error_scope err_scope;
901+
902+
ppmgr.create_pp_content_once(&internals_ptr);
854903
}
855904
return *internals_ptr;
856905
}

tests/env.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,31 @@
2929
or GRAALPY
3030
or (CPYTHON and PY_GIL_DISABLED and (3, 13) <= sys.version_info < (3, 14))
3131
)
32+
33+
34+
def check_script_success_in_subprocess(code: str, *, rerun: int = 8) -> None:
35+
"""Runs the given code in a subprocess."""
36+
import os
37+
import subprocess
38+
import sys
39+
import textwrap
40+
41+
code = textwrap.dedent(code).strip()
42+
try:
43+
for _ in range(rerun): # run flakily failing test multiple times
44+
subprocess.check_output(
45+
[sys.executable, "-c", code],
46+
cwd=os.getcwd(),
47+
stderr=subprocess.STDOUT,
48+
text=True,
49+
)
50+
except subprocess.CalledProcessError as ex:
51+
raise RuntimeError(
52+
f"Subprocess failed with exit code {ex.returncode}.\n\n"
53+
f"Code:\n"
54+
f"```python\n"
55+
f"{code}\n"
56+
f"```\n\n"
57+
f"Output:\n"
58+
f"{ex.output}"
59+
) from None

0 commit comments

Comments
 (0)