Skip to content

Commit da6e071

Browse files
b-passhenryiii
andauthored
Destruct internals during interpreter finalization (pybind#5958)
* Add a shutdown method to internals. shutdown can safely DECREF Python objects owned by the internals. * Actually free internals during interpreter shutdown (instead of after) * Make sure python is alive before DECREFing If something triggers internals to be created during finalization, it might end up being destroyed after finalization and we don't want to do the DECREF at that point, we need the leaky behavior. * make clang-tidy happy * Check IsFinalizing and use Py_CLEAR, make capsule creation safe if the capsule already exists. * oops, put TLS destructor back how it was. * Oops, proper spelling of unstable _Py_IsFinalizing * Add cleanup step to CI workflow Added a step to clean out unused files to save space during CI. * Accept suggested comment * Avoid recreating internals during type deallocation at shutdown. --------- Co-authored-by: Henry Schreiner <HenrySchreinerIII@gmail.com>
1 parent e44aae2 commit da6e071

3 files changed

Lines changed: 85 additions & 31 deletions

File tree

.github/workflows/ci.yml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -485,17 +485,23 @@ jobs:
485485
steps:
486486
- uses: actions/checkout@v6
487487

488+
- name: Clean out unused stuff to save space
489+
run: |
490+
sudo rm -rf /usr/local/lib/android /usr/share/dotnet /opt/ghc /opt/hostedtoolcache/CodeQL
491+
sudo apt-get clean
492+
488493
- name: Add NVHPC Repo
489494
run: |
490495
echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | \
491496
sudo tee /etc/apt/sources.list.d/nvhpc.list
492497
493498
- name: Install 🐍 3 & NVHPC
494499
run: |
495-
sudo apt-get update -y && \
496-
sudo apt-get install -y cmake environment-modules git python3-dev python3-pip python3-numpy && \
497-
sudo apt-get install -y --no-install-recommends nvhpc-25-11 && \
500+
sudo apt-get update -y
501+
sudo apt-get install -y cmake environment-modules git python3-dev python3-pip python3-numpy
502+
sudo apt-get install -y --no-install-recommends nvhpc-25-11
498503
sudo rm -rf /var/lib/apt/lists/*
504+
apt-cache depends nvhpc-25-11
499505
python3 -m pip install --upgrade pip
500506
python3 -m pip install --upgrade pytest
501507

include/pybind11/detail/class.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ extern "C" inline PyObject *pybind11_meta_call(PyObject *type, PyObject *args, P
207207

208208
/// Cleanup the type-info for a pybind11-registered type.
209209
extern "C" inline void pybind11_meta_dealloc(PyObject *obj) {
210-
with_internals([obj](internals &internals) {
210+
with_internals_if_internals([obj](internals &internals) {
211211
auto *type = (PyTypeObject *) obj;
212212

213213
// A pybind11-registered type will:

include/pybind11/detail/internals.h

Lines changed: 75 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ class thread_specific_storage {
103103
// However, in GraalPy (as of v24.2 or older), TSS is implemented by Java and this call
104104
// requires a living Python interpreter.
105105
#ifdef GRAALVM_PYTHON
106-
if (!Py_IsInitialized() || _Py_IsFinalizing()) {
106+
if (Py_IsInitialized() == 0 || _Py_IsFinalizing() != 0) {
107107
return;
108108
}
109109
#endif
@@ -195,6 +195,14 @@ struct override_hash {
195195

196196
using instance_map = std::unordered_multimap<const void *, instance *>;
197197

198+
inline bool is_interpreter_alive() {
199+
#if PY_VERSION_HEX < 0x030D0000
200+
return Py_IsInitialized() != 0 || _Py_IsFinalizing() != 0;
201+
#else
202+
return Py_IsInitialized() != 0 || Py_IsFinalizing() != 0;
203+
#endif
204+
}
205+
198206
#ifdef Py_GIL_DISABLED
199207
// Wrapper around PyMutex to provide BasicLockable semantics
200208
class pymutex {
@@ -308,7 +316,17 @@ struct internals {
308316
internals(internals &&other) = delete;
309317
internals &operator=(const internals &other) = delete;
310318
internals &operator=(internals &&other) = delete;
311-
~internals() = default;
319+
~internals() {
320+
// Normally this destructor runs during interpreter finalization and it may DECREF things.
321+
// In odd finalization scenarios it might end up running after the interpreter has
322+
// completely shut down, In that case, we should not decref these objects because pymalloc
323+
// is gone.
324+
if (is_interpreter_alive()) {
325+
Py_CLEAR(instance_base);
326+
Py_CLEAR(default_metaclass);
327+
Py_CLEAR(static_property_type);
328+
}
329+
}
312330
};
313331

314332
// the internals struct (above) is shared between all the modules. local_internals are only
@@ -325,6 +343,16 @@ struct local_internals {
325343

326344
std::forward_list<ExceptionTranslator> registered_exception_translators;
327345
PyTypeObject *function_record_py_type = nullptr;
346+
347+
~local_internals() {
348+
// Normally this destructor runs during interpreter finalization and it may DECREF things.
349+
// In odd finalization scenarios it might end up running after the interpreter has
350+
// completely shut down, In that case, we should not decref these objects because pymalloc
351+
// is gone.
352+
if (is_interpreter_alive()) {
353+
Py_CLEAR(function_record_py_type);
354+
}
355+
}
328356
};
329357

330358
enum class holder_enum_t : uint8_t {
@@ -569,7 +597,7 @@ inline object get_python_state_dict() {
569597
// The bool follows std::map::insert convention: true = created, false = existed.
570598
template <typename Payload>
571599
std::pair<Payload *, bool> atomic_get_or_create_in_state_dict(const char *key,
572-
bool clear_destructor = false) {
600+
void (*dtor)(PyObject *) = nullptr) {
573601
error_scope err_scope; // preserve any existing Python error states
574602

575603
auto state_dict = reinterpret_borrow<dict>(get_python_state_dict());
@@ -586,16 +614,13 @@ std::pair<Payload *, bool> atomic_get_or_create_in_state_dict(const char *key,
586614
// Use unique_ptr for exception safety: if capsule creation throws, the storage is
587615
// automatically deleted.
588616
auto storage_ptr = std::unique_ptr<Payload>(new Payload{});
589-
// Create capsule with destructor to clean up when the interpreter shuts down.
590-
auto new_capsule = capsule(
591-
storage_ptr.get(),
592-
// The destructor will be called when the capsule is GC'ed.
593-
// - If our capsule is inserted into the dict below, it will be kept alive until
594-
// interpreter shutdown, so the destructor will be called at that time.
595-
// - If our capsule is NOT inserted (another thread inserted first), it will be
596-
// destructed when going out of scope here, so the destructor will be called
597-
// immediately, which will also free the storage.
598-
/*destructor=*/[](void *ptr) -> void { delete static_cast<Payload *>(ptr); });
617+
auto new_capsule
618+
= capsule(storage_ptr.get(),
619+
// The destructor will be called when the capsule is GC'ed.
620+
// If the insert below fails (entry already in the dict), then this
621+
// destructor will be called on the newly created capsule at the end of this
622+
// function, and we want to just release this memory.
623+
/*destructor=*/[](void *v) { delete static_cast<Payload *>(v); });
599624
// At this point, the capsule object is created successfully.
600625
// Release the unique_ptr and let the capsule object own the storage to avoid double-free.
601626
(void) storage_ptr.release();
@@ -613,17 +638,16 @@ std::pair<Payload *, bool> atomic_get_or_create_in_state_dict(const char *key,
613638
throw error_already_set();
614639
}
615640
created = (capsule_obj == new_capsule.ptr());
616-
if (clear_destructor && created) {
617-
// Our capsule was inserted.
618-
// Remove the destructor to leak the storage on interpreter shutdown.
619-
if (PyCapsule_SetDestructor(capsule_obj, nullptr) < 0) {
641+
// - If key already existed, our `new_capsule` is not inserted, it will be destructed when
642+
// going out of scope here, and will call the destructor set above.
643+
// - Otherwise, our `new_capsule` is now in the dict, and it owns the storage and the state
644+
// dict will incref it. We need to set the caller's destructor on it, which will be
645+
// called when the interpreter shuts down.
646+
if (created && dtor) {
647+
if (PyCapsule_SetDestructor(capsule_obj, dtor) < 0) {
620648
throw error_already_set();
621649
}
622650
}
623-
// - If key already existed, our `new_capsule` is not inserted, it will be destructed when
624-
// going out of scope here, which will also free the storage.
625-
// - Otherwise, our `new_capsule` is now in the dict, and it owns the storage and the state
626-
// dict will incref it.
627651
}
628652

629653
// Get the storage pointer from the capsule.
@@ -707,14 +731,27 @@ class internals_pp_manager {
707731
internals_pp_manager(char const *id, on_fetch_function *on_fetch)
708732
: holder_id_(id), on_fetch_(on_fetch) {}
709733

734+
static void internals_shutdown(PyObject *capsule) {
735+
auto *pp = static_cast<std::unique_ptr<InternalsType> *>(
736+
PyCapsule_GetPointer(capsule, nullptr));
737+
if (pp) {
738+
pp->reset();
739+
}
740+
// We reset the unique_ptr's contents but cannot delete the unique_ptr itself here.
741+
// The pp_manager in this module (and possibly other modules sharing internals) holds
742+
// a raw pointer to this unique_ptr, and that pointer would dangle if we deleted it now.
743+
//
744+
// For pybind11-owned interpreters (via embed.h or subinterpreter.h), destroy() is
745+
// called after Py_Finalize/Py_EndInterpreter completes, which safely deletes the
746+
// unique_ptr. For interpreters not owned by pybind11 (e.g., a pybind11 extension
747+
// loaded into an external interpreter), destroy() is never called and the unique_ptr
748+
// shell (8 bytes, not its contents) is leaked.
749+
// (See PR #5958 for ideas to eliminate this leak.)
750+
}
751+
710752
std::unique_ptr<InternalsType> *get_or_create_pp_in_state_dict() {
711-
// The `unique_ptr<InternalsType>` output is leaked on interpreter shutdown. Once an
712-
// instance is created, it will never be deleted until the process exits (compare to
713-
// interpreter shutdown in multiple-interpreter scenarios).
714-
// Because we cannot guarantee the order of destruction of capsules in the interpreter
715-
// state dict, leaking avoids potential use-after-free issues during interpreter shutdown.
716753
auto result = atomic_get_or_create_in_state_dict<std::unique_ptr<InternalsType>>(
717-
holder_id_, /*clear_destructor=*/true);
754+
holder_id_, &internals_shutdown);
718755
auto *pp = result.first;
719756
bool created = result.second;
720757
// Only call on_fetch_ when fetching existing internals, not when creating new ones.
@@ -834,6 +871,17 @@ inline auto with_internals(const F &cb) -> decltype(cb(get_internals())) {
834871
return cb(internals);
835872
}
836873

874+
template <typename F>
875+
inline void with_internals_if_internals(const F &cb) {
876+
auto &ppmgr = get_internals_pp_manager();
877+
auto &internals_ptr = *ppmgr.get_pp();
878+
if (internals_ptr) {
879+
auto &internals = *internals_ptr;
880+
PYBIND11_LOCK_INTERNALS(internals);
881+
cb(internals);
882+
}
883+
}
884+
837885
template <typename F>
838886
inline auto with_exception_translators(const F &cb)
839887
-> decltype(cb(get_internals().registered_exception_translators,

0 commit comments

Comments
 (0)