Skip to content

Commit de89591

Browse files
authored
Make wrapped C++ functions pickleable (#30099)
* Add `test_*_repr()` to test behavior with different Python versions. * Adjust expected repr for PyPy * Adjust another expected repr for PyPy * Try again: undo mistaken adjustment for PyPy * Give up on test_pytypes test_capsule_with_name_repr (not sufficiently important); PyPy still generates 2 different kinds of errors: test_print failure on macOS with Python 3.8; Python 3.9, 3.10 have no leading `<` * `_wrapped_simple_callable` proof of concept * Add `module_::def_as_native()` * Resolve PyPy `TypeError: cannot create weak reference to builtin_function_or_method object` * Replace `PyCapsule` with `function_record_PyObject`. * function_record_PyTypeObject: Replace C++20 designated initializers with full list of values. * Introduce `PYBIND11_DETAIL_FUNCTION_RECORD_ABI_ID` and use along with `PYBIND11_PLATFORM_ABI_ID_V4` to version `function_record_PyTypeObject` `tp_name` * Move `std::once_flag` out of `inline` function (in hopes that that fixes flaky behavior of test_gil_scoped.py). IncludeCleaner fixes. * `tp_vectorcall` was introduced only with Python 3.8 * clang-tidy auto-fixes * Disable `-Wmissing-field-initializers`. Guard `PyType_Ready(&function_record_PyTypeObject)` also with a simple `static bool first_call` * Give up on the `std::call_once` idea, for Python 3.6 compatibility (it works with all other Python versions). Instead call `function_record_PyTypeObject_PyType_Ready()` from `get_internals()`. * Add `__reduce_ex__` to `function_record_PyTypeObject`. Add `_pybind11_detail_function_record_import_helper` (proof of concept). * Move `function_record_PyTypeObject_PyType_Ready()` call in `get_internals()` so that it is always called when `get_internals()` is called the first time. * gcc 4.8.5 and 7.5.0 reject `PYBIND11_WARNING_DISABLE_GCC("-Wmissing-field-initializers")` * `function_record_PyTypeObject_PyType_Ready()`, `get_pybind11_detail_function_record_pickle_helper()` call-once initializations triggered from `cpp_function::initialize_generic()` * gcc 4.8.5 and 7.5.0 reject `PYBIND11_WARNING_DISABLE_GCC("-Wcast-function-type")` * Python 3.6, 3.7: Skip `get_pybind11_detail_function_record_pickle_helper()` call-once initialization triggered from `cpp_function::initialize_generic()` * New version of `_function_record_pickle_helper`, using `collections.namedtuple` * Explicit `str(tup_obj[1])` to fix 🐍 3 • centos:7 • x64 segfault * Factor out detail/function_record_pyobject.h * Use PYBIND11_NAMESPACE_BEGIN/END for function_record_PyTypeObject_methods * Factor out function_record_PyTypeObject_methods::tp_name_impl, mainly to stop clang-format from breaking up a string literal. * Simplify implementation of UNEXPECTED CALL functions. * Factor out `detail::get_scope_module()` * IncludeCleaner fixes (Google toolchain). * Comment out unreachable code (to resolve MSVC Werrors). * Use built-in `eval()` instead of `function_record_pickle_helper()` Much simpler! (Note that the `function_record_pickle_helper()` code is NOT removed in this commit.) This approach was discovered in an attempt to solve the problem that stubgen picks up `_function_record_pickle_helper_v1`. For example (tensorflow_text/core/pybinds/tflite_registrar.pyi): ```diff +from typing import Any + +def _function_record_pickle_helper_v1(*args, **kwargs) -> Any: ... ``` * Remove `function_record_pickle_helper()` * Mark `internals::function_record_capsule_name` as OBSOLETE. * Add comment pointing to #30099 * Archive experimental code from video meet with @rainwoodman 2024-02-15 * Add a pickle roundtrip test starting with `m.simple_callable.__self__` and a long comment to explain the unusual behavior. * PyPy does not have `m.simple_callable.__self__` * Change "UNUSUAL" comment as suggested by @rainwoodman (only very slightly differently as suggested).
1 parent f468b2c commit de89591

10 files changed

Lines changed: 290 additions & 64 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ set(PYBIND11_HEADERS
142142
include/pybind11/detail/cross_extension_shared_state.h
143143
include/pybind11/detail/descr.h
144144
include/pybind11/detail/dynamic_raw_ptr_cast_if_possible.h
145+
include/pybind11/detail/function_record_pyobject.h
145146
include/pybind11/detail/init.h
146147
include/pybind11/detail/internals.h
147148
include/pybind11/detail/native_enum_data.h

include/pybind11/attr.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ struct argument_record {
201201

202202
/// Internal data structure which holds metadata about a bound function (signature, overloads,
203203
/// etc.)
204+
#define PYBIND11_DETAIL_FUNCTION_RECORD_ABI_ID "v1" // PLEASE UPDATE if the struct is changed.
204205
struct function_record {
205206
function_record()
206207
: is_constructor(false), is_new_style_constructor(false), is_stateless(false),
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
// Copyright (c) 2024 The Pybind Development Team.
2+
// All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
// For background see the description of PR google/pywrapcc#30099.
6+
7+
#pragma once
8+
9+
#include "../attr.h"
10+
#include "../pytypes.h"
11+
#include "common.h"
12+
13+
#include <cstring>
14+
15+
PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
16+
PYBIND11_NAMESPACE_BEGIN(detail)
17+
18+
struct function_record_PyObject {
19+
PyObject_HEAD
20+
function_record *cpp_func_rec;
21+
};
22+
23+
PYBIND11_NAMESPACE_BEGIN(function_record_PyTypeObject_methods)
24+
25+
PyObject *tp_new_impl(PyTypeObject *type, PyObject *args, PyObject *kwds);
26+
PyObject *tp_alloc_impl(PyTypeObject *type, Py_ssize_t nitems);
27+
int tp_init_impl(PyObject *self, PyObject *args, PyObject *kwds);
28+
void tp_dealloc_impl(PyObject *self);
29+
void tp_free_impl(void *self);
30+
31+
static PyObject *reduce_ex_impl(PyObject *self, PyObject *, PyObject *);
32+
33+
PYBIND11_WARNING_PUSH
34+
#if defined(__GNUC__) && __GNUC__ >= 8
35+
PYBIND11_WARNING_DISABLE_GCC("-Wcast-function-type")
36+
#endif
37+
static PyMethodDef tp_methods_impl[]
38+
= {{"__reduce_ex__", (PyCFunction) reduce_ex_impl, METH_VARARGS | METH_KEYWORDS, nullptr},
39+
{nullptr, nullptr, 0, nullptr}};
40+
PYBIND11_WARNING_POP
41+
42+
// Note that this name is versioned.
43+
constexpr char tp_name_impl[]
44+
= "pybind11_detail_function_record_" PYBIND11_DETAIL_FUNCTION_RECORD_ABI_ID
45+
"_" PYBIND11_PLATFORM_ABI_ID_V4;
46+
47+
PYBIND11_NAMESPACE_END(function_record_PyTypeObject_methods)
48+
49+
// Designated initializers are a C++20 feature:
50+
// https://en.cppreference.com/w/cpp/language/aggregate_initialization#Designated_initializers
51+
// MSVC rejects them unless /std:c++20 is used (error code C7555).
52+
PYBIND11_WARNING_PUSH
53+
PYBIND11_WARNING_DISABLE_CLANG("-Wmissing-field-initializers")
54+
#if defined(__GNUC__) && __GNUC__ >= 8
55+
PYBIND11_WARNING_DISABLE_GCC("-Wmissing-field-initializers")
56+
#endif
57+
static PyTypeObject function_record_PyTypeObject = {
58+
PyVarObject_HEAD_INIT(nullptr, 0)
59+
/* const char *tp_name */ function_record_PyTypeObject_methods::tp_name_impl,
60+
/* Py_ssize_t tp_basicsize */ sizeof(function_record_PyObject),
61+
/* Py_ssize_t tp_itemsize */ 0,
62+
/* destructor tp_dealloc */ function_record_PyTypeObject_methods::tp_dealloc_impl,
63+
/* Py_ssize_t tp_vectorcall_offset */ 0,
64+
/* getattrfunc tp_getattr */ nullptr,
65+
/* setattrfunc tp_setattr */ nullptr,
66+
/* PyAsyncMethods *tp_as_async */ nullptr,
67+
/* reprfunc tp_repr */ nullptr,
68+
/* PyNumberMethods *tp_as_number */ nullptr,
69+
/* PySequenceMethods *tp_as_sequence */ nullptr,
70+
/* PyMappingMethods *tp_as_mapping */ nullptr,
71+
/* hashfunc tp_hash */ nullptr,
72+
/* ternaryfunc tp_call */ nullptr,
73+
/* reprfunc tp_str */ nullptr,
74+
/* getattrofunc tp_getattro */ nullptr,
75+
/* setattrofunc tp_setattro */ nullptr,
76+
/* PyBufferProcs *tp_as_buffer */ nullptr,
77+
/* unsigned long tp_flags */ Py_TPFLAGS_DEFAULT,
78+
/* const char *tp_doc */ nullptr,
79+
/* traverseproc tp_traverse */ nullptr,
80+
/* inquiry tp_clear */ nullptr,
81+
/* richcmpfunc tp_richcompare */ nullptr,
82+
/* Py_ssize_t tp_weaklistoffset */ 0,
83+
/* getiterfunc tp_iter */ nullptr,
84+
/* iternextfunc tp_iternext */ nullptr,
85+
/* struct PyMethodDef *tp_methods */ function_record_PyTypeObject_methods::tp_methods_impl,
86+
/* struct PyMemberDef *tp_members */ nullptr,
87+
/* struct PyGetSetDef *tp_getset */ nullptr,
88+
/* struct _typeobject *tp_base */ nullptr,
89+
/* PyObject *tp_dict */ nullptr,
90+
/* descrgetfunc tp_descr_get */ nullptr,
91+
/* descrsetfunc tp_descr_set */ nullptr,
92+
/* Py_ssize_t tp_dictoffset */ 0,
93+
/* initproc tp_init */ function_record_PyTypeObject_methods::tp_init_impl,
94+
/* allocfunc tp_alloc */ function_record_PyTypeObject_methods::tp_alloc_impl,
95+
/* newfunc tp_new */ function_record_PyTypeObject_methods::tp_new_impl,
96+
/* freefunc tp_free */ function_record_PyTypeObject_methods::tp_free_impl,
97+
/* inquiry tp_is_gc */ nullptr,
98+
/* PyObject *tp_bases */ nullptr,
99+
/* PyObject *tp_mro */ nullptr,
100+
/* PyObject *tp_cache */ nullptr,
101+
/* PyObject *tp_subclasses */ nullptr,
102+
/* PyObject *tp_weaklist */ nullptr,
103+
/* destructor tp_del */ nullptr,
104+
/* unsigned int tp_version_tag */ 0,
105+
/* destructor tp_finalize */ nullptr,
106+
#if PY_VERSION_HEX >= 0x03080000
107+
/* vectorcallfunc tp_vectorcall */ nullptr,
108+
#endif
109+
};
110+
PYBIND11_WARNING_POP
111+
112+
static bool function_record_PyTypeObject_PyType_Ready_first_call = true;
113+
114+
inline void function_record_PyTypeObject_PyType_Ready() {
115+
if (function_record_PyTypeObject_PyType_Ready_first_call) {
116+
if (PyType_Ready(&function_record_PyTypeObject) < 0) {
117+
throw error_already_set();
118+
}
119+
function_record_PyTypeObject_PyType_Ready_first_call = false;
120+
}
121+
}
122+
123+
inline bool is_function_record_PyObject(PyObject *obj) {
124+
if (PyType_Check(obj) != 0) {
125+
return false;
126+
}
127+
PyTypeObject *obj_type = Py_TYPE(obj);
128+
// Fast path (pointer comparison).
129+
if (obj_type == &function_record_PyTypeObject) {
130+
return true;
131+
}
132+
// This works across extension modules. Note that tp_name is versioned.
133+
if (strcmp(obj_type->tp_name, function_record_PyTypeObject.tp_name) == 0) {
134+
return true;
135+
}
136+
return false;
137+
}
138+
139+
inline function_record *function_record_ptr_from_PyObject(PyObject *obj) {
140+
if (is_function_record_PyObject(obj)) {
141+
return ((detail::function_record_PyObject *) obj)->cpp_func_rec;
142+
}
143+
return nullptr;
144+
}
145+
146+
inline object function_record_PyObject_New() {
147+
auto *py_func_rec = PyObject_New(function_record_PyObject, &function_record_PyTypeObject);
148+
if (py_func_rec == nullptr) {
149+
throw error_already_set();
150+
}
151+
py_func_rec->cpp_func_rec = nullptr; // For clarity/purity. Redundant in practice.
152+
return reinterpret_steal<object>((PyObject *) py_func_rec);
153+
}
154+
155+
PYBIND11_NAMESPACE_BEGIN(function_record_PyTypeObject_methods)
156+
157+
// Guard against accidents & oversights, in particular when porting to future Python versions.
158+
inline PyObject *tp_new_impl(PyTypeObject *, PyObject *, PyObject *) {
159+
pybind11_fail("UNEXPECTED CALL OF function_record_PyTypeObject_methods::tp_new_impl");
160+
// return nullptr; // Unreachable.
161+
}
162+
163+
inline PyObject *tp_alloc_impl(PyTypeObject *, Py_ssize_t) {
164+
pybind11_fail("UNEXPECTED CALL OF function_record_PyTypeObject_methods::tp_alloc_impl");
165+
// return nullptr; // Unreachable.
166+
}
167+
168+
inline int tp_init_impl(PyObject *, PyObject *, PyObject *) {
169+
pybind11_fail("UNEXPECTED CALL OF function_record_PyTypeObject_methods::tp_init_impl");
170+
// return -1; // Unreachable.
171+
}
172+
173+
// The implementation needs the definition of `class cpp_function`.
174+
void tp_dealloc_impl(PyObject *self);
175+
176+
inline void tp_free_impl(void *) {
177+
pybind11_fail("UNEXPECTED CALL OF function_record_PyTypeObject_methods::tp_free_impl");
178+
}
179+
180+
inline PyObject *reduce_ex_impl(PyObject *self, PyObject *, PyObject *) {
181+
// Deliberately ignoring the arguments for simplicity (expected is `protocol: int`).
182+
const function_record *rec = function_record_ptr_from_PyObject(self);
183+
if (rec == nullptr) {
184+
pybind11_fail(
185+
"FATAL: function_record_PyTypeObject reduce_ex_impl(): cannot obtain cpp_func_rec.");
186+
}
187+
if (rec->name != nullptr && rec->name[0] != '\0' && rec->scope
188+
&& PyModule_Check(rec->scope.ptr()) != 0) {
189+
object scope_module = get_scope_module(rec->scope);
190+
if (scope_module) {
191+
return make_tuple(reinterpret_borrow<object>(PyEval_GetBuiltins())["eval"],
192+
make_tuple(str("__import__('importlib').import_module('")
193+
+ scope_module + str("')")))
194+
.release()
195+
.ptr();
196+
}
197+
}
198+
set_error(PyExc_RuntimeError, repr(self) + str(" is not pickleable."));
199+
return nullptr;
200+
}
201+
202+
PYBIND11_NAMESPACE_END(function_record_PyTypeObject_methods)
203+
204+
PYBIND11_NAMESPACE_END(detail)
205+
PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)

include/pybind11/detail/internals.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ struct internals {
143143
# if PYBIND11_INTERNALS_VERSION > 4
144144
// Note that we have to use a std::string to allocate memory to ensure a unique address
145145
// We want unique addresses since we use pointer equality to compare function records
146+
// OBSOLETE: google/pywrapcc#30099
146147
std::string function_record_capsule_name = internals_function_record_capsule_name;
147148
# endif
148149

include/pybind11/functional.h

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,8 @@ struct type_caster<std::function<Return(Args...)>> {
9696
auto *cfunc_self = PyCFunction_GET_SELF(cfunc.ptr());
9797
if (cfunc_self == nullptr) {
9898
PyErr_Clear();
99-
} else if (isinstance<capsule>(cfunc_self)) {
100-
auto c = reinterpret_borrow<capsule>(cfunc_self);
101-
102-
function_record *rec = nullptr;
103-
// Check that we can safely reinterpret the capsule into a function_record
104-
if (detail::is_function_record_capsule(c)) {
105-
rec = c.get_pointer<function_record>();
106-
}
107-
99+
} else {
100+
function_record *rec = function_record_ptr_from_PyObject(cfunc_self);
108101
while (rec != nullptr) {
109102
if (rec->is_stateless
110103
&& same_type(typeid(function_type),

include/pybind11/pybind11.h

Lines changed: 32 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#pragma once
1212

1313
#include "detail/class.h"
14+
#include "detail/function_record_pyobject.h"
1415
#include "detail/init.h"
1516
#include "detail/native_enum_data.h"
1617
#include "detail/smart_holder_sfinae_hooks_only.h"
@@ -20,6 +21,7 @@
2021
#include "options.h"
2122
#include "typing.h"
2223

24+
#include <cassert>
2325
#include <cstdlib>
2426
#include <cstring>
2527
#include <memory>
@@ -528,20 +530,11 @@ class cpp_function : public function {
528530
if (rec->sibling) {
529531
if (PyCFunction_Check(rec->sibling.ptr())) {
530532
auto *self = PyCFunction_GET_SELF(rec->sibling.ptr());
531-
if (!isinstance<capsule>(self)) {
533+
chain = detail::function_record_ptr_from_PyObject(self);
534+
if (chain && !chain->scope.is(rec->scope)) {
535+
/* Never append a method to an overload chain of a parent class;
536+
instead, hide the parent's overloads in this case */
532537
chain = nullptr;
533-
} else {
534-
auto rec_capsule = reinterpret_borrow<capsule>(self);
535-
if (detail::is_function_record_capsule(rec_capsule)) {
536-
chain = rec_capsule.get_pointer<detail::function_record>();
537-
/* Never append a method to an overload chain of a parent class;
538-
instead, hide the parent's overloads in this case */
539-
if (!chain->scope.is(rec->scope)) {
540-
chain = nullptr;
541-
}
542-
} else {
543-
chain = nullptr;
544-
}
545538
}
546539
}
547540
// Don't trigger for things like the default __init__, which are wrapper_descriptors
@@ -561,21 +554,14 @@ class cpp_function : public function {
561554
= reinterpret_cast<PyCFunction>(reinterpret_cast<void (*)()>(dispatcher));
562555
rec->def->ml_flags = METH_VARARGS | METH_KEYWORDS;
563556

564-
capsule rec_capsule(unique_rec.release(),
565-
detail::get_function_record_capsule_name(),
566-
[](void *ptr) { destruct((detail::function_record *) ptr); });
557+
detail::function_record_PyTypeObject_PyType_Ready(); // Call-once initialization.
558+
object py_func_rec = detail::function_record_PyObject_New();
559+
((detail::function_record_PyObject *) py_func_rec.ptr())->cpp_func_rec
560+
= unique_rec.release();
567561
guarded_strdup.release();
568562

569-
object scope_module;
570-
if (rec->scope) {
571-
if (hasattr(rec->scope, "__module__")) {
572-
scope_module = rec->scope.attr("__module__");
573-
} else if (hasattr(rec->scope, "__name__")) {
574-
scope_module = rec->scope.attr("__name__");
575-
}
576-
}
577-
578-
m_ptr = PyCFunction_NewEx(rec->def, rec_capsule.ptr(), scope_module.ptr());
563+
object scope_module = detail::get_scope_module(rec->scope);
564+
m_ptr = PyCFunction_NewEx(rec->def, py_func_rec.ptr(), scope_module.ptr());
579565
if (!m_ptr) {
580566
pybind11_fail("cpp_function::cpp_function(): Could not allocate function object");
581567
}
@@ -604,9 +590,9 @@ class cpp_function : public function {
604590
// chain.
605591
chain_start = rec;
606592
rec->next = chain;
607-
auto rec_capsule
608-
= reinterpret_borrow<capsule>(((PyCFunctionObject *) m_ptr)->m_self);
609-
rec_capsule.set_pointer(unique_rec.release());
593+
auto *py_func_rec
594+
= (detail::function_record_PyObject *) PyCFunction_GET_SELF(m_ptr);
595+
py_func_rec->cpp_func_rec = unique_rec.release();
610596
guarded_strdup.release();
611597
} else {
612598
// Or end of chain (normal behavior)
@@ -680,6 +666,8 @@ class cpp_function : public function {
680666
}
681667
}
682668

669+
friend void detail::function_record_PyTypeObject_methods::tp_dealloc_impl(PyObject *);
670+
683671
/// When a cpp_function is GCed, release any memory allocated by pybind11
684672
static void destruct(detail::function_record *rec, bool free_strings = true) {
685673
// If on Python 3.9, check the interpreter "MICRO" (patch) version.
@@ -729,13 +717,11 @@ class cpp_function : public function {
729717
/// Main dispatch logic for calls to functions bound using pybind11
730718
static PyObject *dispatcher(PyObject *self, PyObject *args_in, PyObject *kwargs_in) {
731719
using namespace detail;
732-
assert(isinstance<capsule>(self));
720+
const function_record *overloads = function_record_ptr_from_PyObject(self);
721+
assert(overloads != nullptr);
733722

734723
/* Iterator over the list of potentially admissible overloads */
735-
const function_record *overloads = reinterpret_cast<function_record *>(
736-
PyCapsule_GetPointer(self, get_function_record_capsule_name())),
737-
*current_overload = overloads;
738-
assert(overloads != nullptr);
724+
const function_record *current_overload = overloads;
739725

740726
/* Need to know how many arguments + keyword arguments there are to pick the right
741727
overload */
@@ -1209,6 +1195,17 @@ class cpp_function : public function {
12091195

12101196
PYBIND11_NAMESPACE_BEGIN(detail)
12111197

1198+
PYBIND11_NAMESPACE_BEGIN(function_record_PyTypeObject_methods)
1199+
1200+
// This implementation needs the definition of `class cpp_function`.
1201+
inline void tp_dealloc_impl(PyObject *self) {
1202+
auto *py_func_rec = (function_record_PyObject *) self;
1203+
cpp_function::destruct(py_func_rec->cpp_func_rec);
1204+
py_func_rec->cpp_func_rec = nullptr;
1205+
}
1206+
1207+
PYBIND11_NAMESPACE_END(function_record_PyTypeObject_methods)
1208+
12121209
/// Instance creation function for all pybind11 types. It only allocates space for the
12131210
/// C++ object, but doesn't call the constructor -- an `__init__` function must do that.
12141211
extern "C" inline PyObject *pybind11_object_new(PyTypeObject *type, PyObject *, PyObject *) {
@@ -2288,14 +2285,7 @@ class class_ : public detail::generic_type {
22882285
if (!func_self) {
22892286
throw error_already_set();
22902287
}
2291-
if (!isinstance<capsule>(func_self)) {
2292-
return nullptr;
2293-
}
2294-
auto cap = reinterpret_borrow<capsule>(func_self);
2295-
if (!detail::is_function_record_capsule(cap)) {
2296-
return nullptr;
2297-
}
2298-
return cap.get_pointer<detail::function_record>();
2288+
return detail::function_record_ptr_from_PyObject(func_self.ptr());
22992289
}
23002290
};
23012291

0 commit comments

Comments
 (0)