Skip to content

Commit d422b3d

Browse files
authored
[mypyc] Support explicit acyclic native classes that avoid gc (#20795)
Use `@mypyc_attr(acyclic=True)` so that instances of the class don't participate in cyclic gc. This sped up a microbenchmark that just allocates lots of temporary objects by ~60%. Acyclic instances also use less memory, since there is no GC header. I did some manual testing beyond the added tests to ensure acyclic classes work as expected. We can later add support for inferring acyclicity automatically in some cases, but usually it will have to be explicitly declared. This was created using Claude Code.
1 parent c67a479 commit d422b3d

7 files changed

Lines changed: 115 additions & 11 deletions

File tree

mypyc/codegen/emitclass.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -264,8 +264,9 @@ def generate_class(cl: ClassIR, module: str, emitter: Emitter) -> None:
264264

265265
if generate_full:
266266
fields["tp_dealloc"] = f"(destructor){name_prefix}_dealloc"
267-
fields["tp_traverse"] = f"(traverseproc){name_prefix}_traverse"
268-
fields["tp_clear"] = f"(inquiry){name_prefix}_clear"
267+
if not cl.is_acyclic:
268+
fields["tp_traverse"] = f"(traverseproc){name_prefix}_traverse"
269+
fields["tp_clear"] = f"(inquiry){name_prefix}_clear"
269270
# Populate .tp_finalize and generate a finalize method only if __del__ is defined for this class.
270271
del_method = next((e.method for e in cl.vtable_entries if e.name == "__del__"), None)
271272
if del_method:
@@ -344,8 +345,9 @@ def emit_line() -> None:
344345
init_fn = cl.get_method("__init__")
345346
generate_new_for_class(cl, new_name, vtable_name, setup_name, init_fn, emitter)
346347
emit_line()
347-
generate_traverse_for_class(cl, traverse_name, emitter)
348-
emit_line()
348+
if not cl.is_acyclic:
349+
generate_traverse_for_class(cl, traverse_name, emitter)
350+
emit_line()
349351
generate_clear_for_class(cl, clear_name, emitter)
350352
emit_line()
351353
generate_dealloc_for_class(cl, dealloc_name, clear_name, bool(del_method), emitter)
@@ -378,7 +380,7 @@ def emit_line() -> None:
378380
emit_line()
379381

380382
flags = ["Py_TPFLAGS_DEFAULT", "Py_TPFLAGS_HEAPTYPE", "Py_TPFLAGS_BASETYPE"]
381-
if generate_full:
383+
if generate_full and not cl.is_acyclic:
382384
flags.append("Py_TPFLAGS_HAVE_GC")
383385
if cl.has_method("__call__"):
384386
fields["tp_vectorcall_offset"] = "offsetof({}, vectorcall)".format(
@@ -621,7 +623,8 @@ def generate_setup_for_class(
621623
emitter.emit_line(f"self = {prefix}_free_instance;")
622624
emitter.emit_line(f"{prefix}_free_instance = NULL;")
623625
emitter.emit_line("Py_SET_REFCNT(self, 1);")
624-
emitter.emit_line("PyObject_GC_Track(self);")
626+
if not cl.is_acyclic:
627+
emitter.emit_line("PyObject_GC_Track(self);")
625628
if defaults_fn is not None:
626629
emit_attr_defaults_func_call(defaults_fn, "self", emitter)
627630
emitter.emit_line("return (PyObject *)self;")
@@ -930,7 +933,8 @@ def generate_dealloc_for_class(
930933
emitter.emit_line("if (res < 0) {")
931934
emitter.emit_line("goto done;")
932935
emitter.emit_line("}")
933-
emitter.emit_line("PyObject_GC_UnTrack(self);")
936+
if not cl.is_acyclic:
937+
emitter.emit_line("PyObject_GC_UnTrack(self);")
934938
if cl.reuse_freed_instance:
935939
emit_reuse_dealloc(cl, emitter)
936940
# The trashcan is needed to handle deep recursive deallocations

mypyc/doc/native_classes.rst

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,38 @@ refer to attributes. These are not valid::
266266

267267
__deletable__ = ('a',) # Error: not in a class body
268268

269+
Acyclic classes
270+
---------------
271+
272+
By default, native classes participate in CPython's cyclic garbage
273+
collector (GC). This adds some overhead to object allocation and
274+
deallocation. If you know that instances of a class can never be
275+
part of reference cycles, you can opt out of cyclic GC using
276+
``@mypyc_attr(acyclic=True)``::
277+
278+
from mypy_extensions import mypyc_attr
279+
280+
@mypyc_attr(acyclic=True)
281+
class Leaf:
282+
def __init__(self, x: int, name: str) -> None:
283+
self.x = x
284+
self.name = name
285+
286+
This can improve performance, especially for classes that are
287+
allocated and deallocated frequently. Acyclic instances also use
288+
less memory, since CPython doesn't need to add a GC header to them.
289+
290+
The acyclic property is not inherited by subclasses. Each subclass
291+
must explicitly use ``@mypyc_attr(acyclic=True)`` to also opt out
292+
of cyclic GC.
293+
294+
.. warning::
295+
296+
If instances of an acyclic class actually participate in reference
297+
cycles, those cycles will never be collected, resulting in memory
298+
leaks. Only use this for classes whose instances won't refer back
299+
to objects that (directly or indirectly) refer to the instance.
300+
269301
Other properties
270302
----------------
271303

mypyc/ir/class_ir.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,11 @@ def __init__(
220220
# per-type free "list" of up to length 1.
221221
self.reuse_freed_instance = False
222222

223+
# If True, the class does not participate in cyclic garbage collection.
224+
# This can improve performance but is only safe if instances can never
225+
# be part of reference cycles. Derived from @mypyc_attr(acyclic=True).
226+
self.is_acyclic = False
227+
223228
# Is this a class inheriting from enum.Enum? Such classes can be special-cased.
224229
self.is_enum = False
225230

@@ -426,6 +431,7 @@ def serialize(self) -> JsonDict:
426431
"init_self_leak": self.init_self_leak,
427432
"env_user_function": self.env_user_function.id if self.env_user_function else None,
428433
"reuse_freed_instance": self.reuse_freed_instance,
434+
"is_acyclic": self.is_acyclic,
429435
"is_enum": self.is_enum,
430436
"is_coroutine": self.coroutine_name,
431437
}
@@ -484,6 +490,7 @@ def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> ClassIR:
484490
ctx.functions[data["env_user_function"]] if data["env_user_function"] else None
485491
)
486492
ir.reuse_freed_instance = data["reuse_freed_instance"]
493+
ir.is_acyclic = data.get("is_acyclic", False)
487494
ir.is_enum = data["is_enum"]
488495
ir.coroutine_name = data["is_coroutine"]
489496

mypyc/irbuild/prepare.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,9 @@ def prepare_class_def(
369369
# Supports copy.copy and pickle (including subclasses)
370370
ir._serializable = True
371371

372+
if attrs.get("acyclic") is True:
373+
ir.is_acyclic = True
374+
372375
free_list_len = attrs.get("free_list_len")
373376
if free_list_len is not None:
374377
line = attrs_lines["free_list_len"]

mypyc/irbuild/util.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,14 @@
3333
from mypyc.errors import Errors
3434

3535
MYPYC_ATTRS: Final[frozenset[MypycAttr]] = frozenset(
36-
["native_class", "allow_interpreted_subclasses", "serializable", "free_list_len"]
36+
["native_class", "allow_interpreted_subclasses", "serializable", "free_list_len", "acyclic"]
3737
)
3838

3939
DATACLASS_DECORATORS: Final = frozenset(["dataclasses.dataclass", "attr.s", "attr.attrs"])
4040

4141

4242
MypycAttr = Literal[
43-
"native_class", "allow_interpreted_subclasses", "serializable", "free_list_len"
43+
"native_class", "allow_interpreted_subclasses", "serializable", "free_list_len", "acyclic"
4444
]
4545

4646

@@ -49,6 +49,7 @@ class MypycAttrs(TypedDict):
4949
allow_interpreted_subclasses: NotRequired[bool]
5050
serializable: NotRequired[bool]
5151
free_list_len: NotRequired[int]
52+
acyclic: NotRequired[bool]
5253

5354

5455
def is_final_decorator(d: Expression) -> bool:

mypyc/test-data/irbuild-classes.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2885,11 +2885,11 @@ L0:
28852885
from mypy_extensions import mypyc_attr
28862886

28872887
@mypyc_attr("allow_interpreted_subclasses", "invalid_arg") # E: "invalid_arg" is not a supported "mypyc_attr" \
2888-
# N: supported keys: "allow_interpreted_subclasses", "free_list_len", "native_class", "serializable"
2888+
# N: supported keys: "acyclic", "allow_interpreted_subclasses", "free_list_len", "native_class", "serializable"
28892889
class InvalidArg:
28902890
pass
28912891
@mypyc_attr(invalid_kwarg=True) # E: "invalid_kwarg" is not a supported "mypyc_attr" \
2892-
# N: supported keys: "allow_interpreted_subclasses", "free_list_len", "native_class", "serializable"
2892+
# N: supported keys: "acyclic", "allow_interpreted_subclasses", "free_list_len", "native_class", "serializable"
28932893
class InvalidKwarg:
28942894
pass
28952895
@mypyc_attr(str()) # E: All "mypyc_attr" positional arguments must be string literals.

mypyc/test-data/run-classes.test

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5619,3 +5619,60 @@ def test_read_corrupted_data() -> None:
56195619
print("RANDOMIZED TEST FAILURE -- please open an issue with the following context:")
56205620
print(">>>", e, data)
56215621
raise
5622+
5623+
[case testAcyclicClass]
5624+
import gc
5625+
from mypy_extensions import mypyc_attr
5626+
5627+
@mypyc_attr(acyclic=True)
5628+
class Leaf:
5629+
def __init__(self, x: int, s: str) -> None:
5630+
self.x = x
5631+
self.s = s
5632+
5633+
def test_basic() -> None:
5634+
o = Leaf(5, "hello")
5635+
assert o.x == 5
5636+
assert o.s == "hello"
5637+
o.x = 10
5638+
assert o.x == 10
5639+
5640+
def test_gc_not_tracked() -> None:
5641+
o = Leaf(1, "a")
5642+
assert not gc.is_tracked(o)
5643+
5644+
def test_dealloc() -> None:
5645+
for i in range(1000):
5646+
o = Leaf(i, str(i))
5647+
# Just verify no crash or leak during repeated alloc/dealloc
5648+
5649+
@mypyc_attr(acyclic=True)
5650+
class AcyclicBase:
5651+
def __init__(self, x: int) -> None:
5652+
self.x = x
5653+
5654+
class DerivedNotAcyclic(AcyclicBase):
5655+
"""Derived without acyclic -- still participates in GC."""
5656+
def __init__(self, x: int, y: str) -> None:
5657+
super().__init__(x)
5658+
self.y = y
5659+
5660+
@mypyc_attr(acyclic=True)
5661+
class DerivedAcyclic(AcyclicBase):
5662+
"""Derived with acyclic -- also opts out of GC."""
5663+
def __init__(self, x: int, y: str) -> None:
5664+
super().__init__(x)
5665+
self.y = y
5666+
5667+
def test_derived_not_acyclic() -> None:
5668+
d = DerivedNotAcyclic(3, "hi")
5669+
assert d.x == 3
5670+
assert d.y == "hi"
5671+
# Subclass without @mypyc_attr(acyclic=True) still participates in GC
5672+
assert gc.is_tracked(d)
5673+
5674+
def test_derived_acyclic() -> None:
5675+
d = DerivedAcyclic(3, "hi")
5676+
assert d.x == 3
5677+
assert d.y == "hi"
5678+
assert not gc.is_tracked(d)

0 commit comments

Comments
 (0)