Skip to content

Commit cae6b21

Browse files
Merge branch 'main' of https://github.com/python/cpython into jit-inline-binary
2 parents 5784b19 + 1c89817 commit cae6b21

File tree

13 files changed

+265
-64
lines changed

13 files changed

+265
-64
lines changed

Include/internal/pycore_ceval.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ _PyCallMethodDescriptorFastWithKeywords_StackRef(
461461
int total_args);
462462

463463
PyAPI_FUNC(PyObject *)
464-
_Py_CallBuiltinClass_StackRefSteal(
464+
_Py_CallBuiltinClass_StackRef(
465465
_PyStackRef callable,
466466
_PyStackRef *arguments,
467467
int total_args);

Include/internal/pycore_opcode_metadata.h

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_uop_ids.h

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_uop_metadata.h

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/test/test_capi/test_opt.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,6 +1521,29 @@ class Foo:
15211521
Foo.attr = 0
15221522
self.assertFalse(ex.is_valid())
15231523

1524+
def test_guard_type_version_locked_removed(self):
1525+
"""
1526+
Verify that redundant _GUARD_TYPE_VERSION_LOCKED guards are
1527+
eliminated for sequential STORE_ATTR_INSTANCE_VALUE in __init__.
1528+
"""
1529+
1530+
class Foo:
1531+
def __init__(self):
1532+
self.a = 1
1533+
self.b = 2
1534+
self.c = 3
1535+
1536+
def thing(n):
1537+
for _ in range(n):
1538+
Foo()
1539+
1540+
res, ex = self._run_with_optimizer(thing, TIER2_THRESHOLD)
1541+
self.assertIsNotNone(ex)
1542+
opnames = list(iter_opnames(ex))
1543+
guard_locked_count = opnames.count("_GUARD_TYPE_VERSION_LOCKED")
1544+
# Only the first store needs the guard; the rest should be NOPed.
1545+
self.assertEqual(guard_locked_count, 1)
1546+
15241547
def test_type_version_doesnt_segfault(self):
15251548
"""
15261549
Tests that setting a type version doesn't cause a segfault when later looking at the stack.
@@ -1542,6 +1565,98 @@ def fn(a):
15421565

15431566
fn(A())
15441567

1568+
def test_init_resolves_callable(self):
1569+
"""
1570+
_CHECK_AND_ALLOCATE_OBJECT should resolve __init__ to a constant,
1571+
enabling the optimizer to propagate type information through the frame
1572+
and eliminate redundant function version and arg count checks.
1573+
"""
1574+
class MyPoint:
1575+
def __init__(self, x, y):
1576+
# If __init__ callable is propagated through, then
1577+
# These will get promoted from globals to constants.
1578+
self.x = range(1)
1579+
self.y = range(1)
1580+
1581+
def testfunc(n):
1582+
for _ in range(n):
1583+
p = MyPoint(1.0, 2.0)
1584+
1585+
_, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
1586+
self.assertIsNotNone(ex)
1587+
uops = get_opnames(ex)
1588+
# The __init__ call should be traced through via _PUSH_FRAME
1589+
self.assertIn("_PUSH_FRAME", uops)
1590+
# __init__ resolution allows promotion of range to constant
1591+
self.assertNotIn("_LOAD_GLOBAL_BUILTINS", uops)
1592+
1593+
def test_guard_type_version_locked_propagates(self):
1594+
"""
1595+
_GUARD_TYPE_VERSION_LOCKED should set the type version on the
1596+
symbol so repeated accesses to the same type can benefit.
1597+
"""
1598+
class Item:
1599+
def __init__(self, val):
1600+
self.val = val
1601+
1602+
def get(self):
1603+
return self.val
1604+
1605+
def get2(self):
1606+
return self.val + 1
1607+
1608+
def testfunc(n):
1609+
item = Item(42)
1610+
total = 0
1611+
for _ in range(n):
1612+
# Two method calls on the same object — the second
1613+
# should benefit from type info set by the first.
1614+
total += item.get() + item.get2()
1615+
return total
1616+
1617+
res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
1618+
self.assertEqual(res, TIER2_THRESHOLD * (42 + 43))
1619+
self.assertIsNotNone(ex)
1620+
uops = get_opnames(ex)
1621+
# Both methods should be traced through
1622+
self.assertEqual(uops.count("_PUSH_FRAME"), 2)
1623+
# Type version propagation: one guard covers both method lookups
1624+
self.assertEqual(uops.count("_GUARD_TYPE_VERSION"), 1)
1625+
# Function checks eliminated (type info resolves the callable)
1626+
self.assertNotIn("_CHECK_FUNCTION_VERSION", uops)
1627+
self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops)
1628+
1629+
def test_method_chain_guard_elimination(self):
1630+
"""
1631+
Calling two methods on the same object should share the outer
1632+
type guard — only one _GUARD_TYPE_VERSION for the two lookups.
1633+
"""
1634+
class Calc:
1635+
def __init__(self, val):
1636+
self.val = val
1637+
1638+
def add(self, x):
1639+
self.val += x
1640+
return self
1641+
1642+
def testfunc(n):
1643+
c = Calc(0)
1644+
for _ in range(n):
1645+
c.add(1).add(2)
1646+
return c.val
1647+
1648+
res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
1649+
self.assertEqual(res, TIER2_THRESHOLD * 3)
1650+
self.assertIsNotNone(ex)
1651+
uops = get_opnames(ex)
1652+
# Both add() calls should be inlined
1653+
push_count = uops.count("_PUSH_FRAME")
1654+
self.assertEqual(push_count, 2)
1655+
# Only one outer type version guard for the two method lookups
1656+
# on the same object c (the second lookup reuses type info)
1657+
guard_version_count = uops.count("_GUARD_TYPE_VERSION")
1658+
self.assertEqual(guard_version_count, 1)
1659+
15451660
def test_func_guards_removed_or_reduced(self):
15461661
def testfunc(n):
15471662
for i in range(n):

Modules/_testinternalcapi/test_cases.c.h

Lines changed: 23 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/bytecodes.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3027,6 +3027,7 @@ dummy_func(
30273027

30283028
macro(STORE_ATTR_INSTANCE_VALUE) =
30293029
unused/1 +
3030+
_RECORD_TOS_TYPE +
30303031
_LOCK_OBJECT +
30313032
_GUARD_TYPE_VERSION_LOCKED +
30323033
_GUARD_DORV_NO_DICT +
@@ -4588,23 +4589,24 @@ dummy_func(
45884589
EXIT_IF(tp->tp_vectorcall == NULL);
45894590
}
45904591

4591-
op(_CALL_BUILTIN_CLASS, (callable, self_or_null, args[oparg] -- res)) {
4592+
op(_CALL_BUILTIN_CLASS, (callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
45924593
int total_args = oparg;
45934594
_PyStackRef *arguments = args;
45944595
if (!PyStackRef_IsNull(self_or_null)) {
45954596
arguments--;
45964597
total_args++;
45974598
}
45984599
STAT_INC(CALL, hit);
4599-
PyObject *res_o = _Py_CallBuiltinClass_StackRefSteal(
4600+
PyObject *res_o = _Py_CallBuiltinClass_StackRef(
46004601
callable,
46014602
arguments,
46024603
total_args);
4603-
DEAD(args);
4604-
DEAD(self_or_null);
4605-
DEAD(callable);
4606-
ERROR_IF(res_o == NULL);
4607-
res = PyStackRef_FromPyObjectSteal(res_o);
4604+
if (res_o == NULL) {
4605+
ERROR_NO_POP();
4606+
}
4607+
_PyStackRef temp = callable;
4608+
callable = PyStackRef_FromPyObjectSteal(res_o);
4609+
PyStackRef_CLOSE(temp);
46084610
}
46094611

46104612
macro(CALL_BUILTIN_CLASS) =
@@ -4613,6 +4615,8 @@ dummy_func(
46134615
unused/2 +
46144616
_GUARD_CALLABLE_BUILTIN_CLASS +
46154617
_CALL_BUILTIN_CLASS +
4618+
_POP_TOP_OPARG +
4619+
POP_TOP +
46164620
_CHECK_PERIODIC_AT_END;
46174621

46184622
op(_GUARD_CALLABLE_BUILTIN_O, (callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {

Python/ceval.c

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -894,30 +894,20 @@ _PyCallMethodDescriptorFastWithKeywords_StackRef(
894894
}
895895

896896
PyObject *
897-
_Py_CallBuiltinClass_StackRefSteal(
897+
_Py_CallBuiltinClass_StackRef(
898898
_PyStackRef callable,
899899
_PyStackRef *arguments,
900900
int total_args)
901901
{
902902
PyObject *res;
903903
STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
904904
if (CONVERSION_FAILED(args_o)) {
905-
res = NULL;
906-
goto cleanup;
905+
return NULL;
907906
}
908907
PyTypeObject *tp = (PyTypeObject *)PyStackRef_AsPyObjectBorrow(callable);
909908
res = tp->tp_vectorcall((PyObject *)tp, args_o, total_args | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL);
910909
STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
911910
assert((res != NULL) ^ (PyErr_Occurred() != NULL));
912-
cleanup:
913-
// arguments is a pointer into the GC visible stack,
914-
// so we must NULL out values as we clear them.
915-
for (int i = total_args-1; i >= 0; i--) {
916-
_PyStackRef tmp = arguments[i];
917-
arguments[i] = PyStackRef_NULL;
918-
PyStackRef_CLOSE(tmp);
919-
}
920-
PyStackRef_CLOSE(callable);
921911
return res;
922912
}
923913

0 commit comments

Comments
 (0)