Skip to content

Commit c846269

Browse files
eendebakptclaude
andcommitted
gh-146306: Specialize float/float true division in tier 2 optimizer
Add inplace float true division ops that the tier 2 optimizer emits when at least one operand is a known float: - _BINARY_OP_TRUEDIV_FLOAT_INPLACE (unique LHS) - _BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT (unique RHS) The optimizer inserts _GUARD_TOS_FLOAT / _GUARD_NOS_FLOAT for operands not yet known to be float, enabling specialization in expressions like `(a + b) / c`. Also marks the result of all NB_TRUE_DIVIDE operations as unique float in the abstract interpreter, enabling downstream inplace ops even for generic `a / b` (the `+=` can reuse the division result). Speeds up chain division patterns by ~2.3x and simple `total += a/b` by ~1.5x. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6009309 commit c846269

File tree

8 files changed

+1660
-1224
lines changed

8 files changed

+1660
-1224
lines changed

Include/internal/pycore_uop_ids.h

Lines changed: 1224 additions & 1216 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_uop_metadata.h

Lines changed: 38 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/test/test_capi/test_opt.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3237,6 +3237,76 @@ def testfunc(args):
32373237
uops = get_opnames(ex)
32383238
self.assertNotIn("_UNARY_NEGATIVE_FLOAT_INPLACE", uops)
32393239

3240+
def test_float_truediv_inplace_unique_lhs(self):
3241+
# (a + b) produces a unique float; dividing by c reuses it
3242+
def testfunc(args):
3243+
a, b, c, n = args
3244+
total = 0.0
3245+
for _ in range(n):
3246+
total += (a + b) / c
3247+
return total
3248+
3249+
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
3250+
self.assertAlmostEqual(res, TIER2_THRESHOLD * 1.25)
3251+
self.assertIsNotNone(ex)
3252+
uops = get_opnames(ex)
3253+
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT_INPLACE", uops)
3254+
3255+
def test_float_truediv_inplace_unique_rhs(self):
3256+
# (a + b) produces a unique float on the right side of /
3257+
def testfunc(args):
3258+
a, b, c, n = args
3259+
total = 0.0
3260+
for _ in range(n):
3261+
total += c / (a + b)
3262+
return total
3263+
3264+
res, ex = self._run_with_optimizer(testfunc, (2.0, 3.0, 4.0, TIER2_THRESHOLD))
3265+
self.assertAlmostEqual(res, TIER2_THRESHOLD * 0.8)
3266+
self.assertIsNotNone(ex)
3267+
uops = get_opnames(ex)
3268+
self.assertIn("_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT", uops)
3269+
3270+
def test_float_truediv_type_propagation(self):
3271+
# (a/b) + (c/d): inner divisions are generic _BINARY_OP but
3272+
# type propagation marks their results as float, so the +
3273+
# is specialized and the += uses inplace on the unique result
3274+
def testfunc(args):
3275+
a, b, c, d, n = args
3276+
total = 0.0
3277+
for _ in range(n):
3278+
total += (a / b) + (c / d)
3279+
return total
3280+
3281+
res, ex = self._run_with_optimizer(testfunc, (10.0, 3.0, 4.0, 5.0, TIER2_THRESHOLD))
3282+
expected = TIER2_THRESHOLD * (10.0 / 3.0 + 4.0 / 5.0)
3283+
self.assertAlmostEqual(res, expected)
3284+
self.assertIsNotNone(ex)
3285+
uops = get_opnames(ex)
3286+
# The + between the two division results should use inplace
3287+
# (the a/b result is unique from type propagation)
3288+
self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE", uops)
3289+
# The += should also use inplace (the + result is unique)
3290+
self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT", uops)
3291+
3292+
def test_float_truediv_unique_result_enables_inplace_add(self):
3293+
# a / b: the generic division result is marked as unique float
3294+
# by type propagation, so total += (a / b) uses inplace add
3295+
def testfunc(args):
3296+
a, b, n = args
3297+
total = 0.0
3298+
for _ in range(n):
3299+
total += a / b
3300+
return total
3301+
3302+
res, ex = self._run_with_optimizer(testfunc, (10.0, 3.0, TIER2_THRESHOLD))
3303+
expected = TIER2_THRESHOLD * (10.0 / 3.0)
3304+
self.assertAlmostEqual(res, expected)
3305+
self.assertIsNotNone(ex)
3306+
uops = get_opnames(ex)
3307+
# The += uses inplace because the division result is unique
3308+
self.assertIn("_BINARY_OP_ADD_FLOAT_INPLACE_RIGHT", uops)
3309+
32403310
def test_load_attr_instance_value(self):
32413311
def testfunc(n):
32423312
class C():

Python/bytecodes.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,28 @@ dummy_func(
838838
INPUTS_DEAD();
839839
}
840840

841+
tier2 op(_BINARY_OP_TRUEDIV_FLOAT_INPLACE, (left, right -- res, l, r)) {
842+
FLOAT_INPLACE_DIVOP(left, right, left);
843+
if (_divop_err) {
844+
ERROR_NO_POP();
845+
}
846+
res = left;
847+
l = PyStackRef_NULL;
848+
r = right;
849+
INPUTS_DEAD();
850+
}
851+
852+
tier2 op(_BINARY_OP_TRUEDIV_FLOAT_INPLACE_RIGHT, (left, right -- res, l, r)) {
853+
FLOAT_INPLACE_DIVOP(left, right, right);
854+
if (_divop_err) {
855+
ERROR_NO_POP();
856+
}
857+
res = right;
858+
l = left;
859+
r = PyStackRef_NULL;
860+
INPUTS_DEAD();
861+
}
862+
841863
pure op(_BINARY_OP_ADD_UNICODE, (left, right -- res, l, r)) {
842864
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
843865
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);

Python/ceval_macros.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,3 +562,27 @@ gen_try_set_executing(PyGenObject *gen)
562562
((PyFloatObject *)PyStackRef_AsPyObjectBorrow(TARGET)) \
563563
->ob_fval = _dres; \
564564
} while (0)
565+
566+
// Inplace float true division. Sets _divop_err to 1 on zero division.
567+
// Caller must check _divop_err and call ERROR_NO_POP() if set.
568+
#define FLOAT_INPLACE_DIVOP(left, right, TARGET) \
569+
int _divop_err = 0; \
570+
do { \
571+
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); \
572+
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); \
573+
assert(PyFloat_CheckExact(left_o)); \
574+
assert(PyFloat_CheckExact(right_o)); \
575+
assert(_PyObject_IsUniquelyReferenced( \
576+
PyStackRef_AsPyObjectBorrow(TARGET))); \
577+
STAT_INC(BINARY_OP, hit); \
578+
double _divisor = ((PyFloatObject *)right_o)->ob_fval; \
579+
if (_divisor == 0.0) { \
580+
PyErr_SetString(PyExc_ZeroDivisionError, \
581+
"float division by zero"); \
582+
_divop_err = 1; \
583+
break; \
584+
} \
585+
double _dres = ((PyFloatObject *)left_o)->ob_fval / _divisor; \
586+
((PyFloatObject *)PyStackRef_AsPyObjectBorrow(TARGET)) \
587+
->ob_fval = _dres; \
588+
} while (0)

Python/executor_cases.c.h

Lines changed: 184 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)