special case for concatenation

eendebakpt · eendebakpt · commit fe63c5949052 · 2026-04-06T23:13:49.000+02:00
diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h
@@ -503,6 +503,11 @@ typedef struct {
        aliased to either operand). Used by the tier 2 optimizer to enable
        inplace follow-up ops. */
     int result_unique;
+    /* Expected types of the left and right operands. Used by the tier 2
+       optimizer to eliminate _GUARD_BINARY_OP_EXTEND when the operand
+       types are already known. NULL means unknown/don't eliminate. */
+    PyTypeObject *lhs_type;
+    PyTypeObject *rhs_type;
 } _PyBinaryOpSpecializationDescr;
 
 /* Comparison bit masks. */
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
@@ -3878,6 +3878,30 @@ def testfunc(n):
         self.assertIn("_UNPACK_SEQUENCE_TUPLE", uops)
         self.assertNotIn("_GUARD_TOS_TUPLE", uops)
 
+    def test_binary_op_extend_guard_elimination(self):
+        # When both operands have known types (e.g., from a prior
+        # _BINARY_OP_EXTEND result), the _GUARD_BINARY_OP_EXTEND
+        # should be eliminated.
+        def testfunc(n):
+            a = [1, 2]
+            b = [3, 4]
+            total = 0
+            for _ in range(n):
+                c = a + b    # first: guard stays, result type = list
+                d = c + c    # second: both operands are list -> guard eliminated
+                total += d[0]
+            return total
+
+        res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
+        self.assertEqual(res, TIER2_THRESHOLD)
+        self.assertIsNotNone(ex)
+        uops = get_opnames(ex)
+        # Both list additions use _BINARY_OP_EXTEND
+        self.assertEqual(uops.count("_BINARY_OP_EXTEND"), 2)
+        # But the second guard is eliminated because both operands
+        # are known to be lists from the first _BINARY_OP_EXTEND.
+        self.assertEqual(uops.count("_GUARD_BINARY_OP_EXTEND"), 1)
+
     def test_unary_invert_long_type(self):
         def testfunc(n):
             for _ in range(n):
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
@@ -1536,8 +1536,8 @@ bytes_length(PyObject *self)
     return Py_SIZE(a);
 }
 
-/* This is also used by PyBytes_Concat() */
-static PyObject *
+/* This is also used by PyBytes_Concat() and BINARY_OP_EXTEND */
+PyObject *
 bytes_concat(PyObject *a, PyObject *b)
 {
     Py_buffer va, vb;
@@ -1581,7 +1581,7 @@ bytes_concat(PyObject *a, PyObject *b)
     return result;
 }
 
-static PyObject *
+PyObject *
 bytes_repeat(PyObject *self, Py_ssize_t n)
 {
     PyBytesObject *a = _PyBytes_CAST(self);
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
@@ -5041,7 +5041,7 @@ dict___sizeof___impl(PyDictObject *self)
     return PyLong_FromSsize_t(_PyDict_SizeOf(self));
 }
 
-static PyObject *
+PyObject *
 dict_or(PyObject *self, PyObject *other)
 {
     if (!PyAnyDict_Check(self) || !PyAnyDict_Check(other)) {
@@ -5081,7 +5081,7 @@ frozendict_or(PyObject *self, PyObject *other)
 }
 
 
-static PyObject *
+PyObject *
 dict_ior(PyObject *self, PyObject *other)
 {
     if (dict_update_arg(self, other)) {
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
@@ -594,7 +594,7 @@ _PyTuple_Concat(PyObject *aa, PyObject *bb)
     return (PyObject *)np;
 }
 
-static PyObject *
+PyObject *
 tuple_repeat(PyObject *self, Py_ssize_t n)
 {
     PyTupleObject *a = _PyTuple_CAST(self);
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
@@ -12494,7 +12494,7 @@ unicode_rstrip_impl(PyObject *self, PyObject *chars)
 }
 
 
-static PyObject*
+PyObject*
 unicode_repeat(PyObject *str, Py_ssize_t len)
 {
     PyObject *u;
diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c
@@ -409,6 +409,16 @@ dummy_func(void) {
         r = right;
     }
 
+    op(_GUARD_BINARY_OP_EXTEND, (descr/4, left, right -- left, right)) {
+        _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr;
+        if (d != NULL && d->lhs_type != NULL && d->rhs_type != NULL) {
+            if (sym_matches_type(left, d->lhs_type) &&
+                sym_matches_type(right, d->rhs_type)) {
+                REPLACE_OP(this_instr, _NOP, 0, 0);
+            }
+        }
+    }
+
     op(_BINARY_OP_EXTEND, (descr/4, left, right -- res, l, r)) {
         _PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr;
         if (d != NULL && d->result_type != NULL) {
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
diff --git a/Python/specialize.c b/Python/specialize.c
@@ -2133,6 +2133,121 @@ tuple_tuple_add(PyObject *lhs, PyObject *rhs)
     return _PyTuple_Concat(lhs, rhs);
 }
 
+/* sequence * int helpers: bypass PyNumber_Multiply dispatch overhead
+   by calling sq_repeat directly with PyLong_AsSsize_t. */
+
+extern PyObject *unicode_repeat(PyObject *str, Py_ssize_t n);
+extern PyObject *bytes_repeat(PyObject *self, Py_ssize_t n);
+extern PyObject *bytes_concat(PyObject *a, PyObject *b);
+extern PyObject *tuple_repeat(PyObject *self, Py_ssize_t n);
+extern PyObject *dict_or(PyObject *self, PyObject *other);
+extern PyObject *dict_ior(PyObject *self, PyObject *other);
+
+static inline PyObject *
+seq_int_multiply(PyObject *seq, PyObject *n,
+                 ssizeargfunc repeat)
+{
+    Py_ssize_t count = PyLong_AsSsize_t(n);
+    if (count == -1 && PyErr_Occurred()) {
+        return NULL;
+    }
+    return repeat(seq, count);
+}
+
+/* str-int and int-str */
+
+static int
+str_int_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyUnicode_CheckExact(lhs) && PyLong_CheckExact(rhs);
+}
+
+static int
+int_str_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyLong_CheckExact(lhs) && PyUnicode_CheckExact(rhs);
+}
+
+static PyObject *
+str_int_multiply(PyObject *lhs, PyObject *rhs)
+{
+    return seq_int_multiply(lhs, rhs, unicode_repeat);
+}
+
+static PyObject *
+int_str_multiply(PyObject *lhs, PyObject *rhs)
+{
+    return seq_int_multiply(rhs, lhs, unicode_repeat);
+}
+
+/* bytes-bytes */
+
+static int
+bytes_bytes_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyBytes_CheckExact(lhs) && PyBytes_CheckExact(rhs);
+}
+
+/* bytes-int and int-bytes */
+
+static int
+bytes_int_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyBytes_CheckExact(lhs) && PyLong_CheckExact(rhs);
+}
+
+static int
+int_bytes_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyLong_CheckExact(lhs) && PyBytes_CheckExact(rhs);
+}
+
+static PyObject *
+bytes_int_multiply(PyObject *lhs, PyObject *rhs)
+{
+    return seq_int_multiply(lhs, rhs, bytes_repeat);
+}
+
+static PyObject *
+int_bytes_multiply(PyObject *lhs, PyObject *rhs)
+{
+    return seq_int_multiply(rhs, lhs, bytes_repeat);
+}
+
+/* tuple-int and int-tuple */
+
+static int
+tuple_int_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyTuple_CheckExact(lhs) && PyLong_CheckExact(rhs);
+}
+
+static int
+int_tuple_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyLong_CheckExact(lhs) && PyTuple_CheckExact(rhs);
+}
+
+static PyObject *
+tuple_int_multiply(PyObject *lhs, PyObject *rhs)
+{
+    return seq_int_multiply(lhs, rhs, tuple_repeat);
+}
+
+static PyObject *
+int_tuple_multiply(PyObject *lhs, PyObject *rhs)
+{
+    return seq_int_multiply(rhs, lhs, tuple_repeat);
+}
+
+/* dict-dict */
+
+static int
+dict_dict_guard(PyObject *lhs, PyObject *rhs)
+{
+    return PyDict_CheckExact(lhs) && PyDict_CheckExact(rhs);
+}
+
 static int
 compactlongs_guard(PyObject *lhs, PyObject *rhs)
 {
@@ -2223,32 +2338,63 @@ LONG_FLOAT_ACTION(compactlong_float_true_div, /)
 #undef LONG_FLOAT_ACTION
 
 static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = {
-    /* long-long arithmetic */
-    {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1},
-    {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1},
-    {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1},
-    {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1},
-    {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1},
-    {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1},
-
-    /* float-long arithemetic */
-    {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type, 1},
-    {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type, 1},
-    {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type, 1},
-    {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type, 1},
-
-    /* long-float arithmetic */
-    {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type, 1},
-    {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type, 1},
-    {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type, 1},
-    {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type, 1},
+    /* long-long arithmetic: guards also check _PyLong_IsCompact, so
+       type alone is not sufficient to eliminate the guard. */
+    {NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1, NULL, NULL},
+    {NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1, NULL, NULL},
+    {NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1, NULL, NULL},
+    {NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1, NULL, NULL},
+    {NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1, NULL, NULL},
+    {NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1, NULL, NULL},
+
+    /* float-long arithmetic: guards also check NaN and compactness. */
+    {NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type, 1, NULL, NULL},
+    {NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type, 1, NULL, NULL},
+    {NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type, 1, NULL, NULL},
+    {NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type, 1, NULL, NULL},
+
+    /* long-float arithmetic: guards also check NaN and compactness. */
+    {NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type, 1, NULL, NULL},
+    {NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type, 1, NULL, NULL},
+    {NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type, 1, NULL, NULL},
+    {NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type, 1, NULL, NULL},
 
     /* list-list concatenation: _PyList_Concat always allocates a new list */
-    {NB_ADD, list_list_guard, list_list_add, &PyList_Type, 1},
+    {NB_ADD, list_list_guard, list_list_add, &PyList_Type, 1, &PyList_Type, &PyList_Type},
     /* tuple-tuple concatenation: _PyTuple_Concat has a zero-length shortcut
        that can return one of the operands, so the result is not guaranteed
        to be a freshly allocated object. */
-    {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type, 0},
+    {NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type, 0, &PyTuple_Type, &PyTuple_Type},
+
+    /* str * int / int * str: call unicode_repeat directly.
+       unicode_repeat returns the original when n == 1. */
+    {NB_MULTIPLY, str_int_guard, str_int_multiply, &PyUnicode_Type, 0, &PyUnicode_Type, &PyLong_Type},
+    {NB_MULTIPLY, int_str_guard, int_str_multiply, &PyUnicode_Type, 0, &PyLong_Type, &PyUnicode_Type},
+    {NB_INPLACE_MULTIPLY, str_int_guard, str_int_multiply, &PyUnicode_Type, 0, &PyUnicode_Type, &PyLong_Type},
+    {NB_INPLACE_MULTIPLY, int_str_guard, int_str_multiply, &PyUnicode_Type, 0, &PyLong_Type, &PyUnicode_Type},
+
+    /* bytes + bytes: call bytes_concat directly. bytes_concat may return
+       an operand when one side is empty, so result is not always unique. */
+    {NB_ADD, bytes_bytes_guard, bytes_concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type},
+    {NB_INPLACE_ADD, bytes_bytes_guard, bytes_concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type},
+
+    /* bytes * int / int * bytes: call bytes_repeat directly.
+       bytes_repeat returns the original when n == 1. */
+    {NB_MULTIPLY, bytes_int_guard, bytes_int_multiply, &PyBytes_Type, 0, &PyBytes_Type, &PyLong_Type},
+    {NB_MULTIPLY, int_bytes_guard, int_bytes_multiply, &PyBytes_Type, 0, &PyLong_Type, &PyBytes_Type},
+    {NB_INPLACE_MULTIPLY, bytes_int_guard, bytes_int_multiply, &PyBytes_Type, 0, &PyBytes_Type, &PyLong_Type},
+    {NB_INPLACE_MULTIPLY, int_bytes_guard, int_bytes_multiply, &PyBytes_Type, 0, &PyLong_Type, &PyBytes_Type},
+
+    /* tuple * int / int * tuple: call tuple_repeat directly.
+       tuple_repeat returns the original when n == 1. */
+    {NB_MULTIPLY, tuple_int_guard, tuple_int_multiply, &PyTuple_Type, 0, &PyTuple_Type, &PyLong_Type},
+    {NB_MULTIPLY, int_tuple_guard, int_tuple_multiply, &PyTuple_Type, 0, &PyLong_Type, &PyTuple_Type},
+    {NB_INPLACE_MULTIPLY, tuple_int_guard, tuple_int_multiply, &PyTuple_Type, 0, &PyTuple_Type, &PyLong_Type},
+    {NB_INPLACE_MULTIPLY, int_tuple_guard, int_tuple_multiply, &PyTuple_Type, 0, &PyLong_Type, &PyTuple_Type},
+
+    /* dict | dict: call dict_or directly */
+    {NB_OR, dict_dict_guard, dict_or, &PyDict_Type, 1, &PyDict_Type, &PyDict_Type},
+    {NB_INPLACE_OR, dict_dict_guard, dict_ior, &PyDict_Type, 0, &PyDict_Type, &PyDict_Type},
 };
 
 static int

Original file line number	Diff line number	Diff line change
`@@ -1536,8 +1536,8 @@ bytes_length(PyObject *self)`
`1536`	`1536`	`return Py_SIZE(a);`
`1537`	`1537`	`}`
`1538`	`1538`
`1539`		`-/* This is also used by PyBytes_Concat() */`
`1540`		`-static PyObject *`
	`1539`	`+/* This is also used by PyBytes_Concat() and BINARY_OP_EXTEND */`
	`1540`	`+PyObject *`
`1541`	`1541`	`bytes_concat(PyObject a, PyObject b)`
`1542`	`1542`	`{`
`1543`	`1543`	`Py_buffer va, vb;`
`@@ -1581,7 +1581,7 @@ bytes_concat(PyObject a, PyObject b)`
`1581`	`1581`	`return result;`
`1582`	`1582`	`}`
`1583`	`1583`
`1584`		`-static PyObject *`
	`1584`	`+PyObject *`
`1585`	`1585`	`bytes_repeat(PyObject *self, Py_ssize_t n)`
`1586`	`1586`	`{`
`1587`	`1587`	`PyBytesObject *a = _PyBytes_CAST(self);`
Original file line number	Diff line number	Diff line change
`@@ -5041,7 +5041,7 @@ dict___sizeof___impl(PyDictObject *self)`
`5041`	`5041`	`return PyLong_FromSsize_t(_PyDict_SizeOf(self));`
`5042`	`5042`	`}`
`5043`	`5043`
`5044`		`-static PyObject *`
	`5044`	`+PyObject *`
`5045`	`5045`	`dict_or(PyObject self, PyObject other)`
`5046`	`5046`	`{`
`5047`	`5047`	`if (!PyAnyDict_Check(self) \|\| !PyAnyDict_Check(other)) {`
`@@ -5081,7 +5081,7 @@ frozendict_or(PyObject self, PyObject other)`
`5081`	`5081`	`}`
`5082`	`5082`
`5083`	`5083`
`5084`		`-static PyObject *`
	`5084`	`+PyObject *`
`5085`	`5085`	`dict_ior(PyObject self, PyObject other)`
`5086`	`5086`	`{`
`5087`	`5087`	`if (dict_update_arg(self, other)) {`
Original file line number	Diff line number	Diff line change
`@@ -594,7 +594,7 @@ _PyTuple_Concat(PyObject aa, PyObject bb)`
`594`	`594`	`return (PyObject *)np;`
`595`	`595`	`}`
`596`	`596`
`597`		`-static PyObject *`
	`597`	`+PyObject *`
`598`	`598`	`tuple_repeat(PyObject *self, Py_ssize_t n)`
`599`	`599`	`{`
`600`	`600`	`PyTupleObject *a = _PyTuple_CAST(self);`
Original file line number	Diff line number	Diff line change
`@@ -12494,7 +12494,7 @@ unicode_rstrip_impl(PyObject self, PyObject chars)`
`12494`	`12494`	`}`
`12495`	`12495`
`12496`	`12496`
`12497`		`-static PyObject*`
	`12497`	`+PyObject*`
`12498`	`12498`	`unicode_repeat(PyObject *str, Py_ssize_t len)`
`12499`	`12499`	`{`
`12500`	`12500`	`PyObject *u;`