Skip to content

Commit fe63c59

Browse files
committed
special case for concatenation
1 parent 77c1558 commit fe63c59

File tree

9 files changed

+225
-28
lines changed

9 files changed

+225
-28
lines changed

Include/internal/pycore_code.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,11 @@ typedef struct {
503503
aliased to either operand). Used by the tier 2 optimizer to enable
504504
inplace follow-up ops. */
505505
int result_unique;
506+
/* Expected types of the left and right operands. Used by the tier 2
507+
optimizer to eliminate _GUARD_BINARY_OP_EXTEND when the operand
508+
types are already known. NULL means unknown/don't eliminate. */
509+
PyTypeObject *lhs_type;
510+
PyTypeObject *rhs_type;
506511
} _PyBinaryOpSpecializationDescr;
507512

508513
/* Comparison bit masks. */

Lib/test/test_capi/test_opt.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3878,6 +3878,30 @@ def testfunc(n):
38783878
self.assertIn("_UNPACK_SEQUENCE_TUPLE", uops)
38793879
self.assertNotIn("_GUARD_TOS_TUPLE", uops)
38803880

3881+
def test_binary_op_extend_guard_elimination(self):
3882+
# When both operands have known types (e.g., from a prior
3883+
# _BINARY_OP_EXTEND result), the _GUARD_BINARY_OP_EXTEND
3884+
# should be eliminated.
3885+
def testfunc(n):
3886+
a = [1, 2]
3887+
b = [3, 4]
3888+
total = 0
3889+
for _ in range(n):
3890+
c = a + b # first: guard stays, result type = list
3891+
d = c + c # second: both operands are list -> guard eliminated
3892+
total += d[0]
3893+
return total
3894+
3895+
res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
3896+
self.assertEqual(res, TIER2_THRESHOLD)
3897+
self.assertIsNotNone(ex)
3898+
uops = get_opnames(ex)
3899+
# Both list additions use _BINARY_OP_EXTEND
3900+
self.assertEqual(uops.count("_BINARY_OP_EXTEND"), 2)
3901+
# But the second guard is eliminated because both operands
3902+
# are known to be lists from the first _BINARY_OP_EXTEND.
3903+
self.assertEqual(uops.count("_GUARD_BINARY_OP_EXTEND"), 1)
3904+
38813905
def test_unary_invert_long_type(self):
38823906
def testfunc(n):
38833907
for _ in range(n):

Objects/bytesobject.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,8 +1536,8 @@ bytes_length(PyObject *self)
15361536
return Py_SIZE(a);
15371537
}
15381538

1539-
/* This is also used by PyBytes_Concat() */
1540-
static PyObject *
1539+
/* This is also used by PyBytes_Concat() and BINARY_OP_EXTEND */
1540+
PyObject *
15411541
bytes_concat(PyObject *a, PyObject *b)
15421542
{
15431543
Py_buffer va, vb;
@@ -1581,7 +1581,7 @@ bytes_concat(PyObject *a, PyObject *b)
15811581
return result;
15821582
}
15831583

1584-
static PyObject *
1584+
PyObject *
15851585
bytes_repeat(PyObject *self, Py_ssize_t n)
15861586
{
15871587
PyBytesObject *a = _PyBytes_CAST(self);

Objects/dictobject.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5041,7 +5041,7 @@ dict___sizeof___impl(PyDictObject *self)
50415041
return PyLong_FromSsize_t(_PyDict_SizeOf(self));
50425042
}
50435043

5044-
static PyObject *
5044+
PyObject *
50455045
dict_or(PyObject *self, PyObject *other)
50465046
{
50475047
if (!PyAnyDict_Check(self) || !PyAnyDict_Check(other)) {
@@ -5081,7 +5081,7 @@ frozendict_or(PyObject *self, PyObject *other)
50815081
}
50825082

50835083

5084-
static PyObject *
5084+
PyObject *
50855085
dict_ior(PyObject *self, PyObject *other)
50865086
{
50875087
if (dict_update_arg(self, other)) {

Objects/tupleobject.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ _PyTuple_Concat(PyObject *aa, PyObject *bb)
594594
return (PyObject *)np;
595595
}
596596

597-
static PyObject *
597+
PyObject *
598598
tuple_repeat(PyObject *self, Py_ssize_t n)
599599
{
600600
PyTupleObject *a = _PyTuple_CAST(self);

Objects/unicodeobject.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12494,7 +12494,7 @@ unicode_rstrip_impl(PyObject *self, PyObject *chars)
1249412494
}
1249512495

1249612496

12497-
static PyObject*
12497+
PyObject*
1249812498
unicode_repeat(PyObject *str, Py_ssize_t len)
1249912499
{
1250012500
PyObject *u;

Python/optimizer_bytecodes.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,16 @@ dummy_func(void) {
409409
r = right;
410410
}
411411

412+
op(_GUARD_BINARY_OP_EXTEND, (descr/4, left, right -- left, right)) {
413+
_PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr;
414+
if (d != NULL && d->lhs_type != NULL && d->rhs_type != NULL) {
415+
if (sym_matches_type(left, d->lhs_type) &&
416+
sym_matches_type(right, d->rhs_type)) {
417+
REPLACE_OP(this_instr, _NOP, 0, 0);
418+
}
419+
}
420+
}
421+
412422
op(_BINARY_OP_EXTEND, (descr/4, left, right -- res, l, r)) {
413423
_PyBinaryOpSpecializationDescr *d = (_PyBinaryOpSpecializationDescr *)descr;
414424
if (d != NULL && d->result_type != NULL) {

Python/optimizer_cases.c.h

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/specialize.c

Lines changed: 167 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2133,6 +2133,121 @@ tuple_tuple_add(PyObject *lhs, PyObject *rhs)
21332133
return _PyTuple_Concat(lhs, rhs);
21342134
}
21352135

2136+
/* sequence * int helpers: bypass PyNumber_Multiply dispatch overhead
2137+
by calling sq_repeat directly with PyLong_AsSsize_t. */
2138+
2139+
extern PyObject *unicode_repeat(PyObject *str, Py_ssize_t n);
2140+
extern PyObject *bytes_repeat(PyObject *self, Py_ssize_t n);
2141+
extern PyObject *bytes_concat(PyObject *a, PyObject *b);
2142+
extern PyObject *tuple_repeat(PyObject *self, Py_ssize_t n);
2143+
extern PyObject *dict_or(PyObject *self, PyObject *other);
2144+
extern PyObject *dict_ior(PyObject *self, PyObject *other);
2145+
2146+
static inline PyObject *
2147+
seq_int_multiply(PyObject *seq, PyObject *n,
2148+
ssizeargfunc repeat)
2149+
{
2150+
Py_ssize_t count = PyLong_AsSsize_t(n);
2151+
if (count == -1 && PyErr_Occurred()) {
2152+
return NULL;
2153+
}
2154+
return repeat(seq, count);
2155+
}
2156+
2157+
/* str-int and int-str */
2158+
2159+
static int
2160+
str_int_guard(PyObject *lhs, PyObject *rhs)
2161+
{
2162+
return PyUnicode_CheckExact(lhs) && PyLong_CheckExact(rhs);
2163+
}
2164+
2165+
static int
2166+
int_str_guard(PyObject *lhs, PyObject *rhs)
2167+
{
2168+
return PyLong_CheckExact(lhs) && PyUnicode_CheckExact(rhs);
2169+
}
2170+
2171+
static PyObject *
2172+
str_int_multiply(PyObject *lhs, PyObject *rhs)
2173+
{
2174+
return seq_int_multiply(lhs, rhs, unicode_repeat);
2175+
}
2176+
2177+
static PyObject *
2178+
int_str_multiply(PyObject *lhs, PyObject *rhs)
2179+
{
2180+
return seq_int_multiply(rhs, lhs, unicode_repeat);
2181+
}
2182+
2183+
/* bytes-bytes */
2184+
2185+
static int
2186+
bytes_bytes_guard(PyObject *lhs, PyObject *rhs)
2187+
{
2188+
return PyBytes_CheckExact(lhs) && PyBytes_CheckExact(rhs);
2189+
}
2190+
2191+
/* bytes-int and int-bytes */
2192+
2193+
static int
2194+
bytes_int_guard(PyObject *lhs, PyObject *rhs)
2195+
{
2196+
return PyBytes_CheckExact(lhs) && PyLong_CheckExact(rhs);
2197+
}
2198+
2199+
static int
2200+
int_bytes_guard(PyObject *lhs, PyObject *rhs)
2201+
{
2202+
return PyLong_CheckExact(lhs) && PyBytes_CheckExact(rhs);
2203+
}
2204+
2205+
static PyObject *
2206+
bytes_int_multiply(PyObject *lhs, PyObject *rhs)
2207+
{
2208+
return seq_int_multiply(lhs, rhs, bytes_repeat);
2209+
}
2210+
2211+
static PyObject *
2212+
int_bytes_multiply(PyObject *lhs, PyObject *rhs)
2213+
{
2214+
return seq_int_multiply(rhs, lhs, bytes_repeat);
2215+
}
2216+
2217+
/* tuple-int and int-tuple */
2218+
2219+
static int
2220+
tuple_int_guard(PyObject *lhs, PyObject *rhs)
2221+
{
2222+
return PyTuple_CheckExact(lhs) && PyLong_CheckExact(rhs);
2223+
}
2224+
2225+
static int
2226+
int_tuple_guard(PyObject *lhs, PyObject *rhs)
2227+
{
2228+
return PyLong_CheckExact(lhs) && PyTuple_CheckExact(rhs);
2229+
}
2230+
2231+
static PyObject *
2232+
tuple_int_multiply(PyObject *lhs, PyObject *rhs)
2233+
{
2234+
return seq_int_multiply(lhs, rhs, tuple_repeat);
2235+
}
2236+
2237+
static PyObject *
2238+
int_tuple_multiply(PyObject *lhs, PyObject *rhs)
2239+
{
2240+
return seq_int_multiply(rhs, lhs, tuple_repeat);
2241+
}
2242+
2243+
/* dict-dict */
2244+
2245+
static int
2246+
dict_dict_guard(PyObject *lhs, PyObject *rhs)
2247+
{
2248+
return PyDict_CheckExact(lhs) && PyDict_CheckExact(rhs);
2249+
}
2250+
21362251
static int
21372252
compactlongs_guard(PyObject *lhs, PyObject *rhs)
21382253
{
@@ -2223,32 +2338,63 @@ LONG_FLOAT_ACTION(compactlong_float_true_div, /)
22232338
#undef LONG_FLOAT_ACTION
22242339

22252340
static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = {
2226-
/* long-long arithmetic */
2227-
{NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1},
2228-
{NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1},
2229-
{NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1},
2230-
{NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1},
2231-
{NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1},
2232-
{NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1},
2233-
2234-
/* float-long arithemetic */
2235-
{NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type, 1},
2236-
{NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type, 1},
2237-
{NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type, 1},
2238-
{NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type, 1},
2239-
2240-
/* long-float arithmetic */
2241-
{NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type, 1},
2242-
{NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type, 1},
2243-
{NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type, 1},
2244-
{NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type, 1},
2341+
/* long-long arithmetic: guards also check _PyLong_IsCompact, so
2342+
type alone is not sufficient to eliminate the guard. */
2343+
{NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1, NULL, NULL},
2344+
{NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1, NULL, NULL},
2345+
{NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1, NULL, NULL},
2346+
{NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1, NULL, NULL},
2347+
{NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1, NULL, NULL},
2348+
{NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1, NULL, NULL},
2349+
2350+
/* float-long arithmetic: guards also check NaN and compactness. */
2351+
{NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type, 1, NULL, NULL},
2352+
{NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type, 1, NULL, NULL},
2353+
{NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type, 1, NULL, NULL},
2354+
{NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type, 1, NULL, NULL},
2355+
2356+
/* long-float arithmetic: guards also check NaN and compactness. */
2357+
{NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type, 1, NULL, NULL},
2358+
{NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type, 1, NULL, NULL},
2359+
{NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type, 1, NULL, NULL},
2360+
{NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type, 1, NULL, NULL},
22452361

22462362
/* list-list concatenation: _PyList_Concat always allocates a new list */
2247-
{NB_ADD, list_list_guard, list_list_add, &PyList_Type, 1},
2363+
{NB_ADD, list_list_guard, list_list_add, &PyList_Type, 1, &PyList_Type, &PyList_Type},
22482364
/* tuple-tuple concatenation: _PyTuple_Concat has a zero-length shortcut
22492365
that can return one of the operands, so the result is not guaranteed
22502366
to be a freshly allocated object. */
2251-
{NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type, 0},
2367+
{NB_ADD, tuple_tuple_guard, tuple_tuple_add, &PyTuple_Type, 0, &PyTuple_Type, &PyTuple_Type},
2368+
2369+
/* str * int / int * str: call unicode_repeat directly.
2370+
unicode_repeat returns the original when n == 1. */
2371+
{NB_MULTIPLY, str_int_guard, str_int_multiply, &PyUnicode_Type, 0, &PyUnicode_Type, &PyLong_Type},
2372+
{NB_MULTIPLY, int_str_guard, int_str_multiply, &PyUnicode_Type, 0, &PyLong_Type, &PyUnicode_Type},
2373+
{NB_INPLACE_MULTIPLY, str_int_guard, str_int_multiply, &PyUnicode_Type, 0, &PyUnicode_Type, &PyLong_Type},
2374+
{NB_INPLACE_MULTIPLY, int_str_guard, int_str_multiply, &PyUnicode_Type, 0, &PyLong_Type, &PyUnicode_Type},
2375+
2376+
/* bytes + bytes: call bytes_concat directly. bytes_concat may return
2377+
an operand when one side is empty, so result is not always unique. */
2378+
{NB_ADD, bytes_bytes_guard, bytes_concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type},
2379+
{NB_INPLACE_ADD, bytes_bytes_guard, bytes_concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type},
2380+
2381+
/* bytes * int / int * bytes: call bytes_repeat directly.
2382+
bytes_repeat returns the original when n == 1. */
2383+
{NB_MULTIPLY, bytes_int_guard, bytes_int_multiply, &PyBytes_Type, 0, &PyBytes_Type, &PyLong_Type},
2384+
{NB_MULTIPLY, int_bytes_guard, int_bytes_multiply, &PyBytes_Type, 0, &PyLong_Type, &PyBytes_Type},
2385+
{NB_INPLACE_MULTIPLY, bytes_int_guard, bytes_int_multiply, &PyBytes_Type, 0, &PyBytes_Type, &PyLong_Type},
2386+
{NB_INPLACE_MULTIPLY, int_bytes_guard, int_bytes_multiply, &PyBytes_Type, 0, &PyLong_Type, &PyBytes_Type},
2387+
2388+
/* tuple * int / int * tuple: call tuple_repeat directly.
2389+
tuple_repeat returns the original when n == 1. */
2390+
{NB_MULTIPLY, tuple_int_guard, tuple_int_multiply, &PyTuple_Type, 0, &PyTuple_Type, &PyLong_Type},
2391+
{NB_MULTIPLY, int_tuple_guard, int_tuple_multiply, &PyTuple_Type, 0, &PyLong_Type, &PyTuple_Type},
2392+
{NB_INPLACE_MULTIPLY, tuple_int_guard, tuple_int_multiply, &PyTuple_Type, 0, &PyTuple_Type, &PyLong_Type},
2393+
{NB_INPLACE_MULTIPLY, int_tuple_guard, int_tuple_multiply, &PyTuple_Type, 0, &PyLong_Type, &PyTuple_Type},
2394+
2395+
/* dict | dict: call dict_or directly */
2396+
{NB_OR, dict_dict_guard, dict_or, &PyDict_Type, 1, &PyDict_Type, &PyDict_Type},
2397+
{NB_INPLACE_OR, dict_dict_guard, dict_ior, &PyDict_Type, 0, &PyDict_Type, &PyDict_Type},
22522398
};
22532399

22542400
static int

0 commit comments

Comments
 (0)