@@ -2133,6 +2133,121 @@ tuple_tuple_add(PyObject *lhs, PyObject *rhs)
21332133 return _PyTuple_Concat (lhs , rhs );
21342134}
21352135
2136+ /* sequence * int helpers: bypass PyNumber_Multiply dispatch overhead
2137+ by calling sq_repeat directly with PyLong_AsSsize_t. */
2138+
2139+ extern PyObject * unicode_repeat (PyObject * str , Py_ssize_t n );
2140+ extern PyObject * bytes_repeat (PyObject * self , Py_ssize_t n );
2141+ extern PyObject * bytes_concat (PyObject * a , PyObject * b );
2142+ extern PyObject * tuple_repeat (PyObject * self , Py_ssize_t n );
2143+ extern PyObject * dict_or (PyObject * self , PyObject * other );
2144+ extern PyObject * dict_ior (PyObject * self , PyObject * other );
2145+
2146+ static inline PyObject *
2147+ seq_int_multiply (PyObject * seq , PyObject * n ,
2148+ ssizeargfunc repeat )
2149+ {
2150+ Py_ssize_t count = PyLong_AsSsize_t (n );
2151+ if (count == -1 && PyErr_Occurred ()) {
2152+ return NULL ;
2153+ }
2154+ return repeat (seq , count );
2155+ }
2156+
2157+ /* str-int and int-str */
2158+
2159+ static int
2160+ str_int_guard (PyObject * lhs , PyObject * rhs )
2161+ {
2162+ return PyUnicode_CheckExact (lhs ) && PyLong_CheckExact (rhs );
2163+ }
2164+
2165+ static int
2166+ int_str_guard (PyObject * lhs , PyObject * rhs )
2167+ {
2168+ return PyLong_CheckExact (lhs ) && PyUnicode_CheckExact (rhs );
2169+ }
2170+
2171+ static PyObject *
2172+ str_int_multiply (PyObject * lhs , PyObject * rhs )
2173+ {
2174+ return seq_int_multiply (lhs , rhs , unicode_repeat );
2175+ }
2176+
2177+ static PyObject *
2178+ int_str_multiply (PyObject * lhs , PyObject * rhs )
2179+ {
2180+ return seq_int_multiply (rhs , lhs , unicode_repeat );
2181+ }
2182+
2183+ /* bytes-bytes */
2184+
2185+ static int
2186+ bytes_bytes_guard (PyObject * lhs , PyObject * rhs )
2187+ {
2188+ return PyBytes_CheckExact (lhs ) && PyBytes_CheckExact (rhs );
2189+ }
2190+
2191+ /* bytes-int and int-bytes */
2192+
2193+ static int
2194+ bytes_int_guard (PyObject * lhs , PyObject * rhs )
2195+ {
2196+ return PyBytes_CheckExact (lhs ) && PyLong_CheckExact (rhs );
2197+ }
2198+
2199+ static int
2200+ int_bytes_guard (PyObject * lhs , PyObject * rhs )
2201+ {
2202+ return PyLong_CheckExact (lhs ) && PyBytes_CheckExact (rhs );
2203+ }
2204+
2205+ static PyObject *
2206+ bytes_int_multiply (PyObject * lhs , PyObject * rhs )
2207+ {
2208+ return seq_int_multiply (lhs , rhs , bytes_repeat );
2209+ }
2210+
2211+ static PyObject *
2212+ int_bytes_multiply (PyObject * lhs , PyObject * rhs )
2213+ {
2214+ return seq_int_multiply (rhs , lhs , bytes_repeat );
2215+ }
2216+
2217+ /* tuple-int and int-tuple */
2218+
2219+ static int
2220+ tuple_int_guard (PyObject * lhs , PyObject * rhs )
2221+ {
2222+ return PyTuple_CheckExact (lhs ) && PyLong_CheckExact (rhs );
2223+ }
2224+
2225+ static int
2226+ int_tuple_guard (PyObject * lhs , PyObject * rhs )
2227+ {
2228+ return PyLong_CheckExact (lhs ) && PyTuple_CheckExact (rhs );
2229+ }
2230+
2231+ static PyObject *
2232+ tuple_int_multiply (PyObject * lhs , PyObject * rhs )
2233+ {
2234+ return seq_int_multiply (lhs , rhs , tuple_repeat );
2235+ }
2236+
2237+ static PyObject *
2238+ int_tuple_multiply (PyObject * lhs , PyObject * rhs )
2239+ {
2240+ return seq_int_multiply (rhs , lhs , tuple_repeat );
2241+ }
2242+
2243+ /* dict-dict */
2244+
2245+ static int
2246+ dict_dict_guard (PyObject * lhs , PyObject * rhs )
2247+ {
2248+ return PyDict_CheckExact (lhs ) && PyDict_CheckExact (rhs );
2249+ }
2250+
21362251static int
21372252compactlongs_guard (PyObject * lhs , PyObject * rhs )
21382253{
@@ -2223,32 +2338,63 @@ LONG_FLOAT_ACTION(compactlong_float_true_div, /)
22232338#undef LONG_FLOAT_ACTION
22242339
22252340static _PyBinaryOpSpecializationDescr binaryop_extend_descrs [ ] = {
2226- /* long-long arithmetic */
2227- {NB_OR , compactlongs_guard , compactlongs_or , & PyLong_Type , 1 },
2228- {NB_AND , compactlongs_guard , compactlongs_and , & PyLong_Type , 1 },
2229- {NB_XOR , compactlongs_guard , compactlongs_xor , & PyLong_Type , 1 },
2230- {NB_INPLACE_OR , compactlongs_guard , compactlongs_or , & PyLong_Type , 1 },
2231- {NB_INPLACE_AND , compactlongs_guard , compactlongs_and , & PyLong_Type , 1 },
2232- {NB_INPLACE_XOR , compactlongs_guard , compactlongs_xor , & PyLong_Type , 1 },
2233-
2234- /* float-long arithemetic */
2235- {NB_ADD , float_compactlong_guard , float_compactlong_add , & PyFloat_Type , 1 },
2236- {NB_SUBTRACT , float_compactlong_guard , float_compactlong_subtract , & PyFloat_Type , 1 },
2237- {NB_TRUE_DIVIDE , nonzero_float_compactlong_guard , float_compactlong_true_div , & PyFloat_Type , 1 },
2238- {NB_MULTIPLY , float_compactlong_guard , float_compactlong_multiply , & PyFloat_Type , 1 },
2239-
2240- /* long-float arithmetic */
2241- {NB_ADD , compactlong_float_guard , compactlong_float_add , & PyFloat_Type , 1 },
2242- {NB_SUBTRACT , compactlong_float_guard , compactlong_float_subtract , & PyFloat_Type , 1 },
2243- {NB_TRUE_DIVIDE , nonzero_compactlong_float_guard , compactlong_float_true_div , & PyFloat_Type , 1 },
2244- {NB_MULTIPLY , compactlong_float_guard , compactlong_float_multiply , & PyFloat_Type , 1 },
2341+ /* long-long arithmetic: guards also check _PyLong_IsCompact, so
2342+ type alone is not sufficient to eliminate the guard. */
2343+ {NB_OR , compactlongs_guard , compactlongs_or , & PyLong_Type , 1 , NULL , NULL },
2344+ {NB_AND , compactlongs_guard , compactlongs_and , & PyLong_Type , 1 , NULL , NULL },
2345+ {NB_XOR , compactlongs_guard , compactlongs_xor , & PyLong_Type , 1 , NULL , NULL },
2346+ {NB_INPLACE_OR , compactlongs_guard , compactlongs_or , & PyLong_Type , 1 , NULL , NULL },
2347+ {NB_INPLACE_AND , compactlongs_guard , compactlongs_and , & PyLong_Type , 1 , NULL , NULL },
2348+ {NB_INPLACE_XOR , compactlongs_guard , compactlongs_xor , & PyLong_Type , 1 , NULL , NULL },
2349+
2350+ /* float-long arithmetic: guards also check NaN and compactness. */
2351+ {NB_ADD , float_compactlong_guard , float_compactlong_add , & PyFloat_Type , 1 , NULL , NULL },
2352+ {NB_SUBTRACT , float_compactlong_guard , float_compactlong_subtract , & PyFloat_Type , 1 , NULL , NULL },
2353+ {NB_TRUE_DIVIDE , nonzero_float_compactlong_guard , float_compactlong_true_div , & PyFloat_Type , 1 , NULL , NULL },
2354+ {NB_MULTIPLY , float_compactlong_guard , float_compactlong_multiply , & PyFloat_Type , 1 , NULL , NULL },
2355+
2356+ /* long-float arithmetic: guards also check NaN and compactness. */
2357+ {NB_ADD , compactlong_float_guard , compactlong_float_add , & PyFloat_Type , 1 , NULL , NULL },
2358+ {NB_SUBTRACT , compactlong_float_guard , compactlong_float_subtract , & PyFloat_Type , 1 , NULL , NULL },
2359+ {NB_TRUE_DIVIDE , nonzero_compactlong_float_guard , compactlong_float_true_div , & PyFloat_Type , 1 , NULL , NULL },
2360+ {NB_MULTIPLY , compactlong_float_guard , compactlong_float_multiply , & PyFloat_Type , 1 , NULL , NULL },
22452361
22462362 /* list-list concatenation: _PyList_Concat always allocates a new list */
2247- {NB_ADD , list_list_guard , list_list_add , & PyList_Type , 1 },
2363+ {NB_ADD , list_list_guard , list_list_add , & PyList_Type , 1 , & PyList_Type , & PyList_Type },
22482364 /* tuple-tuple concatenation: _PyTuple_Concat has a zero-length shortcut
22492365 that can return one of the operands, so the result is not guaranteed
22502366 to be a freshly allocated object. */
2251- {NB_ADD , tuple_tuple_guard , tuple_tuple_add , & PyTuple_Type , 0 },
2367+ {NB_ADD , tuple_tuple_guard , tuple_tuple_add , & PyTuple_Type , 0 , & PyTuple_Type , & PyTuple_Type },
2368+
2369+ /* str * int / int * str: call unicode_repeat directly.
2370+ unicode_repeat returns the original when n == 1. */
2371+ {NB_MULTIPLY , str_int_guard , str_int_multiply , & PyUnicode_Type , 0 , & PyUnicode_Type , & PyLong_Type },
2372+ {NB_MULTIPLY , int_str_guard , int_str_multiply , & PyUnicode_Type , 0 , & PyLong_Type , & PyUnicode_Type },
2373+ {NB_INPLACE_MULTIPLY , str_int_guard , str_int_multiply , & PyUnicode_Type , 0 , & PyUnicode_Type , & PyLong_Type },
2374+ {NB_INPLACE_MULTIPLY , int_str_guard , int_str_multiply , & PyUnicode_Type , 0 , & PyLong_Type , & PyUnicode_Type },
2375+
2376+ /* bytes + bytes: call bytes_concat directly. bytes_concat may return
2377+ an operand when one side is empty, so result is not always unique. */
2378+ {NB_ADD , bytes_bytes_guard , bytes_concat , & PyBytes_Type , 0 , & PyBytes_Type , & PyBytes_Type },
2379+ {NB_INPLACE_ADD , bytes_bytes_guard , bytes_concat , & PyBytes_Type , 0 , & PyBytes_Type , & PyBytes_Type },
2380+
2381+ /* bytes * int / int * bytes: call bytes_repeat directly.
2382+ bytes_repeat returns the original when n == 1. */
2383+ {NB_MULTIPLY , bytes_int_guard , bytes_int_multiply , & PyBytes_Type , 0 , & PyBytes_Type , & PyLong_Type },
2384+ {NB_MULTIPLY , int_bytes_guard , int_bytes_multiply , & PyBytes_Type , 0 , & PyLong_Type , & PyBytes_Type },
2385+ {NB_INPLACE_MULTIPLY , bytes_int_guard , bytes_int_multiply , & PyBytes_Type , 0 , & PyBytes_Type , & PyLong_Type },
2386+ {NB_INPLACE_MULTIPLY , int_bytes_guard , int_bytes_multiply , & PyBytes_Type , 0 , & PyLong_Type , & PyBytes_Type },
2387+
2388+ /* tuple * int / int * tuple: call tuple_repeat directly.
2389+ tuple_repeat returns the original when n == 1. */
2390+ {NB_MULTIPLY , tuple_int_guard , tuple_int_multiply , & PyTuple_Type , 0 , & PyTuple_Type , & PyLong_Type },
2391+ {NB_MULTIPLY , int_tuple_guard , int_tuple_multiply , & PyTuple_Type , 0 , & PyLong_Type , & PyTuple_Type },
2392+ {NB_INPLACE_MULTIPLY , tuple_int_guard , tuple_int_multiply , & PyTuple_Type , 0 , & PyTuple_Type , & PyLong_Type },
2393+ {NB_INPLACE_MULTIPLY , int_tuple_guard , int_tuple_multiply , & PyTuple_Type , 0 , & PyLong_Type , & PyTuple_Type },
2394+
2395+ /* dict | dict: call dict_or directly */
2396+ {NB_OR , dict_dict_guard , dict_or , & PyDict_Type , 1 , & PyDict_Type , & PyDict_Type },
2397+ {NB_INPLACE_OR , dict_dict_guard , dict_ior , & PyDict_Type , 0 , & PyDict_Type , & PyDict_Type },
22522398};
22532399
22542400static int
0 commit comments