@@ -244,6 +244,9 @@ static const _Py_ALIGNED_DEF(64, unsigned char) table_b2a_base85_a85[] =
244244#define BASE85_A85_Z 0x00000000
245245#define BASE85_A85_Y 0x20202020
246246
247+ /* 85**0 through 85**4, used for canonical encoding checks. */
248+ static const uint32_t pow85 [] = {1 , 85 , 7225 , 614125 , 52200625 };
249+
247250
248251static const _Py_ALIGNED_DEF (64 , unsigned char ) table_a2b_base32 [] = {
249252 -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 ,
@@ -1178,7 +1181,20 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces,
11781181 * bin_data ++ = (leftchar >> (24 - 8 * i )) & 0xff ;
11791182 }
11801183
1181- /* Reject non-canonical encodings in the final group. */
1184+ /* Reject non-canonical encodings in the final group.
1185+ *
1186+ * A partial group of N chars (2-4) encodes N-1 bytes. The
1187+ * decoder pads missing chars with digit 84 (the maximum).
1188+ * The encoder produces the unique N chars for those bytes by
1189+ * zero-padding the bytes to a uint32 and taking the leading
1190+ * N base-85 digits. Two encodings are equivalent iff they
1191+ * yield the same leading digits, i.e. the same quotient when
1192+ * the decoded uint32 is divided by 85**(5-N).
1193+ *
1194+ * So we zero the bottom (4-chunk_len) bytes of leftchar to
1195+ * get the canonical uint32 ("canonical_top") and compare
1196+ * quotients. A 1-char group (chunk_len==0) is always
1197+ * non-canonical since no conforming encoder produces it. */
11821198 if (canonical && chunk_len < 4 ) {
11831199 if (chunk_len == 0 ) {
11841200 state = get_binascii_state (module );
@@ -1188,23 +1204,12 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces,
11881204 }
11891205 goto error ;
11901206 }
1191- uint32_t canon = 0 ;
1192- for (Py_ssize_t i = chunk_len ; i > 0 ; i -- ) {
1193- canon = (canon << 8 ) | bin_data [- i ];
1194- }
1195- canon <<= (4 - chunk_len ) * 8 ;
1196- unsigned char digits [5 ];
1197- uint32_t tmp = canon ;
1198- for (int i = 4 ; i >= 0 ; i -- ) {
1199- digits [i ] = tmp % 85 ;
1200- tmp /= 85 ;
1201- }
1202- uint32_t expected = 0 ;
1203- for (int i = 0 ; i < 5 ; i ++ ) {
1204- expected = expected * 85
1205- + (i <= chunk_len ? digits [i ] : 84 );
1206- }
1207- if (expected != leftchar ) {
1207+ int n_pad = 4 - chunk_len ;
1208+ uint32_t canonical_top =
1209+ (leftchar >> (n_pad * 8 )) << (n_pad * 8 );
1210+ if (canonical_top / pow85 [n_pad ]
1211+ != leftchar / pow85 [n_pad ])
1212+ {
12081213 state = get_binascii_state (module );
12091214 if (state != NULL ) {
12101215 PyErr_SetString (state -> Error ,
@@ -1461,39 +1466,23 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data,
14611466 * bin_data ++ = (leftchar >> (24 - 8 * i )) & 0xff ;
14621467 }
14631468
1464- /* Reject non-canonical encodings in the final group. */
1469+ /* Reject non-canonical encodings in the final group.
1470+ * See the comment in a2b_ascii85 for the full explanation. */
14651471 if (canonical && chunk_len < 4 ) {
14661472 if (chunk_len == 0 ) {
1467- /* 1-char partial group is never produced by a conforming
1468- * encoder. */
14691473 state = get_binascii_state (module );
14701474 if (state != NULL ) {
14711475 PyErr_SetString (state -> Error ,
14721476 "Non-canonical Base85 group size" );
14731477 }
14741478 goto error ;
14751479 }
1476- /* Re-encode the output bytes to verify canonical form.
1477- * Build the canonical uint32 from output bytes (zero-padded). */
1478- uint32_t canon = 0 ;
1479- for (Py_ssize_t i = chunk_len ; i > 0 ; i -- ) {
1480- canon = (canon << 8 ) | bin_data [- i ];
1481- }
1482- canon <<= (4 - chunk_len ) * 8 ;
1483- /* Extract first (chunk_len + 1) base85 digits. */
1484- unsigned char digits [5 ];
1485- uint32_t tmp = canon ;
1486- for (int i = 4 ; i >= 0 ; i -- ) {
1487- digits [i ] = tmp % 85 ;
1488- tmp /= 85 ;
1489- }
1490- /* Reconstruct expected value: canonical digits + 84-padding. */
1491- uint32_t expected = 0 ;
1492- for (int i = 0 ; i < 5 ; i ++ ) {
1493- expected = expected * 85
1494- + (i <= chunk_len ? digits [i ] : 84 );
1495- }
1496- if (expected != leftchar ) {
1480+ int n_pad = 4 - chunk_len ;
1481+ uint32_t canonical_top =
1482+ (leftchar >> (n_pad * 8 )) << (n_pad * 8 );
1483+ if (canonical_top / pow85 [n_pad ]
1484+ != leftchar / pow85 [n_pad ])
1485+ {
14971486 state = get_binascii_state (module );
14981487 if (state != NULL ) {
14991488 PyErr_SetString (state -> Error ,
0 commit comments