Reject 1-char base85/ascii85 groups unconditionally

gpshead · claude · gpshead · commit b5391bd378c6 · 2026-04-05T19:29:34.000Z
Per the PLRM spec (section 3.13.3), a final partial 5-tuple containing
only one character is an encoding violation. Move this check outside
the `canonical=` guard so it is always enforced.

Also change chunk_len and i from Py_ssize_t to int per review feedback.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
@@ -486,20 +486,22 @@ def test_ascii85_valid(self):
                 res += b
             self.assertEqual(res, rawdata)
 
-        # Test decoding inputs with length 1 mod 5
-        params = [
-            (b"a", False, False, b"", b""),
-            (b"xbw", False, False, b"wx", b""),
-            (b"<~c~>", False, True, b"", b""),
-            (b"{d ~>", False, True, b" {", b""),
-            (b"ye", True, False, b"", b"    "),
-            (b"z\x01y\x00f", True, False, b"\x00\x01", b"\x00\x00\x00\x00    "),
-            (b"<~FCfN8yg~>", True, True, b"", b"test    "),
-            (b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03", b"tset\x00\x00\x00\x00test    "),
+        # Inputs with length 1 mod 5 end with a 1-char group, which is
+        # an encoding violation per the PLRM spec.
+        error_params = [
+            (b"a", False, False, b""),
+            (b"xbw", False, False, b"wx"),
+            (b"<~c~>", False, True, b""),
+            (b"{d ~>", False, True, b" {"),
+            (b"ye", True, False, b""),
+            (b"z\x01y\x00f", True, False, b"\x00\x01"),
+            (b"<~FCfN8yg~>", True, True, b""),
+            (b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03"),
         ]
-        for a, foldspaces, adobe, ignorechars, b in params:
+        for a, foldspaces, adobe, ignorechars in error_params:
             kwargs = {"foldspaces": foldspaces, "adobe": adobe, "ignorechars": ignorechars}
-            self.assertEqual(binascii.a2b_ascii85(self.type2test(a), **kwargs), b)
+            with self.assertRaises(binascii.Error):
+                binascii.a2b_ascii85(self.type2test(a), **kwargs)
 
     def test_ascii85_invalid(self):
         # Test Ascii85 with invalid characters interleaved
@@ -713,16 +715,18 @@ def test_base85_valid(self):
         self.assertEqual(res, self.rawdata)
 
         # Test decoding inputs with different length
-        self.assertEqual(binascii.a2b_base85(self.type2test(b'a')), b'')
-        self.assertEqual(binascii.a2b_base85(self.type2test(b'a')), b'')
+        # 1-char groups are rejected (encoding violation)
+        with self.assertRaises(binascii.Error):
+            binascii.a2b_base85(self.type2test(b'a'))
         self.assertEqual(binascii.a2b_base85(self.type2test(b'ab')), b'q')
         self.assertEqual(binascii.a2b_base85(self.type2test(b'abc')), b'qa')
         self.assertEqual(binascii.a2b_base85(self.type2test(b'abcd')),
                          b'qa\x9e')
         self.assertEqual(binascii.a2b_base85(self.type2test(b'abcde')),
                          b'qa\x9e\xb6')
-        self.assertEqual(binascii.a2b_base85(self.type2test(b'abcdef')),
-                         b'qa\x9e\xb6')
+        # 6-char input = full 5-char group + trailing 1-char group (rejected)
+        with self.assertRaises(binascii.Error):
+            binascii.a2b_base85(self.type2test(b'abcdef'))
         self.assertEqual(binascii.a2b_base85(self.type2test(b'abcdefg')),
                          b'qa\x9e\xb6\x81')
 
@@ -813,15 +817,13 @@ def test_base85_alphabet(self):
     def test_base85_canonical(self):
         # Non-canonical encodings are accepted without canonical=True
         self.assertEqual(binascii.a2b_base85(b'VF'), b'a')
-        self.assertEqual(binascii.a2b_base85(b'V'), b'')
 
-        # 1-char partial groups are never produced by a conforming encoder
+        # 1-char partial groups are always rejected (encoding violation:
+        # no conforming encoder produces them)
         with self.assertRaises(binascii.Error):
-            binascii.a2b_base85(b'V', canonical=True)
-        # Digit 0 in a 1-char group exercises the explicit chunk_len==0
-        # guard (without it the division check would see 0/P == 0/P).
+            binascii.a2b_base85(b'V')
         with self.assertRaises(binascii.Error):
-            binascii.a2b_base85(b'0', canonical=True)
+            binascii.a2b_base85(b'0')
 
         # Verify round-trip: encode then decode with canonical=True works
         for data in [b'a', b'ab', b'abc', b'abcd', b'abcde',
@@ -894,14 +896,12 @@ def test_base85_canonical_unique(self, payload):
     def test_ascii85_canonical(self):
         # Non-canonical encodings are accepted without canonical=True
         self.assertEqual(binascii.a2b_ascii85(b'@0'), b'a')
-        self.assertEqual(binascii.a2b_ascii85(b'@'), b'')
 
-        # 1-char partial groups are never produced by a conforming encoder
+        # 1-char partial groups are always rejected (PLRM encoding violation)
         with self.assertRaises(binascii.Error):
-            binascii.a2b_ascii85(b'@', canonical=True)
-        # Digit 0 ('!' in ascii85) exercises the explicit chunk_len==0 guard
+            binascii.a2b_ascii85(b'@')
         with self.assertRaises(binascii.Error):
-            binascii.a2b_ascii85(b'!', canonical=True)
+            binascii.a2b_ascii85(b'!')
 
         # Verify round-trip: encode then decode with canonical=True works
         for data in [b'a', b'ab', b'abc', b'abcd', b'abcde',
diff --git a/Modules/binascii.c b/Modules/binascii.c
@@ -1176,8 +1176,20 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces,
         }
 
         /* Write current chunk. */
-        Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4;
-        for (Py_ssize_t i = 0; i < chunk_len; i++) {
+        int chunk_len = ascii_len < 1 ? 3 + (int)ascii_len : 4;
+
+        /* A final partial 5-tuple containing only one character is an
+         * encoding violation per the PLRM spec; reject unconditionally. */
+        if (chunk_len == 0) {
+            state = get_binascii_state(module);
+            if (state != NULL) {
+                PyErr_SetString(state->Error,
+                                "Incomplete Ascii85 group");
+            }
+            goto error;
+        }
+
+        for (int i = 0; i < chunk_len; i++) {
             *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff;
         }
 
@@ -1193,17 +1205,8 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces,
          *
          * So we zero the bottom (4-chunk_len) bytes of leftchar to
          * get the canonical uint32 ("canonical_top") and compare
-         * quotients.  A 1-char group (chunk_len==0) is always
-         * non-canonical since no conforming encoder produces it. */
+         * quotients. */
         if (canonical && chunk_len < 4) {
-            if (chunk_len == 0) {
-                state = get_binascii_state(module);
-                if (state != NULL) {
-                    PyErr_SetString(state->Error,
-                                    "Non-canonical Ascii85 group size");
-                }
-                goto error;
-            }
             int n_pad = 4 - chunk_len;
             uint32_t canonical_top =
                 (leftchar >> (n_pad * 8)) << (n_pad * 8);
@@ -1461,22 +1464,26 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data,
         }
 
         /* Write current chunk. */
-        Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4;
-        for (Py_ssize_t i = 0; i < chunk_len; i++) {
+        int chunk_len = ascii_len < 1 ? 3 + (int)ascii_len : 4;
+
+        /* A 1-char final group is an encoding violation (no conforming
+         * encoder produces it); reject unconditionally. */
+        if (chunk_len == 0) {
+            state = get_binascii_state(module);
+            if (state != NULL) {
+                PyErr_SetString(state->Error,
+                                "Incomplete Base85 group");
+            }
+            goto error;
+        }
+
+        for (int i = 0; i < chunk_len; i++) {
             *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff;
         }
 
         /* Reject non-canonical encodings in the final group.
          * See the comment in a2b_ascii85 for the full explanation. */
         if (canonical && chunk_len < 4) {
-            if (chunk_len == 0) {
-                state = get_binascii_state(module);
-                if (state != NULL) {
-                    PyErr_SetString(state->Error,
-                                    "Non-canonical Base85 group size");
-                }
-                goto error;
-            }
             int n_pad = 4 - chunk_len;
             uint32_t canonical_top =
                 (leftchar >> (n_pad * 8)) << (n_pad * 8);