Skip to content

Commit b5391bd

Browse files
gpsheadclaude
andcommitted
Reject 1-char base85/ascii85 groups unconditionally
Per the PLRM spec (section 3.13.3), a final partial 5-tuple containing only one character is an encoding violation. Move this check outside the `canonical=` guard so it is always enforced. Also change chunk_len and i from Py_ssize_t to int per review feedback. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent e8498dd commit b5391bd

File tree

2 files changed

+56
-49
lines changed

2 files changed

+56
-49
lines changed

Lib/test/test_binascii.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -486,20 +486,22 @@ def test_ascii85_valid(self):
486486
res += b
487487
self.assertEqual(res, rawdata)
488488

489-
# Test decoding inputs with length 1 mod 5
490-
params = [
491-
(b"a", False, False, b"", b""),
492-
(b"xbw", False, False, b"wx", b""),
493-
(b"<~c~>", False, True, b"", b""),
494-
(b"{d ~>", False, True, b" {", b""),
495-
(b"ye", True, False, b"", b" "),
496-
(b"z\x01y\x00f", True, False, b"\x00\x01", b"\x00\x00\x00\x00 "),
497-
(b"<~FCfN8yg~>", True, True, b"", b"test "),
498-
(b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03", b"tset\x00\x00\x00\x00test "),
489+
# Inputs with length 1 mod 5 end with a 1-char group, which is
490+
# an encoding violation per the PLRM spec.
491+
error_params = [
492+
(b"a", False, False, b""),
493+
(b"xbw", False, False, b"wx"),
494+
(b"<~c~>", False, True, b""),
495+
(b"{d ~>", False, True, b" {"),
496+
(b"ye", True, False, b""),
497+
(b"z\x01y\x00f", True, False, b"\x00\x01"),
498+
(b"<~FCfN8yg~>", True, True, b""),
499+
(b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03"),
499500
]
500-
for a, foldspaces, adobe, ignorechars, b in params:
501+
for a, foldspaces, adobe, ignorechars in error_params:
501502
kwargs = {"foldspaces": foldspaces, "adobe": adobe, "ignorechars": ignorechars}
502-
self.assertEqual(binascii.a2b_ascii85(self.type2test(a), **kwargs), b)
503+
with self.assertRaises(binascii.Error):
504+
binascii.a2b_ascii85(self.type2test(a), **kwargs)
503505

504506
def test_ascii85_invalid(self):
505507
# Test Ascii85 with invalid characters interleaved
@@ -713,16 +715,18 @@ def test_base85_valid(self):
713715
self.assertEqual(res, self.rawdata)
714716

715717
# Test decoding inputs with different length
716-
self.assertEqual(binascii.a2b_base85(self.type2test(b'a')), b'')
717-
self.assertEqual(binascii.a2b_base85(self.type2test(b'a')), b'')
718+
# 1-char groups are rejected (encoding violation)
719+
with self.assertRaises(binascii.Error):
720+
binascii.a2b_base85(self.type2test(b'a'))
718721
self.assertEqual(binascii.a2b_base85(self.type2test(b'ab')), b'q')
719722
self.assertEqual(binascii.a2b_base85(self.type2test(b'abc')), b'qa')
720723
self.assertEqual(binascii.a2b_base85(self.type2test(b'abcd')),
721724
b'qa\x9e')
722725
self.assertEqual(binascii.a2b_base85(self.type2test(b'abcde')),
723726
b'qa\x9e\xb6')
724-
self.assertEqual(binascii.a2b_base85(self.type2test(b'abcdef')),
725-
b'qa\x9e\xb6')
727+
# 6-char input = full 5-char group + trailing 1-char group (rejected)
728+
with self.assertRaises(binascii.Error):
729+
binascii.a2b_base85(self.type2test(b'abcdef'))
726730
self.assertEqual(binascii.a2b_base85(self.type2test(b'abcdefg')),
727731
b'qa\x9e\xb6\x81')
728732

@@ -813,15 +817,13 @@ def test_base85_alphabet(self):
813817
def test_base85_canonical(self):
814818
# Non-canonical encodings are accepted without canonical=True
815819
self.assertEqual(binascii.a2b_base85(b'VF'), b'a')
816-
self.assertEqual(binascii.a2b_base85(b'V'), b'')
817820

818-
# 1-char partial groups are never produced by a conforming encoder
821+
# 1-char partial groups are always rejected (encoding violation:
822+
# no conforming encoder produces them)
819823
with self.assertRaises(binascii.Error):
820-
binascii.a2b_base85(b'V', canonical=True)
821-
# Digit 0 in a 1-char group exercises the explicit chunk_len==0
822-
# guard (without it the division check would see 0/P == 0/P).
824+
binascii.a2b_base85(b'V')
823825
with self.assertRaises(binascii.Error):
824-
binascii.a2b_base85(b'0', canonical=True)
826+
binascii.a2b_base85(b'0')
825827

826828
# Verify round-trip: encode then decode with canonical=True works
827829
for data in [b'a', b'ab', b'abc', b'abcd', b'abcde',
@@ -894,14 +896,12 @@ def test_base85_canonical_unique(self, payload):
894896
def test_ascii85_canonical(self):
895897
# Non-canonical encodings are accepted without canonical=True
896898
self.assertEqual(binascii.a2b_ascii85(b'@0'), b'a')
897-
self.assertEqual(binascii.a2b_ascii85(b'@'), b'')
898899

899-
# 1-char partial groups are never produced by a conforming encoder
900+
# 1-char partial groups are always rejected (PLRM encoding violation)
900901
with self.assertRaises(binascii.Error):
901-
binascii.a2b_ascii85(b'@', canonical=True)
902-
# Digit 0 ('!' in ascii85) exercises the explicit chunk_len==0 guard
902+
binascii.a2b_ascii85(b'@')
903903
with self.assertRaises(binascii.Error):
904-
binascii.a2b_ascii85(b'!', canonical=True)
904+
binascii.a2b_ascii85(b'!')
905905

906906
# Verify round-trip: encode then decode with canonical=True works
907907
for data in [b'a', b'ab', b'abc', b'abcd', b'abcde',

Modules/binascii.c

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,8 +1176,20 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces,
11761176
}
11771177

11781178
/* Write current chunk. */
1179-
Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4;
1180-
for (Py_ssize_t i = 0; i < chunk_len; i++) {
1179+
int chunk_len = ascii_len < 1 ? 3 + (int)ascii_len : 4;
1180+
1181+
/* A final partial 5-tuple containing only one character is an
1182+
* encoding violation per the PLRM spec; reject unconditionally. */
1183+
if (chunk_len == 0) {
1184+
state = get_binascii_state(module);
1185+
if (state != NULL) {
1186+
PyErr_SetString(state->Error,
1187+
"Incomplete Ascii85 group");
1188+
}
1189+
goto error;
1190+
}
1191+
1192+
for (int i = 0; i < chunk_len; i++) {
11811193
*bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff;
11821194
}
11831195

@@ -1193,17 +1205,8 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces,
11931205
*
11941206
* So we zero the bottom (4-chunk_len) bytes of leftchar to
11951207
* get the canonical uint32 ("canonical_top") and compare
1196-
* quotients. A 1-char group (chunk_len==0) is always
1197-
* non-canonical since no conforming encoder produces it. */
1208+
* quotients. */
11981209
if (canonical && chunk_len < 4) {
1199-
if (chunk_len == 0) {
1200-
state = get_binascii_state(module);
1201-
if (state != NULL) {
1202-
PyErr_SetString(state->Error,
1203-
"Non-canonical Ascii85 group size");
1204-
}
1205-
goto error;
1206-
}
12071210
int n_pad = 4 - chunk_len;
12081211
uint32_t canonical_top =
12091212
(leftchar >> (n_pad * 8)) << (n_pad * 8);
@@ -1461,22 +1464,26 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data,
14611464
}
14621465

14631466
/* Write current chunk. */
1464-
Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4;
1465-
for (Py_ssize_t i = 0; i < chunk_len; i++) {
1467+
int chunk_len = ascii_len < 1 ? 3 + (int)ascii_len : 4;
1468+
1469+
/* A 1-char final group is an encoding violation (no conforming
1470+
* encoder produces it); reject unconditionally. */
1471+
if (chunk_len == 0) {
1472+
state = get_binascii_state(module);
1473+
if (state != NULL) {
1474+
PyErr_SetString(state->Error,
1475+
"Incomplete Base85 group");
1476+
}
1477+
goto error;
1478+
}
1479+
1480+
for (int i = 0; i < chunk_len; i++) {
14661481
*bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff;
14671482
}
14681483

14691484
/* Reject non-canonical encodings in the final group.
14701485
* See the comment in a2b_ascii85 for the full explanation. */
14711486
if (canonical && chunk_len < 4) {
1472-
if (chunk_len == 0) {
1473-
state = get_binascii_state(module);
1474-
if (state != NULL) {
1475-
PyErr_SetString(state->Error,
1476-
"Non-canonical Base85 group size");
1477-
}
1478-
goto error;
1479-
}
14801487
int n_pad = 4 - chunk_len;
14811488
uint32_t canonical_top =
14821489
(leftchar >> (n_pad * 8)) << (n_pad * 8);

0 commit comments

Comments
 (0)