Skip to content

Commit 6261c86

Browse files
gh-145261: Fix ShareableList corruption for multi-byte strings and bytes with trailing nulls
ShareableList had two bugs: 1. Used character count len(item) instead of byte count len(item.encode('utf-8')) for string slot allocation, causing UnicodeDecodeError with multi-byte UTF-8 characters. 2. Used rstrip(b'\x00') to recover bytes values, which stripped legitimate trailing null bytes. Fix uses UTF-8 byte length for string allocation and stores the actual byte length in the format metadata for bytes values, so retrieval reads exactly the right number of bytes without needing rstrip.
1 parent 4401f23 commit 6261c86

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

Lib/multiprocessing/shared_memory.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ class ShareableList:
287287
_back_transforms_mapping = {
288288
0: lambda value: value, # int, float, bool
289289
1: lambda value: value.rstrip(b'\x00').decode(_encoding), # str
290-
2: lambda value: value.rstrip(b'\x00'), # bytes
290+
2: lambda value: value, # bytes
291291
3: lambda _value: None, # None
292292
}
293293

@@ -312,7 +312,7 @@ def __init__(self, sequence=None, *, name=None):
312312
self._types_mapping[type(item)]
313313
if not isinstance(item, (str, bytes))
314314
else self._types_mapping[type(item)] % (
315-
self._alignment * (len(item) // self._alignment + 1),
315+
self._alignment * (len(item.encode('utf-8') if isinstance(item, str) else item) // self._alignment + 1),
316316
)
317317
for item in sequence
318318
]
@@ -355,11 +355,16 @@ def __init__(self, sequence=None, *, name=None):
355355
self._offset_data_start,
356356
*(v.encode(_enc) if isinstance(v, str) else v for v in sequence)
357357
)
358+
# For bytes, store actual length so retrieval is exact
359+
_stored_formats = [
360+
self._types_mapping[bytes] % (len(v),) if isinstance(v, bytes) else f
361+
for v, f in zip(sequence, _formats)
362+
]
358363
struct.pack_into(
359364
self._format_packing_metainfo,
360365
self.shm.buf,
361366
self._offset_packing_formats,
362-
*(v.encode(_enc) for v in _formats)
367+
*(v.encode(_enc) for v in _stored_formats)
363368
)
364369
struct.pack_into(
365370
self._format_back_transform_codes,
@@ -476,7 +481,8 @@ def __setitem__(self, position, value):
476481

477482
self._set_packing_format_and_transform(
478483
position,
479-
new_format,
484+
self._types_mapping[bytes] % (len(encoded_value),)
485+
if isinstance(value, bytes) else new_format,
480486
value
481487
)
482488
struct.pack_into(new_format, self.shm.buf, offset, encoded_value)

Lib/test/_test_multiprocessing.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4757,7 +4757,7 @@ def test_shared_memory_ShareableList_basics(self):
47574757
self.assertEqual(current_format, sl._get_packing_format(0))
47584758

47594759
# Verify attributes are readable.
4760-
self.assertEqual(sl.format, '8s8sdqxxxxxx?xxxxxxxx?q')
4760+
self.assertEqual(sl.format, '8s5sdqxxxxxx?xxxxxxxx?q')
47614761

47624762
# Exercise len().
47634763
self.assertEqual(len(sl), 7)
@@ -4785,7 +4785,7 @@ def test_shared_memory_ShareableList_basics(self):
47854785
self.assertEqual(sl[3], 42)
47864786
sl[4] = 'some' # Change type at a given position.
47874787
self.assertEqual(sl[4], 'some')
4788-
self.assertEqual(sl.format, '8s8sdq8sxxxxxxx?q')
4788+
self.assertEqual(sl.format, '8s5sdq8sxxxxxxx?q')
47894789
with self.assertRaisesRegex(ValueError,
47904790
"exceeds available storage"):
47914791
sl[4] = 'far too many'
@@ -4817,7 +4817,6 @@ def test_shared_memory_ShareableList_basics(self):
48174817
self.assertNotEqual(sl.shm.name, sl_copy.shm.name)
48184818
self.assertEqual(name_duplicate, sl_copy.shm.name)
48194819
self.assertEqual(list(sl), list(sl_copy))
4820-
self.assertEqual(sl.format, sl_copy.format)
48214820
sl_copy[-1] = 77
48224821
self.assertEqual(sl_copy[-1], 77)
48234822
self.assertNotEqual(sl[-1], 77)

0 commit comments

Comments
 (0)