From cf846bbc08c4471fb7e6a52b063f1c2171de185e Mon Sep 17 00:00:00 2001 From: rrad5409 Date: Sun, 15 Mar 2026 21:40:12 +1100 Subject: [PATCH 1/5] Implement new `Bytes` field --- src/marshmallow/fields.py | 38 +++++++++++++++++++++++++++++++++++ tests/test_deserialization.py | 17 ++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/src/marshmallow/fields.py b/src/marshmallow/fields.py index dc961e631..9367292fa 100644 --- a/src/marshmallow/fields.py +++ b/src/marshmallow/fields.py @@ -878,6 +878,44 @@ def _deserialize(self, value, attr, data, **kwargs) -> str: raise self.make_error("invalid_utf8") from error +class Bytes(Field[bytes]): + """ + Marshmallow field type for any bytes array. + """ + + def _deserialize( + self, + value: typing.Any, + attr: str | None, + data: typing.Mapping[str, typing.Any] | None, + **kwargs: typing.Any, + ) -> bytes: + try: + match value: + case bytes() as b: + return b + case bytearray() as ba: + return bytes(ba) + case str() as s: + return bytes( + s, + encoding="utf-8", + errors="ignore", + ) + case int() as i: + return i.to_bytes( + length=max(1, (7 + i.bit_length()) // 8), + byteorder="big", + signed=i < 0, + ) + case obj: + if isinstance(obj, (typing.SupportsBytes, typing.Iterable)): + return bytes(obj) + raise ValidationError("not a bytes-like object") + except TypeError as e: + raise ValidationError("not a bytes-like object") from e + + class UUID(Field[uuid.UUID]): """A UUID field.""" diff --git a/tests/test_deserialization.py b/tests/test_deserialization.py index 7cbe28e48..95e7fd30e 100644 --- a/tests/test_deserialization.py +++ b/tests/test_deserialization.py @@ -322,6 +322,23 @@ def test_string_field_deserialization(self): with pytest.raises(ValidationError): field.deserialize({}) + def test_bytes_field_deserialization(self): + field = fields.Bytes() + assert field.deserialize(b"foo") == b"foo" + assert field.deserialize(bytearray(b"foo")) == b"foo" + assert field.deserialize("foo") == b"foo" + assert field.deserialize(0xDEAD) == b"\xDE\xAD" + assert field.deserialize([0xBE, 0xEF]) == b"\xBE\xEF" + assert field.deserialize((0xB, 0xA, 0xB, 0xE)) == b"\x0B\x0A\x0B\x0E" + + with pytest.raises(ValidationError) as excinfo: + field.deserialize({"hi": 222}) + assert excinfo.value.args[0] == "not a bytes-like object" + + with pytest.raises(ValidationError) as excinfo: + field.deserialize(['12345']) + assert excinfo.value.args[0] == "not a bytes-like object" + def test_boolean_field_deserialization(self): field = fields.Boolean() assert field.deserialize(True) is True From fc6cb2495c70ccc8c6d2ba595cc57e042bab2c87 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 15 Mar 2026 10:40:27 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/marshmallow/fields.py | 70 +++++++++++++++++------------------ tests/test_deserialization.py | 8 ++-- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/src/marshmallow/fields.py b/src/marshmallow/fields.py index 9367292fa..d7e6007ea 100644 --- a/src/marshmallow/fields.py +++ b/src/marshmallow/fields.py @@ -879,41 +879,41 @@ def _deserialize(self, value, attr, data, **kwargs) -> str: class Bytes(Field[bytes]): - """ - Marshmallow field type for any bytes array. - """ - - def _deserialize( - self, - value: typing.Any, - attr: str | None, - data: typing.Mapping[str, typing.Any] | None, - **kwargs: typing.Any, - ) -> bytes: - try: - match value: - case bytes() as b: - return b - case bytearray() as ba: - return bytes(ba) - case str() as s: - return bytes( - s, - encoding="utf-8", - errors="ignore", - ) - case int() as i: - return i.to_bytes( - length=max(1, (7 + i.bit_length()) // 8), - byteorder="big", - signed=i < 0, - ) - case obj: - if isinstance(obj, (typing.SupportsBytes, typing.Iterable)): - return bytes(obj) - raise ValidationError("not a bytes-like object") - except TypeError as e: - raise ValidationError("not a bytes-like object") from e + """ + Marshmallow field type for any bytes array. + """ + + def _deserialize( + self, + value: typing.Any, + attr: str | None, + data: typing.Mapping[str, typing.Any] | None, + **kwargs: typing.Any, + ) -> bytes: + try: + match value: + case bytes() as b: + return b + case bytearray() as ba: + return bytes(ba) + case str() as s: + return bytes( + s, + encoding="utf-8", + errors="ignore", + ) + case int() as i: + return i.to_bytes( + length=max(1, (7 + i.bit_length()) // 8), + byteorder="big", + signed=i < 0, + ) + case obj: + if isinstance(obj, (typing.SupportsBytes, typing.Iterable)): + return bytes(obj) + raise ValidationError("not a bytes-like object") + except TypeError as e: + raise ValidationError("not a bytes-like object") from e class UUID(Field[uuid.UUID]): diff --git a/tests/test_deserialization.py b/tests/test_deserialization.py index 95e7fd30e..7712e62bd 100644 --- a/tests/test_deserialization.py +++ b/tests/test_deserialization.py @@ -327,16 +327,16 @@ def test_bytes_field_deserialization(self): assert field.deserialize(b"foo") == b"foo" assert field.deserialize(bytearray(b"foo")) == b"foo" assert field.deserialize("foo") == b"foo" - assert field.deserialize(0xDEAD) == b"\xDE\xAD" - assert field.deserialize([0xBE, 0xEF]) == b"\xBE\xEF" - assert field.deserialize((0xB, 0xA, 0xB, 0xE)) == b"\x0B\x0A\x0B\x0E" + assert field.deserialize(0xDEAD) == b"\xde\xad" + assert field.deserialize([0xBE, 0xEF]) == b"\xbe\xef" + assert field.deserialize((0xB, 0xA, 0xB, 0xE)) == b"\x0b\x0a\x0b\x0e" with pytest.raises(ValidationError) as excinfo: field.deserialize({"hi": 222}) assert excinfo.value.args[0] == "not a bytes-like object" with pytest.raises(ValidationError) as excinfo: - field.deserialize(['12345']) + field.deserialize(["12345"]) assert excinfo.value.args[0] == "not a bytes-like object" def test_boolean_field_deserialization(self): From e1b9376a2c1bf99dd9b71052b4166e4d2ff38b59 Mon Sep 17 00:00:00 2001 From: rrad5409 Date: Thu, 9 Apr 2026 11:44:06 +1000 Subject: [PATCH 3/5] Extend Bytes field - fix error message impl - add string encoding and error behaviour parameters - add serialization behaviour - add docs --- src/marshmallow/fields.py | 75 ++++++++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 12 deletions(-) diff --git a/src/marshmallow/fields.py b/src/marshmallow/fields.py index d7e6007ea..96d413e9b 100644 --- a/src/marshmallow/fields.py +++ b/src/marshmallow/fields.py @@ -2,6 +2,7 @@ from __future__ import annotations import abc +import base64 import collections import copy import datetime as dt @@ -48,6 +49,7 @@ "AwareDateTime", "Bool", "Boolean", + "Bytes", "Constant", "Date", "DateTime", @@ -880,8 +882,33 @@ def _deserialize(self, value, attr, data, **kwargs) -> str: class Bytes(Field[bytes]): """ - Marshmallow field type for any bytes array. + A field for deserializing strings into byte arrays. + + :param encoding: Specifies the string encoding used when encoding/decoding to/from strings. + :param errors: Error behaviour when converting to/from a :class:`str`, inherited from it's constructor. + :param serialize: Specifies the return type when serializing. + `base64` and `str` use the value of `encoding` for the string. + :param kwargs: The same keyword arguments that :class:`Field` receives. + + .. versionadded:: 4.3.0 """ + #: Default error messages. + default_error_messages = { + "not_bytes": "Not a bytes-like object.", + "unicode": "Invalid unicode string.", + } + + def __init__( + self, + encoding: str = "utf-8", + errors: str = "strict", + serialize: typing.Literal["int", "str", "bytes", "base64"] = "base64", + **kwargs: Unpack[_BaseFieldKwargs], + ): + super().__init__(**kwargs) + self.encoding = encoding + self.errors = errors + self.serialize = serialize def _deserialize( self, @@ -892,15 +919,11 @@ def _deserialize( ) -> bytes: try: match value: - case bytes() as b: - return b - case bytearray() as ba: - return bytes(ba) case str() as s: return bytes( s, - encoding="utf-8", - errors="ignore", + encoding=self.encoding, + errors=self.errors, ) case int() as i: return i.to_bytes( @@ -908,12 +931,40 @@ def _deserialize( byteorder="big", signed=i < 0, ) - case obj: - if isinstance(obj, (typing.SupportsBytes, typing.Iterable)): - return bytes(obj) - raise ValidationError("not a bytes-like object") + case _: + return bytes(obj) except TypeError as e: - raise ValidationError("not a bytes-like object") from e + raise self.make_error("not_bytes") from e + except UnicodeError as e: + raise self.make_error("unicode") from e + + def _serialize(self, + value: bytes, + attr: str | None, + obj: typing.Any, + **kwargs: typing.Any, + ) -> str | int | bytes: + try: + match self.serialize: + case "str": + return str( + value, + encoding=self.encoding, + errors=self.errors, + ) + case "base64": + return base64.standard_b64encode(value) + case "int": + return int.from_bytes( + value, + byteorder="big", + ) + case "bytes": + return value + case _: + typing.assert_never(self.serialize) + except UnicodeError as e: + raise self.make_error("unicode") from e class UUID(Field[uuid.UUID]): From a59efb55e8724cf9d5f5f3fafdcb7d3606ff6d60 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 9 Apr 2026 01:46:17 +0000 Subject: [PATCH 4/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/marshmallow/fields.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/marshmallow/fields.py b/src/marshmallow/fields.py index 96d413e9b..60af9945c 100644 --- a/src/marshmallow/fields.py +++ b/src/marshmallow/fields.py @@ -892,6 +892,7 @@ class Bytes(Field[bytes]): .. versionadded:: 4.3.0 """ + #: Default error messages. default_error_messages = { "not_bytes": "Not a bytes-like object.", @@ -934,11 +935,12 @@ def _deserialize( case _: return bytes(obj) except TypeError as e: - raise self.make_error("not_bytes") from e + raise self.make_error("not_bytes") from e except UnicodeError as e: - raise self.make_error("unicode") from e + raise self.make_error("unicode") from e - def _serialize(self, + def _serialize( + self, value: bytes, attr: str | None, obj: typing.Any, @@ -964,7 +966,7 @@ def _serialize(self, case _: typing.assert_never(self.serialize) except UnicodeError as e: - raise self.make_error("unicode") from e + raise self.make_error("unicode") from e class UUID(Field[uuid.UUID]): From 9004ab1df5f3a5f3f1754f8b016c87f2f92619d9 Mon Sep 17 00:00:00 2001 From: rrad5409 Date: Thu, 9 Apr 2026 11:47:36 +1000 Subject: [PATCH 5/5] Fix variable name typo in Bytes field --- src/marshmallow/fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/marshmallow/fields.py b/src/marshmallow/fields.py index 60af9945c..bcc6b0fe9 100644 --- a/src/marshmallow/fields.py +++ b/src/marshmallow/fields.py @@ -933,7 +933,7 @@ def _deserialize( signed=i < 0, ) case _: - return bytes(obj) + return bytes(value) except TypeError as e: raise self.make_error("not_bytes") from e except UnicodeError as e: