Skip to content
This repository was archived by the owner on Feb 23, 2026. It is now read-only.

Commit b6f3eb6

Browse files
authored
fix: setting bytes field from python string base64 decodes before assignment (#255)
This is necessary for maintaining field integrity during round trip conversions to python dicts. E.g. class MyMessage(proto.Message): data = proto.Field(proto.BYTES, number=1) my_message = MyMessage(data=b"this is a data payload") assert my_message == MyMessage(MyMessage.to_dict(my_message)) Conversion to and from json does not exhibit this problem because the entire ser/des logic is encapuslated by the vanilla protobuf runtime, which handles this problem. Fix for #249
1 parent 92a514b commit b6f3eb6

6 files changed

Lines changed: 104 additions & 26 deletions

File tree

docs/marshal.rst

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,34 @@ Protocol buffer type Python type Nullable
4444
If you *write* a timestamp field using a Python ``datetime`` value,
4545
any existing nanosecond precision will be overwritten.
4646

47+
.. note::
48+
49+
Setting a ``bytes`` field from a string value will first base64 decode the string.
50+
This is necessary to preserve the original protobuf semantics when converting between
51+
Python dicts and proto messages.
52+
Converting a message containing a bytes field to a dict will
53+
base64 encode the bytes field and yield a value of type str.
54+
55+
.. code-block:: python
56+
57+
import proto
58+
from google.protobuf.json_format import ParseDict
59+
60+
class MyMessage(proto.Message):
61+
data = proto.Field(proto.BYTES, number=1)
62+
63+
msg = MyMessage(data=b"this is a message")
64+
msg_dict = MyMessage.to_dict(msg)
65+
66+
# Note: the value is the base64 encoded string of the bytes field.
67+
# It has a type of str, NOT bytes.
68+
assert type(msg_dict['data']) == str
69+
70+
msg_pb = ParseDict(msg_dict, MyMessage.pb())
71+
msg_two = MyMessage(msg_dict)
72+
73+
assert msg == msg_pb == msg_two
74+
4775
4876
Wrapper types
4977
-------------

proto/fields.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,14 +126,15 @@ def package(self) -> str:
126126

127127
@property
128128
def pb_type(self):
129-
"""Return the composite type of the field, or None for primitives."""
129+
"""Return the composite type of the field, or the primitive type if a primitive."""
130130
# For enums, return the Python enum.
131131
if self.enum:
132132
return self.enum
133133

134-
# For non-enum primitives, return None.
134+
# For primitive fields, we still want to know
135+
# what the type is.
135136
if not self.message:
136-
return None
137+
return self.proto_type
137138

138139
# Return the internal protobuf message.
139140
if hasattr(self.message, "_meta"):

proto/marshal/marshal.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@
2525
from proto.marshal.collections import MapComposite
2626
from proto.marshal.collections import Repeated
2727
from proto.marshal.collections import RepeatedComposite
28+
from proto.marshal.rules import bytes as pb_bytes
2829
from proto.marshal.rules import dates
2930
from proto.marshal.rules import struct
3031
from proto.marshal.rules import wrappers
32+
from proto.primitives import ProtoType
3133

3234

3335
class Rule(abc.ABC):
@@ -85,14 +87,6 @@ class TimestampRule:
8587
proto_type (type): A protocol buffer message type.
8688
rule: A marshal object
8789
"""
88-
# Sanity check: Do not register anything to a class that is not
89-
# a protocol buffer message.
90-
if not issubclass(proto_type, (message.Message, enum.IntEnum)):
91-
raise TypeError(
92-
"Only enums and protocol buffer messages may be "
93-
"registered to the marshal."
94-
)
95-
9690
# If a rule was provided, register it and be done.
9791
if rule:
9892
# Ensure the rule implements Rule.
@@ -150,6 +144,9 @@ def reset(self):
150144
self.register(struct_pb2.ListValue, struct.ListValueRule(marshal=self))
151145
self.register(struct_pb2.Struct, struct.StructRule(marshal=self))
152146

147+
# Special case for bytes to allow base64 encode/decode
148+
self.register(ProtoType.BYTES, pb_bytes.BytesRule())
149+
153150
def to_python(self, proto_type, value, *, absent: bool = None):
154151
# Internal protobuf has its own special type for lists of values.
155152
# Return a view around it that implements MutableSequence.

proto/marshal/rules/bytes.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Copyright (C) 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
import base64
17+
18+
19+
class BytesRule:
20+
"""A marshal between Python strings and protobuf bytes.
21+
22+
Note: this conversion is asymmetric becasue Python does have a bytes type.
23+
It is sometimes necessary to convert proto bytes fields to strings, e.g. for
24+
JSON encoding, marshalling a message to a dict. Because bytes fields can
25+
represent arbitrary data, bytes fields are base64 encoded when they need to
26+
be represented as strings.
27+
28+
It is necessary to have the conversion be bidirectional, i.e.
29+
my_message == MyMessage(MyMessage.to_dict(my_message))
30+
31+
To accomplish this, we need to intercept assignments from strings and
32+
base64 decode them back into bytes.
33+
"""
34+
35+
def to_python(self, value, *, absent: bool = None):
36+
return value
37+
38+
def to_proto(self, value):
39+
if isinstance(value, str):
40+
value = value.encode("utf-8")
41+
value += b"=" * (4 - len(value) % 4) # padding
42+
value = base64.urlsafe_b64decode(value)
43+
44+
return value

tests/test_fields_bytes.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import base64
1516
import pytest
1617

1718
import proto
@@ -71,5 +72,25 @@ class Foo(proto.Message):
7172
# for strings (but not vice versa).
7273
foo.bar = b"anything"
7374
assert foo.bar == "anything"
74-
with pytest.raises(TypeError):
75-
foo.baz = "anything"
75+
76+
# We need to permit setting bytes fields from strings,
77+
# but the marshalling needs to base64 decode the result.
78+
# This is a requirement for interop with the vanilla protobuf runtime:
79+
# converting a proto message to a dict base64 encodes the bytes
80+
# becase it may be sent over the network via a protocol like HTTP.
81+
encoded_swallow: str = base64.urlsafe_b64encode(b"unladen swallow").decode("utf-8")
82+
assert type(encoded_swallow) == str
83+
foo.baz = encoded_swallow
84+
assert foo.baz == b"unladen swallow"
85+
86+
87+
def test_bytes_to_dict_bidi():
88+
class Foo(proto.Message):
89+
bar = proto.Field(proto.BYTES, number=1)
90+
91+
foo = Foo(bar=b"spam")
92+
93+
foo_dict = Foo.to_dict(foo)
94+
foo_two = Foo(foo_dict)
95+
96+
assert foo == foo_two

tests/test_marshal_register.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,6 @@ def to_python(self, value, *, absent=None):
3333
assert isinstance(marshal._rules[empty_pb2.Empty], Rule)
3434

3535

36-
def test_invalid_target_registration():
37-
marshal = BaseMarshal()
38-
with pytest.raises(TypeError):
39-
40-
@marshal.register(object)
41-
class Rule:
42-
def to_proto(self, value):
43-
return value
44-
45-
def to_python(self, value, *, absent=None):
46-
return value
47-
48-
4936
def test_invalid_marshal_class():
5037
marshal = BaseMarshal()
5138
with pytest.raises(TypeError):

0 commit comments

Comments
 (0)