Skip to content

Commit 27bd6c5

Browse files
Document Avro JSON type boundaries
1 parent cb19627 commit 27bd6c5

4 files changed

Lines changed: 101 additions & 3 deletions

File tree

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,19 @@ quiet_client = Client(
184184
)
185185
```
186186

187+
## Avro payload type boundaries
188+
189+
The default Avro codec uses a generic JSON wrapper so PHP, Python, and other
190+
workers can exchange the same wire format. It preserves JSON-native values:
191+
`None`, booleans, numbers, strings, lists, and dictionaries with string keys.
192+
193+
Class-carrying values are not encoded with type metadata. Convert pydantic
194+
models, attrs classes, dataclasses, pendulum values, `datetime` / `date` /
195+
`time`, `UUID`, `Decimal`, and plain `Enum` values to explicit dictionaries or
196+
scalars before passing them to the SDK. `IntEnum` and `StrEnum` encode because
197+
they are JSON scalar subclasses, but they decode as `int` and `str`.
198+
`OrderedDict` decodes as a plain `dict`.
199+
187200
## Authentication
188201

189202
For local servers that use one shared bearer token, pass `token=`:

src/durable_workflow/_avro.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,20 @@
66
base64( 0x00 || avro_binary( record{ json: string, version: int } ) )
77
88
The ``json`` field carries ``json.dumps(value)``; ``version`` is currently
9-
``1``. A ``0x01`` prefix is reserved for typed-schema payloads — those
10-
are not yet encodeable/decodeable from this SDK because typed schemas
11-
require a schema registry that is out of scope for the first Avro release.
9+
``1``. That means the generic wrapper preserves only JSON-native shapes:
10+
``None``, booleans, numbers, strings, lists, and mappings with string keys.
11+
Class identity is not carried on the wire. ``OrderedDict`` decodes as a plain
12+
``dict``; ``IntEnum`` decodes as ``int``; and ``StrEnum`` decodes as ``str``.
13+
Objects that the standard library JSON encoder does not know how to encode,
14+
including dataclasses, attrs classes, pydantic models, pendulum values,
15+
``datetime`` / ``date`` / ``time``, ``uuid.UUID``, ``decimal.Decimal``, and
16+
plain ``Enum`` values, raise ``TypeError`` during encode. Convert those values
17+
to explicit JSON-native dictionaries or scalars before passing them to the
18+
SDK, then rebuild domain objects in workflow or activity code.
19+
20+
A ``0x01`` prefix is reserved for typed-schema payloads — those are not yet
21+
encodeable/decodeable from this SDK because typed schemas require a schema
22+
registry that is out of scope for the first Avro release.
1223
1324
The ``avro`` third-party package is a core runtime dependency. If it is
1425
missing from a broken or partial installation, calling :func:`encode` or
@@ -49,6 +60,8 @@ def encode(value: Any) -> str:
4960
"""Encode a Python value as an Avro generic-wrapper payload blob.
5061
5162
Returns a base64 string the server accepts under ``payload_codec="avro"``.
63+
The generic wrapper accepts the same value shapes as ``json.dumps``; adapt
64+
domain objects to JSON-native data before encoding.
5265
"""
5366
try:
5467
import avro.io

src/durable_workflow/serializer.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
existing data only, not used for new workflows.
1111
- ``"avro"`` — the blob is a base64-encoded Avro generic-wrapper payload
1212
(see :mod:`durable_workflow._avro`). Default for all new v2 workflows.
13+
The wrapper carries JSON-native values only. Class-carrying values such as
14+
dataclasses, attrs classes, pydantic models, pendulum values, datetimes,
15+
UUIDs, ``Decimal``, and plain ``Enum`` values need an explicit adapter to a
16+
dictionary or scalar before encode; JSON-subclass values such as ``IntEnum``
17+
and ``StrEnum`` round-trip as their JSON scalar, not as the original class.
1318
"""
1419
from __future__ import annotations
1520

tests/test_serializer.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
11
import logging
2+
from collections import OrderedDict
3+
from dataclasses import dataclass
4+
from datetime import date, datetime, time, timezone
5+
from decimal import Decimal
6+
from enum import Enum, IntEnum
7+
from uuid import UUID
28

39
import pytest
410

@@ -16,6 +22,33 @@
1622
not _AVRO_AVAILABLE, reason="avro package not installed"
1723
)
1824

25+
try:
26+
from enum import StrEnum
27+
except ImportError: # pragma: no cover - Python < 3.11 compatibility
28+
StrEnum = None # type: ignore[assignment,misc]
29+
30+
31+
@dataclass
32+
class SerializerDataclass:
33+
name: str
34+
count: int
35+
36+
37+
class SerializerEnum(Enum):
38+
PENDING = "pending"
39+
40+
41+
class SerializerIntEnum(IntEnum):
42+
LOW = 1
43+
44+
45+
if StrEnum is not None:
46+
47+
class SerializerStrEnum(StrEnum):
48+
HIGH = "high"
49+
else:
50+
SerializerStrEnum = None
51+
1952

2053
class TestEncode:
2154
def test_list(self) -> None:
@@ -170,6 +203,40 @@ def test_round_trip_containers(self) -> None:
170203
blob = serializer.encode(value, codec="avro")
171204
assert serializer.decode(blob, codec="avro") == value
172205

206+
@pytest.mark.parametrize(
207+
"value",
208+
[
209+
SerializerDataclass(name="Ada", count=2),
210+
datetime(2026, 4, 21, 10, 30, tzinfo=timezone.utc),
211+
date(2026, 4, 21),
212+
time(10, 30, tzinfo=timezone.utc),
213+
UUID("12345678-1234-5678-1234-567812345678"),
214+
Decimal("10.25"),
215+
SerializerEnum.PENDING,
216+
],
217+
)
218+
def test_json_unsupported_user_types_fail_encode(self, value: object) -> None:
219+
with pytest.raises(TypeError, match="not JSON serializable"):
220+
serializer.encode(value, codec="avro")
221+
222+
def test_ordered_dict_decodes_as_plain_dict(self) -> None:
223+
value = OrderedDict([("first", 1), ("second", 2)])
224+
decoded = serializer.decode(serializer.encode(value, codec="avro"), codec="avro")
225+
assert decoded == {"first": 1, "second": 2}
226+
assert type(decoded) is dict
227+
228+
def test_int_enum_decodes_as_int(self) -> None:
229+
decoded = serializer.decode(serializer.encode(SerializerIntEnum.LOW, codec="avro"), codec="avro")
230+
assert decoded == 1
231+
assert type(decoded) is int
232+
233+
@pytest.mark.skipif(StrEnum is None, reason="StrEnum requires Python 3.11+")
234+
def test_str_enum_decodes_as_str(self) -> None:
235+
assert SerializerStrEnum is not None
236+
decoded = serializer.decode(serializer.encode(SerializerStrEnum.HIGH, codec="avro"), codec="avro")
237+
assert decoded == "high"
238+
assert type(decoded) is str
239+
173240
@pytest.mark.parametrize(
174241
"name,blob,expected",
175242
[(name, blob, expected) for name, (blob, expected) in _PHP_AVRO_FIXTURES.items()],

0 commit comments

Comments
 (0)