Skip to content

Commit 2c48df1

Browse files
committed
Add type-aware serializers
1 parent a1180f5 commit 2c48df1

3 files changed

Lines changed: 608 additions & 21 deletions

File tree

azure/functions/_durable_functions.py

Lines changed: 223 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,23 @@
11
# Copyright (c) Microsoft Corporation. All rights reserved.
22
# Licensed under the MIT License.
33

4-
from typing import Union
4+
import json
5+
import logging
6+
import os
7+
import sys
8+
from typing import Any, Callable, Optional, Union
9+
510
from . import _abc
6-
from importlib import import_module
11+
12+
logger = logging.getLogger(__name__)
13+
14+
_STRICT_ENV_VAR = "AZURE_FUNCTIONS_DURABLE_STRICT_TYPING"
15+
_TRUTHY = frozenset({"1", "true", "yes"})
16+
_LEGACY_KEYS = frozenset({"__class__", "__module__", "__data__"})
17+
18+
19+
def _is_strict_mode() -> bool:
20+
return os.environ.get(_STRICT_ENV_VAR, "").strip().lower() in _TRUTHY
721

822

923
# Utilities
@@ -46,33 +60,52 @@ def _serialize_custom_object(obj):
4660
def _deserialize_custom_object(obj: dict) -> object:
4761
"""Deserialize a user-defined object from JSON.
4862
49-
Deserializes a dictionary encoding a custom object,
50-
if it contains class metadata suggesting that it should be
51-
decoded further.
63+
Reconstructs a custom object from a dictionary that carries the
64+
``{"__class__", "__module__", "__data__"}`` envelope produced by
65+
:func:`_serialize_custom_object`. The class is resolved by looking
66+
up ``__module__`` in :data:`sys.modules`; modules are never imported
67+
on demand.
5268
53-
Parameters:
69+
Parameters
5470
----------
55-
obj: dict
56-
Dictionary object that potentially encodes a custom class
71+
obj : dict
72+
Dictionary that potentially encodes a custom class.
5773
58-
Returns:
59-
--------
74+
Returns
75+
-------
6076
object
61-
Either the original `obj` dictionary or the custom object it encoded
77+
Either the original ``obj`` dictionary (if it is not an
78+
envelope) or the reconstructed custom object.
6279
63-
Exceptions
64-
----------
80+
Raises
81+
------
82+
ValueError
83+
If the declared module is not present in ``sys.modules``.
84+
AttributeError
85+
If the declared module is loaded but does not define the
86+
declared class.
6587
TypeError
66-
If the decoded object does not contain a `from_json` function
88+
If the resolved class does not expose a ``from_json`` function.
6789
"""
6890
if ("__class__" in obj) and ("__module__" in obj) and ("__data__" in obj):
6991
class_name = obj.pop("__class__")
7092
module_name = obj.pop("__module__")
7193
obj_data = obj.pop("__data__")
7294

73-
# Importing the clas
74-
module = import_module(module_name)
75-
class_ = getattr(module, class_name)
95+
# Resolve the class from already-loaded modules; this function
96+
# does not import modules on demand.
97+
module = sys.modules.get(module_name)
98+
if module is None:
99+
raise ValueError(
100+
f"cannot deserialize custom object: module "
101+
f"{module_name!r} is not loaded in sys.modules"
102+
)
103+
class_ = getattr(module, class_name, None)
104+
if class_ is None:
105+
raise AttributeError(
106+
f"cannot deserialize custom object: class {class_name!r} "
107+
f"not found in module {module_name!r}"
108+
)
76109

77110
if not hasattr(class_, "from_json"):
78111
raise TypeError(f"class {type(obj)} does not expose a `from_json` "
@@ -83,6 +116,179 @@ def _deserialize_custom_object(obj: dict) -> object:
83116
return obj
84117

85118

119+
# ---------------------------------------------------------------------------
120+
# Public Durable Functions JSON codec
121+
# ---------------------------------------------------------------------------
122+
123+
124+
def df_dumps(value: Any) -> str:
125+
"""Serialize *value* to a JSON string.
126+
127+
In **loose mode** (default) this is equivalent to
128+
``json.dumps(value, default=_serialize_custom_object)``: nested
129+
custom objects are wrapped recursively in the
130+
``{"__class__", "__module__", "__data__"}`` envelope.
131+
132+
In **strict mode** (``AZURE_FUNCTIONS_DURABLE_STRICT_TYPING`` set
133+
to ``1``, ``true`` or ``yes``) only the top-level custom object is
134+
wrapped; its ``__data__`` payload is serialized as plain JSON
135+
without a ``default=`` hook. ``to_json()`` must therefore return
136+
a value that is natively JSON-serializable, and ``TypeError`` is
137+
raised if any nested value is not.
138+
"""
139+
if _is_strict_mode():
140+
if hasattr(value, "to_json"):
141+
envelope = _serialize_custom_object(value)
142+
return json.dumps(envelope)
143+
# Primitive / plain-JSON value -- serialize without default=.
144+
return json.dumps(value)
145+
return json.dumps(value, default=_serialize_custom_object)
146+
147+
148+
def df_loads(s: str, expected_type: Optional[type] = None) -> Any:
149+
"""Deserialize a JSON string, optionally validating against *expected_type*.
150+
151+
When *expected_type* is ``None``:
152+
153+
* **Loose mode** (default) runs
154+
``json.loads(s, object_hook=_deserialize_custom_object)``. Custom
155+
objects whose declaring module is already present in
156+
``sys.modules`` are reconstructed; otherwise ``ValueError`` is
157+
raised.
158+
* **Strict mode** parses without an ``object_hook``. A legacy
159+
custom-object envelope at the top level raises ``TypeError`` --
160+
the caller must supply ``expected_type`` to deserialize custom
161+
objects in strict mode.
162+
163+
When *expected_type* is provided the raw JSON is parsed first
164+
(without an ``object_hook``) so the payload can be inspected before
165+
any class lookup. On a class/module mismatch loose mode logs a
166+
warning and strict mode raises ``TypeError``. In loose mode the
167+
legacy ``object_hook`` path then runs (so nested custom objects are
168+
also reconstructed); in strict mode the matching custom-object
169+
payload is reconstructed by calling
170+
``expected_type.from_json(raw["__data__"])`` directly.
171+
"""
172+
if expected_type is not None:
173+
return _loads_with_expected_type(s, expected_type)
174+
175+
if _is_strict_mode():
176+
return _loads_strict_no_type(s)
177+
178+
return json.loads(s, object_hook=_deserialize_custom_object)
179+
180+
181+
def _get_serialize_default() -> Optional[Callable]:
182+
"""Return the ``default`` callback for ``json.dumps``.
183+
184+
Intended for call sites that build their own ``json.dumps``
185+
invocation (e.g. ``OrchestratorState.to_json_string``) and want to
186+
honour the active typing mode. Returns ``_serialize_custom_object``
187+
in loose mode and ``None`` in strict mode.
188+
"""
189+
if _is_strict_mode():
190+
return None
191+
return _serialize_custom_object
192+
193+
194+
def _loads_strict_no_type(s: str) -> Any:
195+
"""Strict-mode deserialization when no *expected_type* is supplied.
196+
197+
Parses *s* without an ``object_hook``. Returns the parsed value
198+
unchanged for primitive / plain-JSON payloads; raises ``TypeError``
199+
if the top-level value is a legacy custom-object envelope.
200+
"""
201+
raw = json.loads(s)
202+
if _is_legacy_custom_dict(raw):
203+
raise TypeError(
204+
"df_loads: strict mode requires expected_type to "
205+
"deserialize custom-object payloads, but none was provided. "
206+
f"Payload declares {raw['__module__']}.{raw['__class__']}."
207+
)
208+
return raw
209+
210+
211+
def _is_legacy_custom_dict(d: Any) -> bool:
212+
"""Return True if *d* is a dict with legacy custom-object markers."""
213+
return isinstance(d, dict) and _LEGACY_KEYS.issubset(d)
214+
215+
216+
def _has_json_protocol(cls: type) -> bool:
217+
"""Return True iff *cls* exposes callable ``to_json`` and ``from_json``."""
218+
return callable(getattr(cls, "to_json", None)) and callable(
219+
getattr(cls, "from_json", None)
220+
)
221+
222+
223+
def _is_compatible(value: Any, expected_type: type) -> bool:
224+
"""Best-effort ``isinstance`` check that tolerates generic type hints."""
225+
try:
226+
return isinstance(value, expected_type)
227+
except TypeError:
228+
# typing constructs like List[int] aren't valid for isinstance.
229+
return True
230+
231+
232+
def _loads_with_expected_type(s: str, expected_type: type) -> Any:
233+
"""Parse *s* and validate the result against *expected_type*.
234+
235+
The raw JSON is parsed without an ``object_hook`` so the payload
236+
shape can be inspected before any class lookup. In strict mode a
237+
matching custom-object payload is reconstructed via
238+
``expected_type.from_json``; in loose mode the legacy
239+
``object_hook`` path runs so nested custom objects inside
240+
``__data__`` are also reconstructed.
241+
"""
242+
raw = json.loads(s)
243+
strict = _is_strict_mode()
244+
245+
if _is_legacy_custom_dict(raw):
246+
class_name = raw["__class__"]
247+
module_name = raw["__module__"]
248+
type_matches = (class_name == expected_type.__name__
249+
and module_name == expected_type.__module__)
250+
251+
if not type_matches:
252+
msg = (
253+
f"df_loads: payload declares class "
254+
f"{module_name}.{class_name} but expected "
255+
f"{expected_type.__module__}.{expected_type.__name__}"
256+
)
257+
if strict:
258+
raise TypeError(msg)
259+
logger.warning(msg)
260+
# Fall through to the object_hook path below.
261+
262+
if strict:
263+
if not _has_json_protocol(expected_type):
264+
raise TypeError(
265+
f"df_loads: expected_type "
266+
f"{expected_type.__module__}.{expected_type.__name__} "
267+
f"does not expose from_json"
268+
)
269+
return expected_type.from_json(raw["__data__"])
270+
271+
# Loose mode -- use the object_hook path so nested custom
272+
# objects inside __data__ are also reconstructed.
273+
return json.loads(s, object_hook=_deserialize_custom_object)
274+
275+
# Primitive / plain-JSON payload -- validate the Python type.
276+
if not _is_compatible(raw, expected_type):
277+
msg = (
278+
f"df_loads: deserialized value ({type(raw).__name__}) is not "
279+
f"compatible with expected type {expected_type}"
280+
)
281+
if strict:
282+
raise TypeError(msg)
283+
logger.warning(msg)
284+
285+
if strict:
286+
return raw
287+
# Loose mode -- use the object_hook path so nested custom objects
288+
# inside dicts/lists are reconstructed.
289+
return json.loads(s, object_hook=_deserialize_custom_object)
290+
291+
86292
class OrchestrationContext(_abc.OrchestrationContext):
87293
"""A durable function orchestration context.
88294

azure/functions/durable_functions.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,20 @@ def decode(cls,
9292

9393
# Durable functions extension always returns a string of json
9494
# See durable functions library's call_activity_task docs
95+
#
96+
# Strict-mode caveat: when the AZURE_FUNCTIONS_DURABLE_STRICT_TYPING
97+
# environment variable is set, df_loads requires an `expected_type`
98+
# to deserialize custom-object envelopes. The worker's converter
99+
# dispatch does not currently forward the activity function's
100+
# parameter type annotation to `decode`, so we have nothing to
101+
# pass here -- a strict-mode payload carrying a custom-object
102+
# envelope will surface as TypeError below and be re-raised as
103+
# ValueError. Plumbing `expected_type` through `InConverter.decode`
104+
# is tracked as future work in the spec (see
105+
# spec-functions-sdk-df-serialization.md, section 6).
95106
if data_type in ['string', 'json']:
96107
try:
97-
callback = _durable_functions._deserialize_custom_object
98-
result = json.loads(data.value, object_hook=callback)
108+
result = _durable_functions.df_loads(data.value)
99109
except json.JSONDecodeError:
100110
# String failover if the content is not json serializable
101111
result = data.value
@@ -113,8 +123,7 @@ def decode(cls,
113123
def encode(cls, obj: typing.Any, *,
114124
expected_type: typing.Optional[type]) -> meta.Datum:
115125
try:
116-
callback = _durable_functions._serialize_custom_object
117-
result = json.dumps(obj, default=callback)
126+
result = _durable_functions.df_dumps(obj)
118127
except TypeError as e:
119128
raise ValueError(
120129
f'activity trigger output must be json serializable ({obj})') from e

0 commit comments

Comments
 (0)