Skip to content

Commit 4a4bb46

Browse files
authored
fix(serializer): add depth limit to EventSerializer to prevent hangs on complex objects (#1577)
* fix: add depth limit to EventSerializer to prevent hangs on complex objects EventSerializer.default() recursively traverses __dict__ and __slots__ of arbitrary objects without a depth limit. When @observe() captures function arguments containing objects like google.genai.Client (which hold aiohttp sessions, connection pools, and threading locks), json.dumps blocks indefinitely on the second invocation. Add a _MAX_DEPTH=20 counter that returns a <TypeName> placeholder when exceeded, preventing infinite recursion into complex object graphs while preserving all existing serialization behavior. * test: tighten slots depth assertion to reflect double depth-counting * fix: claude comments
1 parent 2263507 commit 4a4bb46

2 files changed

Lines changed: 158 additions & 19 deletions

File tree

langfuse/_utils/serializer.py

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,21 @@ class Serializable: # type: ignore
3636

3737

3838
class EventSerializer(JSONEncoder):
39+
_MAX_DEPTH = 20
40+
3941
def __init__(self, *args: Any, **kwargs: Any) -> None:
4042
super().__init__(*args, **kwargs)
4143
self.seen: set[int] = set() # Track seen objects to detect circular references
44+
self._depth = 0
4245

4346
def default(self, obj: Any) -> Any:
47+
self._depth += 1
48+
try:
49+
return self._default_inner(obj)
50+
finally:
51+
self._depth -= 1
52+
53+
def _default_inner(self, obj: Any) -> Any:
4454
try:
4555
if isinstance(obj, (datetime)):
4656
# Timezone-awareness check
@@ -82,9 +92,6 @@ def default(self, obj: Any) -> Any:
8292
if isinstance(obj, Queue):
8393
return type(obj).__name__
8494

85-
if is_dataclass(obj):
86-
return asdict(obj) # type: ignore
87-
8895
if isinstance(obj, UUID):
8996
return str(obj)
9097

@@ -97,22 +104,9 @@ def default(self, obj: Any) -> Any:
97104
if isinstance(obj, (date)):
98105
return obj.isoformat()
99106

100-
if isinstance(obj, BaseModel):
101-
obj.model_rebuild()
102-
103-
# For LlamaIndex models, we need to rebuild the raw model as well if they include OpenAI models
104-
if isinstance(raw := getattr(obj, "raw", None), BaseModel):
105-
raw.model_rebuild()
106-
107-
return obj.model_dump()
108-
109107
if isinstance(obj, Path):
110108
return str(obj)
111109

112-
# if langchain is not available, the Serializable type is NoneType
113-
if Serializable is not type(None) and isinstance(obj, Serializable): # type: ignore
114-
return obj.to_json()
115-
116110
# 64-bit integers might overflow the JavaScript safe integer range.
117111
# Since Node.js is run on the server that handles the serialized value,
118112
# we need to ensure that integers outside the safe range are converted to strings.
@@ -123,6 +117,25 @@ def default(self, obj: Any) -> Any:
123117
if isinstance(obj, (str, float, type(None))):
124118
return obj
125119

120+
if self._depth >= self._MAX_DEPTH:
121+
return f"<{type(obj).__name__}>"
122+
123+
if is_dataclass(obj):
124+
return asdict(obj) # type: ignore
125+
126+
if isinstance(obj, BaseModel):
127+
obj.model_rebuild()
128+
129+
# For LlamaIndex models, we need to rebuild the raw model as well if they include OpenAI models
130+
if isinstance(raw := getattr(obj, "raw", None), BaseModel):
131+
raw.model_rebuild()
132+
133+
return obj.model_dump()
134+
135+
# if langchain is not available, the Serializable type is NoneType
136+
if Serializable is not type(None) and isinstance(obj, Serializable): # type: ignore
137+
return obj.to_json()
138+
126139
if isinstance(obj, (tuple, set, frozenset)):
127140
return list(obj)
128141

@@ -138,9 +151,10 @@ def default(self, obj: Any) -> Any:
138151
return [self.default(item) for item in obj]
139152

140153
if hasattr(obj, "__slots__"):
141-
return self.default(
142-
{slot: getattr(obj, slot, None) for slot in obj.__slots__}
143-
)
154+
return {
155+
slot: self.default(getattr(obj, slot, None))
156+
for slot in obj.__slots__
157+
}
144158
elif hasattr(obj, "__dict__"):
145159
obj_id = id(obj)
146160

@@ -167,6 +181,7 @@ def default(self, obj: Any) -> Any:
167181

168182
def encode(self, obj: Any) -> str:
169183
self.seen.clear() # Clear seen objects before each encode call
184+
self._depth = 0
170185

171186
try:
172187
return super().encode(self.default(obj))

tests/unit/test_serializer.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pathlib import Path
77
from uuid import UUID
88

9+
import pytest
910
from pydantic import BaseModel
1011

1112
from langfuse._utils.serializer import (
@@ -174,3 +175,126 @@ def __init__(self):
174175
obj = SlotClass()
175176
serializer = EventSerializer()
176177
assert json.loads(serializer.encode(obj)) == {"field": "value"}
178+
179+
180+
def test_deeply_nested_object_does_not_hang():
181+
class Inner:
182+
def __init__(self):
183+
self.lock = threading.Lock()
184+
self.value = "deep"
185+
186+
class Connection:
187+
def __init__(self):
188+
self._inner = Inner()
189+
self._pool = [Inner() for _ in range(3)]
190+
191+
class Client:
192+
def __init__(self):
193+
self._connection = Connection()
194+
self._config = {"key": "value"}
195+
196+
class Platform:
197+
def __init__(self):
198+
self._client = Client()
199+
200+
obj = {"args": (Platform(),), "kwargs": {}}
201+
serializer = EventSerializer()
202+
result = serializer.encode(obj)
203+
204+
# Must complete without hanging and produce valid JSON
205+
parsed = json.loads(result)
206+
assert "args" in parsed
207+
208+
209+
def test_max_depth_returns_type_name():
210+
class Level:
211+
def __init__(self, child=None):
212+
self.child = child
213+
214+
# Build a chain deeper than _MAX_DEPTH
215+
obj = None
216+
for _ in range(EventSerializer._MAX_DEPTH + 10):
217+
obj = Level(child=obj)
218+
219+
serializer = EventSerializer()
220+
result = json.loads(serializer.encode(obj))
221+
222+
# Walk down the chain — at some point it should be truncated to "Level"
223+
node = result
224+
found_truncation = False
225+
while isinstance(node, dict) and "child" in node:
226+
if node["child"] == "Level" or node["child"] == "<Level>":
227+
found_truncation = True
228+
break
229+
node = node["child"]
230+
231+
assert found_truncation, "Expected depth limit to truncate deep nesting"
232+
233+
234+
def test_deeply_nested_slots_object_is_truncated():
235+
class SlotLevel:
236+
__slots__ = ["child"]
237+
238+
def __init__(self, child=None):
239+
self.child = child
240+
241+
obj = None
242+
for _ in range(EventSerializer._MAX_DEPTH + 10):
243+
obj = SlotLevel(child=obj)
244+
245+
serializer = EventSerializer()
246+
result = json.loads(serializer.encode(obj))
247+
248+
# Walk the nested structure and verify it terminates
249+
node = result
250+
depth = 0
251+
while isinstance(node, dict):
252+
depth += 1
253+
if "child" in node:
254+
node = node["child"]
255+
else:
256+
break
257+
258+
assert EventSerializer._MAX_DEPTH - 2 <= depth <= EventSerializer._MAX_DEPTH + 2, (
259+
f"Nesting depth {depth} not near _MAX_DEPTH ({EventSerializer._MAX_DEPTH}) — "
260+
"serializer truncated too early or too late"
261+
)
262+
263+
264+
def test_deeply_nested_dict_preserves_keys_at_depth_boundary(monkeypatch):
265+
monkeypatch.setattr(EventSerializer, "_MAX_DEPTH", 3)
266+
267+
input_obj = {"a": {"b": {"c": "leaf"}}}
268+
expected = {"a": {"b": "<dict>"}}
269+
270+
serializer = EventSerializer()
271+
result = json.loads(serializer.encode(input_obj))
272+
273+
assert result == expected
274+
275+
276+
class _Color(Enum):
277+
RED = "red"
278+
NUMERIC = 7
279+
280+
281+
@pytest.mark.parametrize(
282+
"input_obj, expected",
283+
[
284+
(
285+
{datetime(2024, 1, 1, tzinfo=timezone.utc): "v"},
286+
{"2024-01-01T00:00:00Z": "v"},
287+
),
288+
(
289+
{UUID("12345678-1234-5678-1234-567812345678"): "v"},
290+
{"12345678-1234-5678-1234-567812345678": "v"},
291+
),
292+
({_Color.RED: "v"}, {"red": "v"}),
293+
({_Color.NUMERIC: "v"}, {"7": "v"}),
294+
],
295+
ids=["datetime", "uuid", "enum_str_value", "enum_int_value"],
296+
)
297+
def test_dict_with_non_string_keys_is_serialized(input_obj, expected):
298+
result = json.loads(EventSerializer().encode(input_obj))
299+
300+
assert result == expected

0 commit comments

Comments
 (0)