Skip to content

Commit 82e8720

Browse files
authored
bugsnag: chain-aware grouping key and chain title in error list (#262)
`normalize_error_chain` walks `__cause__`/`__context__` and joins each level's normalized string with `" <- "`. This replaces the single-exception `normalize_error_message` call used for the grouping key so that `"LauncherError <- TimeoutError"` and `"LauncherError <- ApiException"` land in separate, stable groups rather than collapsing into one group per root cause. `build_chain_title` does the same walk but truncates each level to 80 chars and returns `None` for single exceptions. `_before_notify` uses it to override the root-cause `errorClass` that Bugsnag displays in the error list, giving reviewers the full raise path at a glance without needing to open the stacktrace.
1 parent 394c09c commit 82e8720

3 files changed

Lines changed: 143 additions & 4 deletions

File tree

cloud_pipelines_backend/instrumentation/bugsnag_instrumentation.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,36 @@ def _before_notify(event: bugsnag_event.Event) -> None:
4343
if context:
4444
event.add_tab("tangle_context", context)
4545
if _CUSTOM_GROUPING_KEY and event.original_error:
46-
normalized = error_normalization.normalize_error_message(
46+
# Use the full chain for grouping so that "LauncherError <- TimeoutError"
47+
# and "LauncherError <- ApiException" land in separate, stable groups.
48+
chain = error_normalization.normalize_error_chain(
4749
exception=event.original_error
4850
)
4951
prefix = (event.metadata.get("extra") or {}).get("grouping_prefix")
50-
key_value = f"{prefix}: {normalized}" if prefix else normalized
52+
key_value = f"{prefix}: {chain}" if prefix else chain
5153
event.add_tab("custom", {_CUSTOM_GROUPING_KEY: key_value})
5254
if prefix and event.errors:
5355
try:
5456
for error in event.errors:
5557
error.error_class = f"{prefix}: {error.error_class}"
5658
except Exception:
57-
_logger.debug(
59+
_logger.warning(
5860
"Could not prepend grouping prefix to errorClass", exc_info=True
5961
)
62+
# For chained exceptions, override the root-cause title (what Bugsnag
63+
# displays in the error list) with the full chain so reviewers can see
64+
# the complete raise path without opening the stacktrace.
65+
chain_title = error_normalization.build_chain_title(
66+
exception=event.original_error
67+
)
68+
if chain_title and event.errors:
69+
try:
70+
title = f"{prefix}: {chain_title}" if prefix else chain_title
71+
event.errors[-1].error_class = title
72+
except Exception:
73+
_logger.warning(
74+
"Could not set chain title on errorClass", exc_info=True
75+
)
6076

6177

6278
def setup(*, service_name: str | None = None) -> None:

cloud_pipelines_backend/instrumentation/error_normalization.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def _normalize_launcher_error(*, exception: BaseException) -> str | None:
107107

108108

109109
def normalize_error_message(*, exception: BaseException) -> str:
110-
"""Return a stable normalized string for error grouping."""
110+
"""Return a stable normalized string for a single exception (no chain traversal)."""
111111
for normalizer in (
112112
_normalize_k8s_api_exception,
113113
_normalize_max_retry_error,
@@ -120,3 +120,53 @@ def normalize_error_message(*, exception: BaseException) -> str:
120120
return result
121121

122122
return f"{type(exception).__name__}: {_strip_generic(message=str(exception))}"
123+
124+
125+
_CHAIN_PART_MAX_LEN = 80
126+
_CHAIN_GROUPING_KEY_MAX_PART_LEN = 200
127+
_CHAIN_MAX_DEPTH = 4
128+
129+
130+
def _walk_chain(exception: BaseException) -> list[BaseException]:
131+
"""Return the exception chain up to ``_CHAIN_MAX_DEPTH`` levels, cycle-safe."""
132+
excs: list[BaseException] = []
133+
seen: set[int] = set()
134+
exc: BaseException | None = exception
135+
while exc is not None and id(exc) not in seen and len(excs) < _CHAIN_MAX_DEPTH:
136+
seen.add(id(exc))
137+
excs.append(exc)
138+
exc = exc.__cause__ or (None if exc.__suppress_context__ else exc.__context__)
139+
return excs
140+
141+
142+
def normalize_error_chain(*, exception: BaseException) -> str:
143+
"""Return a stable normalized string covering the full exception chain.
144+
145+
Walks ``__cause__`` (and ``__context__`` when not suppressed) and joins
146+
each level with `` <- ``. Use this for grouping keys so that chained
147+
exceptions like ``LauncherError <- TimeoutError`` produce one stable group
148+
rather than one per root cause.
149+
"""
150+
parts = [
151+
normalize_error_message(exception=exc)[:_CHAIN_GROUPING_KEY_MAX_PART_LEN]
152+
for exc in _walk_chain(exception)
153+
]
154+
return " <- ".join(parts)
155+
156+
157+
def build_chain_title(*, exception: BaseException) -> str | None:
158+
"""Return a human-readable chain title for display, or ``None`` for single exceptions.
159+
160+
Like ``normalize_error_chain`` but truncates each level so the result fits
161+
in a Bugsnag error list title. Returns ``None`` when there is no
162+
chain so callers can skip overriding the default title.
163+
"""
164+
parts: list[str] = []
165+
for exc in _walk_chain(exception):
166+
part = normalize_error_message(exception=exc)
167+
if len(part) > _CHAIN_PART_MAX_LEN:
168+
part = part[:_CHAIN_PART_MAX_LEN].rstrip() + "..."
169+
parts.append(part)
170+
if len(parts) <= 1:
171+
return None
172+
return " <- ".join(parts)

tests/instrumentation/test_error_normalization.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,3 +260,76 @@ def test_strips_json_object_double_quotes(self):
260260
exc = RuntimeError('operation failed: {"key": "value"}')
261261
result = error_normalization.normalize_error_message(exception=exc)
262262
assert result == "RuntimeError: operation failed: {...}"
263+
264+
265+
class TestNormalizeErrorChain:
266+
def _chained(
267+
self, outer_msg: str, inner: BaseException, outer_cls: type = RuntimeError
268+
) -> BaseException:
269+
try:
270+
raise outer_cls(outer_msg) from inner
271+
except outer_cls as exc:
272+
return exc
273+
274+
def test_single_exception_no_arrow(self):
275+
exc = ValueError("something went wrong")
276+
result = error_normalization.normalize_error_chain(exception=exc)
277+
assert result == "ValueError: something went wrong"
278+
assert " <- " not in result
279+
280+
def test_two_level_chain(self):
281+
inner = TimeoutError("The read operation timed out")
282+
outer = self._chained("Failed to create pod: {'apiVersion': 'v1'}", inner)
283+
result = error_normalization.normalize_error_chain(exception=outer)
284+
assert result == (
285+
"RuntimeError: Failed to create pod: {...} <- TimeoutError: The read operation timed out"
286+
)
287+
288+
def test_launcher_error_chain(self):
289+
try:
290+
from cloud_pipelines_backend.launchers.interfaces import LauncherError
291+
except ImportError:
292+
pytest.skip("LauncherError not importable")
293+
inner = TimeoutError("The read operation timed out")
294+
try:
295+
raise LauncherError("Failed to create pod: {spec}") from inner
296+
except LauncherError as outer:
297+
result = error_normalization.normalize_error_chain(exception=outer)
298+
assert result == (
299+
"LauncherError: Failed to create pod: {spec} <- TimeoutError: The read operation timed out"
300+
)
301+
302+
def test_caps_at_four_levels(self):
303+
exc: BaseException = ValueError("level 4")
304+
for i in range(3, 0, -1):
305+
exc = self._chained(f"level {i}", exc)
306+
# Chain is 4 deep; add a 5th
307+
exc = self._chained("level 0", exc)
308+
result = error_normalization.normalize_error_chain(exception=exc)
309+
assert result.count(" <- ") == 3 # 4 parts max
310+
311+
312+
class TestBuildChainTitle:
313+
def test_single_exception_returns_none(self):
314+
exc = ValueError("nothing to chain")
315+
assert error_normalization.build_chain_title(exception=exc) is None
316+
317+
def test_two_level_chain_returns_string(self):
318+
inner = TimeoutError("The read operation timed out")
319+
try:
320+
raise RuntimeError("outer problem") from inner
321+
except RuntimeError as outer:
322+
result = error_normalization.build_chain_title(exception=outer)
323+
assert result == (
324+
"RuntimeError: outer problem <- TimeoutError: The read operation timed out"
325+
)
326+
327+
def test_truncates_long_parts(self):
328+
inner = ValueError("x" * 200)
329+
try:
330+
raise RuntimeError("outer") from inner
331+
except RuntimeError as outer:
332+
result = error_normalization.build_chain_title(exception=outer)
333+
assert result is not None
334+
inner_part = result.split(" <- ")[1]
335+
assert len(inner_part) <= 83 # 80 + "..."

0 commit comments

Comments
 (0)