Skip to content

Commit 563c451

Browse files
committed
bugsnag: chain-aware grouping key and chain title in error list
normalize_error_chain walks __cause__/__context__ and joins each level's normalized string with " <- ". This replaces the single- exception normalize_error_message call used for the grouping key so that "LauncherError <- TimeoutError" and "LauncherError <- ApiException" land in separate, stable groups rather than one per root cause. build_chain_title does the same walk but truncates each level to 80 chars and returns None for single exceptions. _before_notify uses it to override the root-cause errorClass shown in the Bugsnag error list, giving reviewers the full raise path at a glance.
1 parent cff8f3a commit 563c451

3 files changed

Lines changed: 133 additions & 3 deletions

File tree

cloud_pipelines_backend/instrumentation/bugsnag_instrumentation.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,13 @@ def _before_notify(event: bugsnag_event.Event) -> None:
4343
if context:
4444
event.add_tab("tangle_context", context)
4545
if _CUSTOM_GROUPING_KEY and event.original_error:
46-
normalized = error_normalization.normalize_error_message(
46+
# Use the full chain for grouping so that "LauncherError <- TimeoutError"
47+
# and "LauncherError <- ApiException" land in separate, stable groups.
48+
chain = error_normalization.normalize_error_chain(
4749
exception=event.original_error
4850
)
4951
prefix = (event.metadata.get("extra") or {}).get("grouping_prefix")
50-
key_value = f"{prefix}: {normalized}" if prefix else normalized
52+
key_value = f"{prefix}: {chain}" if prefix else chain
5153
event.add_tab("custom", {_CUSTOM_GROUPING_KEY: key_value})
5254
if prefix and event.errors:
5355
try:
@@ -57,6 +59,18 @@ def _before_notify(event: bugsnag_event.Event) -> None:
5759
_logger.debug(
5860
"Could not prepend grouping prefix to errorClass", exc_info=True
5961
)
62+
# For chained exceptions, override the root-cause title (what Bugsnag
63+
# displays in the error list) with the full chain so reviewers can see
64+
# the complete raise path without opening the stacktrace.
65+
chain_title = error_normalization.build_chain_title(
66+
exception=event.original_error
67+
)
68+
if chain_title and event.errors:
69+
try:
70+
title = f"{prefix}: {chain_title}" if prefix else chain_title
71+
event.errors[-1].error_class = title
72+
except Exception:
73+
_logger.debug("Could not set chain title on errorClass", exc_info=True)
6074

6175

6276
def setup(*, service_name: str | None = None) -> None:

cloud_pipelines_backend/instrumentation/error_normalization.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def _normalize_launcher_error(*, exception: BaseException) -> str | None:
104104

105105

106106
def normalize_error_message(*, exception: BaseException) -> str:
107-
"""Return a stable normalized string for error grouping."""
107+
"""Return a stable normalized string for a single exception (no chain traversal)."""
108108
for normalizer in (
109109
_normalize_k8s_api_exception,
110110
_normalize_max_retry_error,
@@ -117,3 +117,46 @@ def normalize_error_message(*, exception: BaseException) -> str:
117117
return result
118118

119119
return f"{type(exception).__name__}: {_strip_generic(message=str(exception))}"
120+
121+
122+
_CHAIN_PART_MAX_LEN = 80
123+
124+
125+
def normalize_error_chain(*, exception: BaseException) -> str:
126+
"""Return a stable normalized string covering the full exception chain.
127+
128+
Walks ``__cause__`` (and ``__context__`` when not suppressed) and joins
129+
each level with `` <- ``. Use this for grouping keys so that chained
130+
exceptions like ``LauncherError <- TimeoutError`` produce one stable group
131+
rather than one per root cause.
132+
"""
133+
parts: list[str] = []
134+
seen: set[int] = set()
135+
exc: BaseException | None = exception
136+
while exc is not None and id(exc) not in seen and len(parts) < 4:
137+
seen.add(id(exc))
138+
parts.append(normalize_error_message(exception=exc))
139+
exc = exc.__cause__ or (None if exc.__suppress_context__ else exc.__context__)
140+
return " <- ".join(parts)
141+
142+
143+
def build_chain_title(*, exception: BaseException) -> str | None:
144+
"""Return a human-readable chain title for display, or ``None`` for single exceptions.
145+
146+
Like ``normalize_error_chain`` but truncates each level so the result fits
147+
in a Bugsnag error list title. Returns ``None`` when there is no
148+
chain so callers can skip overriding the default title.
149+
"""
150+
parts: list[str] = []
151+
seen: set[int] = set()
152+
exc: BaseException | None = exception
153+
while exc is not None and id(exc) not in seen and len(parts) < 4:
154+
seen.add(id(exc))
155+
part = normalize_error_message(exception=exc)
156+
if len(part) > _CHAIN_PART_MAX_LEN:
157+
part = part[:_CHAIN_PART_MAX_LEN].rstrip() + "..."
158+
parts.append(part)
159+
exc = exc.__cause__ or (None if exc.__suppress_context__ else exc.__context__)
160+
if len(parts) <= 1:
161+
return None
162+
return " <- ".join(parts)

tests/instrumentation/test_error_normalization.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,3 +249,76 @@ def test_strips_json_object_double_quotes(self):
249249
exc = RuntimeError('operation failed: {"key": "value"}')
250250
result = error_normalization.normalize_error_message(exception=exc)
251251
assert result == "RuntimeError: operation failed: {...}"
252+
253+
254+
class TestNormalizeErrorChain:
255+
def _chained(
256+
self, outer_msg: str, inner: BaseException, outer_cls: type = RuntimeError
257+
) -> BaseException:
258+
try:
259+
raise outer_cls(outer_msg) from inner
260+
except outer_cls as exc:
261+
return exc
262+
263+
def test_single_exception_no_arrow(self):
264+
exc = ValueError("something went wrong")
265+
result = error_normalization.normalize_error_chain(exception=exc)
266+
assert result == "ValueError: something went wrong"
267+
assert " <- " not in result
268+
269+
def test_two_level_chain(self):
270+
inner = TimeoutError("The read operation timed out")
271+
outer = self._chained("Failed to create pod: {'apiVersion': 'v1'}", inner)
272+
result = error_normalization.normalize_error_chain(exception=outer)
273+
assert result == (
274+
"RuntimeError: Failed to create pod: {...} <- TimeoutError: The read operation timed out"
275+
)
276+
277+
def test_launcher_error_chain(self):
278+
try:
279+
from cloud_pipelines_backend.launchers.interfaces import LauncherError
280+
except ImportError:
281+
pytest.skip("LauncherError not importable")
282+
inner = TimeoutError("The read operation timed out")
283+
try:
284+
raise LauncherError("Failed to create pod: {spec}") from inner
285+
except LauncherError as outer:
286+
result = error_normalization.normalize_error_chain(exception=outer)
287+
assert result == (
288+
"LauncherError: Failed to create pod <- TimeoutError: The read operation timed out"
289+
)
290+
291+
def test_caps_at_four_levels(self):
292+
exc: BaseException = ValueError("level 4")
293+
for i in range(3, 0, -1):
294+
exc = self._chained(f"level {i}", exc)
295+
# Chain is 4 deep; add a 5th
296+
exc = self._chained("level 0", exc)
297+
result = error_normalization.normalize_error_chain(exception=exc)
298+
assert result.count(" <- ") == 3 # 4 parts max
299+
300+
301+
class TestBuildChainTitle:
302+
def test_single_exception_returns_none(self):
303+
exc = ValueError("nothing to chain")
304+
assert error_normalization.build_chain_title(exception=exc) is None
305+
306+
def test_two_level_chain_returns_string(self):
307+
inner = TimeoutError("The read operation timed out")
308+
try:
309+
raise RuntimeError("outer problem") from inner
310+
except RuntimeError as outer:
311+
result = error_normalization.build_chain_title(exception=outer)
312+
assert result == (
313+
"RuntimeError: outer problem <- TimeoutError: The read operation timed out"
314+
)
315+
316+
def test_truncates_long_parts(self):
317+
inner = ValueError("x" * 200)
318+
try:
319+
raise RuntimeError("outer") from inner
320+
except RuntimeError as outer:
321+
result = error_normalization.build_chain_title(exception=outer)
322+
assert result is not None
323+
inner_part = result.split(" <- ")[1]
324+
assert len(inner_part) <= 83 # 80 + "..."

0 commit comments

Comments
 (0)