Skip to content

Commit 1de7eee

Browse files
yuvmenclaude
authored andcommitted
feat(seer): Add lightweight RCA clustering endpoint integration (#112229)
Integrate Seer's new `/v0/issues/supergroups/cluster-lightweight` endpoint for lightweight root cause analysis and supergroup clustering. When a new error issue is created, if the org is in the `supergroups.lightweight-enabled-orgs` sentry-option, we send the issue's event data to Seer. Seer generates a lightweight RCA via a single LLM call and clusters the issue into supergroups based on embedding similarity. This is separate from the existing Explorer-based agentic RCA flow. **Changes:** - Register `supergroups.active-rca-source` and `supergroups.lightweight-enabled-orgs` sentry-options - Add `LightweightRCAClusterRequest` type and `make_lightweight_rca_cluster_request()` API function - Add `trigger_lightweight_rca_cluster()` core function and Celery task - Add `kick_off_lightweight_rca_cluster` pipeline step in post_process for new error issues - Rename existing `lightweight_rca.py` → `explorer_lightweight_rca.py` to clarify it's the Explorer-based flow --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d5ac7a2 commit 1de7eee

File tree

12 files changed

+272
-30
lines changed

12 files changed

+272
-30
lines changed

src/sentry/conf/server.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
990990
"sentry.workflow_engine.tasks.cleanup",
991991
"sentry.tasks.seer.explorer_index",
992992
"sentry.tasks.seer.context_engine_index",
993+
"sentry.tasks.seer.lightweight_rca_cluster",
993994
# Used for tests
994995
"sentry.taskworker.tasks.examples",
995996
)

src/sentry/options/defaults.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,6 +1374,14 @@
13741374
flags=FLAG_MODIFIABLE_RATE | FLAG_AUTOMATOR_MODIFIABLE,
13751375
)
13761376

1377+
# Supergroups / Lightweight RCA
1378+
register(
1379+
"supergroups.lightweight-enabled-orgs",
1380+
type=Sequence,
1381+
default=[],
1382+
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
1383+
)
1384+
13771385
# ## sentry.killswitches
13781386
#
13791387
# The following options are documented in sentry.killswitches in more detail

src/sentry/seer/autofix/issue_summary.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
make_signed_seer_api_request,
4646
make_summarize_issue_request,
4747
)
48-
from sentry.seer.supergroups.lightweight_rca import trigger_lightweight_rca
48+
from sentry.seer.supergroups.explorer_lightweight_rca import trigger_explorer_lightweight_rca
4949
from sentry.services import eventstore
5050
from sentry.services.eventstore.models import Event, GroupEvent
5151
from sentry.tasks.base import instrumented_task
@@ -226,10 +226,10 @@ def _trigger_autofix_task(
226226
stopping_point=stopping_point,
227227
)
228228
try:
229-
trigger_lightweight_rca(group)
229+
trigger_explorer_lightweight_rca(group)
230230
except Exception:
231231
logger.exception(
232-
"lightweight_rca.trigger_error_in_trigger_autofix_task",
232+
"explorer_lightweight_rca.trigger_error_in_trigger_autofix_task",
233233
extra={"group_id": group_id},
234234
)
235235
else:

src/sentry/seer/signed_seer_api.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,14 @@ class SupergroupsEmbeddingRequest(TypedDict):
376376
artifact_data: dict[str, Any]
377377

378378

379+
class LightweightRCAClusterRequest(TypedDict):
380+
group_id: int
381+
issue: dict[str, Any]
382+
organization_slug: str
383+
organization_id: int
384+
project_id: int
385+
386+
379387
class SupergroupsGetRequest(TypedDict):
380388
organization_id: int
381389
supergroup_id: int
@@ -501,6 +509,20 @@ def make_supergroups_embedding_request(
501509
)
502510

503511

512+
def make_lightweight_rca_cluster_request(
513+
body: LightweightRCAClusterRequest,
514+
timeout: int | float | None = None,
515+
viewer_context: SeerViewerContext | None = None,
516+
) -> BaseHTTPResponse:
517+
return make_signed_seer_api_request(
518+
seer_autofix_default_connection_pool,
519+
"/v0/issues/supergroups/cluster-lightweight",
520+
body=orjson.dumps(body, option=orjson.OPT_NON_STR_KEYS),
521+
timeout=timeout,
522+
viewer_context=viewer_context,
523+
)
524+
525+
504526
def make_supergroups_get_request(
505527
body: SupergroupsGetRequest,
506528
viewer_context: SeerViewerContext,

src/sentry/seer/supergroups/lightweight_rca.py renamed to src/sentry/seer/supergroups/explorer_lightweight_rca.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
logger = logging.getLogger(__name__)
1111

1212

13-
def trigger_lightweight_rca(group: Group) -> int | None:
13+
def trigger_explorer_lightweight_rca(group: Group) -> int | None:
1414
"""
1515
Trigger a lightweight Explorer RCA run for the given group.
1616
@@ -26,7 +26,7 @@ def trigger_lightweight_rca(group: Group) -> int | None:
2626
"""
2727
has_feature = features.has("projects:supergroup-lightweight-rca", group.project)
2828
logger.info(
29-
"lightweight_rca.feature_flag_check",
29+
"explorer_lightweight_rca.feature_flag_check",
3030
extra={
3131
"group_id": group.id,
3232
"project_id": group.project.id,
@@ -66,7 +66,7 @@ def trigger_lightweight_rca(group: Group) -> int | None:
6666
)
6767

6868
logger.info(
69-
"lightweight_rca.starting_run",
69+
"explorer_lightweight_rca.starting_run",
7070
extra={
7171
"group_id": group.id,
7272
"project_id": group.project.id,
@@ -83,7 +83,7 @@ def trigger_lightweight_rca(group: Group) -> int | None:
8383
)
8484

8585
logger.info(
86-
"lightweight_rca.run_started",
86+
"explorer_lightweight_rca.run_started",
8787
extra={
8888
"group_id": group.id,
8989
"project_id": group.project.id,
@@ -94,7 +94,7 @@ def trigger_lightweight_rca(group: Group) -> int | None:
9494
return run_id
9595
except Exception:
9696
logger.exception(
97-
"lightweight_rca.trigger_failed",
97+
"explorer_lightweight_rca.trigger_failed",
9898
extra={
9999
"group_id": group.id,
100100
"organization_id": group.organization.id,
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
5+
from sentry.api.serializers import EventSerializer, serialize
6+
from sentry.eventstore import backend as eventstore
7+
from sentry.models.group import Group
8+
from sentry.seer.models import SeerApiError
9+
from sentry.seer.signed_seer_api import (
10+
LightweightRCAClusterRequest,
11+
SeerViewerContext,
12+
make_lightweight_rca_cluster_request,
13+
)
14+
15+
logger = logging.getLogger(__name__)
16+
17+
18+
def trigger_lightweight_rca_cluster(group: Group) -> None:
19+
"""
20+
Call Seer's lightweight RCA clustering endpoint for the given group.
21+
22+
Sends issue event data to Seer, which generates a lightweight root cause analysis
23+
and clusters the issue into supergroups based on embedding similarity.
24+
"""
25+
event = group.get_latest_event()
26+
if not event:
27+
logger.info(
28+
"lightweight_rca_cluster.no_event",
29+
extra={"group_id": group.id},
30+
)
31+
return
32+
33+
ready_event = eventstore.get_event_by_id(group.project.id, event.event_id, group_id=group.id)
34+
if not ready_event:
35+
logger.info(
36+
"lightweight_rca_cluster.event_not_ready",
37+
extra={"group_id": group.id, "event_id": event.event_id},
38+
)
39+
return
40+
41+
serialized_event = serialize(ready_event, None, EventSerializer())
42+
43+
body = LightweightRCAClusterRequest(
44+
group_id=group.id,
45+
issue={
46+
"id": group.id,
47+
"title": group.title,
48+
"short_id": group.qualified_short_id,
49+
"events": [serialized_event],
50+
},
51+
organization_slug=group.organization.slug,
52+
organization_id=group.organization.id,
53+
project_id=group.project.id,
54+
)
55+
viewer_context = SeerViewerContext(organization_id=group.organization.id)
56+
57+
response = make_lightweight_rca_cluster_request(body, timeout=30, viewer_context=viewer_context)
58+
if response.status >= 400:
59+
raise SeerApiError("Lightweight RCA cluster request failed", response.status)
60+
61+
logger.info(
62+
"lightweight_rca_cluster.success",
63+
extra={
64+
"group_id": group.id,
65+
"project_id": group.project.id,
66+
"organization_id": group.organization.id,
67+
},
68+
)

src/sentry/tasks/post_process.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,6 +1584,22 @@ def kick_off_seer_automation(job: PostProcessJob) -> None:
15841584
)
15851585

15861586

1587+
def kick_off_lightweight_rca_cluster(job: PostProcessJob) -> None:
1588+
from sentry.tasks.seer.lightweight_rca_cluster import trigger_lightweight_rca_cluster_task
1589+
1590+
if not job["group_state"]["is_new"]:
1591+
return
1592+
1593+
event = job["event"]
1594+
group = event.group
1595+
1596+
enabled_orgs: list[int] = options.get("supergroups.lightweight-enabled-orgs")
1597+
if group.organization.id not in enabled_orgs:
1598+
return
1599+
1600+
trigger_lightweight_rca_cluster_task.delay(group.id)
1601+
1602+
15871603
GROUP_CATEGORY_POST_PROCESS_PIPELINE: dict[
15881604
GroupCategory, list[Callable[[PostProcessJob], None]]
15891605
] = {
@@ -1596,6 +1612,7 @@ def kick_off_seer_automation(job: PostProcessJob) -> None:
15961612
handle_owner_assignment,
15971613
handle_auto_assignment,
15981614
kick_off_seer_automation,
1615+
kick_off_lightweight_rca_cluster,
15991616
process_workflow_engine_issue_alerts,
16001617
process_resource_change_bounds,
16011618
process_data_forwarding,
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import logging
2+
3+
from sentry.models.group import Group
4+
from sentry.seer.supergroups.lightweight_rca_cluster import trigger_lightweight_rca_cluster
5+
from sentry.tasks.base import instrumented_task
6+
from sentry.taskworker.namespaces import ingest_errors_tasks
7+
8+
logger = logging.getLogger(__name__)
9+
10+
11+
@instrumented_task(
12+
name="sentry.tasks.seer.lightweight_rca_cluster.trigger_lightweight_rca_cluster_task",
13+
namespace=ingest_errors_tasks,
14+
)
15+
def trigger_lightweight_rca_cluster_task(group_id: int, **kwargs) -> None:
16+
try:
17+
group = Group.objects.get(id=group_id)
18+
except Group.DoesNotExist:
19+
logger.info(
20+
"lightweight_rca_cluster_task.group_not_found",
21+
extra={"group_id": group_id},
22+
)
23+
return
24+
25+
try:
26+
trigger_lightweight_rca_cluster(group)
27+
except Exception:
28+
logger.exception(
29+
"lightweight_rca_cluster_task.failed",
30+
extra={"group_id": group_id},
31+
)
32+
raise

tests/sentry/seer/autofix/test_issue_summary.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -960,14 +960,14 @@ def setUp(self) -> None:
960960
event_data = load_data("python")
961961
self.event = self.store_event(data=event_data, project_id=self.project.id)
962962

963-
@patch("sentry.seer.autofix.issue_summary.trigger_lightweight_rca")
963+
@patch("sentry.seer.autofix.issue_summary.trigger_explorer_lightweight_rca")
964964
@patch("sentry.seer.autofix.issue_summary.trigger_autofix_explorer", return_value=42)
965965
def test_lightweight_rca_called_on_explorer_path(
966966
self,
967967
mock_explorer,
968-
mock_lightweight_rca,
968+
mock_explorer_lightweight_rca,
969969
):
970-
"""trigger_lightweight_rca is called when the explorer path is taken"""
970+
"""trigger_explorer_lightweight_rca is called when the explorer path is taken"""
971971
_trigger_autofix_task(
972972
group_id=self.group.id,
973973
event_id=self.event.event_id,
@@ -976,18 +976,18 @@ def test_lightweight_rca_called_on_explorer_path(
976976
)
977977

978978
mock_explorer.assert_called_once()
979-
mock_lightweight_rca.assert_called_once_with(self.group)
979+
mock_explorer_lightweight_rca.assert_called_once_with(self.group)
980980

981-
@patch("sentry.seer.autofix.issue_summary.trigger_lightweight_rca")
981+
@patch("sentry.seer.autofix.issue_summary.trigger_explorer_lightweight_rca")
982982
@patch(
983983
"sentry.seer.autofix.issue_summary.trigger_autofix", return_value=Mock(data={"run_id": 42})
984984
)
985985
def test_lightweight_rca_not_called_on_legacy_path(
986986
self,
987987
mock_autofix,
988-
mock_lightweight_rca,
988+
mock_explorer_lightweight_rca,
989989
):
990-
"""trigger_lightweight_rca is NOT called on the legacy autofix path"""
990+
"""trigger_explorer_lightweight_rca is NOT called on the legacy autofix path"""
991991
with self.feature(
992992
{
993993
"organizations:seer-explorer": False,
@@ -1002,17 +1002,17 @@ def test_lightweight_rca_not_called_on_legacy_path(
10021002
)
10031003

10041004
mock_autofix.assert_called_once()
1005-
mock_lightweight_rca.assert_not_called()
1005+
mock_explorer_lightweight_rca.assert_not_called()
10061006

1007-
@patch("sentry.seer.autofix.issue_summary.trigger_lightweight_rca")
1007+
@patch("sentry.seer.autofix.issue_summary.trigger_explorer_lightweight_rca")
10081008
@patch("sentry.seer.autofix.issue_summary.trigger_autofix_explorer", return_value=42)
10091009
def test_lightweight_rca_failure_does_not_block_explorer(
10101010
self,
10111011
mock_explorer,
1012-
mock_lightweight_rca,
1012+
mock_explorer_lightweight_rca,
10131013
):
1014-
"""Failure in trigger_lightweight_rca doesn't prevent the explorer autofix from completing"""
1015-
mock_lightweight_rca.side_effect = Exception("lightweight RCA failed")
1014+
"""Failure in trigger_explorer_lightweight_rca doesn't prevent the explorer autofix from completing"""
1015+
mock_explorer_lightweight_rca.side_effect = Exception("lightweight RCA failed")
10161016

10171017
_trigger_autofix_task(
10181018
group_id=self.group.id,
@@ -1022,7 +1022,7 @@ def test_lightweight_rca_failure_does_not_block_explorer(
10221022
)
10231023

10241024
mock_explorer.assert_called_once()
1025-
mock_lightweight_rca.assert_called_once_with(self.group)
1025+
mock_explorer_lightweight_rca.assert_called_once_with(self.group)
10261026

10271027

10281028
class TestFetchUserPreference:

tests/sentry/seer/supergroups/test_lightweight_rca.py renamed to tests/sentry/seer/supergroups/test_explorer_lightweight_rca.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
from unittest.mock import MagicMock, patch
22

3-
from sentry.seer.supergroups.lightweight_rca import trigger_lightweight_rca
3+
from sentry.seer.supergroups.explorer_lightweight_rca import trigger_explorer_lightweight_rca
44
from sentry.testutils.cases import TestCase
55

66

7-
class TestTriggerLightweightRca(TestCase):
7+
class TestTriggerExplorerLightweightRca(TestCase):
88
def setUp(self) -> None:
99
super().setUp()
1010
self.user = self.create_user()
@@ -13,18 +13,18 @@ def setUp(self) -> None:
1313
self.group = self.create_group(project=self.project)
1414

1515
def test_returns_none_when_feature_flag_off(self) -> None:
16-
run_id = trigger_lightweight_rca(self.group)
16+
run_id = trigger_explorer_lightweight_rca(self.group)
1717

1818
assert run_id is None
1919

20-
@patch("sentry.seer.supergroups.lightweight_rca.SeerExplorerClient")
20+
@patch("sentry.seer.supergroups.explorer_lightweight_rca.SeerExplorerClient")
2121
def test_creates_client_with_correct_params(self, mock_client_cls):
2222
mock_client = MagicMock()
2323
mock_client.start_run.return_value = 42
2424
mock_client_cls.return_value = mock_client
2525

2626
with self.feature("projects:supergroup-lightweight-rca"):
27-
run_id = trigger_lightweight_rca(self.group)
27+
run_id = trigger_explorer_lightweight_rca(self.group)
2828

2929
assert run_id == 42
3030
mock_client_cls.assert_called_once()
@@ -38,14 +38,14 @@ def test_creates_client_with_correct_params(self, mock_client_cls):
3838
assert kwargs["category_key"] == "lightweight_rca"
3939
assert kwargs["category_value"] == str(self.group.id)
4040

41-
@patch("sentry.seer.supergroups.lightweight_rca.SeerExplorerClient")
41+
@patch("sentry.seer.supergroups.explorer_lightweight_rca.SeerExplorerClient")
4242
def test_start_run_called_with_correct_params(self, mock_client_cls):
4343
mock_client = MagicMock()
4444
mock_client.start_run.return_value = 42
4545
mock_client_cls.return_value = mock_client
4646

4747
with self.feature("projects:supergroup-lightweight-rca"):
48-
trigger_lightweight_rca(self.group)
48+
trigger_explorer_lightweight_rca(self.group)
4949

5050
mock_client.start_run.assert_called_once()
5151
kwargs = mock_client.start_run.call_args[1]
@@ -54,11 +54,11 @@ def test_start_run_called_with_correct_params(self, mock_client_cls):
5454
assert kwargs["metadata"] == {"group_id": self.group.id}
5555
assert "root cause" in kwargs["prompt"].lower()
5656

57-
@patch("sentry.seer.supergroups.lightweight_rca.SeerExplorerClient")
57+
@patch("sentry.seer.supergroups.explorer_lightweight_rca.SeerExplorerClient")
5858
def test_returns_none_on_error(self, mock_client_cls):
5959
mock_client_cls.side_effect = Exception("connection failed")
6060

6161
with self.feature("projects:supergroup-lightweight-rca"):
62-
run_id = trigger_lightweight_rca(self.group)
62+
run_id = trigger_explorer_lightweight_rca(self.group)
6363

6464
assert run_id is None

0 commit comments

Comments
 (0)