Skip to content

Commit c6b102a

Browse files
anonpranauditclaude
authored
[WEB-4956] Add no_tools_found Sentry event to diagnose empty discovery scans (#215)
* Add no_tools_found Sentry event to diagnose empty discovery scans A scan that finds zero tools was previously silent — indistinguishable from an errored scan, a missed detection, or a genuinely empty device. Emit one fire-and-forget Sentry warning (phase=no_tools_found) carrying the discriminators needed to tell them apart: - homes_enumerated: pre-fallback enumerated-user count (0 = enumeration miss; the existing post-fallback user_count masks 0 as 1) - users_scanned, used_fallback_user, is_root, os, duration_ms Counts/booleans only (no PII); reuses the existing curl-based report_to_sentry; wrapped so telemetry can never raise into or slow the scan. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * Let no_tools_found bypass the per-run Sentry cap (Greptile P2) A noisy empty scan (many earlier detector errors) could exhaust the shared 30-event per-run cap before the terminal no_tools_found summary runs, dropping exactly the diagnostic you most need on a likely missed-detection. Add a `priority` flag to report_to_sentry that bypasses the COUNT cap only — it still honors the circuit breaker (a dead transport can't be helped) and dedup (no spam) — and pass priority=True for the no_tools_found event. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * Let priority Sentry events bypass the circuit breaker too (Greptile P1) The priority flag bypassed the count cap but the circuit breaker — tripped by 3 consecutive, possibly transient, send failures — could still skip the terminal no_tools_found event without even an attempt. Allow a priority event ONE bounded attempt past the breaker (at most one ~4s curl at end of run) so a transient mid-scan outage doesn't drop the terminal diagnostic. Dedup is still honored, so it can never spam. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --------- Co-authored-by: audit <audit@local> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 0675c9f commit c6b102a

4 files changed

Lines changed: 201 additions & 3 deletions

File tree

scripts/coding_discovery_tools/ai_tools_discovery.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2475,6 +2475,10 @@ def _on_term_signal(signum, _frame) -> None:
24752475
else:
24762476
all_users = []
24772477

2478+
# Pre-fallback count is the key no-tools discriminator: 0 here means the
2479+
# macOS/AD enumeration missed every account (the fallback below masks 0->1).
2480+
homes_enumerated = len(all_users)
2481+
24782482
# If no users found, fall back to current user
24792483
if not all_users:
24802484
all_users = [get_user_info()]
@@ -2875,6 +2879,32 @@ def _on_term_signal(signum, _frame) -> None:
28752879
except Exception as metrics_err:
28762880
logger.debug(f"Building/sending discovery metrics failed: {metrics_err}")
28772881

2882+
# No-tools telemetry: a zero-tool scan is otherwise silent — we cannot tell
2883+
# an enumeration miss from a timing race from a genuinely empty device.
2884+
# Emit ONE enriched warning with the discriminators. Fire-and-forget:
2885+
# telemetry must never raise into / slow the scan.
2886+
if not tools:
2887+
try:
2888+
no_tools_ctx = {
2889+
**sentry_ctx,
2890+
"phase": "no_tools_found",
2891+
"homes_enumerated": homes_enumerated,
2892+
"users_scanned": len(all_users),
2893+
"used_fallback_user": homes_enumerated == 0,
2894+
"os": platform.system(),
2895+
"duration_ms": round((time.monotonic() - t_start) * 1000),
2896+
}
2897+
if hasattr(os, "getuid"):
2898+
no_tools_ctx["is_root"] = os.getuid() == 0
2899+
report_to_sentry(
2900+
RuntimeError("Discovery found no tools"),
2901+
context=no_tools_ctx,
2902+
level="warning",
2903+
priority=True,
2904+
)
2905+
except Exception:
2906+
pass
2907+
28782908
# Send scan completed event AFTER all scanning
28792909
logger.info("Sending scan completed event...")
28802910
success, _ = send_scan_event(

scripts/coding_discovery_tools/utils.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1611,13 +1611,20 @@ def report_to_sentry(
16111611
exception: Exception,
16121612
context: Optional[Dict] = None,
16131613
level: str = "error",
1614+
priority: bool = False,
16141615
) -> None:
16151616
"""Send an event to Sentry using the raw HTTP store endpoint.
16161617
16171618
Args:
16181619
exception: The exception to report.
16191620
context: Extra tags/context (e.g. phase, tool_name, http_code).
16201621
level: Sentry level -- "error" for crashes, "warning" for HTTP send failures.
1622+
priority: Best-effort guarantee a terminal once-per-run diagnostic
1623+
(e.g. the no_tools_found summary) is delivered. Bypasses both the
1624+
per-run event cap AND the circuit breaker so earlier transient
1625+
per-tool send failures can't silently skip it -- it still gets ONE
1626+
attempt at the end of the run (bounded: at most one ~4s curl). Dedup
1627+
is still honored (no spam). Reserve for a single terminal event/run.
16211628
"""
16221629
try:
16231630
dsn = _parse_sentry_dsn(_SENTRY_DSN)
@@ -1630,11 +1637,17 @@ def report_to_sentry(
16301637
global _sentry_event_count, _sentry_consecutive_fails, _sentry_dead_this_run
16311638
# Circuit breaker: once the transport looks dead, stop calling Sentry for the
16321639
# rest of the run so a blocked endpoint can't add its timeout to every failure.
1633-
if _sentry_dead_this_run:
1640+
# priority events bypass it for ONE bounded attempt so a transient mid-scan
1641+
# outage doesn't silently drop the terminal diagnostic.
1642+
if _sentry_dead_this_run and not priority:
16341643
return
16351644
# Collapse duplicate events and hard-cap the synchronous curls per run.
1645+
# priority events skip the count cap + breaker (but never dedup) so a
1646+
# terminal once-per-run diagnostic isn't starved by earlier per-tool errors.
16361647
signature = (type(exception).__name__, ctx.get("phase"), ctx.get("tool_name"))
1637-
if signature in _sentry_sent_signatures or _sentry_event_count >= _SENTRY_MAX_EVENTS_PER_RUN:
1648+
if signature in _sentry_sent_signatures:
1649+
return
1650+
if not priority and _sentry_event_count >= _SENTRY_MAX_EVENTS_PER_RUN:
16381651
return
16391652
_sentry_sent_signatures.add(signature)
16401653
_sentry_event_count += 1

tests/test_discovery_flow.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,5 +1351,93 @@ def test_extract_failure_reports_warning_and_returns_empty(self):
13511351
self.assertEqual(context.get("phase"), "extract")
13521352

13531353

1354+
class TestNoToolsSentryEvent(unittest.TestCase):
1355+
"""main() emits exactly one enriched 'no_tools_found' warning on a zero-tool
1356+
scan (the only signal that distinguishes an enumeration miss from a genuinely
1357+
empty device) and stays silent when any tool is detected."""
1358+
1359+
@staticmethod
1360+
def _context_of(call):
1361+
# report_to_sentry is invoked two ways in the codebase: context as the
1362+
# keyword `context=` (the no-tools path) and context positionally as the
1363+
# 2nd arg (extraction/process paths). Read whichever is present.
1364+
args, kwargs = call
1365+
if "context" in kwargs:
1366+
return kwargs["context"]
1367+
return args[1] if len(args) > 1 else {}
1368+
1369+
def _run_main(self, detect_return, mock_sentry):
1370+
import scripts.coding_discovery_tools.ai_tools_discovery as adm
1371+
1372+
argv = ["ai_tools_discovery.py", "--api-key", "k", "--domain", "http://127.0.0.1:1"]
1373+
detector_inst = Mock()
1374+
detector_inst.get_device_id.return_value = "dev"
1375+
detector_inst.detect_all_tools.return_value = detect_return
1376+
with patch.object(adm.platform, "system", return_value="Linux"), \
1377+
patch.object(adm.discovery_cache, "acquire_lock", return_value="acquired"), \
1378+
patch.object(adm.discovery_cache, "heartbeat_start", Mock(return_value=threading.Event())), \
1379+
patch.object(adm.discovery_cache, "release_lock", Mock()), \
1380+
patch.object(adm, "AIToolsDetector", return_value=detector_inst), \
1381+
patch.object(adm, "report_to_sentry", mock_sentry), \
1382+
patch.object(adm, "send_scan_event", Mock(return_value=(True, None))), \
1383+
patch.object(adm, "send_discovery_metrics", Mock()), \
1384+
patch.object(adm, "load_pending_reports", return_value=[]), \
1385+
patch.object(adm, "save_failed_reports", Mock()), \
1386+
patch.object(adm, "get_all_users_linux", return_value=[]), \
1387+
patch.object(adm, "get_user_info", return_value="runner"), \
1388+
patch.object(utils_mod, "_SENTRY_DSN", ""), \
1389+
patch.object(sys, "argv", argv):
1390+
detector_inst._set_canonical_vscode_copilot = Mock()
1391+
try:
1392+
adm.main()
1393+
except SystemExit:
1394+
pass
1395+
1396+
@unittest.skipUnless(os.name == "posix", "POSIX signal handling")
1397+
def test_fires_on_zero_tool_scan(self):
1398+
mock_sentry = Mock()
1399+
self._run_main([], mock_sentry)
1400+
1401+
no_tools_calls = [
1402+
c for c in mock_sentry.call_args_list
1403+
if self._context_of(c).get("phase") == "no_tools_found"
1404+
]
1405+
self.assertEqual(len(no_tools_calls), 1, "expected exactly one no_tools_found event")
1406+
1407+
call = no_tools_calls[0]
1408+
args, kwargs = call
1409+
# Exception is positional, RuntimeError with the fixed message.
1410+
self.assertIsInstance(args[0], RuntimeError)
1411+
self.assertEqual(str(args[0]), "Discovery found no tools")
1412+
self.assertEqual(kwargs.get("level"), "warning")
1413+
1414+
ctx = self._context_of(call)
1415+
# get_all_users_linux -> [] means enumeration missed every account, so the
1416+
# current-user fallback supplies the single scanned home.
1417+
self.assertEqual(ctx.get("homes_enumerated"), 0)
1418+
self.assertEqual(ctx.get("users_scanned"), 1)
1419+
self.assertIs(ctx.get("used_fallback_user"), True)
1420+
self.assertIn("device_id", ctx)
1421+
self.assertIn("run_id", ctx)
1422+
self.assertIn("duration_ms", ctx)
1423+
# PII guard: no list-valued context (e.g. the raw user list) may leak.
1424+
for key, value in ctx.items():
1425+
self.assertNotIsInstance(value, list, f"context key {key!r} is a list")
1426+
1427+
@unittest.skipUnless(os.name == "posix", "POSIX signal handling")
1428+
def test_does_not_fire_when_tools_found(self):
1429+
mock_sentry = Mock()
1430+
self._run_main(
1431+
[{"name": "Claude Code", "version": "1.0", "install_path": "/home/runner/.claude"}],
1432+
mock_sentry,
1433+
)
1434+
1435+
no_tools_calls = [
1436+
c for c in mock_sentry.call_args_list
1437+
if self._context_of(c).get("phase") == "no_tools_found"
1438+
]
1439+
self.assertEqual(no_tools_calls, [], "no_tools_found must not fire when a tool is detected")
1440+
1441+
13541442
if __name__ == "__main__":
13551443
unittest.main()

tests/test_send_and_persist.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from datetime import datetime, timezone, timedelta
1616
from http.server import HTTPServer, BaseHTTPRequestHandler
1717
from pathlib import Path
18-
from unittest.mock import patch
18+
from unittest.mock import patch, Mock
1919

2020
import scripts.coding_discovery_tools.utils as utils_mod
2121
from scripts.coding_discovery_tools.utils import (
@@ -491,5 +491,72 @@ def test_scan_failed_device_level_error(self, _sleep):
491491
self.assertEqual(payload["scan_error"]["error_type"], "RuntimeError")
492492

493493

494+
class TestSentryPriorityBypassesCap(unittest.TestCase):
495+
"""A priority=True event (the terminal no_tools_found summary) bypasses the
496+
per-run event cap, but still respects the circuit breaker and dedup."""
497+
498+
def setUp(self):
499+
self._reset_budget()
500+
501+
def tearDown(self):
502+
self._reset_budget()
503+
504+
@staticmethod
505+
def _reset_budget():
506+
utils_mod._sentry_event_count = 0
507+
utils_mod._sentry_sent_signatures = set()
508+
utils_mod._sentry_consecutive_fails = 0
509+
utils_mod._sentry_dead_this_run = False
510+
511+
@patch.object(utils_mod, "subprocess")
512+
@patch.object(
513+
utils_mod, "_parse_sentry_dsn",
514+
return_value={"key": "k", "store_url": "http://sentry.invalid/store/"},
515+
)
516+
def test_priority_event_bypasses_count_cap(self, _dsn, mock_subprocess):
517+
mock_subprocess.run.return_value = Mock(returncode=0, stdout="200", stderr="")
518+
utils_mod._sentry_event_count = utils_mod._SENTRY_MAX_EVENTS_PER_RUN
519+
520+
# Non-priority event with a fresh signature is dropped: cap reached.
521+
utils_mod.report_to_sentry(
522+
RuntimeError("capped"), {"phase": "detect", "tool_name": "X"}
523+
)
524+
self.assertEqual(mock_subprocess.run.call_count, 0)
525+
526+
# The terminal priority event still sends despite the exhausted budget.
527+
utils_mod.report_to_sentry(
528+
RuntimeError("Discovery found no tools"),
529+
{"phase": "no_tools_found"},
530+
level="warning",
531+
priority=True,
532+
)
533+
self.assertEqual(mock_subprocess.run.call_count, 1)
534+
535+
@patch.object(utils_mod, "subprocess")
536+
@patch.object(
537+
utils_mod, "_parse_sentry_dsn",
538+
return_value={"key": "k", "store_url": "http://sentry.invalid/store/"},
539+
)
540+
def test_priority_bypasses_breaker_but_respects_dedup(self, _dsn, mock_subprocess):
541+
mock_subprocess.run.return_value = Mock(returncode=0, stdout="200", stderr="")
542+
543+
# Breaker open from earlier (possibly transient) failures => a priority event
544+
# STILL gets its one bounded attempt, so the terminal diagnostic isn't lost.
545+
utils_mod._sentry_dead_this_run = True
546+
utils_mod.report_to_sentry(
547+
RuntimeError("Discovery found no tools"),
548+
{"phase": "no_tools_found"}, priority=True,
549+
)
550+
self.assertEqual(mock_subprocess.run.call_count, 1)
551+
552+
# Dedup is still honored even with priority: the same signature (added by the
553+
# send above) is not re-sent, so a priority event can never spam.
554+
utils_mod.report_to_sentry(
555+
RuntimeError("Discovery found no tools"),
556+
{"phase": "no_tools_found"}, priority=True,
557+
)
558+
self.assertEqual(mock_subprocess.run.call_count, 1)
559+
560+
494561
if __name__ == "__main__":
495562
unittest.main()

0 commit comments

Comments
 (0)