From 29e59bce384e668e8588c8cb1bd9665df0b5d8c1 Mon Sep 17 00:00:00 2001 From: audit Date: Fri, 26 Jun 2026 08:55:22 +0530 Subject: [PATCH 1/3] Add no_tools_found Sentry event to diagnose empty discovery scans MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A scan that finds zero tools was previously silent — indistinguishable from an errored scan, a missed detection, or a genuinely empty device. Emit one fire-and-forget Sentry warning (phase=no_tools_found) carrying the discriminators needed to tell them apart: - homes_enumerated: pre-fallback enumerated-user count (0 = enumeration miss; the existing post-fallback user_count masks 0 as 1) - users_scanned, used_fallback_user, is_root, os, duration_ms Counts/booleans only (no PII); reuses the existing curl-based report_to_sentry; wrapped so telemetry can never raise into or slow the scan. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../ai_tools_discovery.py | 29 ++++++ tests/test_discovery_flow.py | 88 +++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/scripts/coding_discovery_tools/ai_tools_discovery.py b/scripts/coding_discovery_tools/ai_tools_discovery.py index 2639248..68038db 100644 --- a/scripts/coding_discovery_tools/ai_tools_discovery.py +++ b/scripts/coding_discovery_tools/ai_tools_discovery.py @@ -2467,6 +2467,10 @@ def _on_term_signal(signum, _frame) -> None: else: all_users = [] + # Pre-fallback count is the key no-tools discriminator: 0 here means the + # macOS/AD enumeration missed every account (the fallback below masks 0->1). + homes_enumerated = len(all_users) + # If no users found, fall back to current user if not all_users: all_users = [get_user_info()] @@ -2856,6 +2860,31 @@ def _on_term_signal(signum, _frame) -> None: except Exception as metrics_err: logger.debug(f"Building/sending discovery metrics failed: {metrics_err}") + # No-tools telemetry: a zero-tool scan is otherwise silent — we cannot tell + # an enumeration miss from a timing race from a genuinely empty device. + # Emit ONE enriched warning with the discriminators. Fire-and-forget: + # telemetry must never raise into / slow the scan. + if not tools: + try: + no_tools_ctx = { + **sentry_ctx, + "phase": "no_tools_found", + "homes_enumerated": homes_enumerated, + "users_scanned": len(all_users), + "used_fallback_user": homes_enumerated == 0, + "os": platform.system(), + "duration_ms": round((time.monotonic() - t_start) * 1000), + } + if hasattr(os, "getuid"): + no_tools_ctx["is_root"] = os.getuid() == 0 + report_to_sentry( + RuntimeError("Discovery found no tools"), + context=no_tools_ctx, + level="warning", + ) + except Exception: + pass + # Send scan completed event AFTER all scanning logger.info("Sending scan completed event...") success, _ = send_scan_event( diff --git a/tests/test_discovery_flow.py b/tests/test_discovery_flow.py index 14595ee..b82fb21 100644 --- a/tests/test_discovery_flow.py +++ b/tests/test_discovery_flow.py @@ -1351,5 +1351,93 @@ def test_extract_failure_reports_warning_and_returns_empty(self): self.assertEqual(context.get("phase"), "extract") +class TestNoToolsSentryEvent(unittest.TestCase): + """main() emits exactly one enriched 'no_tools_found' warning on a zero-tool + scan (the only signal that distinguishes an enumeration miss from a genuinely + empty device) and stays silent when any tool is detected.""" + + @staticmethod + def _context_of(call): + # report_to_sentry is invoked two ways in the codebase: context as the + # keyword `context=` (the no-tools path) and context positionally as the + # 2nd arg (extraction/process paths). Read whichever is present. + args, kwargs = call + if "context" in kwargs: + return kwargs["context"] + return args[1] if len(args) > 1 else {} + + def _run_main(self, detect_return, mock_sentry): + import scripts.coding_discovery_tools.ai_tools_discovery as adm + + argv = ["ai_tools_discovery.py", "--api-key", "k", "--domain", "http://127.0.0.1:1"] + detector_inst = Mock() + detector_inst.get_device_id.return_value = "dev" + detector_inst.detect_all_tools.return_value = detect_return + with patch.object(adm.platform, "system", return_value="Linux"), \ + patch.object(adm.discovery_cache, "acquire_lock", return_value="acquired"), \ + patch.object(adm.discovery_cache, "heartbeat_start", Mock(return_value=threading.Event())), \ + patch.object(adm.discovery_cache, "release_lock", Mock()), \ + patch.object(adm, "AIToolsDetector", return_value=detector_inst), \ + patch.object(adm, "report_to_sentry", mock_sentry), \ + patch.object(adm, "send_scan_event", Mock(return_value=(True, None))), \ + patch.object(adm, "send_discovery_metrics", Mock()), \ + patch.object(adm, "load_pending_reports", return_value=[]), \ + patch.object(adm, "save_failed_reports", Mock()), \ + patch.object(adm, "get_all_users_linux", return_value=[]), \ + patch.object(adm, "get_user_info", return_value="runner"), \ + patch.object(utils_mod, "_SENTRY_DSN", ""), \ + patch.object(sys, "argv", argv): + detector_inst._set_canonical_vscode_copilot = Mock() + try: + adm.main() + except SystemExit: + pass + + @unittest.skipUnless(os.name == "posix", "POSIX signal handling") + def test_fires_on_zero_tool_scan(self): + mock_sentry = Mock() + self._run_main([], mock_sentry) + + no_tools_calls = [ + c for c in mock_sentry.call_args_list + if self._context_of(c).get("phase") == "no_tools_found" + ] + self.assertEqual(len(no_tools_calls), 1, "expected exactly one no_tools_found event") + + call = no_tools_calls[0] + args, kwargs = call + # Exception is positional, RuntimeError with the fixed message. + self.assertIsInstance(args[0], RuntimeError) + self.assertEqual(str(args[0]), "Discovery found no tools") + self.assertEqual(kwargs.get("level"), "warning") + + ctx = self._context_of(call) + # get_all_users_linux -> [] means enumeration missed every account, so the + # current-user fallback supplies the single scanned home. + self.assertEqual(ctx.get("homes_enumerated"), 0) + self.assertEqual(ctx.get("users_scanned"), 1) + self.assertIs(ctx.get("used_fallback_user"), True) + self.assertIn("device_id", ctx) + self.assertIn("run_id", ctx) + self.assertIn("duration_ms", ctx) + # PII guard: no list-valued context (e.g. the raw user list) may leak. + for key, value in ctx.items(): + self.assertNotIsInstance(value, list, f"context key {key!r} is a list") + + @unittest.skipUnless(os.name == "posix", "POSIX signal handling") + def test_does_not_fire_when_tools_found(self): + mock_sentry = Mock() + self._run_main( + [{"name": "Claude Code", "version": "1.0", "install_path": "/home/runner/.claude"}], + mock_sentry, + ) + + no_tools_calls = [ + c for c in mock_sentry.call_args_list + if self._context_of(c).get("phase") == "no_tools_found" + ] + self.assertEqual(no_tools_calls, [], "no_tools_found must not fire when a tool is detected") + + if __name__ == "__main__": unittest.main() From d18efd2ab8f60f51fbf2ee8497698f8e255a1b4b Mon Sep 17 00:00:00 2001 From: audit Date: Fri, 26 Jun 2026 09:20:03 +0530 Subject: [PATCH 2/3] Let no_tools_found bypass the per-run Sentry cap (Greptile P2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A noisy empty scan (many earlier detector errors) could exhaust the shared 30-event per-run cap before the terminal no_tools_found summary runs, dropping exactly the diagnostic you most need on a likely missed-detection. Add a `priority` flag to report_to_sentry that bypasses the COUNT cap only — it still honors the circuit breaker (a dead transport can't be helped) and dedup (no spam) — and pass priority=True for the no_tools_found event. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../ai_tools_discovery.py | 1 + scripts/coding_discovery_tools/utils.py | 11 ++- tests/test_send_and_persist.py | 69 ++++++++++++++++++- 3 files changed, 79 insertions(+), 2 deletions(-) diff --git a/scripts/coding_discovery_tools/ai_tools_discovery.py b/scripts/coding_discovery_tools/ai_tools_discovery.py index 68038db..27dfa21 100644 --- a/scripts/coding_discovery_tools/ai_tools_discovery.py +++ b/scripts/coding_discovery_tools/ai_tools_discovery.py @@ -2881,6 +2881,7 @@ def _on_term_signal(signum, _frame) -> None: RuntimeError("Discovery found no tools"), context=no_tools_ctx, level="warning", + priority=True, ) except Exception: pass diff --git a/scripts/coding_discovery_tools/utils.py b/scripts/coding_discovery_tools/utils.py index 03d0d6e..bbf4430 100644 --- a/scripts/coding_discovery_tools/utils.py +++ b/scripts/coding_discovery_tools/utils.py @@ -1479,6 +1479,7 @@ def report_to_sentry( exception: Exception, context: Optional[Dict] = None, level: str = "error", + priority: bool = False, ) -> None: """Send an event to Sentry using the raw HTTP store endpoint. @@ -1486,6 +1487,10 @@ def report_to_sentry( exception: The exception to report. context: Extra tags/context (e.g. phase, tool_name, http_code). level: Sentry level -- "error" for crashes, "warning" for HTTP send failures. + priority: Bypass the per-run event cap so a terminal once-per-run + diagnostic (e.g. the no_tools_found summary) is not starved by + earlier per-tool errors. Still honors the circuit breaker (a dead + transport can't be helped) and dedup (no spam). """ try: dsn = _parse_sentry_dsn(_SENTRY_DSN) @@ -1501,8 +1506,12 @@ def report_to_sentry( if _sentry_dead_this_run: return # Collapse duplicate events and hard-cap the synchronous curls per run. + # priority events skip the count cap (but never dedup or the breaker) so a + # terminal once-per-run diagnostic isn't starved by earlier per-tool errors. signature = (type(exception).__name__, ctx.get("phase"), ctx.get("tool_name")) - if signature in _sentry_sent_signatures or _sentry_event_count >= _SENTRY_MAX_EVENTS_PER_RUN: + if signature in _sentry_sent_signatures: + return + if not priority and _sentry_event_count >= _SENTRY_MAX_EVENTS_PER_RUN: return _sentry_sent_signatures.add(signature) _sentry_event_count += 1 diff --git a/tests/test_send_and_persist.py b/tests/test_send_and_persist.py index a815ab3..76362c9 100644 --- a/tests/test_send_and_persist.py +++ b/tests/test_send_and_persist.py @@ -15,7 +15,7 @@ from datetime import datetime, timezone, timedelta from http.server import HTTPServer, BaseHTTPRequestHandler from pathlib import Path -from unittest.mock import patch +from unittest.mock import patch, Mock import scripts.coding_discovery_tools.utils as utils_mod from scripts.coding_discovery_tools.utils import ( @@ -457,5 +457,72 @@ def test_scan_failed_device_level_error(self, _sleep): self.assertEqual(payload["scan_error"]["error_type"], "RuntimeError") +class TestSentryPriorityBypassesCap(unittest.TestCase): + """A priority=True event (the terminal no_tools_found summary) bypasses the + per-run event cap, but still respects the circuit breaker and dedup.""" + + def setUp(self): + self._reset_budget() + + def tearDown(self): + self._reset_budget() + + @staticmethod + def _reset_budget(): + utils_mod._sentry_event_count = 0 + utils_mod._sentry_sent_signatures = set() + utils_mod._sentry_consecutive_fails = 0 + utils_mod._sentry_dead_this_run = False + + @patch.object(utils_mod, "subprocess") + @patch.object( + utils_mod, "_parse_sentry_dsn", + return_value={"key": "k", "store_url": "http://sentry.invalid/store/"}, + ) + def test_priority_event_bypasses_count_cap(self, _dsn, mock_subprocess): + mock_subprocess.run.return_value = Mock(returncode=0, stdout="200", stderr="") + utils_mod._sentry_event_count = utils_mod._SENTRY_MAX_EVENTS_PER_RUN + + # Non-priority event with a fresh signature is dropped: cap reached. + utils_mod.report_to_sentry( + RuntimeError("capped"), {"phase": "detect", "tool_name": "X"} + ) + self.assertEqual(mock_subprocess.run.call_count, 0) + + # The terminal priority event still sends despite the exhausted budget. + utils_mod.report_to_sentry( + RuntimeError("Discovery found no tools"), + {"phase": "no_tools_found"}, + level="warning", + priority=True, + ) + self.assertEqual(mock_subprocess.run.call_count, 1) + + @patch.object(utils_mod, "subprocess") + @patch.object( + utils_mod, "_parse_sentry_dsn", + return_value={"key": "k", "store_url": "http://sentry.invalid/store/"}, + ) + def test_priority_still_respects_breaker_and_dedup(self, _dsn, mock_subprocess): + mock_subprocess.run.return_value = Mock(returncode=0, stdout="200", stderr="") + + # Breaker open => even a priority event is suppressed (dead transport). + utils_mod._sentry_dead_this_run = True + utils_mod.report_to_sentry( + RuntimeError("Discovery found no tools"), + {"phase": "no_tools_found"}, priority=True, + ) + self.assertEqual(mock_subprocess.run.call_count, 0) + + # Breaker closed but signature already sent => deduped even with priority. + utils_mod._sentry_dead_this_run = False + utils_mod._sentry_sent_signatures = {("RuntimeError", "no_tools_found", None)} + utils_mod.report_to_sentry( + RuntimeError("Discovery found no tools"), + {"phase": "no_tools_found"}, priority=True, + ) + self.assertEqual(mock_subprocess.run.call_count, 0) + + if __name__ == "__main__": unittest.main() From ab61e8eb4cfaf325db5c82b849522bb7754aa97f Mon Sep 17 00:00:00 2001 From: audit Date: Fri, 26 Jun 2026 09:27:41 +0530 Subject: [PATCH 3/3] Let priority Sentry events bypass the circuit breaker too (Greptile P1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The priority flag bypassed the count cap but the circuit breaker — tripped by 3 consecutive, possibly transient, send failures — could still skip the terminal no_tools_found event without even an attempt. Allow a priority event ONE bounded attempt past the breaker (at most one ~4s curl at end of run) so a transient mid-scan outage doesn't drop the terminal diagnostic. Dedup is still honored, so it can never spam. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/coding_discovery_tools/utils.py | 16 ++++++++++------ tests/test_send_and_persist.py | 14 +++++++------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/scripts/coding_discovery_tools/utils.py b/scripts/coding_discovery_tools/utils.py index bbf4430..41e98c4 100644 --- a/scripts/coding_discovery_tools/utils.py +++ b/scripts/coding_discovery_tools/utils.py @@ -1487,10 +1487,12 @@ def report_to_sentry( exception: The exception to report. context: Extra tags/context (e.g. phase, tool_name, http_code). level: Sentry level -- "error" for crashes, "warning" for HTTP send failures. - priority: Bypass the per-run event cap so a terminal once-per-run - diagnostic (e.g. the no_tools_found summary) is not starved by - earlier per-tool errors. Still honors the circuit breaker (a dead - transport can't be helped) and dedup (no spam). + priority: Best-effort guarantee a terminal once-per-run diagnostic + (e.g. the no_tools_found summary) is delivered. Bypasses both the + per-run event cap AND the circuit breaker so earlier transient + per-tool send failures can't silently skip it -- it still gets ONE + attempt at the end of the run (bounded: at most one ~4s curl). Dedup + is still honored (no spam). Reserve for a single terminal event/run. """ try: dsn = _parse_sentry_dsn(_SENTRY_DSN) @@ -1503,10 +1505,12 @@ def report_to_sentry( global _sentry_event_count, _sentry_consecutive_fails, _sentry_dead_this_run # Circuit breaker: once the transport looks dead, stop calling Sentry for the # rest of the run so a blocked endpoint can't add its timeout to every failure. - if _sentry_dead_this_run: + # priority events bypass it for ONE bounded attempt so a transient mid-scan + # outage doesn't silently drop the terminal diagnostic. + if _sentry_dead_this_run and not priority: return # Collapse duplicate events and hard-cap the synchronous curls per run. - # priority events skip the count cap (but never dedup or the breaker) so a + # priority events skip the count cap + breaker (but never dedup) so a # terminal once-per-run diagnostic isn't starved by earlier per-tool errors. signature = (type(exception).__name__, ctx.get("phase"), ctx.get("tool_name")) if signature in _sentry_sent_signatures: diff --git a/tests/test_send_and_persist.py b/tests/test_send_and_persist.py index 76362c9..6418821 100644 --- a/tests/test_send_and_persist.py +++ b/tests/test_send_and_persist.py @@ -503,25 +503,25 @@ def test_priority_event_bypasses_count_cap(self, _dsn, mock_subprocess): utils_mod, "_parse_sentry_dsn", return_value={"key": "k", "store_url": "http://sentry.invalid/store/"}, ) - def test_priority_still_respects_breaker_and_dedup(self, _dsn, mock_subprocess): + def test_priority_bypasses_breaker_but_respects_dedup(self, _dsn, mock_subprocess): mock_subprocess.run.return_value = Mock(returncode=0, stdout="200", stderr="") - # Breaker open => even a priority event is suppressed (dead transport). + # Breaker open from earlier (possibly transient) failures => a priority event + # STILL gets its one bounded attempt, so the terminal diagnostic isn't lost. utils_mod._sentry_dead_this_run = True utils_mod.report_to_sentry( RuntimeError("Discovery found no tools"), {"phase": "no_tools_found"}, priority=True, ) - self.assertEqual(mock_subprocess.run.call_count, 0) + self.assertEqual(mock_subprocess.run.call_count, 1) - # Breaker closed but signature already sent => deduped even with priority. - utils_mod._sentry_dead_this_run = False - utils_mod._sentry_sent_signatures = {("RuntimeError", "no_tools_found", None)} + # Dedup is still honored even with priority: the same signature (added by the + # send above) is not re-sent, so a priority event can never spam. utils_mod.report_to_sentry( RuntimeError("Discovery found no tools"), {"phase": "no_tools_found"}, priority=True, ) - self.assertEqual(mock_subprocess.run.call_count, 0) + self.assertEqual(mock_subprocess.run.call_count, 1) if __name__ == "__main__":