diff --git a/scripts/coding_discovery_tools/ai_tools_discovery.py b/scripts/coding_discovery_tools/ai_tools_discovery.py index 2639248..27dfa21 100644 --- a/scripts/coding_discovery_tools/ai_tools_discovery.py +++ b/scripts/coding_discovery_tools/ai_tools_discovery.py @@ -2467,6 +2467,10 @@ def _on_term_signal(signum, _frame) -> None: else: all_users = [] + # Pre-fallback count is the key no-tools discriminator: 0 here means the + # macOS/AD enumeration missed every account (the fallback below masks 0->1). + homes_enumerated = len(all_users) + # If no users found, fall back to current user if not all_users: all_users = [get_user_info()] @@ -2856,6 +2860,32 @@ def _on_term_signal(signum, _frame) -> None: except Exception as metrics_err: logger.debug(f"Building/sending discovery metrics failed: {metrics_err}") + # No-tools telemetry: a zero-tool scan is otherwise silent — we cannot tell + # an enumeration miss from a timing race from a genuinely empty device. + # Emit ONE enriched warning with the discriminators. Fire-and-forget: + # telemetry must never raise into / slow the scan. + if not tools: + try: + no_tools_ctx = { + **sentry_ctx, + "phase": "no_tools_found", + "homes_enumerated": homes_enumerated, + "users_scanned": len(all_users), + "used_fallback_user": homes_enumerated == 0, + "os": platform.system(), + "duration_ms": round((time.monotonic() - t_start) * 1000), + } + if hasattr(os, "getuid"): + no_tools_ctx["is_root"] = os.getuid() == 0 + report_to_sentry( + RuntimeError("Discovery found no tools"), + context=no_tools_ctx, + level="warning", + priority=True, + ) + except Exception: + pass + # Send scan completed event AFTER all scanning logger.info("Sending scan completed event...") success, _ = send_scan_event( diff --git a/scripts/coding_discovery_tools/utils.py b/scripts/coding_discovery_tools/utils.py index 03d0d6e..41e98c4 100644 --- a/scripts/coding_discovery_tools/utils.py +++ b/scripts/coding_discovery_tools/utils.py @@ -1479,6 +1479,7 @@ def report_to_sentry( exception: Exception, context: Optional[Dict] = None, level: str = "error", + priority: bool = False, ) -> None: """Send an event to Sentry using the raw HTTP store endpoint. @@ -1486,6 +1487,12 @@ def report_to_sentry( exception: The exception to report. context: Extra tags/context (e.g. phase, tool_name, http_code). level: Sentry level -- "error" for crashes, "warning" for HTTP send failures. + priority: Best-effort guarantee a terminal once-per-run diagnostic + (e.g. the no_tools_found summary) is delivered. Bypasses both the + per-run event cap AND the circuit breaker so earlier transient + per-tool send failures can't silently skip it -- it still gets ONE + attempt at the end of the run (bounded: at most one ~4s curl). Dedup + is still honored (no spam). Reserve for a single terminal event/run. """ try: dsn = _parse_sentry_dsn(_SENTRY_DSN) @@ -1498,11 +1505,17 @@ def report_to_sentry( global _sentry_event_count, _sentry_consecutive_fails, _sentry_dead_this_run # Circuit breaker: once the transport looks dead, stop calling Sentry for the # rest of the run so a blocked endpoint can't add its timeout to every failure. - if _sentry_dead_this_run: + # priority events bypass it for ONE bounded attempt so a transient mid-scan + # outage doesn't silently drop the terminal diagnostic. + if _sentry_dead_this_run and not priority: return # Collapse duplicate events and hard-cap the synchronous curls per run. + # priority events skip the count cap + breaker (but never dedup) so a + # terminal once-per-run diagnostic isn't starved by earlier per-tool errors. signature = (type(exception).__name__, ctx.get("phase"), ctx.get("tool_name")) - if signature in _sentry_sent_signatures or _sentry_event_count >= _SENTRY_MAX_EVENTS_PER_RUN: + if signature in _sentry_sent_signatures: + return + if not priority and _sentry_event_count >= _SENTRY_MAX_EVENTS_PER_RUN: return _sentry_sent_signatures.add(signature) _sentry_event_count += 1 diff --git a/tests/test_discovery_flow.py b/tests/test_discovery_flow.py index 14595ee..b82fb21 100644 --- a/tests/test_discovery_flow.py +++ b/tests/test_discovery_flow.py @@ -1351,5 +1351,93 @@ def test_extract_failure_reports_warning_and_returns_empty(self): self.assertEqual(context.get("phase"), "extract") +class TestNoToolsSentryEvent(unittest.TestCase): + """main() emits exactly one enriched 'no_tools_found' warning on a zero-tool + scan (the only signal that distinguishes an enumeration miss from a genuinely + empty device) and stays silent when any tool is detected.""" + + @staticmethod + def _context_of(call): + # report_to_sentry is invoked two ways in the codebase: context as the + # keyword `context=` (the no-tools path) and context positionally as the + # 2nd arg (extraction/process paths). Read whichever is present. + args, kwargs = call + if "context" in kwargs: + return kwargs["context"] + return args[1] if len(args) > 1 else {} + + def _run_main(self, detect_return, mock_sentry): + import scripts.coding_discovery_tools.ai_tools_discovery as adm + + argv = ["ai_tools_discovery.py", "--api-key", "k", "--domain", "http://127.0.0.1:1"] + detector_inst = Mock() + detector_inst.get_device_id.return_value = "dev" + detector_inst.detect_all_tools.return_value = detect_return + with patch.object(adm.platform, "system", return_value="Linux"), \ + patch.object(adm.discovery_cache, "acquire_lock", return_value="acquired"), \ + patch.object(adm.discovery_cache, "heartbeat_start", Mock(return_value=threading.Event())), \ + patch.object(adm.discovery_cache, "release_lock", Mock()), \ + patch.object(adm, "AIToolsDetector", return_value=detector_inst), \ + patch.object(adm, "report_to_sentry", mock_sentry), \ + patch.object(adm, "send_scan_event", Mock(return_value=(True, None))), \ + patch.object(adm, "send_discovery_metrics", Mock()), \ + patch.object(adm, "load_pending_reports", return_value=[]), \ + patch.object(adm, "save_failed_reports", Mock()), \ + patch.object(adm, "get_all_users_linux", return_value=[]), \ + patch.object(adm, "get_user_info", return_value="runner"), \ + patch.object(utils_mod, "_SENTRY_DSN", ""), \ + patch.object(sys, "argv", argv): + detector_inst._set_canonical_vscode_copilot = Mock() + try: + adm.main() + except SystemExit: + pass + + @unittest.skipUnless(os.name == "posix", "POSIX signal handling") + def test_fires_on_zero_tool_scan(self): + mock_sentry = Mock() + self._run_main([], mock_sentry) + + no_tools_calls = [ + c for c in mock_sentry.call_args_list + if self._context_of(c).get("phase") == "no_tools_found" + ] + self.assertEqual(len(no_tools_calls), 1, "expected exactly one no_tools_found event") + + call = no_tools_calls[0] + args, kwargs = call + # Exception is positional, RuntimeError with the fixed message. + self.assertIsInstance(args[0], RuntimeError) + self.assertEqual(str(args[0]), "Discovery found no tools") + self.assertEqual(kwargs.get("level"), "warning") + + ctx = self._context_of(call) + # get_all_users_linux -> [] means enumeration missed every account, so the + # current-user fallback supplies the single scanned home. + self.assertEqual(ctx.get("homes_enumerated"), 0) + self.assertEqual(ctx.get("users_scanned"), 1) + self.assertIs(ctx.get("used_fallback_user"), True) + self.assertIn("device_id", ctx) + self.assertIn("run_id", ctx) + self.assertIn("duration_ms", ctx) + # PII guard: no list-valued context (e.g. the raw user list) may leak. + for key, value in ctx.items(): + self.assertNotIsInstance(value, list, f"context key {key!r} is a list") + + @unittest.skipUnless(os.name == "posix", "POSIX signal handling") + def test_does_not_fire_when_tools_found(self): + mock_sentry = Mock() + self._run_main( + [{"name": "Claude Code", "version": "1.0", "install_path": "/home/runner/.claude"}], + mock_sentry, + ) + + no_tools_calls = [ + c for c in mock_sentry.call_args_list + if self._context_of(c).get("phase") == "no_tools_found" + ] + self.assertEqual(no_tools_calls, [], "no_tools_found must not fire when a tool is detected") + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_send_and_persist.py b/tests/test_send_and_persist.py index a815ab3..6418821 100644 --- a/tests/test_send_and_persist.py +++ b/tests/test_send_and_persist.py @@ -15,7 +15,7 @@ from datetime import datetime, timezone, timedelta from http.server import HTTPServer, BaseHTTPRequestHandler from pathlib import Path -from unittest.mock import patch +from unittest.mock import patch, Mock import scripts.coding_discovery_tools.utils as utils_mod from scripts.coding_discovery_tools.utils import ( @@ -457,5 +457,72 @@ def test_scan_failed_device_level_error(self, _sleep): self.assertEqual(payload["scan_error"]["error_type"], "RuntimeError") +class TestSentryPriorityBypassesCap(unittest.TestCase): + """A priority=True event (the terminal no_tools_found summary) bypasses the + per-run event cap, but still respects the circuit breaker and dedup.""" + + def setUp(self): + self._reset_budget() + + def tearDown(self): + self._reset_budget() + + @staticmethod + def _reset_budget(): + utils_mod._sentry_event_count = 0 + utils_mod._sentry_sent_signatures = set() + utils_mod._sentry_consecutive_fails = 0 + utils_mod._sentry_dead_this_run = False + + @patch.object(utils_mod, "subprocess") + @patch.object( + utils_mod, "_parse_sentry_dsn", + return_value={"key": "k", "store_url": "http://sentry.invalid/store/"}, + ) + def test_priority_event_bypasses_count_cap(self, _dsn, mock_subprocess): + mock_subprocess.run.return_value = Mock(returncode=0, stdout="200", stderr="") + utils_mod._sentry_event_count = utils_mod._SENTRY_MAX_EVENTS_PER_RUN + + # Non-priority event with a fresh signature is dropped: cap reached. + utils_mod.report_to_sentry( + RuntimeError("capped"), {"phase": "detect", "tool_name": "X"} + ) + self.assertEqual(mock_subprocess.run.call_count, 0) + + # The terminal priority event still sends despite the exhausted budget. + utils_mod.report_to_sentry( + RuntimeError("Discovery found no tools"), + {"phase": "no_tools_found"}, + level="warning", + priority=True, + ) + self.assertEqual(mock_subprocess.run.call_count, 1) + + @patch.object(utils_mod, "subprocess") + @patch.object( + utils_mod, "_parse_sentry_dsn", + return_value={"key": "k", "store_url": "http://sentry.invalid/store/"}, + ) + def test_priority_bypasses_breaker_but_respects_dedup(self, _dsn, mock_subprocess): + mock_subprocess.run.return_value = Mock(returncode=0, stdout="200", stderr="") + + # Breaker open from earlier (possibly transient) failures => a priority event + # STILL gets its one bounded attempt, so the terminal diagnostic isn't lost. + utils_mod._sentry_dead_this_run = True + utils_mod.report_to_sentry( + RuntimeError("Discovery found no tools"), + {"phase": "no_tools_found"}, priority=True, + ) + self.assertEqual(mock_subprocess.run.call_count, 1) + + # Dedup is still honored even with priority: the same signature (added by the + # send above) is not re-sent, so a priority event can never spam. + utils_mod.report_to_sentry( + RuntimeError("Discovery found no tools"), + {"phase": "no_tools_found"}, priority=True, + ) + self.assertEqual(mock_subprocess.run.call_count, 1) + + if __name__ == "__main__": unittest.main()