websentry-ai · anonpran · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026
diff --git a/scripts/coding_discovery_tools/ai_tools_discovery.py b/scripts/coding_discovery_tools/ai_tools_discovery.py
@@ -370,7 +370,7 @@ def get_device_id(self) -> str:
         """
         return self._device_id_extractor.extract_device_id()
 
-    def detect_all_tools(self, user_home: Optional[Path] = None) -> List[Dict]:
+    def detect_all_tools(self, user_home: Optional[Path] = None, failures: Optional[set] = None) -> List[Dict]:
         """
         Detect all supported AI tools.
 
@@ -401,6 +401,9 @@ def detect_all_tools(self, user_home: Optional[Path] = None) -> List[Dict]:
             except Exception as e:
                 logger.warning(f"Error detecting {detector.tool_name}: {e}")
                 report_to_sentry(e, {"phase": "detect", "tool_name": detector.tool_name}, level="warning")
+                # Detection errored: record the tool so the caller can keep it (presence unknown != uninstalled).
+                if failures is not None:
+                    failures.add(detector.tool_name)
 
         return tools
 
@@ -2802,6 +2805,11 @@ def _on_term_signal(signum, _frame) -> None:
         # Track failed reports for persistence
         failed_reports = []
 
+        # (home_user, tool_name) detected present this run; backend set-diffs it in "completed" to prune the rest.
+        scanned_manifest = set()
+        # Detection/extraction errors this run; if non-empty the scan is marked unclean so the backend skips pruning.
+        incomplete_reasons = []
+
         # --- Drain pending reports from previous run ---
         with time_step("drain_pending_queue", "queue"):
             pending = load_pending_reports()
@@ -2896,7 +2904,20 @@ def _on_term_signal(signum, _frame) -> None:
                 user_home = Path.home()
             logger.info(f"  Detecting tools for user: {user} (home: {user_home})")
             with time_step("detect_tools", "detect"):
-                user_tools = detector.detect_all_tools(user_home=user_home)
+                user_detect_failures = set()
+                user_tools = detector.detect_all_tools(
+                    user_home=user_home, failures=user_detect_failures
+                )
+            # Record per-user presence at detection: a tool this user actually has stays in the
+            # manifest even if reading/uploading it later errors (a read failure isn't an
+            # uninstall), and only users who detected the tool get an entry (no phantom ownership).
+            for detected in user_tools:
+                scanned_manifest.add((user, detected.get('name', 'Unknown')))
+            # A detector ERRORED -> presence unknown. detector.tool_name is an umbrella label
+            # (e.g. "GitHub Copilot"), not the concrete row name ("GitHub Copilot (VS Code)"),
+            # so it can't safely target the manifest. Skip pruning this run instead.
+            if user_detect_failures:
+                incomplete_reasons.append(f"detector error for user {user}")
 
             if user_tools:
                 logger.info(f"    Found {len(user_tools)} tool(s) for {user}:")
@@ -2959,6 +2980,13 @@ def _on_term_signal(signum, _frame) -> None:
                         user_home = Path.home()
 
                     try:
+                        # Only report a tool for users who actually detected it (the manifest is
+                        # the per-user presence set). all_tools is deduped globally, so without this
+                        # a user-scoped tool one user has would otherwise be reported for every
+                        # enumerated user — a phantom install the backend could never prune.
+                        if (user_name, tool_name) not in scanned_manifest:
+                            continue
+
                         # Filter projects to only include this user's projects
                         with time_step("filter_projects", "process"):
                             tool_filtered = detector.filter_tool_projects_by_user(tool_with_projects, user_home)
@@ -2974,6 +3002,8 @@ def _on_term_signal(signum, _frame) -> None:
                                 f"{tool_filtered.get('_config_path') or tool_filtered.get('install_path')!r} "
                                 f"not owned by this user and no per-user data"
                             )
+                            # Detected globally but not owned by this user -> drop the presence entry.
+                            scanned_manifest.discard((user_name, tool_name))
                             continue
 
                         # Ownership gate (Augment surfaces): same ~/.augment-keyed
@@ -2986,6 +3016,8 @@ def _on_term_signal(signum, _frame) -> None:
                                 f"{tool_filtered.get('_config_path') or tool_filtered.get('install_path')!r} "
                                 f"not owned by this user and no per-user data"
                             )
+                            # Detected globally but not owned by this user -> drop the presence entry.
+                            scanned_manifest.discard((user_name, tool_name))
                             continue
 
                         # Detect subscription plan for Claude Code
@@ -3197,6 +3229,8 @@ def _on_term_signal(signum, _frame) -> None:
 
             except Exception as e:
                 logger.error(f"Error processing tool {tool_name}: {e}", exc_info=True)
+                # Detected tools are already in the manifest from the detection phase, so a
+                # device-wide extraction failure here cannot drop a live tool (no re-add needed).
                 report_to_sentry(e, {**sentry_ctx, "phase": "process_tool", "tool_name": tool_name}, level="warning")
                 logger.info("")
 
@@ -3233,6 +3267,8 @@ def _on_term_signal(signum, _frame) -> None:
                     "os": platform.system(),
                     "tool_count": len(tools),
                     "user_count": len(all_users),
+                    "manifest_size": len(scanned_manifest),
+                    "scan_incomplete": bool(incomplete_reasons),
                     "python_version": f"{sys.version_info.major}.{sys.version_info.minor}",
                     "script_version": SCRIPT_VERSION,
                 },
@@ -3244,11 +3280,30 @@ def _on_term_signal(signum, _frame) -> None:
         except Exception as metrics_err:
             logger.debug(f"Building/sending discovery metrics failed: {metrics_err}")
 
-        # Send scan completed event AFTER all scanning
+        # Detection/extraction hit an error this run -> mark the scan unclean BEFORE the
+        # completed event so the backend's reconcile skips pruning (a missing tool may mean
+        # "couldn't read", not "uninstalled"). Sent first so scan_error is persisted before
+        # the completed event dispatches the reconcile.
+        if incomplete_reasons:
+            send_scan_event(
+                args.domain, args.api_key, device_id, run_id, "failed",
+                args.app_name,
+                scan_error={
+                    "error_type": "ScanIncomplete",
+                    "message": "; ".join(incomplete_reasons[:20]),
+                    "timestamp": datetime.utcnow().isoformat() + "Z",
+                },
+                sentry_context=sentry_ctx, system_user=system_user,
+            )
+
+        # only the completed event carries manifest + covered users (backend prunes from them)
         logger.info("Sending scan completed event...")
+        # manifest = (home_user, tool_name) detected present; backend set-diffs it to prune the rest.
+        manifest = [{"home_user": hu, "tool_name": tn} for hu, tn in sorted(scanned_manifest)]
         success, _ = send_scan_event(
             args.domain, args.api_key, device_id, run_id, "completed",
-            args.app_name, sentry_context=sentry_ctx, system_user=system_user
+            args.app_name, sentry_context=sentry_ctx, system_user=system_user,
+            manifest=manifest, covered_home_users=all_users,
         )
         if success:
             logger.info("✓ Scan completed event sent successfully")

diff --git a/scripts/coding_discovery_tools/utils.py b/scripts/coding_discovery_tools/utils.py
@@ -716,6 +716,8 @@ def send_scan_event(
     scan_error: Optional[Dict] = None,
     sentry_context: Optional[Dict] = None,
     system_user: Optional[str] = None,
+    manifest: Optional[List[Dict]] = None,
+    covered_home_users: Optional[List[str]] = None,
 ) -> Tuple[bool, bool]:
     """
     Send scan lifecycle event to backend (in_progress, completed, failed).
@@ -733,6 +735,10 @@ def send_scan_event(
         system_user: Optional real human user running the scan (or None). Used by
             the backend to attribute empty machines. MUST be a real human or
             None (see ``get_audit_user``), never a junk/service identity.
+        manifest: Optional [{"home_user", "tool_name"}] seen this run; sent only on
+            "completed" so the backend set-diffs it to prune the rest.
+        covered_home_users: Optional home users covered; sent only on "completed" to
+            bound the prune scope.
 
     Returns:
         Tuple of (success, retryable): success=True if sent, retryable=True if caller should queue
@@ -755,6 +761,12 @@ def send_scan_event(
     if scan_error:
         payload["scan_error"] = scan_error
 
+    if manifest is not None:
+        payload["manifest"] = manifest
+
+    if covered_home_users is not None:
+        payload["covered_home_users"] = covered_home_users
+
     return send_report_to_backend(
         backend_url,
         api_key,