-
Notifications
You must be signed in to change notification settings - Fork 0
WEB-4679: declare a scan manifest so the backend can prune uninstalled tools #187
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: staging
Are you sure you want to change the base?
Changes from all commits
3f13480
6402e54
ff39524
a2087ba
f6987a6
bcb482e
e7e7a9e
83a19b3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -370,7 +370,7 @@ def get_device_id(self) -> str: | |
| """ | ||
| return self._device_id_extractor.extract_device_id() | ||
|
|
||
| def detect_all_tools(self, user_home: Optional[Path] = None) -> List[Dict]: | ||
| def detect_all_tools(self, user_home: Optional[Path] = None, failures: Optional[set] = None) -> List[Dict]: | ||
| """ | ||
| Detect all supported AI tools. | ||
|
|
||
|
|
@@ -401,6 +401,9 @@ def detect_all_tools(self, user_home: Optional[Path] = None) -> List[Dict]: | |
| except Exception as e: | ||
| logger.warning(f"Error detecting {detector.tool_name}: {e}") | ||
| report_to_sentry(e, {"phase": "detect", "tool_name": detector.tool_name}, level="warning") | ||
| # Detection errored: record the tool so the caller can keep it (presence unknown != uninstalled). | ||
| if failures is not None: | ||
| failures.add(detector.tool_name) | ||
|
|
||
| return tools | ||
|
|
||
|
|
@@ -2802,6 +2805,11 @@ def _on_term_signal(signum, _frame) -> None: | |
| # Track failed reports for persistence | ||
| failed_reports = [] | ||
|
|
||
| # (home_user, tool_name) detected present this run; backend set-diffs it in "completed" to prune the rest. | ||
| scanned_manifest = set() | ||
| # Detection/extraction errors this run; if non-empty the scan is marked unclean so the backend skips pruning. | ||
| incomplete_reasons = [] | ||
|
|
||
| # --- Drain pending reports from previous run --- | ||
| with time_step("drain_pending_queue", "queue"): | ||
| pending = load_pending_reports() | ||
|
|
@@ -2896,7 +2904,20 @@ def _on_term_signal(signum, _frame) -> None: | |
| user_home = Path.home() | ||
| logger.info(f" Detecting tools for user: {user} (home: {user_home})") | ||
| with time_step("detect_tools", "detect"): | ||
| user_tools = detector.detect_all_tools(user_home=user_home) | ||
| user_detect_failures = set() | ||
| user_tools = detector.detect_all_tools( | ||
| user_home=user_home, failures=user_detect_failures | ||
| ) | ||
| # Record per-user presence at detection: a tool this user actually has stays in the | ||
| # manifest even if reading/uploading it later errors (a read failure isn't an | ||
| # uninstall), and only users who detected the tool get an entry (no phantom ownership). | ||
| for detected in user_tools: | ||
| scanned_manifest.add((user, detected.get('name', 'Unknown'))) | ||
| # A detector ERRORED -> presence unknown. detector.tool_name is an umbrella label | ||
| # (e.g. "GitHub Copilot"), not the concrete row name ("GitHub Copilot (VS Code)"), | ||
| # so it can't safely target the manifest. Skip pruning this run instead. | ||
| if user_detect_failures: | ||
| incomplete_reasons.append(f"detector error for user {user}") | ||
|
|
||
| if user_tools: | ||
| logger.info(f" Found {len(user_tools)} tool(s) for {user}:") | ||
|
|
@@ -2959,6 +2980,13 @@ def _on_term_signal(signum, _frame) -> None: | |
| user_home = Path.home() | ||
|
|
||
| try: | ||
| # Only report a tool for users who actually detected it (the manifest is | ||
| # the per-user presence set). all_tools is deduped globally, so without this | ||
| # a user-scoped tool one user has would otherwise be reported for every | ||
| # enumerated user — a phantom install the backend could never prune. | ||
| if (user_name, tool_name) not in scanned_manifest: | ||
| continue | ||
|
|
||
| # Filter projects to only include this user's projects | ||
| with time_step("filter_projects", "process"): | ||
| tool_filtered = detector.filter_tool_projects_by_user(tool_with_projects, user_home) | ||
|
|
@@ -2974,6 +3002,8 @@ def _on_term_signal(signum, _frame) -> None: | |
| f"{tool_filtered.get('_config_path') or tool_filtered.get('install_path')!r} " | ||
| f"not owned by this user and no per-user data" | ||
| ) | ||
| # Detected globally but not owned by this user -> drop the presence entry. | ||
| scanned_manifest.discard((user_name, tool_name)) | ||
| continue | ||
|
|
||
| # Ownership gate (Augment surfaces): same ~/.augment-keyed | ||
|
|
@@ -2986,6 +3016,8 @@ def _on_term_signal(signum, _frame) -> None: | |
| f"{tool_filtered.get('_config_path') or tool_filtered.get('install_path')!r} " | ||
| f"not owned by this user and no per-user data" | ||
| ) | ||
| # Detected globally but not owned by this user -> drop the presence entry. | ||
| scanned_manifest.discard((user_name, tool_name)) | ||
| continue | ||
|
|
||
| # Detect subscription plan for Claude Code | ||
|
|
@@ -3197,6 +3229,8 @@ def _on_term_signal(signum, _frame) -> None: | |
|
|
||
| except Exception as e: | ||
| logger.error(f"Error processing tool {tool_name}: {e}", exc_info=True) | ||
| # Detected tools are already in the manifest from the detection phase, so a | ||
| # device-wide extraction failure here cannot drop a live tool (no re-add needed). | ||
| report_to_sentry(e, {**sentry_ctx, "phase": "process_tool", "tool_name": tool_name}, level="warning") | ||
| logger.info("") | ||
|
|
||
|
|
@@ -3233,6 +3267,8 @@ def _on_term_signal(signum, _frame) -> None: | |
| "os": platform.system(), | ||
| "tool_count": len(tools), | ||
| "user_count": len(all_users), | ||
| "manifest_size": len(scanned_manifest), | ||
| "scan_incomplete": bool(incomplete_reasons), | ||
| "python_version": f"{sys.version_info.major}.{sys.version_info.minor}", | ||
| "script_version": SCRIPT_VERSION, | ||
| }, | ||
|
|
@@ -3244,11 +3280,30 @@ def _on_term_signal(signum, _frame) -> None: | |
| except Exception as metrics_err: | ||
| logger.debug(f"Building/sending discovery metrics failed: {metrics_err}") | ||
|
|
||
| # Send scan completed event AFTER all scanning | ||
| # Detection/extraction hit an error this run -> mark the scan unclean BEFORE the | ||
| # completed event so the backend's reconcile skips pruning (a missing tool may mean | ||
| # "couldn't read", not "uninstalled"). Sent first so scan_error is persisted before | ||
| # the completed event dispatches the reconcile. | ||
| if incomplete_reasons: | ||
| send_scan_event( | ||
| args.domain, args.api_key, device_id, run_id, "failed", | ||
| args.app_name, | ||
| scan_error={ | ||
| "error_type": "ScanIncomplete", | ||
| "message": "; ".join(incomplete_reasons[:20]), | ||
| "timestamp": datetime.utcnow().isoformat() + "Z", | ||
| }, | ||
| sentry_context=sentry_ctx, system_user=system_user, | ||
| ) | ||
|
|
||
| # only the completed event carries manifest + covered users (backend prunes from them) | ||
| logger.info("Sending scan completed event...") | ||
| # manifest = (home_user, tool_name) detected present; backend set-diffs it to prune the rest. | ||
| manifest = [{"home_user": hu, "tool_name": tn} for hu, tn in sorted(scanned_manifest)] | ||
| success, _ = send_scan_event( | ||
| args.domain, args.api_key, device_id, run_id, "completed", | ||
| args.app_name, sentry_context=sentry_ctx, system_user=system_user | ||
| args.app_name, sentry_context=sentry_ctx, system_user=system_user, | ||
| manifest=manifest, covered_home_users=all_users, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Incomplete scan still sends prune manifestMedium Severity When Reviewed by Cursor Bugbot for commit 83a19b3. Configure here. |
||
| ) | ||
| if success: | ||
| logger.info("✓ Scan completed event sent successfully") | ||
|
|
||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This records the current outer-loop user for every tool returned by the detector, but some extension detectors can return tools from other home directories during root scans. For example, if Alice has Roo Code or Cline and Bob does not, the detector can still return Alice's extension while the loop is scanning Bob. That adds
(Bob, tool_name)to the manifest, lets the later manifest guard pass, and can send Bob a phantom install with no per-user projects. The backend then treats Bob's row as still installed and cannot prune it. The manifest entry needs to come from the detected tool's actual owner, or these detector families need an ownership check before the pair is added.