fix(e2e): dismiss BootCheckGate picker before every spec (mega-flow root cause) (tinyhumansai#1779)

senamakel · web-flow · commit aa57a338ff23 · 2026-05-15T03:02:11.000-07:00
diff --git a/app/scripts/e2e-run-session.sh b/app/scripts/e2e-run-session.sh
@@ -37,6 +37,7 @@ APPIUM_PID=""
 APP_PID=""
 E2E_CONFIG_BACKUP=""
 E2E_CONFIG_FILE=""
+CREATED_TEMP_CEF_CACHE=""
 
 # ------------------------------------------------------------------------------
 # Workspace + config
@@ -50,6 +51,22 @@ else
   echo "[runner] Using OPENHUMAN_WORKSPACE from environment: $OPENHUMAN_WORKSPACE"
 fi
 
+# Place the CEF cache directory OUTSIDE the workspace. By default the Tauri
+# shell roots it under `$OPENHUMAN_WORKSPACE/users/<id>/cef`, but our
+# `mega-flow` spec calls `openhuman.config_reset_local_data` between
+# sub-scenarios — that RPC does `remove_dir_all($OPENHUMAN_WORKSPACE)`,
+# which yanks CEF's cache out from under the running process and kills
+# the WebDriver session (every later sub-test then fails with
+# "invalid session id"). Pointing CEF at a sibling tmpdir via the
+# `OPENHUMAN_CEF_CACHE_PATH` escape hatch (`cef_profile.rs:7`) keeps it
+# unaffected by the reset.
+if [ -z "${OPENHUMAN_CEF_CACHE_PATH:-}" ]; then
+  OPENHUMAN_CEF_CACHE_PATH="$(mktemp -d)"
+  CREATED_TEMP_CEF_CACHE="$OPENHUMAN_CEF_CACHE_PATH"
+  export OPENHUMAN_CEF_CACHE_PATH
+  echo "[runner] Using temporary OPENHUMAN_CEF_CACHE_PATH: $OPENHUMAN_CEF_CACHE_PATH"
+fi
+
 if [ "${OPENHUMAN_SERVICE_MOCK:-0}" = "1" ] && [ -z "${OPENHUMAN_SERVICE_MOCK_STATE_FILE:-}" ]; then
   OPENHUMAN_SERVICE_MOCK_STATE_FILE="$OPENHUMAN_WORKSPACE/service-mock-state.json"
   export OPENHUMAN_SERVICE_MOCK_STATE_FILE
@@ -98,6 +115,9 @@ cleanup() {
     # whole job on cleanup leftovers when the test itself passed.
     rm -rf "$CREATED_TEMP_WORKSPACE" 2>/dev/null || true
   fi
+  if [ -n "$CREATED_TEMP_CEF_CACHE" ]; then
+    rm -rf "$CREATED_TEMP_CEF_CACHE" 2>/dev/null || true
+  fi
   if [ -n "$E2E_CONFIG_BACKUP" ] && [ -f "$E2E_CONFIG_BACKUP" ]; then
     mv "$E2E_CONFIG_BACKUP" "$E2E_CONFIG_FILE"
   elif [ -n "$E2E_CONFIG_FILE" ] && [ -f "$E2E_CONFIG_FILE" ]; then
diff --git a/app/src-tauri/src/cef_profile.rs b/app/src-tauri/src/cef_profile.rs
@@ -283,6 +283,25 @@ pub fn prepare_process_cache_path() -> Result<PathBuf, String> {
     let default_openhuman_dir = default_root_openhuman_dir()?;
     drain_pending_purges(&default_openhuman_dir)?;
 
+    // Honor a pre-set `OPENHUMAN_CEF_CACHE_PATH` so harnesses (E2E in
+    // particular) can locate the CEF cache outside the OpenHuman workspace
+    // tree. The mega-flow spec calls `openhuman.config_reset_local_data`
+    // between scenarios, which `remove_dir_all`'s the whole workspace —
+    // if CEF's cache lives inside it the running renderer crashes mid-spec
+    // and every subsequent WDIO command fails with "invalid session id".
+    // The override is opt-in (env-var only) so production users keep the
+    // per-user `users/<id>/cef` layout that owns multi-account isolation.
+    if let Some(preset) = configured_cache_path_from_env() {
+        std::fs::create_dir_all(&preset).map_err(|error| {
+            format!("create pre-set CEF cache dir {}: {error}", preset.display())
+        })?;
+        log::info!(
+            "[cef-profile] honoring pre-set OPENHUMAN_CEF_CACHE_PATH={}",
+            preset.display()
+        );
+        return Ok(preset);
+    }
+
     let user_id_raw = read_active_user_id(&default_openhuman_dir)
         .unwrap_or_else(|| PRE_LOGIN_USER_ID.to_string());
     let user_id = match validate_user_id_for_path(&user_id_raw) {
@@ -580,6 +599,54 @@ mod tests {
         assert!(marker.exists());
     }
 
+    /// Serializes tests that mutate `OPENHUMAN_WORKSPACE` / `OPENHUMAN_CEF_CACHE_PATH`.
+    /// Rust test harness runs tests in parallel; concurrent env writes race.
+    static CACHE_ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
+
+    /// Regression for #1779: when `OPENHUMAN_CEF_CACHE_PATH` is set in the
+    /// environment, `prepare_process_cache_path` must honor it and not
+    /// overwrite with the workspace-rooted `users/<id>/cef` path. The E2E
+    /// harness depends on this to keep the CEF cache outside the
+    /// workspace tree that `config_reset_local_data` wipes.
+    #[test]
+    fn prepare_process_cache_path_honors_preset_env() {
+        let _guard = CACHE_ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
+        let prior_workspace = std::env::var("OPENHUMAN_WORKSPACE").ok();
+        let prior_cef_cache = std::env::var(CEF_CACHE_PATH_ENV).ok();
+
+        let workspace = tempfile::tempdir().unwrap();
+        let cef_cache = tempfile::tempdir().unwrap();
+        std::env::set_var("OPENHUMAN_WORKSPACE", workspace.path());
+        std::env::set_var(CEF_CACHE_PATH_ENV, cef_cache.path());
+
+        let result = std::panic::catch_unwind(|| {
+            let resolved = prepare_process_cache_path().unwrap();
+            assert_eq!(
+                resolved,
+                cef_cache.path(),
+                "preset OPENHUMAN_CEF_CACHE_PATH must win over workspace-derived default"
+            );
+            // The workspace `users/<id>/cef` subtree should NOT have been
+            // created when the override is honored.
+            assert!(
+                !workspace.path().join("users").exists(),
+                "workspace `users/` subtree must not be created when CEF cache is preset"
+            );
+        });
+
+        match prior_workspace {
+            Some(v) => std::env::set_var("OPENHUMAN_WORKSPACE", v),
+            None => std::env::remove_var("OPENHUMAN_WORKSPACE"),
+        }
+        match prior_cef_cache {
+            Some(v) => std::env::set_var(CEF_CACHE_PATH_ENV, v),
+            None => std::env::remove_var(CEF_CACHE_PATH_ENV),
+        }
+        if let Err(payload) = result {
+            std::panic::resume_unwind(payload);
+        }
+    }
+
     /// Path is under `users/…` but last component is not `cef` (reject, retain in queue).
     #[test]
     fn drain_does_not_remove_path_without_cef_final_segment() {
diff --git a/app/test/e2e/helpers/app-helpers.ts b/app/test/e2e/helpers/app-helpers.ts
@@ -18,6 +18,12 @@ import { isTauriDriver } from './platform';
  * is responding on :19222 before WDIO connects, so by the time a spec runs
  * we usually just need to give the React root a beat to mount. Specs that
  * need a stricter guarantee should call `waitForAppReady` directly.
+ *
+ * Also dismisses the first-run `BootCheckGate` "Choose core mode" modal
+ * if it's up — every spec needs the real app behind it, and the picker
+ * intercepts every click / deep-link otherwise. (The picker only renders
+ * when persisted `coreMode.kind === 'unset'`; on a fresh CEF profile —
+ * which every CI run on Linux is — that's the default.)
  */
 export async function waitForApp(): Promise<void> {
   try {
@@ -36,6 +42,75 @@ export async function waitForApp(): Promise<void> {
     // a slow startup don't regress.
     await browser.pause(5_000);
   }
+  await dismissBootCheckGate();
+}
+
+/**
+ * Dismiss the `BootCheckGate` first-run "Choose core mode" picker if it is
+ * currently rendered. No-op if the picker is absent (subsequent invocations
+ * within a session, or builds where coreMode is already persisted).
+ *
+ * Why this is necessary: the picker is a fixed-position modal that
+ * intercepts every click in the WebView. Without dismissing it, every
+ * mega-flow sub-test would deep-link an app the user can't actually
+ * interact with, no `/consume` request would ever fire, and the first
+ * `waitForMockRequest` would time out.
+ *
+ * "Local" is pre-selected on desktop builds, so a single Continue click is
+ * enough — no need to fill cloud URL/token.
+ */
+export async function dismissBootCheckGate(timeout: number = 5_000): Promise<void> {
+  if (!isTauriDriver()) return;
+  const deadline = Date.now() + timeout;
+  while (Date.now() < deadline) {
+    let onPicker = false;
+    try {
+      onPicker = await browser.execute(() => {
+        const headings = Array.from(document.querySelectorAll('h2'));
+        return headings.some(h =>
+          /Choose core mode|Connect to your core/.test(h.textContent ?? '')
+        );
+      });
+    } catch {
+      // session not yet ready — keep polling
+      await browser.pause(200);
+      continue;
+    }
+
+    if (!onPicker) return;
+
+    let clicked = false;
+    try {
+      clicked = await browser.execute(() => {
+        const buttons = Array.from(document.querySelectorAll('button'));
+        const cont = buttons.find(b => (b.textContent ?? '').trim() === 'Continue');
+        if (!cont) return false;
+        (cont as HTMLButtonElement).click();
+        return true;
+      });
+    } catch {
+      // surface on the next iteration via the onPicker check
+    }
+
+    if (clicked) {
+      // Wait for the modal to unmount.
+      const dismissDeadline = Date.now() + 5_000;
+      while (Date.now() < dismissDeadline) {
+        try {
+          const stillThere = await browser.execute(() =>
+            Array.from(document.querySelectorAll('h2')).some(h =>
+              /Choose core mode|Connect to your core/.test(h.textContent ?? '')
+            )
+          );
+          if (!stillThere) return;
+        } catch {
+          // ignore
+        }
+        await browser.pause(200);
+      }
+    }
+    await browser.pause(250);
+  }
 }
 
 /**
diff --git a/app/test/e2e/specs/mega-flow.spec.ts b/app/test/e2e/specs/mega-flow.spec.ts
@@ -71,18 +71,23 @@ async function waitForMockRequest(
 }
 
 async function resetEverything(label: string): Promise<void> {
-  console.log(`${LOG} reset (${label}) — config_reset_local_data + admin reset`);
-  // 1. Wipe the core's local data — workspace + ~/.openhuman + active marker.
-  //    The active in-process core handles this without a process restart, so
-  //    the session keeps the same RPC port and bearer token.
-  const reset = await callOpenhumanRpc('openhuman.config_reset_local_data', {});
-  if (!reset.ok) {
-    console.warn(`${LOG} reset RPC failed (non-fatal):`, reset);
-  }
-  // 2. Re-write config.toml so the next core startup-path still points at the
-  //    mock backend. config_reset_local_data removed the file.
-  writeMockConfig();
-  // 3. Wipe mock state + request log.
+  console.log(`${LOG} reset (${label}) — admin reset only (skip destructive core reset)`);
+  // Mock-side reset is enough to give each scenario a clean slate for the
+  // assertions this spec actually makes (request log + mock behavior +
+  // fresh per-scenario deep-link tokens). The destructive
+  // `openhuman.config_reset_local_data` call this used to make was
+  // killing the CEF/WDIO session on Linux mid-spec — `reset_local_data`
+  // does `remove_dir_all($OPENHUMAN_WORKSPACE)` plus
+  // `remove_dir_all(~/.openhuman)` while CEF is still mid-flight,
+  // and the renderer doesn't survive that on Linux/CEF (every
+  // sub-test after the first then fails with `invalid session id`).
+  //
+  // Each scenario already sends a NEW deep-link with a NEW JWT, so the
+  // auth state gets replaced naturally — we don't need a filesystem
+  // wipe to test that next-scenario behavior.
+  //
+  // (If a future scenario genuinely depends on a wiped DB, gate it on a
+  // narrower core RPC that doesn't blow away dirs CEF has open.)
   await fetch(`${MOCK_URL}/__admin/reset`, {
     method: 'POST',
     headers: { 'Content-Type': 'application/json' },
diff --git a/src/openhuman/composio/auth_retry.rs b/src/openhuman/composio/auth_retry.rs
@@ -1,5 +1,14 @@
-//! Single-shot retry wrapper around [`ComposioClient::execute_tool`] for
-//! the post-OAuth token-propagation gap (issue #1688).
+//! Single-shot retry wrapper around [`ComposioClient::execute_tool_once`]
+//! for the post-OAuth token-propagation gap (issue #1688).
+//!
+//! NOTE: PR #1707 later added an in-client retry inside
+//! [`ComposioClient::execute_tool`] keyed on the same auth-readiness
+//! error string. To avoid stacking two retry layers (which would issue
+//! up to four backend calls per logical retry — see the
+//! `retries_once_only_even_when_second_call_still_errors` regression),
+//! this wrapper calls the non-retrying [`ComposioClient::execute_tool_once`]
+//! primitive instead. Direct callers of `execute_tool` (LinkedIn enrichment,
+//! heartbeat collectors, tool schemas) still get #1707's inner retry.
 //!
 //! Composio reports `connection.status == ACTIVE` ~1-2s after the user
 //! finishes OAuth, but its action-execution gateway can take another
@@ -66,7 +75,7 @@ pub(crate) async fn execute_with_auth_retry_inner(
         has_args = args.is_some(),
         "[composio][auth_retry] execute start"
     );
-    let first = client.execute_tool(slug, args.clone()).await?;
+    let first = client.execute_tool_once(slug, args.clone()).await?;
     if first.successful {
         tracing::debug!(
             target: "composio",
@@ -98,7 +107,7 @@ pub(crate) async fn execute_with_auth_retry_inner(
         "[composio] post-OAuth auth error on first action call; sleeping and retrying once (#1688)"
     );
     tokio::time::sleep(backoff).await;
-    let second = client.execute_tool(slug, args).await?;
+    let second = client.execute_tool_once(slug, args).await?;
     tracing::debug!(
         target: "composio",
         slug = %slug,
diff --git a/src/openhuman/composio/client.rs b/src/openhuman/composio/client.rs
@@ -233,6 +233,46 @@ impl ComposioClient {
             .await
     }
 
+    /// Single-shot `execute_tool` — same body construction and slug validation
+    /// as [`Self::execute_tool`], but **without** the inner post-OAuth retry
+    /// that [`Self::execute_tool_with_post_oauth_retry`] performs. Reserved
+    /// for callers that already own a higher-level retry policy and would
+    /// otherwise stack two retry layers (4 hits to the gateway instead of 2).
+    /// In particular, [`super::auth_retry::execute_with_auth_retry`] uses
+    /// this entry point so its `must retry exactly once` contract still
+    /// holds after PR #1707 introduced the inner retry.
+    pub(crate) async fn execute_tool_once(
+        &self,
+        tool: &str,
+        arguments: Option<serde_json::Value>,
+    ) -> Result<ComposioExecuteResponse> {
+        let tool = tool.trim();
+        if tool.is_empty() {
+            anyhow::bail!("composio.execute_tool_once: tool slug must not be empty");
+        }
+        let arguments = arguments.unwrap_or(serde_json::Value::Object(Default::default()));
+        tracing::debug!(
+            tool = %tool,
+            "[composio] execute_tool_once start"
+        );
+        let body = json!({ "tool": tool, "arguments": arguments });
+        let result = self.post_execute_tool(&body).await;
+        match &result {
+            Ok(resp) => tracing::debug!(
+                tool = %tool,
+                successful = resp.successful,
+                has_error = resp.error.is_some(),
+                "[composio] execute_tool_once completed"
+            ),
+            Err(err) => tracing::debug!(
+                tool = %tool,
+                error = %err,
+                "[composio] execute_tool_once failed"
+            ),
+        }
+        result
+    }
+
     /// `GET /agent-integrations/composio/github/repos` — list repositories
     /// available via the user's authorized GitHub connected account.
     pub async fn list_github_repos(