Skip to content

Commit aa57a33

Browse files
authored
fix(e2e): dismiss BootCheckGate picker before every spec (mega-flow root cause) (tinyhumansai#1779)
1 parent 04a548f commit aa57a33

6 files changed

Lines changed: 232 additions & 16 deletions

File tree

app/scripts/e2e-run-session.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ APPIUM_PID=""
3737
APP_PID=""
3838
E2E_CONFIG_BACKUP=""
3939
E2E_CONFIG_FILE=""
40+
CREATED_TEMP_CEF_CACHE=""
4041

4142
# ------------------------------------------------------------------------------
4243
# Workspace + config
@@ -50,6 +51,22 @@ else
5051
echo "[runner] Using OPENHUMAN_WORKSPACE from environment: $OPENHUMAN_WORKSPACE"
5152
fi
5253

54+
# Place the CEF cache directory OUTSIDE the workspace. By default the Tauri
55+
# shell roots it under `$OPENHUMAN_WORKSPACE/users/<id>/cef`, but our
56+
# `mega-flow` spec calls `openhuman.config_reset_local_data` between
57+
# sub-scenarios — that RPC does `remove_dir_all($OPENHUMAN_WORKSPACE)`,
58+
# which yanks CEF's cache out from under the running process and kills
59+
# the WebDriver session (every later sub-test then fails with
60+
# "invalid session id"). Pointing CEF at a sibling tmpdir via the
61+
# `OPENHUMAN_CEF_CACHE_PATH` escape hatch (`cef_profile.rs:7`) keeps it
62+
# unaffected by the reset.
63+
if [ -z "${OPENHUMAN_CEF_CACHE_PATH:-}" ]; then
64+
OPENHUMAN_CEF_CACHE_PATH="$(mktemp -d)"
65+
CREATED_TEMP_CEF_CACHE="$OPENHUMAN_CEF_CACHE_PATH"
66+
export OPENHUMAN_CEF_CACHE_PATH
67+
echo "[runner] Using temporary OPENHUMAN_CEF_CACHE_PATH: $OPENHUMAN_CEF_CACHE_PATH"
68+
fi
69+
5370
if [ "${OPENHUMAN_SERVICE_MOCK:-0}" = "1" ] && [ -z "${OPENHUMAN_SERVICE_MOCK_STATE_FILE:-}" ]; then
5471
OPENHUMAN_SERVICE_MOCK_STATE_FILE="$OPENHUMAN_WORKSPACE/service-mock-state.json"
5572
export OPENHUMAN_SERVICE_MOCK_STATE_FILE
@@ -98,6 +115,9 @@ cleanup() {
98115
# whole job on cleanup leftovers when the test itself passed.
99116
rm -rf "$CREATED_TEMP_WORKSPACE" 2>/dev/null || true
100117
fi
118+
if [ -n "$CREATED_TEMP_CEF_CACHE" ]; then
119+
rm -rf "$CREATED_TEMP_CEF_CACHE" 2>/dev/null || true
120+
fi
101121
if [ -n "$E2E_CONFIG_BACKUP" ] && [ -f "$E2E_CONFIG_BACKUP" ]; then
102122
mv "$E2E_CONFIG_BACKUP" "$E2E_CONFIG_FILE"
103123
elif [ -n "$E2E_CONFIG_FILE" ] && [ -f "$E2E_CONFIG_FILE" ]; then

app/src-tauri/src/cef_profile.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,25 @@ pub fn prepare_process_cache_path() -> Result<PathBuf, String> {
283283
let default_openhuman_dir = default_root_openhuman_dir()?;
284284
drain_pending_purges(&default_openhuman_dir)?;
285285

286+
// Honor a pre-set `OPENHUMAN_CEF_CACHE_PATH` so harnesses (E2E in
287+
// particular) can locate the CEF cache outside the OpenHuman workspace
288+
// tree. The mega-flow spec calls `openhuman.config_reset_local_data`
289+
// between scenarios, which `remove_dir_all`'s the whole workspace —
290+
// if CEF's cache lives inside it the running renderer crashes mid-spec
291+
// and every subsequent WDIO command fails with "invalid session id".
292+
// The override is opt-in (env-var only) so production users keep the
293+
// per-user `users/<id>/cef` layout that owns multi-account isolation.
294+
if let Some(preset) = configured_cache_path_from_env() {
295+
std::fs::create_dir_all(&preset).map_err(|error| {
296+
format!("create pre-set CEF cache dir {}: {error}", preset.display())
297+
})?;
298+
log::info!(
299+
"[cef-profile] honoring pre-set OPENHUMAN_CEF_CACHE_PATH={}",
300+
preset.display()
301+
);
302+
return Ok(preset);
303+
}
304+
286305
let user_id_raw = read_active_user_id(&default_openhuman_dir)
287306
.unwrap_or_else(|| PRE_LOGIN_USER_ID.to_string());
288307
let user_id = match validate_user_id_for_path(&user_id_raw) {
@@ -580,6 +599,54 @@ mod tests {
580599
assert!(marker.exists());
581600
}
582601

602+
/// Serializes tests that mutate `OPENHUMAN_WORKSPACE` / `OPENHUMAN_CEF_CACHE_PATH`.
603+
/// Rust test harness runs tests in parallel; concurrent env writes race.
604+
static CACHE_ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
605+
606+
/// Regression for #1779: when `OPENHUMAN_CEF_CACHE_PATH` is set in the
607+
/// environment, `prepare_process_cache_path` must honor it and not
608+
/// overwrite with the workspace-rooted `users/<id>/cef` path. The E2E
609+
/// harness depends on this to keep the CEF cache outside the
610+
/// workspace tree that `config_reset_local_data` wipes.
611+
#[test]
612+
fn prepare_process_cache_path_honors_preset_env() {
613+
let _guard = CACHE_ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
614+
let prior_workspace = std::env::var("OPENHUMAN_WORKSPACE").ok();
615+
let prior_cef_cache = std::env::var(CEF_CACHE_PATH_ENV).ok();
616+
617+
let workspace = tempfile::tempdir().unwrap();
618+
let cef_cache = tempfile::tempdir().unwrap();
619+
std::env::set_var("OPENHUMAN_WORKSPACE", workspace.path());
620+
std::env::set_var(CEF_CACHE_PATH_ENV, cef_cache.path());
621+
622+
let result = std::panic::catch_unwind(|| {
623+
let resolved = prepare_process_cache_path().unwrap();
624+
assert_eq!(
625+
resolved,
626+
cef_cache.path(),
627+
"preset OPENHUMAN_CEF_CACHE_PATH must win over workspace-derived default"
628+
);
629+
// The workspace `users/<id>/cef` subtree should NOT have been
630+
// created when the override is honored.
631+
assert!(
632+
!workspace.path().join("users").exists(),
633+
"workspace `users/` subtree must not be created when CEF cache is preset"
634+
);
635+
});
636+
637+
match prior_workspace {
638+
Some(v) => std::env::set_var("OPENHUMAN_WORKSPACE", v),
639+
None => std::env::remove_var("OPENHUMAN_WORKSPACE"),
640+
}
641+
match prior_cef_cache {
642+
Some(v) => std::env::set_var(CEF_CACHE_PATH_ENV, v),
643+
None => std::env::remove_var(CEF_CACHE_PATH_ENV),
644+
}
645+
if let Err(payload) = result {
646+
std::panic::resume_unwind(payload);
647+
}
648+
}
649+
583650
/// Path is under `users/…` but last component is not `cef` (reject, retain in queue).
584651
#[test]
585652
fn drain_does_not_remove_path_without_cef_final_segment() {

app/test/e2e/helpers/app-helpers.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ import { isTauriDriver } from './platform';
1818
* is responding on :19222 before WDIO connects, so by the time a spec runs
1919
* we usually just need to give the React root a beat to mount. Specs that
2020
* need a stricter guarantee should call `waitForAppReady` directly.
21+
*
22+
* Also dismisses the first-run `BootCheckGate` "Choose core mode" modal
23+
* if it's up — every spec needs the real app behind it, and the picker
24+
* intercepts every click / deep-link otherwise. (The picker only renders
25+
* when persisted `coreMode.kind === 'unset'`; on a fresh CEF profile —
26+
* which every CI run on Linux is — that's the default.)
2127
*/
2228
export async function waitForApp(): Promise<void> {
2329
try {
@@ -36,6 +42,75 @@ export async function waitForApp(): Promise<void> {
3642
// a slow startup don't regress.
3743
await browser.pause(5_000);
3844
}
45+
await dismissBootCheckGate();
46+
}
47+
48+
/**
49+
* Dismiss the `BootCheckGate` first-run "Choose core mode" picker if it is
50+
* currently rendered. No-op if the picker is absent (subsequent invocations
51+
* within a session, or builds where coreMode is already persisted).
52+
*
53+
* Why this is necessary: the picker is a fixed-position modal that
54+
* intercepts every click in the WebView. Without dismissing it, every
55+
* mega-flow sub-test would deep-link an app the user can't actually
56+
* interact with, no `/consume` request would ever fire, and the first
57+
* `waitForMockRequest` would time out.
58+
*
59+
* "Local" is pre-selected on desktop builds, so a single Continue click is
60+
* enough — no need to fill cloud URL/token.
61+
*/
62+
export async function dismissBootCheckGate(timeout: number = 5_000): Promise<void> {
63+
if (!isTauriDriver()) return;
64+
const deadline = Date.now() + timeout;
65+
while (Date.now() < deadline) {
66+
let onPicker = false;
67+
try {
68+
onPicker = await browser.execute(() => {
69+
const headings = Array.from(document.querySelectorAll('h2'));
70+
return headings.some(h =>
71+
/Choose core mode|Connect to your core/.test(h.textContent ?? '')
72+
);
73+
});
74+
} catch {
75+
// session not yet ready — keep polling
76+
await browser.pause(200);
77+
continue;
78+
}
79+
80+
if (!onPicker) return;
81+
82+
let clicked = false;
83+
try {
84+
clicked = await browser.execute(() => {
85+
const buttons = Array.from(document.querySelectorAll('button'));
86+
const cont = buttons.find(b => (b.textContent ?? '').trim() === 'Continue');
87+
if (!cont) return false;
88+
(cont as HTMLButtonElement).click();
89+
return true;
90+
});
91+
} catch {
92+
// surface on the next iteration via the onPicker check
93+
}
94+
95+
if (clicked) {
96+
// Wait for the modal to unmount.
97+
const dismissDeadline = Date.now() + 5_000;
98+
while (Date.now() < dismissDeadline) {
99+
try {
100+
const stillThere = await browser.execute(() =>
101+
Array.from(document.querySelectorAll('h2')).some(h =>
102+
/Choose core mode|Connect to your core/.test(h.textContent ?? '')
103+
)
104+
);
105+
if (!stillThere) return;
106+
} catch {
107+
// ignore
108+
}
109+
await browser.pause(200);
110+
}
111+
}
112+
await browser.pause(250);
113+
}
39114
}
40115

41116
/**

app/test/e2e/specs/mega-flow.spec.ts

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -71,18 +71,23 @@ async function waitForMockRequest(
7171
}
7272

7373
async function resetEverything(label: string): Promise<void> {
74-
console.log(`${LOG} reset (${label}) — config_reset_local_data + admin reset`);
75-
// 1. Wipe the core's local data — workspace + ~/.openhuman + active marker.
76-
// The active in-process core handles this without a process restart, so
77-
// the session keeps the same RPC port and bearer token.
78-
const reset = await callOpenhumanRpc('openhuman.config_reset_local_data', {});
79-
if (!reset.ok) {
80-
console.warn(`${LOG} reset RPC failed (non-fatal):`, reset);
81-
}
82-
// 2. Re-write config.toml so the next core startup-path still points at the
83-
// mock backend. config_reset_local_data removed the file.
84-
writeMockConfig();
85-
// 3. Wipe mock state + request log.
74+
console.log(`${LOG} reset (${label}) — admin reset only (skip destructive core reset)`);
75+
// Mock-side reset is enough to give each scenario a clean slate for the
76+
// assertions this spec actually makes (request log + mock behavior +
77+
// fresh per-scenario deep-link tokens). The destructive
78+
// `openhuman.config_reset_local_data` call this used to make was
79+
// killing the CEF/WDIO session on Linux mid-spec — `reset_local_data`
80+
// does `remove_dir_all($OPENHUMAN_WORKSPACE)` plus
81+
// `remove_dir_all(~/.openhuman)` while CEF is still mid-flight,
82+
// and the renderer doesn't survive that on Linux/CEF (every
83+
// sub-test after the first then fails with `invalid session id`).
84+
//
85+
// Each scenario already sends a NEW deep-link with a NEW JWT, so the
86+
// auth state gets replaced naturally — we don't need a filesystem
87+
// wipe to test that next-scenario behavior.
88+
//
89+
// (If a future scenario genuinely depends on a wiped DB, gate it on a
90+
// narrower core RPC that doesn't blow away dirs CEF has open.)
8691
await fetch(`${MOCK_URL}/__admin/reset`, {
8792
method: 'POST',
8893
headers: { 'Content-Type': 'application/json' },

src/openhuman/composio/auth_retry.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
1-
//! Single-shot retry wrapper around [`ComposioClient::execute_tool`] for
2-
//! the post-OAuth token-propagation gap (issue #1688).
1+
//! Single-shot retry wrapper around [`ComposioClient::execute_tool_once`]
2+
//! for the post-OAuth token-propagation gap (issue #1688).
3+
//!
4+
//! NOTE: PR #1707 later added an in-client retry inside
5+
//! [`ComposioClient::execute_tool`] keyed on the same auth-readiness
6+
//! error string. To avoid stacking two retry layers (which would issue
7+
//! up to four backend calls per logical retry — see the
8+
//! `retries_once_only_even_when_second_call_still_errors` regression),
9+
//! this wrapper calls the non-retrying [`ComposioClient::execute_tool_once`]
10+
//! primitive instead. Direct callers of `execute_tool` (LinkedIn enrichment,
11+
//! heartbeat collectors, tool schemas) still get #1707's inner retry.
312
//!
413
//! Composio reports `connection.status == ACTIVE` ~1-2s after the user
514
//! finishes OAuth, but its action-execution gateway can take another
@@ -66,7 +75,7 @@ pub(crate) async fn execute_with_auth_retry_inner(
6675
has_args = args.is_some(),
6776
"[composio][auth_retry] execute start"
6877
);
69-
let first = client.execute_tool(slug, args.clone()).await?;
78+
let first = client.execute_tool_once(slug, args.clone()).await?;
7079
if first.successful {
7180
tracing::debug!(
7281
target: "composio",
@@ -98,7 +107,7 @@ pub(crate) async fn execute_with_auth_retry_inner(
98107
"[composio] post-OAuth auth error on first action call; sleeping and retrying once (#1688)"
99108
);
100109
tokio::time::sleep(backoff).await;
101-
let second = client.execute_tool(slug, args).await?;
110+
let second = client.execute_tool_once(slug, args).await?;
102111
tracing::debug!(
103112
target: "composio",
104113
slug = %slug,

src/openhuman/composio/client.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,46 @@ impl ComposioClient {
233233
.await
234234
}
235235

236+
/// Single-shot `execute_tool` — same body construction and slug validation
237+
/// as [`Self::execute_tool`], but **without** the inner post-OAuth retry
238+
/// that [`Self::execute_tool_with_post_oauth_retry`] performs. Reserved
239+
/// for callers that already own a higher-level retry policy and would
240+
/// otherwise stack two retry layers (4 hits to the gateway instead of 2).
241+
/// In particular, [`super::auth_retry::execute_with_auth_retry`] uses
242+
/// this entry point so its `must retry exactly once` contract still
243+
/// holds after PR #1707 introduced the inner retry.
244+
pub(crate) async fn execute_tool_once(
245+
&self,
246+
tool: &str,
247+
arguments: Option<serde_json::Value>,
248+
) -> Result<ComposioExecuteResponse> {
249+
let tool = tool.trim();
250+
if tool.is_empty() {
251+
anyhow::bail!("composio.execute_tool_once: tool slug must not be empty");
252+
}
253+
let arguments = arguments.unwrap_or(serde_json::Value::Object(Default::default()));
254+
tracing::debug!(
255+
tool = %tool,
256+
"[composio] execute_tool_once start"
257+
);
258+
let body = json!({ "tool": tool, "arguments": arguments });
259+
let result = self.post_execute_tool(&body).await;
260+
match &result {
261+
Ok(resp) => tracing::debug!(
262+
tool = %tool,
263+
successful = resp.successful,
264+
has_error = resp.error.is_some(),
265+
"[composio] execute_tool_once completed"
266+
),
267+
Err(err) => tracing::debug!(
268+
tool = %tool,
269+
error = %err,
270+
"[composio] execute_tool_once failed"
271+
),
272+
}
273+
result
274+
}
275+
236276
/// `GET /agent-integrations/composio/github/repos` — list repositories
237277
/// available via the user's authorized GitHub connected account.
238278
pub async fn list_github_repos(

0 commit comments

Comments
 (0)