Skip to content

Commit 22946a1

Browse files
authored
fix(boot): unblock cold-boot core start on dev hosts (tinyhumansai#1324)
1 parent a6c7b0f commit 22946a1

6 files changed

Lines changed: 175 additions & 30 deletions

File tree

app/src-tauri/permissions/allow-core-process.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ allow = [
77
"core_rpc_url",
88
"core_rpc_token",
99
"restart_core_process",
10+
# `start_core_process` is invoked by BootCheckGate after the user picks
11+
# Local mode, before redux-persist hydrates the rest of the app (#1316).
12+
# Without this allow entry the invoke is rejected with "Command not
13+
# found" and the boot gate stalls.
14+
"start_core_process",
1015
# `restart_app` triggers `app.restart()` so CEF re-initializes against
1116
# the active user's `users/<id>/cef` profile after an identity flip
1217
# (#900). Without this allow entry, the invoke is silently denied by

app/src-tauri/src/core_process.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,27 @@ impl CoreProcessHandle {
104104
}
105105

106106
pub async fn ensure_running(&self) -> Result<(), String> {
107+
// Idempotent fast path: if we already spawned the embedded server in
108+
// *this* process and it's still alive on the port, the listener is
109+
// us — return Ok without identifying or taking over. Without this,
110+
// a second `start_core_process` call (e.g. HMR re-mounting the boot
111+
// gate) sees its own port as bound, classifies the listener as
112+
// "stale OpenHuman", and walks into the SIGTERM/SIGKILL takeover
113+
// path against itself. (#1130 takeover is meant to recover from
114+
// *external* leftover binaries, not our own in-process spawn.)
115+
{
116+
let guard = self.task.lock().await;
117+
if let Some(task) = guard.as_ref() {
118+
if !task.is_finished() && self.is_rpc_port_open().await {
119+
log::debug!(
120+
"[core] ensure_running: embedded task already running on port {} — no-op",
121+
self.port
122+
);
123+
return Ok(());
124+
}
125+
}
126+
}
127+
107128
if self.is_rpc_port_open().await {
108129
if reuse_existing_listener_enabled() {
109130
log::warn!(

app/src-tauri/src/lib.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,6 +1354,80 @@ pub fn run() {
13541354
return Err("webview_apis bridge failed to start — aborting setup".into());
13551355
}
13561356

1357+
// Purge stray LaunchAgent left over from a prior worktree's
1358+
// `service install`. KeepAlive=true on the plist re-spawns the
1359+
// daemon after every SIGKILL, fighting `ensure_running`'s
1360+
// stale-listener takeover and re-binding port 7788 on cold boot.
1361+
// (Symptom: "Failed to start local core: signaled pid <X> but
1362+
// port 7788 remained bound after 5000ms".)
1363+
//
1364+
// Tightly scoped to avoid clobbering a legitimate `service
1365+
// install`:
1366+
// - dev builds only (`cfg!(debug_assertions)`)
1367+
// - skip when this process IS the daemon (`!daemon_mode`)
1368+
// - only purge when the plist's ProgramArguments[0] points
1369+
// somewhere other than the currently-running executable —
1370+
// i.e. a sibling worktree's stale binary, not us.
1371+
#[cfg(target_os = "macos")]
1372+
if cfg!(debug_assertions) && !daemon_mode {
1373+
const STALE_LABEL: &str = "com.openhuman.core";
1374+
1375+
if let Ok(home) = std::env::var("HOME") {
1376+
let plist = std::path::PathBuf::from(&home)
1377+
.join("Library")
1378+
.join("LaunchAgents")
1379+
.join(format!("{STALE_LABEL}.plist"));
1380+
1381+
let plist_targets_us = std::fs::read_to_string(&plist)
1382+
.ok()
1383+
.and_then(|contents| {
1384+
// ProgramArguments[0] is the first <string>...</string>
1385+
// after the <key>ProgramArguments</key> marker. The
1386+
// service installer always writes it as an absolute
1387+
// path to the openhuman-core binary (see
1388+
// src/openhuman/service/macos.rs).
1389+
let after_key = contents.split("<key>ProgramArguments</key>").nth(1)?;
1390+
let start = after_key.find("<string>")? + "<string>".len();
1391+
let rest = &after_key[start..];
1392+
let end = rest.find("</string>")?;
1393+
Some(std::path::PathBuf::from(rest[..end].trim()))
1394+
})
1395+
.zip(std::env::current_exe().ok())
1396+
.map(|(plist_bin, self_bin)| plist_bin == self_bin)
1397+
.unwrap_or(false);
1398+
1399+
if plist.exists() && !plist_targets_us {
1400+
let uid = std::process::Command::new("id")
1401+
.arg("-u")
1402+
.output()
1403+
.ok()
1404+
.and_then(|o| String::from_utf8(o.stdout).ok())
1405+
.map(|s| s.trim().to_string());
1406+
1407+
if let Some(uid) = uid {
1408+
let target = format!("gui/{uid}/{STALE_LABEL}");
1409+
let _ = std::process::Command::new("launchctl")
1410+
.arg("bootout")
1411+
.arg(&target)
1412+
.status();
1413+
}
1414+
1415+
match std::fs::remove_file(&plist) {
1416+
Ok(()) => log::warn!(
1417+
"[boot] removed stale LaunchAgent plist at {} \
1418+
(points at a different binary than this build — \
1419+
likely a sibling worktree's `service install`)",
1420+
plist.display()
1421+
),
1422+
Err(err) => log::warn!(
1423+
"[boot] failed to remove stale LaunchAgent plist {}: {err}",
1424+
plist.display()
1425+
),
1426+
}
1427+
}
1428+
}
1429+
}
1430+
13571431
let core_handle =
13581432
core_process::CoreProcessHandle::new(core_process::default_core_port());
13591433
std::env::set_var("OPENHUMAN_CORE_RPC_URL", core_handle.rpc_url());

app/src/lib/bootCheck/index.test.ts

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ describe('runBootCheck — local mode', () => {
4242

4343
const transport = makeTransport({
4444
callRpc: rpcResponder({
45-
'openhuman.ping': {},
45+
'core.ping': {},
4646
'openhuman.service_status': { installed: false, running: false },
47-
'openhuman.update_version': { version_info: { version: appVersion } },
47+
'openhuman.update_version': { result: { version: appVersion } },
4848
}),
4949
});
5050

@@ -57,9 +57,9 @@ describe('runBootCheck — local mode', () => {
5757

5858
const transport = makeTransport({
5959
callRpc: rpcResponder({
60-
'openhuman.ping': {},
60+
'core.ping': {},
6161
'openhuman.service_status': { installed: true, running: false },
62-
'openhuman.update_version': { version_info: { version: appVersion } },
62+
'openhuman.update_version': { result: { version: appVersion } },
6363
}),
6464
});
6565

@@ -70,9 +70,9 @@ describe('runBootCheck — local mode', () => {
7070
it('returns daemonDetected when service_status shows running=true', async () => {
7171
const transport = makeTransport({
7272
callRpc: rpcResponder({
73-
'openhuman.ping': {},
73+
'core.ping': {},
7474
'openhuman.service_status': { installed: false, running: true },
75-
'openhuman.update_version': { version_info: { version: 'x' } },
75+
'openhuman.update_version': { result: { version: 'x' } },
7676
}),
7777
});
7878

@@ -83,9 +83,9 @@ describe('runBootCheck — local mode', () => {
8383
it('returns outdatedLocal when core version differs from app version', async () => {
8484
const transport = makeTransport({
8585
callRpc: rpcResponder({
86-
'openhuman.ping': {},
86+
'core.ping': {},
8787
'openhuman.service_status': { installed: false, running: false },
88-
'openhuman.update_version': { version_info: { version: '0.0.0-different' } },
88+
'openhuman.update_version': { result: { version: '0.0.0-different' } },
8989
}),
9090
});
9191

@@ -96,7 +96,7 @@ describe('runBootCheck — local mode', () => {
9696
it('returns noVersionMethod when update_version returns -32601', async () => {
9797
const transport = makeTransport({
9898
callRpc: rpcResponder({
99-
'openhuman.ping': {},
99+
'core.ping': {},
100100
'openhuman.service_status': { installed: false, running: false },
101101
'openhuman.update_version': new Error('JSON-RPC error -32601 Method not found'),
102102
}),
@@ -109,7 +109,7 @@ describe('runBootCheck — local mode', () => {
109109
it('returns noVersionMethod on "method not found" text variant', async () => {
110110
const transport = makeTransport({
111111
callRpc: rpcResponder({
112-
'openhuman.ping': {},
112+
'core.ping': {},
113113
'openhuman.service_status': { installed: false, running: false },
114114
'openhuman.update_version': new Error('method not found'),
115115
}),
@@ -155,9 +155,7 @@ describe('runBootCheck — cloud mode', () => {
155155
const appVersion = (await import('../../utils/config')).APP_VERSION;
156156

157157
const transport = makeTransport({
158-
callRpc: rpcResponder({
159-
'openhuman.update_version': { version_info: { version: appVersion } },
160-
}),
158+
callRpc: rpcResponder({ 'openhuman.update_version': { result: { version: appVersion } } }),
161159
});
162160

163161
const result = await runBootCheck(
@@ -169,9 +167,7 @@ describe('runBootCheck — cloud mode', () => {
169167

170168
it('returns outdatedCloud when version differs', async () => {
171169
const transport = makeTransport({
172-
callRpc: rpcResponder({
173-
'openhuman.update_version': { version_info: { version: '0.0.0-old' } },
174-
}),
170+
callRpc: rpcResponder({ 'openhuman.update_version': { result: { version: '0.0.0-old' } } }),
175171
});
176172

177173
const result = await runBootCheck(
@@ -228,22 +224,22 @@ describe('runBootCheck — error and edge branches', () => {
228224

229225
const transport = makeTransport({
230226
callRpc: rpcResponder({
231-
'openhuman.ping': {},
227+
'core.ping': {},
232228
'openhuman.service_status': new Error('rpc transport blew up'),
233-
'openhuman.update_version': { version_info: { version: appVersion } },
229+
'openhuman.update_version': { result: { version: appVersion } },
234230
}),
235231
});
236232

237233
const result = await runBootCheck({ kind: 'local' }, transport);
238234
expect(result.kind).toBe('match');
239235
});
240236

241-
it('treats empty version_info.version as outdatedLocal', async () => {
237+
it('treats empty version as outdatedLocal', async () => {
242238
const transport = makeTransport({
243239
callRpc: rpcResponder({
244-
'openhuman.ping': {},
240+
'core.ping': {},
245241
'openhuman.service_status': { installed: false, running: false },
246-
'openhuman.update_version': { version_info: { version: '' } },
242+
'openhuman.update_version': { result: { version: '' } },
247243
}),
248244
});
249245

@@ -268,7 +264,7 @@ describe('runBootCheck — error and edge branches', () => {
268264
let pingCalls = 0;
269265
const transport: BootCheckTransport = {
270266
callRpc: vi.fn(async (method: string) => {
271-
if (method === 'openhuman.ping') {
267+
if (method === 'core.ping') {
272268
pingCalls += 1;
273269
if (pingCalls === 1) return {};
274270
throw new Error('subsequent failure');

app/src/lib/bootCheck/index.ts

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,12 @@ function isMethodNotFound(err: unknown): boolean {
6464
}
6565

6666
/**
67-
* Poll `openhuman.ping` with exponential back-off until the core responds or
68-
* we exhaust the budget.
67+
* Poll `core.ping` with exponential back-off until the core responds or we
68+
* exhaust the budget. `core.ping` is a Tier-1 dispatcher method (see
69+
* `src/core/dispatch.rs`) that responds before any domain controller is
70+
* registered, which is exactly what we want for a liveness probe — it tells
71+
* us "the HTTP server is up and the dispatcher is wired" without coupling to
72+
* any specific subsystem's readiness.
6973
*
7074
* Returns true when the core is reachable, false on timeout.
7175
*/
@@ -79,7 +83,7 @@ async function waitForCore(
7983
const elapsedAtStart = Date.now() - startedAt;
8084
try {
8185
log('[boot-check] ping attempt elapsed=%dms', elapsedAtStart);
82-
await callRpc('openhuman.ping', {});
86+
await callRpc('core.ping', {});
8387
log('[boot-check] ping succeeded elapsed=%dms', elapsedAtStart);
8488
return true;
8589
} catch {
@@ -131,11 +135,18 @@ type VersionCheckResult = 'match' | 'outdated' | 'noVersionMethod' | 'unreachabl
131135

132136
async function checkVersion(callRpc: BootCheckTransport['callRpc']): Promise<VersionCheckResult> {
133137
try {
134-
const result = await callRpc<{ version_info?: { version?: string } }>(
138+
// `openhuman.update_version` is wrapped by RpcOutcome::single_log
139+
// (see src/openhuman/update/ops.rs + src/rpc/mod.rs::into_cli_compatible_json):
140+
// when logs are present the response shape is `{ result: VersionInfo, logs }`,
141+
// and VersionInfo is `{ version, target_triple, asset_prefix }`. Earlier
142+
// attempts read `result.version_info.version` (no such field) and then
143+
// `result.version` (skipped the RpcOutcome `result` wrapper) — both
144+
// yielded '' and pinned every boot to "outdated local".
145+
const response = await callRpc<{ result?: { version?: string } }>(
135146
'openhuman.update_version',
136147
{}
137148
);
138-
const coreVersion = result?.version_info?.version ?? '';
149+
const coreVersion = response?.result?.version ?? '';
139150
log('[boot-check] version_check app=%s core=%s', APP_VERSION, coreVersion);
140151

141152
if (!coreVersion) {
@@ -164,7 +175,7 @@ async function checkVersion(callRpc: BootCheckTransport['callRpc']): Promise<Ver
164175
*
165176
* Local mode:
166177
* 1. Invoke `start_core_process` Tauri command to spawn the embedded core.
167-
* 2. Poll `openhuman.ping` until reachable (≤10 s).
178+
* 2. Poll `core.ping` until reachable (≤10 s).
168179
* 3. Check for a legacy daemon via `service_status`.
169180
* 4. Version-check via `update_version`.
170181
*

app/src/store/index.ts

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import {
1010
REGISTER,
1111
REHYDRATE,
1212
} from 'redux-persist';
13-
import defaultStorage from 'redux-persist/lib/storage';
1413

1514
import { IS_DEV } from '../utils/config';
1615
import accountsReducer from './accountsSlice';
@@ -30,7 +29,46 @@ const storage = userScopedStorage;
3029

3130
// coreMode is pre-login and not user-scoped — use plain localStorage so the
3231
// setting survives across user switches without leaking per-user state.
33-
const coreModePersistConfig = { key: 'coreMode', storage: defaultStorage, whitelist: ['mode'] };
32+
// Inline adapter rather than `redux-persist/lib/storage`'s default export,
33+
// which Vite's CJS dep-pre-bundling can resolve to the module namespace
34+
// (then `storage.getItem` is undefined and rehydrate throws on cold boot).
35+
const localStorageAdapter = {
36+
getItem: (key: string) =>
37+
Promise.resolve(
38+
(() => {
39+
try {
40+
return localStorage.getItem(key);
41+
} catch {
42+
return null;
43+
}
44+
})()
45+
),
46+
setItem: (key: string, value: string) =>
47+
Promise.resolve(
48+
(() => {
49+
try {
50+
localStorage.setItem(key, value);
51+
} catch {
52+
/* ignore quota / unavailable */
53+
}
54+
})()
55+
),
56+
removeItem: (key: string) =>
57+
Promise.resolve(
58+
(() => {
59+
try {
60+
localStorage.removeItem(key);
61+
} catch {
62+
/* ignore */
63+
}
64+
})()
65+
),
66+
};
67+
const coreModePersistConfig = {
68+
key: 'coreMode',
69+
storage: localStorageAdapter,
70+
whitelist: ['mode'],
71+
};
3472
const persistedCoreModeReducer = persistReducer(coreModePersistConfig, coreModeReducer);
3573

3674
const channelConnectionsPersistConfig = {

0 commit comments

Comments
 (0)