Skip to content

Commit 20aec43

Browse files
committed
fix: wrap fireHeartbeat in try/catch/finally to prevent timer death
Bug: if pi.sendMessage() or saveState() threw, the exception propagated uncaught from the setTimeout callback. The armTimer() call at the end was never reached, permanently killing the heartbeat loop after a single failure. The consecutiveErrors counter was also never incremented, making the backoff machinery dead code. Fix: - try/catch/finally around the entire fire path - Success resets consecutiveErrors to 0 - Catch increments consecutiveErrors (drives exponential backoff) - finally always calls armTimer() so the loop never dies - saveState() in catch is best-effort (nested try/catch) - Removed stale comment referencing nonexistent agent_end handler Added 6 tests simulating the error handling contract.
1 parent c0e7814 commit 20aec43

2 files changed

Lines changed: 128 additions & 37 deletions

File tree

pi/extensions/heartbeat.test.mjs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,84 @@ describe("heartbeat: computeBackoffMs", () => {
332332
});
333333
});
334334

335+
describe("heartbeat: fireHeartbeat error handling", () => {
336+
// Simulate the try/catch/finally pattern from fireHeartbeat to verify
337+
// that the error counter and re-arm behavior work correctly.
338+
339+
function simulateFireHeartbeat(state, sendThrows, saveThrows) {
340+
let timerArmed = false;
341+
342+
try {
343+
state.totalRuns += 1;
344+
345+
if (sendThrows) throw new Error("sendMessage failed");
346+
347+
// Success — reset error counter
348+
state.consecutiveErrors = 0;
349+
350+
if (saveThrows) throw new Error("saveState failed");
351+
} catch {
352+
state.consecutiveErrors += 1;
353+
try {
354+
if (saveThrows) throw new Error("saveState failed in catch");
355+
} catch {
356+
// Best-effort — don't prevent re-arm
357+
}
358+
} finally {
359+
timerArmed = true;
360+
}
361+
362+
return { timerArmed, state };
363+
}
364+
365+
it("resets consecutiveErrors on success", () => {
366+
const state = { consecutiveErrors: 3, totalRuns: 5 };
367+
const result = simulateFireHeartbeat(state, false, false);
368+
assert.equal(result.state.consecutiveErrors, 0);
369+
assert.equal(result.state.totalRuns, 6);
370+
assert.equal(result.timerArmed, true);
371+
});
372+
373+
it("increments consecutiveErrors on sendMessage failure", () => {
374+
const state = { consecutiveErrors: 0, totalRuns: 5 };
375+
const result = simulateFireHeartbeat(state, true, false);
376+
assert.equal(result.state.consecutiveErrors, 1);
377+
assert.equal(result.timerArmed, true, "timer should always re-arm");
378+
});
379+
380+
it("accumulates errors across multiple failures", () => {
381+
const state = { consecutiveErrors: 4, totalRuns: 10 };
382+
const result = simulateFireHeartbeat(state, true, false);
383+
assert.equal(result.state.consecutiveErrors, 5);
384+
assert.equal(result.timerArmed, true, "timer should always re-arm");
385+
});
386+
387+
it("re-arms timer even when both send and save throw", () => {
388+
const state = { consecutiveErrors: 0, totalRuns: 0 };
389+
const result = simulateFireHeartbeat(state, true, true);
390+
assert.equal(result.timerArmed, true, "timer must re-arm regardless of errors");
391+
assert.equal(result.state.consecutiveErrors, 1);
392+
});
393+
394+
it("consecutive errors increase backoff delay", () => {
395+
const base = 600_000;
396+
// After 1 error: 2x
397+
assert.equal(computeBackoffMs(1, base), 1_200_000);
398+
// After 2 errors: 4x (capped at max)
399+
assert.equal(computeBackoffMs(2, base), 2_400_000);
400+
// After 3 errors: would be 8x = 4.8M but capped at 3.6M
401+
assert.equal(computeBackoffMs(3, base), MAX_BACKOFF_MS);
402+
});
403+
404+
it("success after errors resets to base interval", () => {
405+
const state = { consecutiveErrors: 5, totalRuns: 10 };
406+
const result = simulateFireHeartbeat(state, false, false);
407+
assert.equal(result.state.consecutiveErrors, 0);
408+
// With 0 errors, backoff returns base interval
409+
assert.equal(computeBackoffMs(result.state.consecutiveErrors, 600_000), 600_000);
410+
});
411+
});
412+
335413
describe("heartbeat: deploy checklist file", () => {
336414
beforeEach(setup);
337415
afterEach(teardown);

pi/extensions/heartbeat.ts

Lines changed: 50 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -120,45 +120,58 @@ export default function heartbeatExtension(pi: ExtensionAPI): void {
120120
}
121121

122122
function fireHeartbeat() {
123-
const content = readHeartbeatFile(state.heartbeatFile);
124-
if (!content) {
125-
// No checklist — skip silently, re-arm for next interval
126-
armTimer();
127-
return;
128-
}
129-
130-
const now = Date.now();
131-
state.lastRunAt = now;
132-
state.totalRuns += 1;
133-
134-
const prompt = [
135-
`🫀 **Heartbeat** (run #${state.totalRuns}, ${new Date(now).toISOString()})`,
136-
``,
137-
`Review the following checklist and take action on any items that need attention.`,
138-
`If everything is healthy, respond briefly with what you checked. Do NOT take action unless something is wrong.`,
139-
``,
140-
`---`,
141-
content,
142-
`---`,
143-
``,
144-
`If you find issues, fix them. If everything looks good, say so briefly and move on.`,
145-
].join("\n");
146-
147-
pi.sendMessage(
148-
{
149-
customType: "heartbeat",
150-
content: prompt,
151-
display: true,
152-
},
153-
{
154-
deliverAs: "followUp",
155-
triggerTurn: true,
123+
try {
124+
const content = readHeartbeatFile(state.heartbeatFile);
125+
if (!content) {
126+
// No checklist — skip silently, re-arm for next interval
127+
armTimer();
128+
return;
156129
}
157-
);
158130

159-
saveState();
160-
// Re-arm after firing (the agent_end handler will also re-arm on error)
161-
armTimer();
131+
const now = Date.now();
132+
state.lastRunAt = now;
133+
state.totalRuns += 1;
134+
135+
const prompt = [
136+
`🫀 **Heartbeat** (run #${state.totalRuns}, ${new Date(now).toISOString()})`,
137+
``,
138+
`Review the following checklist and take action on any items that need attention.`,
139+
`If everything is healthy, respond briefly with what you checked. Do NOT take action unless something is wrong.`,
140+
``,
141+
`---`,
142+
content,
143+
`---`,
144+
``,
145+
`If you find issues, fix them. If everything looks good, say so briefly and move on.`,
146+
].join("\n");
147+
148+
pi.sendMessage(
149+
{
150+
customType: "heartbeat",
151+
content: prompt,
152+
display: true,
153+
},
154+
{
155+
deliverAs: "followUp",
156+
triggerTurn: true,
157+
}
158+
);
159+
160+
// Success — reset error counter
161+
state.consecutiveErrors = 0;
162+
saveState();
163+
} catch (err) {
164+
// Increment error counter for backoff — never let the heartbeat die
165+
state.consecutiveErrors += 1;
166+
try {
167+
saveState();
168+
} catch {
169+
// Best-effort state persistence — don't let a save failure prevent re-arm
170+
}
171+
} finally {
172+
// Always re-arm the timer, even after errors (with backoff)
173+
armTimer();
174+
}
162175
}
163176

164177
function stopTimer() {

0 commit comments

Comments
 (0)