2525import type { ExtensionAPI } from "@mariozechner/pi-coding-agent" ;
2626import { Type } from "@sinclair/typebox" ;
2727import { StringEnum } from "@mariozechner/pi-ai" ;
28- import { appendFileSync , closeSync , existsSync , mkdirSync , openSync , readdirSync , readFileSync , readSync , statSync } from "node:fs" ;
28+ import { appendFileSync , closeSync , existsSync , mkdirSync , openSync , readdirSync , readFileSync , readlinkSync , readSync , statSync , unlinkSync } from "node:fs" ;
29+ import { execFileSync , execSync } from "node:child_process" ;
2930import { homedir } from "node:os" ;
3031import { dirname , join } from "node:path" ;
3132import { discoverSubagentPackages , readSubagentState , resolveEffectiveState } from "./subagent-registry.ts" ;
@@ -132,7 +133,6 @@ function checkSessions(): CheckResult[] {
132133
133134 // Check that the symlink target (.sock file) exists
134135 try {
135- const { readlinkSync } = require ( "node:fs" ) ;
136136 const target = readlinkSync ( aliasPath ) ;
137137 const sockPath = join ( SOCKET_DIR , target ) ;
138138 if ( ! existsSync ( sockPath ) ) {
@@ -353,8 +353,7 @@ function checkUnansweredMentions(): CheckResult[] {
353353 // Support both bridge implementations:
354354 // - broker-bridge.mjs: "... (type: app_mention, ts: 1234.5678)"
355355 // - bridge.mjs: "app_mention ... ts: 1234.5678"
356- const { execSync } = require ( "node:child_process" ) ;
357- const logTail = execSync ( `tail -500 "${ bridgeLogPath } "` , { encoding : "utf-8" } ) ;
356+ const logTail = execFileSync ( "tail" , [ "-500" , bridgeLogPath ] , { encoding : "utf-8" } ) ;
358357
359358 const mentionThreadTsSet = new Set < string > ( extractMentionThreadTs ( logTail ) ) ;
360359
@@ -638,7 +637,6 @@ function logRecovery(entry: RecoveryAction): void {
638637 */
639638async function tryAutoRecover ( failures : CheckResult [ ] ) : Promise < RecoveryAction [ ] > {
640639 const actions : RecoveryAction [ ] = [ ] ;
641- const { execSync } = require ( "node:child_process" ) ;
642640
643641 for ( const failure of failures ) {
644642 // Auto-recover: bridge down → restart the bridge tmux session
@@ -659,7 +657,7 @@ async function tryAutoRecover(failures: CheckResult[]): Promise<RecoveryAction[]
659657
660658 // Kill existing bridge tmux session
661659 try {
662- execSync ( ' tmux kill-session -t baudbot-gateway-bridge 2>/dev/null' , { timeout : 5000 } ) ;
660+ execFileSync ( " tmux" , [ " kill-session" , "-t" , " baudbot-gateway-bridge" ] , { timeout : 5000 } ) ;
663661 } catch {
664662 // May not exist — that's fine
665663 }
@@ -673,33 +671,56 @@ async function tryAutoRecover(failures: CheckResult[]): Promise<RecoveryAction[]
673671
674672 // Restart via startup script
675673 const startupScript = join ( homedir ( ) , ".pi" , "agent" , "skills" , "control-agent" , "startup-pi.sh" ) ;
676- if ( existsSync ( startupScript ) ) {
677- // Get live session UUIDs from session-control dir
678- const sockFiles = readdirSync ( SOCKET_DIR ) . filter ( ( f ) => f . endsWith ( ".sock" ) ) ;
679- const uuids = sockFiles . map ( ( f ) => f . replace ( ".sock" , "" ) ) . join ( " " ) ;
680- if ( uuids ) {
681- execSync ( `bash "${ startupScript } " ${ uuids } 2>&1` , {
682- timeout : 30000 ,
683- encoding : "utf-8" ,
684- } ) ;
685-
686- // Verify bridge came back
687- await new Promise ( ( resolve ) => setTimeout ( resolve , 3000 ) ) ;
688- const verifyResult = await checkBridge ( ) ;
674+ if ( ! existsSync ( startupScript ) ) {
675+ const entry : RecoveryAction = {
676+ timestamp : new Date ( ) . toISOString ( ) ,
677+ check : failure . name ,
678+ action : "bridge_restart" ,
679+ success : false ,
680+ detail : `Bridge session killed but startup script not found at ${ startupScript } — cannot restart` ,
681+ } ;
682+ actions . push ( entry ) ;
683+ logRecovery ( entry ) ;
684+ continue ;
685+ }
689686
690- const entry : RecoveryAction = {
691- timestamp : new Date ( ) . toISOString ( ) ,
692- check : failure . name ,
693- action : "bridge_restart" ,
694- success : verifyResult . ok ,
695- detail : verifyResult . ok
696- ? "Bridge restarted and verified healthy"
697- : `Bridge restart attempted but still failing: ${ verifyResult . detail } ` ,
698- } ;
699- actions . push ( entry ) ;
700- logRecovery ( entry ) ;
701- }
687+ // Get live session UUIDs from session-control dir
688+ const sockFiles = readdirSync ( SOCKET_DIR ) . filter ( ( f ) => f . endsWith ( ".sock" ) ) ;
689+ const uuids = sockFiles . map ( ( f ) => f . replace ( ".sock" , "" ) ) ;
690+ if ( uuids . length === 0 ) {
691+ const entry : RecoveryAction = {
692+ timestamp : new Date ( ) . toISOString ( ) ,
693+ check : failure . name ,
694+ action : "bridge_restart" ,
695+ success : false ,
696+ detail : "Bridge session killed but no live socket UUIDs found — cannot restart" ,
697+ } ;
698+ actions . push ( entry ) ;
699+ logRecovery ( entry ) ;
700+ continue ;
702701 }
702+
703+ // Pass UUIDs as separate args to avoid shell injection
704+ execFileSync ( "bash" , [ startupScript , ...uuids ] , {
705+ timeout : 30000 ,
706+ encoding : "utf-8" ,
707+ } ) ;
708+
709+ // Verify bridge came back
710+ await new Promise ( ( resolve ) => setTimeout ( resolve , 3000 ) ) ;
711+ const verifyResult = await checkBridge ( ) ;
712+
713+ const entry : RecoveryAction = {
714+ timestamp : new Date ( ) . toISOString ( ) ,
715+ check : failure . name ,
716+ action : "bridge_restart" ,
717+ success : verifyResult . ok ,
718+ detail : verifyResult . ok
719+ ? "Bridge restarted and verified healthy"
720+ : `Bridge restart attempted but still failing: ${ verifyResult . detail } ` ,
721+ } ;
722+ actions . push ( entry ) ;
723+ logRecovery ( entry ) ;
703724 } catch ( err : any ) {
704725 const entry : RecoveryAction = {
705726 timestamp : new Date ( ) . toISOString ( ) ,
@@ -717,17 +738,34 @@ async function tryAutoRecover(failures: CheckResult[]): Promise<RecoveryAction[]
717738 if ( failure . name . startsWith ( "orphan:" ) ) {
718739 const sessionName = failure . name . replace ( "orphan:" , "" ) ;
719740 try {
720- // Kill the tmux session
741+ // Kill the tmux session (use execFileSync to avoid shell injection)
742+ let tmuxKilled = false ;
721743 try {
722- execSync ( `tmux kill-session -t "${ sessionName } " 2>/dev/null` , { timeout : 5000 } ) ;
723- } catch {
724- // May already be dead
744+ execFileSync ( "tmux" , [ "kill-session" , "-t" , sessionName ] , { timeout : 5000 } ) ;
745+ tmuxKilled = true ;
746+ } catch ( killErr : any ) {
747+ // "session not found" means it's already dead — that's fine
748+ const msg = killErr . message || String ( killErr ) ;
749+ if ( msg . includes ( "session not found" ) || msg . includes ( "can't find session" ) ) {
750+ tmuxKilled = true ; // Already gone — counts as success
751+ } else {
752+ // Real error (tmux daemon down, permissions, timeout, etc.)
753+ const entry : RecoveryAction = {
754+ timestamp : new Date ( ) . toISOString ( ) ,
755+ check : failure . name ,
756+ action : "orphan_cleanup" ,
757+ success : false ,
758+ detail : `Failed to kill tmux session "${ sessionName } ": ${ msg } ` ,
759+ } ;
760+ actions . push ( entry ) ;
761+ logRecovery ( entry ) ;
762+ continue ;
763+ }
725764 }
726765
727766 // Remove the stale alias
728767 const aliasPath = join ( SOCKET_DIR , `${ sessionName } .alias` ) ;
729768 if ( existsSync ( aliasPath ) ) {
730- const { unlinkSync } = require ( "node:fs" ) ;
731769 unlinkSync ( aliasPath ) ;
732770 }
733771
0 commit comments