2525import type { ExtensionAPI } from "@mariozechner/pi-coding-agent" ;
2626import { Type } from "@sinclair/typebox" ;
2727import { StringEnum } from "@mariozechner/pi-ai" ;
28- import { appendFileSync , existsSync , mkdirSync , readdirSync , readFileSync , statSync } from "node:fs" ;
28+ import { appendFileSync , existsSync , mkdirSync , readdirSync , readFileSync , readlinkSync , statSync , unlinkSync } from "node:fs" ;
29+ import { execFileSync , execSync } from "node:child_process" ;
2930import { homedir } from "node:os" ;
3031import { dirname , join } from "node:path" ;
3132import { discoverSubagentPackages , readSubagentState , resolveEffectiveState } from "./subagent-registry.ts" ;
@@ -127,7 +128,6 @@ function checkSessions(): CheckResult[] {
127128
128129 // Check that the symlink target (.sock file) exists
129130 try {
130- const { readlinkSync } = require ( "node:fs" ) ;
131131 const target = readlinkSync ( aliasPath ) ;
132132 const sockPath = join ( SOCKET_DIR , target ) ;
133133 if ( ! existsSync ( sockPath ) ) {
@@ -348,8 +348,7 @@ function checkUnansweredMentions(): CheckResult[] {
348348 // Support both bridge implementations:
349349 // - broker-bridge.mjs: "... (type: app_mention, ts: 1234.5678)"
350350 // - bridge.mjs: "app_mention ... ts: 1234.5678"
351- const { execSync } = require ( "node:child_process" ) ;
352- const logTail = execSync ( `tail -500 "${ bridgeLogPath } "` , { encoding : "utf-8" } ) ;
351+ const logTail = execFileSync ( "tail" , [ "-500" , bridgeLogPath ] , { encoding : "utf-8" } ) ;
353352
354353 const mentionThreadTsSet = new Set < string > ( extractMentionThreadTs ( logTail ) ) ;
355354
@@ -592,7 +591,6 @@ function logRecovery(entry: RecoveryAction): void {
592591 */
593592async function tryAutoRecover ( failures : CheckResult [ ] ) : Promise < RecoveryAction [ ] > {
594593 const actions : RecoveryAction [ ] = [ ] ;
595- const { execSync } = require ( "node:child_process" ) ;
596594
597595 for ( const failure of failures ) {
598596 // Auto-recover: bridge down → restart the bridge tmux session
@@ -613,7 +611,7 @@ async function tryAutoRecover(failures: CheckResult[]): Promise<RecoveryAction[]
613611
614612 // Kill existing bridge tmux session
615613 try {
616- execSync ( ' tmux kill-session -t baudbot-gateway-bridge 2>/dev/null' , { timeout : 5000 } ) ;
614+ execFileSync ( " tmux" , [ " kill-session" , "-t" , " baudbot-gateway-bridge" ] , { timeout : 5000 } ) ;
617615 } catch {
618616 // May not exist — that's fine
619617 }
@@ -627,33 +625,56 @@ async function tryAutoRecover(failures: CheckResult[]): Promise<RecoveryAction[]
627625
628626 // Restart via startup script
629627 const startupScript = join ( homedir ( ) , ".pi" , "agent" , "skills" , "control-agent" , "startup-pi.sh" ) ;
630- if ( existsSync ( startupScript ) ) {
631- // Get live session UUIDs from session-control dir
632- const sockFiles = readdirSync ( SOCKET_DIR ) . filter ( ( f ) => f . endsWith ( ".sock" ) ) ;
633- const uuids = sockFiles . map ( ( f ) => f . replace ( ".sock" , "" ) ) . join ( " " ) ;
634- if ( uuids ) {
635- execSync ( `bash "${ startupScript } " ${ uuids } 2>&1` , {
636- timeout : 30000 ,
637- encoding : "utf-8" ,
638- } ) ;
639-
640- // Verify bridge came back
641- await new Promise ( ( resolve ) => setTimeout ( resolve , 3000 ) ) ;
642- const verifyResult = await checkBridge ( ) ;
628+ if ( ! existsSync ( startupScript ) ) {
629+ const entry : RecoveryAction = {
630+ timestamp : new Date ( ) . toISOString ( ) ,
631+ check : failure . name ,
632+ action : "bridge_restart" ,
633+ success : false ,
634+ detail : `Bridge session killed but startup script not found at ${ startupScript } — cannot restart` ,
635+ } ;
636+ actions . push ( entry ) ;
637+ logRecovery ( entry ) ;
638+ continue ;
639+ }
643640
644- const entry : RecoveryAction = {
645- timestamp : new Date ( ) . toISOString ( ) ,
646- check : failure . name ,
647- action : "bridge_restart" ,
648- success : verifyResult . ok ,
649- detail : verifyResult . ok
650- ? "Bridge restarted and verified healthy"
651- : `Bridge restart attempted but still failing: ${ verifyResult . detail } ` ,
652- } ;
653- actions . push ( entry ) ;
654- logRecovery ( entry ) ;
655- }
641+ // Get live session UUIDs from session-control dir
642+ const sockFiles = readdirSync ( SOCKET_DIR ) . filter ( ( f ) => f . endsWith ( ".sock" ) ) ;
643+ const uuids = sockFiles . map ( ( f ) => f . replace ( ".sock" , "" ) ) ;
644+ if ( uuids . length === 0 ) {
645+ const entry : RecoveryAction = {
646+ timestamp : new Date ( ) . toISOString ( ) ,
647+ check : failure . name ,
648+ action : "bridge_restart" ,
649+ success : false ,
650+ detail : "Bridge session killed but no live socket UUIDs found — cannot restart" ,
651+ } ;
652+ actions . push ( entry ) ;
653+ logRecovery ( entry ) ;
654+ continue ;
656655 }
656+
657+ // Pass UUIDs as separate args to avoid shell injection
658+ execFileSync ( "bash" , [ startupScript , ...uuids ] , {
659+ timeout : 30000 ,
660+ encoding : "utf-8" ,
661+ } ) ;
662+
663+ // Verify bridge came back
664+ await new Promise ( ( resolve ) => setTimeout ( resolve , 3000 ) ) ;
665+ const verifyResult = await checkBridge ( ) ;
666+
667+ const entry : RecoveryAction = {
668+ timestamp : new Date ( ) . toISOString ( ) ,
669+ check : failure . name ,
670+ action : "bridge_restart" ,
671+ success : verifyResult . ok ,
672+ detail : verifyResult . ok
673+ ? "Bridge restarted and verified healthy"
674+ : `Bridge restart attempted but still failing: ${ verifyResult . detail } ` ,
675+ } ;
676+ actions . push ( entry ) ;
677+ logRecovery ( entry ) ;
657678 } catch ( err : any ) {
658679 const entry : RecoveryAction = {
659680 timestamp : new Date ( ) . toISOString ( ) ,
@@ -671,17 +692,34 @@ async function tryAutoRecover(failures: CheckResult[]): Promise<RecoveryAction[]
671692 if ( failure . name . startsWith ( "orphan:" ) ) {
672693 const sessionName = failure . name . replace ( "orphan:" , "" ) ;
673694 try {
674- // Kill the tmux session
695+ // Kill the tmux session (use execFileSync to avoid shell injection)
696+ let tmuxKilled = false ;
675697 try {
676- execSync ( `tmux kill-session -t "${ sessionName } " 2>/dev/null` , { timeout : 5000 } ) ;
677- } catch {
678- // May already be dead
698+ execFileSync ( "tmux" , [ "kill-session" , "-t" , sessionName ] , { timeout : 5000 } ) ;
699+ tmuxKilled = true ;
700+ } catch ( killErr : any ) {
701+ // "session not found" means it's already dead — that's fine
702+ const msg = killErr . message || String ( killErr ) ;
703+ if ( msg . includes ( "session not found" ) || msg . includes ( "can't find session" ) ) {
704+ tmuxKilled = true ; // Already gone — counts as success
705+ } else {
706+ // Real error (tmux daemon down, permissions, timeout, etc.)
707+ const entry : RecoveryAction = {
708+ timestamp : new Date ( ) . toISOString ( ) ,
709+ check : failure . name ,
710+ action : "orphan_cleanup" ,
711+ success : false ,
712+ detail : `Failed to kill tmux session "${ sessionName } ": ${ msg } ` ,
713+ } ;
714+ actions . push ( entry ) ;
715+ logRecovery ( entry ) ;
716+ continue ;
717+ }
679718 }
680719
681720 // Remove the stale alias
682721 const aliasPath = join ( SOCKET_DIR , `${ sessionName } .alias` ) ;
683722 if ( existsSync ( aliasPath ) ) {
684- const { unlinkSync } = require ( "node:fs" ) ;
685723 unlinkSync ( aliasPath ) ;
686724 }
687725
0 commit comments