@@ -36,6 +36,7 @@ const POLL_INTERVAL_MS = parseInt(process.env.SLACK_BROKER_POLL_INTERVAL_MS || "
3636const MAX_MESSAGES = parseInt ( process . env . SLACK_BROKER_MAX_MESSAGES || "10" , 10 ) ;
3737const DEDUPE_TTL_MS = parseInt ( process . env . SLACK_BROKER_DEDUPE_TTL_MS || String ( 20 * 60 * 1000 ) , 10 ) ;
3838const MAX_BACKOFF_MS = 30_000 ;
39+ const BROKER_HEALTH_PATH = path . join ( homedir ( ) , ".pi" , "agent" , "broker-health.json" ) ;
3940
4041function ts ( ) {
4142 return new Date ( ) . toISOString ( ) ;
@@ -95,6 +96,114 @@ let cryptoState = null;
9596
9697const dedupe = new Map ( ) ;
9798
99+ const brokerHealth = {
100+ started_at : new Date ( ) . toISOString ( ) ,
101+ updated_at : new Date ( ) . toISOString ( ) ,
102+ outbound_mode : outboundMode ,
103+ broker_url : brokerBaseUrl ,
104+ workspace_id : workspaceId ,
105+ poll : {
106+ last_ok_at : null ,
107+ last_error_at : null ,
108+ consecutive_failures : 0 ,
109+ last_error : null ,
110+ } ,
111+ inbound : {
112+ last_decrypt_ok_at : null ,
113+ last_decrypt_error_at : null ,
114+ last_process_ok_at : null ,
115+ last_process_error_at : null ,
116+ last_error : null ,
117+ } ,
118+ ack : {
119+ last_ok_at : null ,
120+ last_error_at : null ,
121+ last_error : null ,
122+ } ,
123+ outbound : {
124+ last_ok_at : null ,
125+ last_error_at : null ,
126+ last_error : null ,
127+ } ,
128+ } ;
129+
130+ function trimError ( err ) {
131+ const msg = err instanceof Error ? err . message : String ( err || "unknown error" ) ;
132+ return msg . slice ( 0 , 400 ) ;
133+ }
134+
135+ function persistBrokerHealth ( ) {
136+ brokerHealth . updated_at = new Date ( ) . toISOString ( ) ;
137+ const dir = path . dirname ( BROKER_HEALTH_PATH ) ;
138+ const tmp = `${ BROKER_HEALTH_PATH } .tmp` ;
139+ fs . mkdirSync ( dir , { recursive : true } ) ;
140+ fs . writeFileSync ( tmp , `${ JSON . stringify ( brokerHealth , null , 2 ) } \n` , { mode : 0o600 } ) ;
141+ fs . renameSync ( tmp , BROKER_HEALTH_PATH ) ;
142+ }
143+
144+ function markHealth ( section , ok , err = null ) {
145+ const now = new Date ( ) . toISOString ( ) ;
146+
147+ if ( section === "poll" ) {
148+ if ( ok ) {
149+ brokerHealth . poll . last_ok_at = now ;
150+ brokerHealth . poll . consecutive_failures = 0 ;
151+ brokerHealth . poll . last_error = null ;
152+ } else {
153+ brokerHealth . poll . last_error_at = now ;
154+ brokerHealth . poll . consecutive_failures += 1 ;
155+ brokerHealth . poll . last_error = trimError ( err ) ;
156+ }
157+ persistBrokerHealth ( ) ;
158+ return ;
159+ }
160+
161+ if ( section === "inbound_decrypt" ) {
162+ if ( ok ) {
163+ brokerHealth . inbound . last_decrypt_ok_at = now ;
164+ } else {
165+ brokerHealth . inbound . last_decrypt_error_at = now ;
166+ brokerHealth . inbound . last_error = trimError ( err ) ;
167+ }
168+ persistBrokerHealth ( ) ;
169+ return ;
170+ }
171+
172+ if ( section === "inbound_process" ) {
173+ if ( ok ) {
174+ brokerHealth . inbound . last_process_ok_at = now ;
175+ } else {
176+ brokerHealth . inbound . last_process_error_at = now ;
177+ brokerHealth . inbound . last_error = trimError ( err ) ;
178+ }
179+ persistBrokerHealth ( ) ;
180+ return ;
181+ }
182+
183+ if ( section === "ack" ) {
184+ if ( ok ) {
185+ brokerHealth . ack . last_ok_at = now ;
186+ brokerHealth . ack . last_error = null ;
187+ } else {
188+ brokerHealth . ack . last_error_at = now ;
189+ brokerHealth . ack . last_error = trimError ( err ) ;
190+ }
191+ persistBrokerHealth ( ) ;
192+ return ;
193+ }
194+
195+ if ( section === "outbound" ) {
196+ if ( ok ) {
197+ brokerHealth . outbound . last_ok_at = now ;
198+ brokerHealth . outbound . last_error = null ;
199+ } else {
200+ brokerHealth . outbound . last_error_at = now ;
201+ brokerHealth . outbound . last_error = trimError ( err ) ;
202+ }
203+ persistBrokerHealth ( ) ;
204+ }
205+ }
206+
98207function toBase64 ( bytes ) {
99208 return Buffer . from ( bytes ) . toString ( "base64" ) ;
100209}
@@ -329,15 +438,22 @@ async function sendViaBroker({ action, routing, body }) {
329438 const sig = sodium . crypto_sign_detached ( canonical , cryptoState . serverSignSecretKey ) ;
330439 const signature = toBase64 ( sig ) ;
331440
332- return brokerFetch ( "/api/send" , {
333- workspace_id : workspaceId ,
334- action,
335- routing,
336- encrypted_body : encryptedBody ,
337- nonce : nonceB64 ,
338- timestamp,
339- signature,
340- } ) ;
441+ try {
442+ const result = await brokerFetch ( "/api/send" , {
443+ workspace_id : workspaceId ,
444+ action,
445+ routing,
446+ encrypted_body : encryptedBody ,
447+ nonce : nonceB64 ,
448+ timestamp,
449+ signature,
450+ } ) ;
451+ markHealth ( "outbound" , true ) ;
452+ return result ;
453+ } catch ( err ) {
454+ markHealth ( "outbound" , false , err ) ;
455+ throw err ;
456+ }
341457}
342458
343459/**
@@ -386,11 +502,13 @@ async function sendDirectToSlack(apiMethod, params) {
386502 const error = data . error || response . statusText ;
387503 throw new Error ( `Slack API ${ apiMethod } failed: ${ sanitizeError ( error ) } ` ) ;
388504 }
389-
505+
506+ markHealth ( "outbound" , true ) ;
390507 return data ;
391508 } catch ( err ) {
392509 // Sanitize any error messages to prevent token leakage
393510 const sanitizedMessage = sanitizeError ( err . message || String ( err ) ) ;
511+ markHealth ( "outbound" , false , sanitizedMessage ) ;
394512 throw new Error ( sanitizedMessage ) ;
395513 }
396514}
@@ -519,7 +637,15 @@ async function processPulledMessage(message) {
519637 throw new Error ( "invalid broker envelope signature" ) ;
520638 }
521639
522- const payload = decryptEnvelope ( message ) ;
640+ let payload ;
641+ try {
642+ payload = decryptEnvelope ( message ) ;
643+ markHealth ( "inbound_decrypt" , true ) ;
644+ } catch ( err ) {
645+ markHealth ( "inbound_decrypt" , false , err ) ;
646+ throw err ;
647+ }
648+
523649 logInfo ( `📦 decrypted envelope — type: ${ payload ?. type || "unknown" } ` ) ;
524650
525651 if ( payload ?. type !== "event_callback" ) {
@@ -723,6 +849,7 @@ async function startPollLoop() {
723849 pruneDedupe ( ) ;
724850
725851 const messages = await pullInbox ( ) ;
852+ markHealth ( "poll" , true ) ;
726853 pollCount ++ ;
727854 const ackIds = [ ] ;
728855
@@ -756,13 +883,15 @@ async function startPollLoop() {
756883 logInfo ( `📩 processing message ${ message . message_id } ` ) ;
757884 const ok = await processPulledMessage ( message ) ;
758885 if ( ok ) {
886+ markHealth ( "inbound_process" , true ) ;
759887 dedupe . set ( message . message_id , Date . now ( ) + DEDUPE_TTL_MS ) ;
760888 ackIds . push ( message . message_id ) ;
761889 logInfo ( `✅ processed & acked message ${ message . message_id } ` ) ;
762890 } else {
763891 logWarn ( `⚠️ message ${ message . message_id } returned not-ok, will retry next poll` ) ;
764892 }
765893 } catch ( err ) {
894+ markHealth ( "inbound_process" , false , err ) ;
766895 const errMsg = err instanceof Error ? err . message : "unknown error" ;
767896 const errStack = err instanceof Error ? err . stack : "" ;
768897 logError ( `❌ message processing failed (${ message . message_id } ): ${ errMsg } ` ) ;
@@ -777,13 +906,20 @@ async function startPollLoop() {
777906 }
778907
779908 if ( ackIds . length > 0 ) {
780- await ackInbox ( ackIds ) ;
781- logInfo ( `📤 acked ${ ackIds . length } message(s)` ) ;
909+ try {
910+ await ackInbox ( ackIds ) ;
911+ markHealth ( "ack" , true ) ;
912+ logInfo ( `📤 acked ${ ackIds . length } message(s)` ) ;
913+ } catch ( err ) {
914+ markHealth ( "ack" , false , err ) ;
915+ throw err ;
916+ }
782917 }
783918
784919 backoffMs = POLL_INTERVAL_MS ;
785920 await sleep ( POLL_INTERVAL_MS ) ;
786921 } catch ( err ) {
922+ markHealth ( "poll" , false , err ) ;
787923 const errMsg = err instanceof Error ? err . message : "unknown error" ;
788924 const errStack = err instanceof Error ? err . stack : "" ;
789925 logError ( `❌ inbox poll failed: ${ errMsg } ` ) ;
@@ -811,6 +947,7 @@ async function startPollLoop() {
811947
812948 refreshSocket ( ) ;
813949 startApiServer ( ) ;
950+ persistBrokerHealth ( ) ;
814951 logInfo ( "⚡ Slack broker pull bridge is running!" ) ;
815952 logInfo ( ` outbound mode: ${ outboundMode } ${ outboundMode === "direct" ? "(using SLACK_BOT_TOKEN)" : "(via broker)" } ` ) ;
816953 logInfo ( ` broker: ${ brokerBaseUrl } ` ) ;
0 commit comments