@@ -123,6 +123,8 @@ enum ErrorCode {
123123 Consumed ,
124124 /// Internal / unexpected failure (lock poison, task join error, etc.).
125125 Internal ,
126+ /// Reentrant call detected — calling sandbox methods from a host callback.
127+ Reentrant ,
126128}
127129
128130impl ErrorCode {
@@ -135,6 +137,7 @@ impl ErrorCode {
135137 Self :: InvalidArg => "ERR_INVALID_ARG" ,
136138 Self :: Consumed => "ERR_CONSUMED" ,
137139 Self :: Internal => "ERR_INTERNAL" ,
140+ Self :: Reentrant => "ERR_REENTRANT" ,
138141 }
139142 }
140143}
@@ -230,6 +233,15 @@ fn join_error(err: tokio::task::JoinError) -> napi::Error {
230233 )
231234}
232235
236+ /// Creates an error for reentrant calls detected at runtime.
237+ fn reentrant_error ( ) -> napi:: Error {
238+ hl_error (
239+ ErrorCode :: Reentrant ,
240+ "Cannot call sandbox methods from a host callback — the sandbox lock is held during \
241+ guest execution. Perform this operation after callHandler() resolves instead",
242+ )
243+ }
244+
233245// ── Snapshot ─────────────────────────────────────────────────────────
234246
235247/// A captured point-in-time state of a sandbox.
@@ -435,9 +447,10 @@ impl SandboxBuilderWrapper {
435447 //
436448 // **Reentrancy note:** The print callback runs while the sandbox
437449 // Mutex is held (inside `call_handler`'s `spawn_blocking`).
438- // Calling Hyperlight APIs that acquire the same lock from within
439- // the callback (e.g. `snapshot()`, `restore()`, `unload()`) will
440- // deadlock. Keep print callbacks simple — logging only.
450+ // If user code in the callback attempts to call methods that
451+ // acquire the same lock (e.g. `snapshot()`, `restore()`,
452+ // `unload()`, `callHandler()`), the `executing_flag` deadlock
453+ // detection will return `ERR_REENTRANT` instead of hanging.
441454 let print_fn = move |msg : String | -> i32 {
442455 let status = callback. call ( msg, ThreadsafeFunctionCallMode :: Blocking ) ;
443456 if status == Status :: Ok {
@@ -853,6 +866,7 @@ impl JSSandboxWrapper {
853866 poisoned_flag,
854867 last_call_stats : Arc :: new ( ArcSwapOption :: empty ( ) ) ,
855868 disposed_flag : Arc :: new ( AtomicBool :: new ( false ) ) ,
869+ executing_flag : Arc :: new ( AtomicBool :: new ( false ) ) ,
856870 } )
857871 }
858872
@@ -935,17 +949,37 @@ pub struct LoadedJSSandboxWrapper {
935949 /// `unload()`), for lock-free checks in sync getters that don't
936950 /// consult the inner Mutex.
937951 disposed_flag : Arc < AtomicBool > ,
952+
953+ /// Tracks whether guest code is currently executing inside `call_handler`.
954+ ///
955+ /// Used for deadlock detection: if a host callback (print fn, host function)
956+ /// tries to call a method that needs the inner Mutex while this is `true`,
957+ /// we return `ERR_REENTRANT` immediately instead of deadlocking.
958+ executing_flag : Arc < AtomicBool > ,
938959}
939960
940961type LoadedJSSandboxGuard = OwnedMappedMutexGuard < Option < LoadedJSSandbox > , LoadedJSSandbox > ;
941962
942963impl LoadedJSSandboxWrapper {
943964 /// Borrow the inner value mutably via Mutex, or error if consumed.
965+ ///
966+ /// Performs deadlock detection: if `executing_flag` is set (meaning we're
967+ /// inside a `call_handler` on a background thread), `try_lock` is attempted
968+ /// first. If it fails, we know this is a reentrant call from a host callback
969+ /// and return `ERR_REENTRANT` immediately instead of deadlocking.
944970 async fn with_inner < R > (
945971 & self ,
946972 f : impl AsyncFnOnce ( LoadedJSSandboxGuard ) -> napi:: Result < R > ,
947973 ) -> napi:: Result < R > {
948- let sandbox = self . inner . clone ( ) . lock_owned ( ) . await ;
974+ let sandbox = if self . executing_flag . load ( Ordering :: Acquire ) {
975+ // We're inside a host callback — try_lock to detect reentrancy.
976+ match self . inner . clone ( ) . try_lock_owned ( ) {
977+ Ok ( guard) => guard,
978+ Err ( _) => return Err ( reentrant_error ( ) ) ,
979+ }
980+ } else {
981+ self . inner . clone ( ) . lock_owned ( ) . await
982+ } ;
949983 let sandbox = OwnedMutexGuard :: try_map ( sandbox, Option :: as_mut)
950984 . map_err ( |_| consumed_error ( "LoadedJSSandbox" ) ) ?;
951985 f ( sandbox) . await
@@ -954,30 +988,42 @@ impl LoadedJSSandboxWrapper {
954988 /// Borrow the inner value mutably via Mutex, or error if consumed.
955989 /// The closure `f` will run using spawn_blocking, so it can perform long-running operations without
956990 /// blocking the Node.js event loop. This is the main way to interact with the inner `LoadedJSSandbox`.
991+ ///
992+ /// Sets `executing_flag` for the duration of the blocking closure so that
993+ /// reentrant calls from host callbacks are detected instead of deadlocking.
957994 async fn with_blocking_inner < R : Send + ' static > (
958995 & self ,
959996 f : impl FnOnce ( LoadedJSSandboxGuard ) -> napi:: Result < R > + Send + ' static ,
960997 ) -> napi:: Result < R > {
998+ let executing_flag = self . executing_flag . clone ( ) ;
961999 self . with_inner ( async move |sandbox| {
962- tokio:: task:: spawn_blocking ( move || f ( sandbox) )
1000+ executing_flag. store ( true , Ordering :: Release ) ;
1001+ let result = tokio:: task:: spawn_blocking ( move || f ( sandbox) )
9631002 . await
964- . map_err ( join_error) ?
1003+ . map_err ( join_error) ?;
1004+ executing_flag. store ( false , Ordering :: Release ) ;
1005+ result
9651006 } )
9661007 . await
9671008 }
9681009
9691010 /// Take ownership of the inner value, returning a consumed-state error if
9701011 /// this instance has already been used.
1012+ ///
1013+ /// Performs the same deadlock detection as `with_inner`.
9711014 async fn take_inner_with < R > (
9721015 & self ,
9731016 f : impl AsyncFnOnce ( LoadedJSSandbox ) -> napi:: Result < R > ,
9741017 ) -> napi:: Result < R > {
975- let sandbox = self
976- . inner
977- . lock ( )
978- . await
979- . take ( )
980- . ok_or_else ( || consumed_error ( "LoadedJSSandbox" ) ) ?;
1018+ let sandbox = if self . executing_flag . load ( Ordering :: Acquire ) {
1019+ match self . inner . try_lock ( ) {
1020+ Ok ( mut guard) => guard. take ( ) ,
1021+ Err ( _) => return Err ( reentrant_error ( ) ) ,
1022+ }
1023+ } else {
1024+ self . inner . lock ( ) . await . take ( )
1025+ }
1026+ . ok_or_else ( || consumed_error ( "LoadedJSSandbox" ) ) ?;
9811027 self . disposed_flag . store ( true , Ordering :: Release ) ;
9821028 f ( sandbox) . await
9831029 }
@@ -986,14 +1032,20 @@ impl LoadedJSSandboxWrapper {
9861032 /// this instance has already been used.
9871033 /// The closure `f` will run using spawn_blocking, so it can perform long-running operations without
9881034 /// blocking the Node.js event loop. This is the main way to interact with the inner `LoadedJSSandbox`.
1035+ ///
1036+ /// Sets `executing_flag` for the duration of the blocking closure.
9891037 async fn take_blocking_inner_with < R : Send + ' static > (
9901038 & self ,
9911039 f : impl FnOnce ( LoadedJSSandbox ) -> napi:: Result < R > + Send + ' static ,
9921040 ) -> napi:: Result < R > {
1041+ let executing_flag = self . executing_flag . clone ( ) ;
9931042 self . take_inner_with ( async move |sandbox| {
994- tokio:: task:: spawn_blocking ( move || f ( sandbox) )
1043+ executing_flag. store ( true , Ordering :: Release ) ;
1044+ let result = tokio:: task:: spawn_blocking ( move || f ( sandbox) )
9951045 . await
996- . map_err ( join_error) ?
1046+ . map_err ( join_error) ?;
1047+ executing_flag. store ( false , Ordering :: Release ) ;
1048+ result
9971049 } )
9981050 . await
9991051 }
0 commit comments