Skip to content

Commit 4b9b1ee

Browse files
committed
Update
1 parent 9ec5aed commit 4b9b1ee

1 file changed

Lines changed: 10 additions & 3 deletions

File tree

src/pb_stub.cc

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1496,8 +1496,16 @@ Stub::GetCUDAMemoryPoolAddress(std::unique_ptr<IPCMessage>& ipc_message)
14961496
*(ipc_message->ResponseMutex())};
14971497
cuda_pool_message_ptr->waiting_on_stub = true;
14981498
ipc_message->ResponseCondition()->notify_all();
1499-
while (cuda_pool_message_ptr->waiting_on_stub) {
1500-
ipc_message->ResponseCondition()->wait(lock);
1499+
// This handler runs on the single ParentToStubMQMonitor thread,
1500+
// which is also the only thread that delivers decoupled BLS responses,
1501+
// so it must not block on the success path.
1502+
// It should only wait when an error message has been written to
1503+
// error_string_shm, so the parent can finish reading it before
1504+
// this function returns and frees that shared memory.
1505+
if (has_exception) {
1506+
while (cuda_pool_message_ptr->waiting_on_stub) {
1507+
ipc_message->ResponseCondition()->wait(lock);
1508+
}
15011509
}
15021510
}
15031511
#endif
@@ -1849,7 +1857,6 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
18491857
auto stub = Stub::GetOrCreateInstance();
18501858
py::object loop =
18511859
py::module_::import("asyncio").attr("get_running_loop")();
1852-
// Capture 'stub' by value (it is a shared_ptr).
18531860
py::cpp_function callback = [stub, infer_request, decoupled]() {
18541861
std::shared_ptr<InferResponse> response =
18551862
infer_request->Exec(decoupled);

0 commit comments

Comments
 (0)