Skip to content

Commit fcb95c8

Browse files
Fix Groot2Publisher destructor infinite loop (#1057) (#1100)
The destructor could hang indefinitely when the ZMQ server thread was waiting on recv() while active_server_ remained true. Changes: - Set active_server_=false before joining threads - Call zmq_context.shutdown() to interrupt blocking recv() - Add try-catch around ZMQ operations to handle context termination - Reorder destructor to remove hooks after threads are joined Includes tests for: - Destructor completion after exception - Destructor with multiple tree nodes attached - Rapid create/destroy cycles Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 2c71b41 commit fcb95c8

File tree

2 files changed

+88
-5
lines changed

2 files changed

+88
-5
lines changed

src/loggers/groot2_publisher.cpp

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,9 +183,15 @@ std::chrono::milliseconds Groot2Publisher::maxHeartbeatDelay() const
183183

184184
Groot2Publisher::~Groot2Publisher()
185185
{
186-
removeAllHooks();
187-
186+
// First, signal threads to stop
188187
_p->active_server = false;
188+
189+
// Shutdown the ZMQ context to unblock any recv() calls immediately.
190+
// This prevents waiting for the recv timeout (100ms) before threads can exit.
191+
// Context shutdown will cause all blocking operations to return with ETERM error.
192+
_p->context.shutdown();
193+
194+
// Now join the threads - they should exit quickly
189195
if(_p->server_thread.joinable())
190196
{
191197
_p->server_thread.join();
@@ -196,6 +202,9 @@ Groot2Publisher::~Groot2Publisher()
196202
_p->heartbeat_thread.join();
197203
}
198204

205+
// Remove hooks after threads are stopped to avoid race conditions
206+
removeAllHooks();
207+
199208
flush();
200209

201210
{
@@ -260,9 +269,17 @@ void Groot2Publisher::serverLoop()
260269
while(_p->active_server)
261270
{
262271
zmq::multipart_t requestMsg;
263-
if(!requestMsg.recv(socket) || requestMsg.size() == 0)
272+
try
264273
{
265-
continue;
274+
if(!requestMsg.recv(socket) || requestMsg.size() == 0)
275+
{
276+
continue;
277+
}
278+
}
279+
catch(const zmq::error_t&)
280+
{
281+
// Context was terminated or socket error - exit the loop
282+
break;
266283
}
267284

268285
// this heartbeat will help establishing if Groot is connected or not
@@ -490,7 +507,15 @@ void Groot2Publisher::serverLoop()
490507
}
491508
}
492509
// send the reply
493-
reply_msg.send(socket);
510+
try
511+
{
512+
reply_msg.send(socket);
513+
}
514+
catch(const zmq::error_t&)
515+
{
516+
// Context was terminated or socket error - exit the loop
517+
break;
518+
}
494519
}
495520
}
496521

tests/gtest_groot2_publisher.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,17 @@ static const char* xml_text = R"(
1818
</root>
1919
)";
2020

21+
static const char* xml_text_sequence = R"(
22+
<root BTCPP_format="4">
23+
<BehaviorTree ID="MainTree">
24+
<Sequence>
25+
<AlwaysSuccess/>
26+
<ThrowRuntimeError/>
27+
</Sequence>
28+
</BehaviorTree>
29+
</root>
30+
)";
31+
2132
void throwRuntimeError()
2233
{
2334
BT::BehaviorTreeFactory factory;
@@ -46,3 +57,50 @@ TEST(Groot2PublisherTest, EnsureNoInfiniteLoopOnThrow)
4657
},
4758
::testing::ExitedWithCode(EXIT_SUCCESS), ".*");
4859
}
60+
61+
// Test that destructor completes quickly even after exception
62+
// This test runs multiple times to catch race conditions
63+
TEST(Groot2PublisherTest, DestructorCompletesAfterException)
64+
{
65+
for(int i = 0; i < 5; i++)
66+
{
67+
BT::BehaviorTreeFactory factory;
68+
factory.registerSimpleAction("ThrowRuntimeError",
69+
[](BT::TreeNode&) -> BT::NodeStatus {
70+
throw BT::RuntimeError("Test exception");
71+
});
72+
73+
auto tree = factory.createTreeFromText(xml_text);
74+
BT::Groot2Publisher publisher(tree, 1667 + i * 2);
75+
EXPECT_THROW(tree.tickExactlyOnce(), BT::RuntimeError);
76+
}
77+
}
78+
79+
// Test that destructor completes quickly when tree has multiple nodes
80+
TEST(Groot2PublisherTest, DestructorCompletesWithMultipleNodes)
81+
{
82+
BT::BehaviorTreeFactory factory;
83+
factory.registerSimpleAction("ThrowRuntimeError", [](BT::TreeNode&) -> BT::NodeStatus {
84+
throw BT::RuntimeError("Test exception in sequence");
85+
});
86+
87+
auto tree = factory.createTreeFromText(xml_text_sequence);
88+
BT::Groot2Publisher publisher(tree, 1677);
89+
EXPECT_THROW(tree.tickExactlyOnce(), BT::RuntimeError);
90+
}
91+
92+
// Test rapid creation and destruction of publishers
93+
TEST(Groot2PublisherTest, RapidCreateDestroy)
94+
{
95+
for(int i = 0; i < 3; i++)
96+
{
97+
BT::BehaviorTreeFactory factory;
98+
factory.registerSimpleAction(
99+
"ThrowRuntimeError",
100+
[](BT::TreeNode&) -> BT::NodeStatus { throw BT::RuntimeError("Rapid test"); });
101+
102+
auto tree = factory.createTreeFromText(xml_text);
103+
BT::Groot2Publisher publisher(tree, 1687 + i * 2);
104+
EXPECT_THROW(tree.tickExactlyOnce(), BT::RuntimeError);
105+
}
106+
}

0 commit comments

Comments
 (0)