Skip to content

Commit 5b792e3

Browse files
author
gophergogo
committed
Cover ConnectionPoolImpl timeout timer against stack-capture UAF (#222)
Drive the pool's timeout path with real IO: bind to TEST-NET-1 (192.0.2.1:1, RFC 5737 non-routable) and set a short 150ms connection_timeout so the timer fires long before the kernel's SYN retransmit budget gives up. On the pre-fix code this segfaults in the timer callback (the lambda dereferences a destroyed stack slot); with the fix applied the dispatcher delivers onPoolFailure(Timeout) cleanly in ~150ms. Use RealIoTestBase so pool construction, newConnection, and teardown all run inside the dispatcher thread — the only context libevent and ConnectionImpl will accept for creating file events and timers.
1 parent 20d6d99 commit 5b792e3

2 files changed

Lines changed: 183 additions & 0 deletions

File tree

tests/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ add_executable(test_filter_chain_state_machine network/test_filter_chain_state_m
168168
# Real IO integration tests (using real sockets instead of mocks)
169169
add_executable(test_event_loop_real_io event/test_event_loop_real_io.cc)
170170
add_executable(test_connection_manager_real_io network/test_connection_manager_real_io.cc)
171+
add_executable(test_connection_pool_pending_timeout network/test_connection_pool_pending_timeout.cc)
171172
add_executable(test_event_handling integration/test_event_handling.cc)
172173

173174
# Link libraries
@@ -1271,6 +1272,14 @@ target_link_libraries(test_connection_manager_real_io
12711272
Threads::Threads
12721273
)
12731274

1275+
target_link_libraries(test_connection_pool_pending_timeout
1276+
gopher-mcp
1277+
gopher-mcp-event
1278+
gtest
1279+
gtest_main
1280+
Threads::Threads
1281+
)
1282+
12741283
target_link_libraries(test_event_handling
12751284
gopher-mcp
12761285
gopher-mcp-event
@@ -1379,6 +1388,7 @@ add_test(NAME FilterChainStateMachineTest COMMAND test_filter_chain_state_machin
13791388
# Real IO integration tests
13801389
add_test(NAME EventLoopRealIoTest COMMAND test_event_loop_real_io)
13811390
add_test(NAME ConnectionManagerRealIoTest COMMAND test_connection_manager_real_io)
1391+
add_test(NAME ConnectionPoolPendingTimeoutTest COMMAND test_connection_pool_pending_timeout)
13821392
add_test(NAME EventHandlingTest COMMAND test_event_handling)
13831393

13841394
# Filter state machine tests
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
/**
2+
* Regression test for ConnectionPoolImpl::createNewConnection lifetime bug.
3+
*
4+
* Prior code constructed a PendingConnection on the caller's stack, set up a
5+
* timeout Timer whose callback captured `&pending` by reference, and only
6+
* afterwards std::move-pushed the PendingConnection onto pending_connections_.
7+
* The captured pointer therefore pointed at a destroyed stack slot by the time
8+
* the timer fired, giving a classic use-after-free on the timeout path.
9+
*
10+
* The companion cleanup in onConnectionTimeout used the non-member
11+
* std::remove_if + list::erase idiom, which moves list elements to compact
12+
* them — moving out from under a PendingConnection whose timer lambda is
13+
* currently executing. std::list::remove_if (member) unlinks nodes in place
14+
* and is the correct tool.
15+
*
16+
* This test drives the timeout path with real IO: a real dispatcher thread,
17+
* a real socket, and a connect() to TEST-NET-1 (192.0.2.0/24, RFC 5737) so
18+
* the SYN has nowhere to go. The short pool-level connection_timeout fires
19+
* long before the kernel's SYN retransmit budget, exercising exactly the
20+
* path that used to touch the destroyed stack slot. Running under ASAN
21+
* (-DENABLE_ASAN=ON) is the second half of the regression-net — the plain
22+
* build can miss a UAF whose reused stack memory happens to still hold
23+
* plausible-looking bytes.
24+
*/
25+
26+
#include <chrono>
27+
#include <memory>
28+
29+
#include <gtest/gtest.h>
30+
31+
#include "mcp/network/address.h"
32+
#include "mcp/network/connection_manager.h"
33+
#include "mcp/network/socket_interface.h"
34+
#include "mcp/network/transport_socket.h"
35+
36+
#include "../integration/real_io_test_base.h"
37+
38+
namespace mcp {
39+
namespace network {
40+
namespace {
41+
42+
// Minimal client transport factory. The pool test only exercises the
43+
// SYN-is-pending window — no bytes are ever read or written — so the
44+
// transport can be a pure stub.
45+
class StubClientTransportSocketFactory : public UniversalTransportSocketFactory {
46+
public:
47+
bool implementsSecureTransport() const override { return false; }
48+
std::string name() const override { return "stub-client"; }
49+
50+
TransportSocketPtr createTransportSocket(
51+
TransportSocketOptionsSharedPtr /*options*/) const override {
52+
return std::make_unique<StubTransportSocket>();
53+
}
54+
55+
void hashKey(std::vector<uint8_t>& key,
56+
TransportSocketOptionsSharedPtr /*options*/) const override {
57+
const std::string tag = "stub-client";
58+
key.insert(key.end(), tag.begin(), tag.end());
59+
}
60+
61+
TransportSocketPtr createTransportSocket() const override {
62+
return createTransportSocket(nullptr);
63+
}
64+
65+
private:
66+
class StubTransportSocket : public TransportSocket {
67+
public:
68+
void setTransportSocketCallbacks(
69+
TransportSocketCallbacks& callbacks) override {
70+
callbacks_ = &callbacks;
71+
}
72+
std::string protocol() const override { return "stub"; }
73+
std::string failureReason() const override { return ""; }
74+
bool canFlushClose() override { return true; }
75+
VoidResult connect(Socket&) override { return makeVoidSuccess(); }
76+
void closeSocket(ConnectionEvent) override {}
77+
TransportIoResult doRead(Buffer&) override {
78+
return TransportIoResult::success(0);
79+
}
80+
TransportIoResult doWrite(Buffer&, bool) override {
81+
return TransportIoResult::success(0);
82+
}
83+
void onConnected() override {}
84+
85+
TransportSocketCallbacks* callbacks_{nullptr};
86+
};
87+
};
88+
89+
// Records pool-level outcomes. Counters are touched only from the dispatcher
90+
// thread; the test polls them from the main thread via waitFor() on atomics.
91+
class RecordingPoolCallbacks : public ConnectionPool::Callbacks {
92+
public:
93+
void onPoolReady(ClientConnection&, const std::string&) override {
94+
ready_called_.fetch_add(1, std::memory_order_relaxed);
95+
}
96+
97+
void onPoolFailure(ConnectionPool::PoolFailureReason reason,
98+
const std::string&) override {
99+
last_reason_.store(static_cast<int>(reason), std::memory_order_relaxed);
100+
failure_called_.fetch_add(1, std::memory_order_release);
101+
}
102+
103+
std::atomic<int> ready_called_{0};
104+
std::atomic<int> failure_called_{0};
105+
std::atomic<int> last_reason_{-1};
106+
};
107+
108+
class ConnectionPoolPendingTimeoutTest : public mcp::test::RealIoTestBase {};
109+
110+
// Exercises the timeout path that previously used-after-free'd a stack-local
111+
// PendingConnection. With the fix, the timer callback dereferences a stable
112+
// list-element address and onPoolFailure(Timeout) is delivered cleanly.
113+
TEST_F(ConnectionPoolPendingTimeoutTest, TimeoutFiresCleanly) {
114+
RecordingPoolCallbacks callbacks;
115+
std::unique_ptr<ConnectionManagerImpl> manager;
116+
std::unique_ptr<ConnectionPoolImpl> pool;
117+
118+
executeInDispatcher([&]() {
119+
ConnectionManagerConfig config;
120+
config.max_connections = 1;
121+
// Tight enough that a broken timeout path surfaces as a test hang-then-
122+
// failure rather than a multi-second wait, loose enough to not race with
123+
// libevent's timer quantization on a loaded CI host.
124+
config.connection_timeout = std::chrono::milliseconds(150);
125+
config.per_connection_buffer_limit = 64 * 1024;
126+
config.client_transport_socket_factory =
127+
std::make_shared<StubClientTransportSocketFactory>();
128+
129+
manager = std::make_unique<ConnectionManagerImpl>(
130+
*dispatcher_, socketInterface(), config);
131+
132+
// TEST-NET-1 (RFC 5737) — reserved for documentation, guaranteed
133+
// non-routable in any sane network. The kernel will retransmit SYNs
134+
// for tens of seconds before giving up, which is orders of magnitude
135+
// longer than our 150ms pool timeout. The timeout timer wins the race
136+
// deterministically.
137+
auto unreachable =
138+
Address::parseInternetAddress("192.0.2.1", /*port=*/1);
139+
pool = std::make_unique<ConnectionPoolImpl>(*dispatcher_, unreachable,
140+
config, *manager);
141+
142+
pool->newConnection(callbacks);
143+
// One pending slot should be present — the new connection is in SYN_SENT
144+
// against an unroutable peer and hasn't failed yet.
145+
EXPECT_EQ(1u, pool->numPendingConnections());
146+
});
147+
148+
// The timer must fire on the dispatcher thread; we only observe the effect.
149+
// 3s covers the 150ms timer plus generous scheduling slack under ASAN.
150+
ASSERT_TRUE(waitFor(
151+
[&]() {
152+
return callbacks.failure_called_.load(std::memory_order_acquire) > 0;
153+
},
154+
std::chrono::seconds(3)))
155+
<< "connection_timeout never delivered onPoolFailure — the timer's "
156+
"capture was likely cancelled or pointed at freed memory";
157+
158+
EXPECT_EQ(1, callbacks.failure_called_.load());
159+
EXPECT_EQ(0, callbacks.ready_called_.load());
160+
EXPECT_EQ(static_cast<int>(ConnectionPool::PoolFailureReason::Timeout),
161+
callbacks.last_reason_.load());
162+
163+
// Tear pool/manager down inside the dispatcher — their destructors touch
164+
// libevent state that must be mutated on the dispatcher thread.
165+
executeInDispatcher([&]() {
166+
pool.reset();
167+
manager.reset();
168+
});
169+
}
170+
171+
} // namespace
172+
} // namespace network
173+
} // namespace mcp

0 commit comments

Comments
 (0)