Skip to content

Commit 6112a78

Browse files
authored
NetAcceptAction::cancel() use-after-free fix: part 2 (#12874)
This is a revised version of PR #12803 which was reverted in PR #12841 because it lacked the idempotent cancel guard, causing its own set of crashes. There is a race between NetAcceptAction::cancel() and NetAccept::acceptEvent() where the server pointer could be dereferenced after the NetAccept object was deleted. Thread A calls cancel(), which sets cancelled=true via Action::cancel(). Thread B running acceptEvent() sees cancelled==true and deletes the NetAccept (including the embedded Server). Thread A then calls server->close() on freed memory. This is fixed by making the server pointer in NetAcceptAction atomic and using exchange(nullptr) so only one thread can obtain and close the server. Additionally, cancel() is made idempotent by checking !cancelled before calling Action::cancel(), which prevents ink_assert(!cancelled) assertion failures when cancel is called from both external callers (TSActionCancel) and internal error paths (acceptEvent, acceptFastEvent, acceptLoopEvent).
1 parent 2ffe0ee commit 6112a78

4 files changed

Lines changed: 26 additions & 18 deletions

File tree

src/iocore/net/P_NetAccept.h

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "iocore/net/NetAcceptEventIO.h"
4444
#include "Server.h"
4545

46+
#include <atomic>
4647
#include <vector>
4748

4849
struct NetAccept;
@@ -60,21 +61,30 @@ AcceptFunction net_accept;
6061

6162
class UnixNetVConnection;
6263

63-
// TODO fix race between cancel accept and call back
6464
struct NetAcceptAction : public Action, public RefCountObjInHeap {
65-
Server *server;
65+
std::atomic<Server *> server{nullptr};
6666

67-
void
68-
cancel(Continuation *cont = nullptr) override
67+
NetAcceptAction(Continuation *cont, Server *s)
6968
{
70-
Action::cancel(cont);
71-
server->close();
69+
continuation = cont;
70+
if (cont != nullptr) {
71+
mutex = cont->mutex;
72+
}
73+
server.store(s, std::memory_order_release);
7274
}
7375

74-
Continuation *
75-
operator=(Continuation *acont)
76+
void
77+
cancel(Continuation *cont = nullptr) override
7678
{
77-
return Action::operator=(acont);
79+
// Use atomic exchange so only one thread closes the server, preventing
80+
// use-after-free races between cancel() and acceptEvent() cleanup.
81+
Server *s = server.exchange(nullptr, std::memory_order_acq_rel);
82+
if (s != nullptr) {
83+
s->close();
84+
}
85+
if (!cancelled) {
86+
Action::cancel(cont);
87+
}
7888
}
7989

8090
~NetAcceptAction() override

src/iocore/net/QUICNetProcessor.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -251,9 +251,7 @@ QUICNetProcessor::main_accept(Continuation *cont, SOCKET fd, AcceptOptions const
251251
na->server.sock = UnixSocket{fd};
252252
ats_ip_copy(&na->server.accept_addr, &accept_ip);
253253

254-
na->action_ = new NetAcceptAction();
255-
*na->action_ = cont;
256-
na->action_->server = &na->server;
254+
na->action_ = new NetAcceptAction(cont, &na->server);
257255
na->init_accept();
258256

259257
return na->action_.get();

src/iocore/net/UnixNetAccept.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -317,9 +317,7 @@ NetAccept::init_accept_per_thread()
317317
void
318318
NetAccept::stop_accept()
319319
{
320-
if (!action_->cancelled) {
321-
action_->cancel();
322-
}
320+
action_->cancel();
323321
server.close();
324322
}
325323

@@ -479,6 +477,7 @@ NetAccept::acceptEvent(int event, void *ep)
479477
MUTEX_TRY_LOCK(lock, m, e->ethread);
480478
if (lock.is_locked()) {
481479
if (action_->cancelled) {
480+
// Server was already closed by whoever called cancel().
482481
e->cancel();
483482
Metrics::Gauge::decrement(net_rsb.accepts_currently_open);
484483
delete this;
@@ -487,6 +486,7 @@ NetAccept::acceptEvent(int event, void *ep)
487486

488487
int res;
489488
if ((res = net_accept(this, e, false)) < 0) {
489+
action_->cancel();
490490
Metrics::Gauge::decrement(net_rsb.accepts_currently_open);
491491
/* INKqa11179 */
492492
Warning("Accept on port %d failed with error no %d", ats_ip_port_host_order(&server.addr), res);
@@ -637,6 +637,7 @@ NetAccept::acceptFastEvent(int event, void *ep)
637637
return EVENT_CONT;
638638

639639
Lerror:
640+
action_->cancel();
640641
server.close();
641642
e->cancel();
642643
Metrics::Gauge::decrement(net_rsb.accepts_currently_open);
@@ -656,6 +657,7 @@ NetAccept::acceptLoopEvent(int event, Event *e)
656657
}
657658

658659
// Don't think this ever happens ...
660+
action_->cancel();
659661
Metrics::Gauge::decrement(net_rsb.accepts_currently_open);
660662
delete this;
661663
return EVENT_DONE;

src/iocore/net/UnixNetProcessor.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,7 @@ UnixNetProcessor::accept_internal(Continuation *cont, int fd, AcceptOptions cons
133133
na->proxyPort = sa ? sa->proxyPort : nullptr;
134134
na->snpa = dynamic_cast<SSLNextProtocolAccept *>(cont);
135135

136-
na->action_ = new NetAcceptAction();
137-
*na->action_ = cont;
138-
na->action_->server = &na->server;
136+
na->action_ = new NetAcceptAction(cont, &na->server);
139137

140138
if (opt.frequent_accept) { // true
141139
if (accept_threads > 0 && listen_per_thread == 0) {

0 commit comments

Comments
 (0)