Skip to content

Commit fc11f1a

Browse files
committed
fix(nif): validate event-loop fd handles via a registry
The asyncio reader/writer integration handed Python a raw fd_resource pointer cast to an integer, then cast it back and dereferenced it in remove/update/clear/release. A stale, duplicate, or fabricated id was a double-free or arbitrary-pointer deref that crashed the node. Hand out opaque ids tracked in a validating registry instead: producers register and return an id; removing consumers take (and release) the entry so a duplicate id is a no-op; non-removing consumers look up with a temporary keep. An unknown id is a safe no-op or a clean ValueError. fd_read/fd_write also moved to ERL_NIF_DIRTY_JOB_IO_BOUND. Adds py_fd_ops_SUITE:fd_registry_invalid_id_test.
1 parent e5ee85f commit fc11f1a

4 files changed

Lines changed: 195 additions & 42 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44

55
### Security
66

7+
- **Validated event-loop fd handles** - The asyncio reader/writer integration no longer
8+
hands Python a raw `fd_resource` pointer as an integer key. Each handle is an opaque id
9+
validated against a registry on every use, so a stale, duplicate, or fabricated id is a
10+
safe no-op (or clean error) instead of a double-free or arbitrary-pointer dereference
11+
that crashed the node. `fd_read`/`fd_write` also moved to dirty IO schedulers.
712
- **OWN_GIL worker robustness** (Python 3.14+) - A per-request allocation failure in
813
a subinterpreter worker no longer `break`s (and permanently kills) the worker command
914
loop; it returns an error and keeps serving. The `owngil_*` dispatch NIFs now run on

c_src/py_event_loop.c

Lines changed: 169 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -6516,6 +6516,83 @@ static PyObject *py_get_isolation_mode(PyObject *self, PyObject *args) {
65166516
return PyUnicode_FromString("global");
65176517
}
65186518

6519+
/* ============================================================================
6520+
* Validating fd-handle registry
6521+
*
6522+
* Python receives an opaque integer id for each fd resource, never a raw
6523+
* pointer. Every consumer validates the id against this table, so a stale,
6524+
* duplicate, or fabricated id is a safe no-op instead of a use-after-free or
6525+
* arbitrary-pointer dereference. The producer's resource reference lives in the
6526+
* table entry: fd_reg_get hands out a temporary keep, fd_reg_take transfers the
6527+
* reference to the caller to release.
6528+
* ============================================================================ */
6529+
typedef struct { uint64_t id; fd_resource_t *res; } fd_reg_entry_t;
6530+
static fd_reg_entry_t *g_fd_reg = NULL;
6531+
static size_t g_fd_reg_len = 0;
6532+
static size_t g_fd_reg_cap = 0;
6533+
static uint64_t g_fd_reg_next_id = 1;
6534+
static pthread_mutex_t g_fd_reg_mutex = PTHREAD_MUTEX_INITIALIZER;
6535+
6536+
/* Store res and return its opaque id (0 on allocation failure). The caller's
6537+
* existing resource reference becomes owned by the table entry. */
6538+
static uint64_t fd_reg_add(fd_resource_t *res) {
6539+
pthread_mutex_lock(&g_fd_reg_mutex);
6540+
if (g_fd_reg_len == g_fd_reg_cap) {
6541+
size_t ncap = g_fd_reg_cap ? g_fd_reg_cap * 2 : 16;
6542+
fd_reg_entry_t *n = enif_realloc(g_fd_reg, ncap * sizeof(*n));
6543+
if (n == NULL) {
6544+
pthread_mutex_unlock(&g_fd_reg_mutex);
6545+
return 0;
6546+
}
6547+
g_fd_reg = n;
6548+
g_fd_reg_cap = ncap;
6549+
}
6550+
uint64_t id = g_fd_reg_next_id++;
6551+
if (id == 0) id = g_fd_reg_next_id++; /* never hand out 0 */
6552+
g_fd_reg[g_fd_reg_len].id = id;
6553+
g_fd_reg[g_fd_reg_len].res = res;
6554+
g_fd_reg_len++;
6555+
pthread_mutex_unlock(&g_fd_reg_mutex);
6556+
return id;
6557+
}
6558+
6559+
/* Look up id; on hit, keep the resource and return it (caller must release).
6560+
* Returns NULL for an unknown/stale/fabricated id. */
6561+
static fd_resource_t *fd_reg_get(uint64_t id) {
6562+
fd_resource_t *res = NULL;
6563+
if (id != 0) {
6564+
pthread_mutex_lock(&g_fd_reg_mutex);
6565+
for (size_t i = 0; i < g_fd_reg_len; i++) {
6566+
if (g_fd_reg[i].id == id) {
6567+
res = g_fd_reg[i].res;
6568+
enif_keep_resource(res);
6569+
break;
6570+
}
6571+
}
6572+
pthread_mutex_unlock(&g_fd_reg_mutex);
6573+
}
6574+
return res;
6575+
}
6576+
6577+
/* Remove id; on hit, return the resource (caller owns the entry's reference and
6578+
* must release it). Returns NULL for an unknown/stale/duplicate id. */
6579+
static fd_resource_t *fd_reg_take(uint64_t id) {
6580+
fd_resource_t *res = NULL;
6581+
if (id != 0) {
6582+
pthread_mutex_lock(&g_fd_reg_mutex);
6583+
for (size_t i = 0; i < g_fd_reg_len; i++) {
6584+
if (g_fd_reg[i].id == id) {
6585+
res = g_fd_reg[i].res;
6586+
g_fd_reg[i] = g_fd_reg[g_fd_reg_len - 1]; /* swap with last */
6587+
g_fd_reg_len--;
6588+
break;
6589+
}
6590+
}
6591+
pthread_mutex_unlock(&g_fd_reg_mutex);
6592+
}
6593+
return res;
6594+
}
6595+
65196596
/* Python function: _add_reader(fd, callback_id) -> fd_key */
65206597
static PyObject *py_add_reader(PyObject *self, PyObject *args) {
65216598
(void)self;
@@ -6567,8 +6644,17 @@ static PyObject *py_add_reader(PyObject *self, PyObject *args) {
65676644
}
65686645

65696646
/* Return a key that can be used to remove the reader */
6570-
unsigned long long key = (unsigned long long)(uintptr_t)fd_res;
6571-
return PyLong_FromUnsignedLongLong(key);
6647+
/* Hand Python an opaque, validated id instead of a raw pointer. The
6648+
* producer's resource reference is taken over by the registry entry. */
6649+
uint64_t id = fd_reg_add(fd_res);
6650+
if (id == 0) {
6651+
enif_select(loop->msg_env, (ErlNifEvent)fd, ERL_NIF_SELECT_STOP,
6652+
fd_res, NULL, ATOM_UNDEFINED);
6653+
enif_release_resource(fd_res);
6654+
PyErr_SetString(PyExc_MemoryError, "fd registry full");
6655+
return NULL;
6656+
}
6657+
return PyLong_FromUnsignedLongLong((unsigned long long)id);
65726658
}
65736659

65746660
/* Python function: _remove_reader(fd_key) -> None */
@@ -6581,11 +6667,13 @@ static PyObject *py_remove_reader(PyObject *self, PyObject *args) {
65816667
}
65826668

65836669
/* Use per-interpreter event loop lookup - but still allow cleanup even if loop is gone */
6584-
fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
6585-
if (fd_res != NULL && fd_res->loop != NULL) {
6586-
enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
6587-
ERL_NIF_SELECT_STOP, fd_res, NULL, ATOM_UNDEFINED);
6588-
fd_res->reader_active = false;
6670+
fd_resource_t *fd_res = fd_reg_take(fd_key);
6671+
if (fd_res != NULL) {
6672+
if (fd_res->loop != NULL) {
6673+
enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
6674+
ERL_NIF_SELECT_STOP, fd_res, NULL, ATOM_UNDEFINED);
6675+
fd_res->reader_active = false;
6676+
}
65896677
enif_release_resource(fd_res);
65906678
}
65916679

@@ -6643,8 +6731,17 @@ static PyObject *py_add_writer(PyObject *self, PyObject *args) {
66436731
}
66446732

66456733
/* Return a key that can be used to remove the writer */
6646-
unsigned long long key = (unsigned long long)(uintptr_t)fd_res;
6647-
return PyLong_FromUnsignedLongLong(key);
6734+
/* Hand Python an opaque, validated id instead of a raw pointer. The
6735+
* producer's resource reference is taken over by the registry entry. */
6736+
uint64_t id = fd_reg_add(fd_res);
6737+
if (id == 0) {
6738+
enif_select(loop->msg_env, (ErlNifEvent)fd, ERL_NIF_SELECT_STOP,
6739+
fd_res, NULL, ATOM_UNDEFINED);
6740+
enif_release_resource(fd_res);
6741+
PyErr_SetString(PyExc_MemoryError, "fd registry full");
6742+
return NULL;
6743+
}
6744+
return PyLong_FromUnsignedLongLong((unsigned long long)id);
66486745
}
66496746

66506747
/* Python function: _remove_writer(fd_key) -> None */
@@ -6657,11 +6754,13 @@ static PyObject *py_remove_writer(PyObject *self, PyObject *args) {
66576754
}
66586755

66596756
/* Use fd_res->loop directly - allows cleanup even if interpreter's loop is gone */
6660-
fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
6661-
if (fd_res != NULL && fd_res->loop != NULL) {
6662-
enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
6663-
ERL_NIF_SELECT_STOP, fd_res, NULL, ATOM_UNDEFINED);
6664-
fd_res->writer_active = false;
6757+
fd_resource_t *fd_res = fd_reg_take(fd_key);
6758+
if (fd_res != NULL) {
6759+
if (fd_res->loop != NULL) {
6760+
enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
6761+
ERL_NIF_SELECT_STOP, fd_res, NULL, ATOM_UNDEFINED);
6762+
fd_res->writer_active = false;
6763+
}
66656764
enif_release_resource(fd_res);
66666765
}
66676766

@@ -7391,8 +7490,17 @@ static PyObject *py_add_reader_for(PyObject *self, PyObject *args) {
73917490
return NULL;
73927491
}
73937492

7394-
unsigned long long key = (unsigned long long)(uintptr_t)fd_res;
7395-
return PyLong_FromUnsignedLongLong(key);
7493+
/* Hand Python an opaque, validated id instead of a raw pointer. The
7494+
* producer's resource reference is taken over by the registry entry. */
7495+
uint64_t id = fd_reg_add(fd_res);
7496+
if (id == 0) {
7497+
enif_select(loop->msg_env, (ErlNifEvent)fd, ERL_NIF_SELECT_STOP,
7498+
fd_res, NULL, ATOM_UNDEFINED);
7499+
enif_release_resource(fd_res);
7500+
PyErr_SetString(PyExc_MemoryError, "fd registry full");
7501+
return NULL;
7502+
}
7503+
return PyLong_FromUnsignedLongLong((unsigned long long)id);
73967504
}
73977505

73987506
/* Python function: _remove_reader_for(capsule, fd_key) -> None */
@@ -7411,11 +7519,13 @@ static PyObject *py_remove_reader_for(PyObject *self, PyObject *args) {
74117519
return NULL;
74127520
}
74137521

7414-
fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
7415-
if (fd_res != NULL && fd_res->loop != NULL) {
7416-
enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
7417-
ERL_NIF_SELECT_STOP, fd_res, NULL, ATOM_UNDEFINED);
7418-
fd_res->reader_active = false;
7522+
fd_resource_t *fd_res = fd_reg_take(fd_key);
7523+
if (fd_res != NULL) {
7524+
if (fd_res->loop != NULL) {
7525+
enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
7526+
ERL_NIF_SELECT_STOP, fd_res, NULL, ATOM_UNDEFINED);
7527+
fd_res->reader_active = false;
7528+
}
74197529
enif_release_resource(fd_res);
74207530
}
74217531

@@ -7472,8 +7582,17 @@ static PyObject *py_add_writer_for(PyObject *self, PyObject *args) {
74727582
return NULL;
74737583
}
74747584

7475-
unsigned long long key = (unsigned long long)(uintptr_t)fd_res;
7476-
return PyLong_FromUnsignedLongLong(key);
7585+
/* Hand Python an opaque, validated id instead of a raw pointer. The
7586+
* producer's resource reference is taken over by the registry entry. */
7587+
uint64_t id = fd_reg_add(fd_res);
7588+
if (id == 0) {
7589+
enif_select(loop->msg_env, (ErlNifEvent)fd, ERL_NIF_SELECT_STOP,
7590+
fd_res, NULL, ATOM_UNDEFINED);
7591+
enif_release_resource(fd_res);
7592+
PyErr_SetString(PyExc_MemoryError, "fd registry full");
7593+
return NULL;
7594+
}
7595+
return PyLong_FromUnsignedLongLong((unsigned long long)id);
74777596
}
74787597

74797598
/* Python function: _remove_writer_for(capsule, fd_key) -> None */
@@ -7492,11 +7611,13 @@ static PyObject *py_remove_writer_for(PyObject *self, PyObject *args) {
74927611
return NULL;
74937612
}
74947613

7495-
fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
7496-
if (fd_res != NULL && fd_res->loop != NULL) {
7497-
enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
7498-
ERL_NIF_SELECT_STOP, fd_res, NULL, ATOM_UNDEFINED);
7499-
fd_res->writer_active = false;
7614+
fd_resource_t *fd_res = fd_reg_take(fd_key);
7615+
if (fd_res != NULL) {
7616+
if (fd_res->loop != NULL) {
7617+
enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
7618+
ERL_NIF_SELECT_STOP, fd_res, NULL, ATOM_UNDEFINED);
7619+
fd_res->writer_active = false;
7620+
}
75007621
enif_release_resource(fd_res);
75017622
}
75027623

@@ -7516,8 +7637,9 @@ static PyObject *py_update_fd_read(PyObject *self, PyObject *args) {
75167637
return NULL;
75177638
}
75187639

7519-
fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
7640+
fd_resource_t *fd_res = fd_reg_get(fd_key);
75207641
if (fd_res == NULL || fd_res->loop == NULL) {
7642+
if (fd_res) enif_release_resource(fd_res);
75217643
PyErr_SetString(PyExc_ValueError, "Invalid fd resource");
75227644
return NULL;
75237645
}
@@ -7531,6 +7653,7 @@ static PyObject *py_update_fd_read(PyObject *self, PyObject *args) {
75317653
enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
75327654
ERL_NIF_SELECT_READ, fd_res, target_pid, ATOM_UNDEFINED);
75337655

7656+
enif_release_resource(fd_res);
75347657
Py_RETURN_NONE;
75357658
}
75367659

@@ -7547,8 +7670,9 @@ static PyObject *py_update_fd_write(PyObject *self, PyObject *args) {
75477670
return NULL;
75487671
}
75497672

7550-
fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
7673+
fd_resource_t *fd_res = fd_reg_get(fd_key);
75517674
if (fd_res == NULL || fd_res->loop == NULL) {
7675+
if (fd_res) enif_release_resource(fd_res);
75527676
PyErr_SetString(PyExc_ValueError, "Invalid fd resource");
75537677
return NULL;
75547678
}
@@ -7562,6 +7686,7 @@ static PyObject *py_update_fd_write(PyObject *self, PyObject *args) {
75627686
enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
75637687
ERL_NIF_SELECT_WRITE, fd_res, target_pid, ATOM_UNDEFINED);
75647688

7689+
enif_release_resource(fd_res);
75657690
Py_RETURN_NONE;
75667691
}
75677692

@@ -7577,9 +7702,10 @@ static PyObject *py_clear_fd_read(PyObject *self, PyObject *args) {
75777702
return NULL;
75787703
}
75797704

7580-
fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
7705+
fd_resource_t *fd_res = fd_reg_get(fd_key);
75817706
if (fd_res == NULL || fd_res->loop == NULL) {
7582-
Py_RETURN_NONE; /* Already cleaned up */
7707+
if (fd_res) enif_release_resource(fd_res);
7708+
Py_RETURN_NONE; /* Already cleaned up / invalid id */
75837709
}
75847710

75857711
if (fd_res->reader_active) {
@@ -7590,6 +7716,7 @@ static PyObject *py_clear_fd_read(PyObject *self, PyObject *args) {
75907716
fd_res->read_callback_id = 0;
75917717
}
75927718

7719+
enif_release_resource(fd_res);
75937720
Py_RETURN_NONE;
75947721
}
75957722

@@ -7605,9 +7732,10 @@ static PyObject *py_clear_fd_write(PyObject *self, PyObject *args) {
76057732
return NULL;
76067733
}
76077734

7608-
fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
7735+
fd_resource_t *fd_res = fd_reg_get(fd_key);
76097736
if (fd_res == NULL || fd_res->loop == NULL) {
7610-
Py_RETURN_NONE; /* Already cleaned up */
7737+
if (fd_res) enif_release_resource(fd_res);
7738+
Py_RETURN_NONE; /* Already cleaned up / invalid id */
76117739
}
76127740

76137741
if (fd_res->writer_active) {
@@ -7618,6 +7746,7 @@ static PyObject *py_clear_fd_write(PyObject *self, PyObject *args) {
76187746
fd_res->write_callback_id = 0;
76197747
}
76207748

7749+
enif_release_resource(fd_res);
76217750
Py_RETURN_NONE;
76227751
}
76237752

@@ -7633,10 +7762,12 @@ static PyObject *py_release_fd_resource(PyObject *self, PyObject *args) {
76337762
return NULL;
76347763
}
76357764

7636-
fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
7637-
if (fd_res != NULL && fd_res->loop != NULL) {
7638-
enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
7639-
ERL_NIF_SELECT_STOP, fd_res, NULL, ATOM_UNDEFINED);
7765+
fd_resource_t *fd_res = fd_reg_take(fd_key);
7766+
if (fd_res != NULL) {
7767+
if (fd_res->loop != NULL) {
7768+
enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
7769+
ERL_NIF_SELECT_STOP, fd_res, NULL, ATOM_UNDEFINED);
7770+
}
76407771
enif_release_resource(fd_res);
76417772
}
76427773

c_src/py_nif.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8032,8 +8032,8 @@ static ErlNifFunc nif_funcs[] = {
80328032
{"reactor_close_fd", 2, nif_reactor_close_fd, 0},
80338033

80348034
/* Direct FD operations */
8035-
{"fd_read", 2, nif_fd_read, 0},
8036-
{"fd_write", 2, nif_fd_write, 0},
8035+
{"fd_read", 2, nif_fd_read, ERL_NIF_DIRTY_JOB_IO_BOUND},
8036+
{"fd_write", 2, nif_fd_write, ERL_NIF_DIRTY_JOB_IO_BOUND},
80378037
{"fd_select_read", 1, nif_fd_select_read, 0},
80388038
{"fd_select_write", 1, nif_fd_select_write, 0},
80398039
{"fd_close", 1, nif_fd_close, 0},

test/py_fd_ops_SUITE.erl

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
fd_read_write_test/1,
2929
fd_close_test/1,
3030
fd_select_test/1,
31-
dup_fd_test/1
31+
dup_fd_test/1,
32+
fd_registry_invalid_id_test/1
3233
]).
3334

3435
all() ->
@@ -37,7 +38,8 @@ all() ->
3738
fd_read_write_test,
3839
fd_close_test,
3940
fd_select_test,
40-
dup_fd_test
41+
dup_fd_test,
42+
fd_registry_invalid_id_test
4143
].
4244

4345
init_per_suite(Config) ->
@@ -176,3 +178,18 @@ dup_fd_test(_Config) ->
176178
ok = py_nif:fd_close(DupFd2),
177179
ok = py_nif:fd_close(Fd1),
178180
ok.
181+
182+
%% @doc A stale, duplicate, or fabricated fd id must be a safe no-op (or clean
183+
%% error), not a raw-pointer dereference. Regression for the validating
184+
%% fd-handle registry: pre-fix the id was cast straight to a pointer and
185+
%% dereferenced, so a bogus id crashed the node.
186+
fd_registry_invalid_id_test(_Config) ->
187+
{ok, none} = py:eval(<<"__import__('py_event_loop')._remove_reader(999999999)">>),
188+
{ok, none} = py:eval(<<"__import__('py_event_loop')._remove_writer(123456789)">>),
189+
{ok, none} = py:eval(<<"__import__('py_event_loop')._release_fd_resource(42)">>),
190+
{ok, none} = py:eval(<<"__import__('py_event_loop')._clear_fd_read(7)">>),
191+
%% _update_fd_* rejects an unknown id with a clean ValueError, not a crash.
192+
{error, _} = py:eval(<<"__import__('py_event_loop')._update_fd_read(54321, 1)">>),
193+
%% Node still alive and usable.
194+
{ok, 2} = py:eval(<<"1+1">>),
195+
ok.

0 commit comments

Comments
 (0)