Skip to content

Commit bc38307

Browse files
author
Marko Vejnovic
committed
bug: Fix lockup when RLIMIT_NOFILE is large
1 parent 1d1e48b commit bc38307

File tree

1 file changed

+112
-7
lines changed

1 file changed

+112
-7
lines changed

Sources/libreprl/libreprl-posix.c

Lines changed: 112 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@
2323
#include <assert.h>
2424
#include <errno.h>
2525
#include <fcntl.h>
26+
#include <limits.h>
2627
#include <poll.h>
2728
#include <signal.h>
2829
#include <stdarg.h>
30+
#include <stdbool.h>
2931
#include <stdio.h>
3032
#include <stdlib.h>
3133
#include <string.h>
@@ -36,16 +38,28 @@
3638
#include <sys/wait.h>
3739
#include <time.h>
3840
#include <unistd.h>
41+
#ifdef __linux__
42+
#include <sys/utsname.h>
43+
#endif
3944

4045
// Well-known file descriptor numbers for reprl <-> child communication, child process side
46+
// Make sure you modify reprl_fds[] below if you change these.
4147
#define REPRL_CHILD_CTRL_IN 100
4248
#define REPRL_CHILD_CTRL_OUT 101
4349
#define REPRL_CHILD_DATA_IN 102
4450
#define REPRL_CHILD_DATA_OUT 103
4551

52+
static const int reprl_fds[] = {
53+
REPRL_CHILD_CTRL_IN,
54+
REPRL_CHILD_CTRL_OUT,
55+
REPRL_CHILD_DATA_IN,
56+
REPRL_CHILD_DATA_OUT
57+
};
58+
4659
/// Maximum timeout in microseconds. Mostly just limited by the fact that the timeout in milliseconds has to fit into a 32-bit integer.
4760
#define REPRL_MAX_TIMEOUT_IN_MICROSECONDS ((uint64_t)(INT_MAX) * 1000)
4861

62+
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
4963
static size_t min(size_t x, size_t y) {
5064
return x < y ? x : y;
5165
}
@@ -79,6 +93,103 @@ static void free_string_array(char** arr)
7993
free(arr);
8094
}
8195

96+
static unsigned int getdtablesize_or_crash() {
97+
const int tablesize = getdtablesize();
98+
if (tablesize < 0) {
99+
fprintf(stderr, "getdtablesize() failed: %s. This likely means the system is borked.\n",
100+
strerror(errno));
101+
exit(-1);
102+
}
103+
104+
return (unsigned int)tablesize;
105+
}
106+
107+
static int fd_cast(unsigned int fd) {
108+
if (fd > INT_MAX) {
109+
fprintf(stderr, "File descriptor value %u is too large to fit into int. This likely means "
110+
"the system is borked.\n", fd);
111+
exit(-1);
112+
}
113+
114+
return (int)fd;
115+
}
116+
117+
static bool system_supports_close_range() {
118+
#ifdef __linux__
119+
struct utsname buffer;
120+
int major, minor, patch;
121+
(void)uname(&buffer); // Linux uname can only throw EFAULT if buf is invalid.
122+
123+
if (sscanf(buffer.release, "%d.%d.%d", &major, &minor, &patch) != 3) {
124+
return false;
125+
}
126+
127+
return major > 5 || (major == 5 && minor >= 9);
128+
#else
129+
// TODO: Technically, FreeBSD does support close_range, but I don't need support for it right
130+
// now, so leaving this unimplemented.
131+
// https://man.freebsd.org/cgi/man.cgi?close_range(2)
132+
return false;
133+
#endif
134+
}
135+
136+
static int fd_qsort_compare(const void* a, const void* b) {
137+
int fd_a = *(const int*)a;
138+
int fd_b = *(const int*)b;
139+
return (fd_a > fd_b) - (fd_a < fd_b);
140+
}
141+
142+
/// Fast path which uses close_range() to close ranges of fds.
143+
static void close_all_non_reprl_fds_fast() {
144+
// Unfortunately we cannot trust the reprl_fds array to be sorted since an accidental edit to
145+
// reprl_fds could have broken that assumption. So let's create a new sorted array. It's cheap
146+
// anyways.
147+
int sorted_reprl_fds[ARRAY_SIZE(reprl_fds) + 2];
148+
sorted_reprl_fds[0] = 3; // Skip the well-known stdin, stdout, stderr fds.
149+
memcpy(sorted_reprl_fds + 1, reprl_fds, sizeof(reprl_fds));
150+
sorted_reprl_fds[ARRAY_SIZE(sorted_reprl_fds) - 1] = fd_cast(getdtablesize_or_crash());
151+
qsort(sorted_reprl_fds, ARRAY_SIZE(sorted_reprl_fds), sizeof(int), fd_qsort_compare);
152+
153+
// Cool, now we will iterate the sorted fds in ranges and close everything in between.
154+
int start_fd = 3, end_fd;
155+
for (size_t i = 0; i < ARRAY_SIZE(sorted_reprl_fds); i++) {
156+
end_fd = sorted_reprl_fds[i];
157+
if (start_fd < end_fd) {
158+
// Close the range [start_fd, end_fd)
159+
close_range(start_fd, end_fd - 1, 0);
160+
}
161+
start_fd = end_fd + 1;
162+
}
163+
}
164+
165+
/// Fallback path which makes a close() syscall for each non-REPRL fd.
166+
static void close_all_non_reprl_fds_slow() {
167+
const int tablesize = fd_cast(getdtablesize_or_crash());
168+
169+
for (int i = 3; i < tablesize; i++) {
170+
bool is_reprl_fd = false;
171+
for (size_t j = 0; j < ARRAY_SIZE(reprl_fds); j++) {
172+
if (i == reprl_fds[j]) {
173+
is_reprl_fd = true;
174+
break;
175+
}
176+
}
177+
178+
if (!is_reprl_fd) {
179+
close(i);
180+
}
181+
}
182+
}
183+
184+
/// Close all file descriptors except the well-known REPRL and stdio fds.
185+
static void close_all_non_reprl_fds() {
186+
if (system_supports_close_range()) {
187+
close_all_non_reprl_fds_fast();
188+
} else {
189+
close_all_non_reprl_fds_slow();
190+
}
191+
}
192+
82193
// A unidirectional communication channel for larger amounts of data, up to a maximum size (REPRL_MAX_DATA_SIZE).
83194
// Implemented as a (RAM-backed) file for which the file descriptor is shared with the child process and which is mapped into our address space.
84195
struct data_channel {
@@ -250,13 +361,7 @@ static int reprl_spawn_child(struct reprl_context* ctx)
250361
close(devnull);
251362

252363
// close all other FDs. We try to use FD_CLOEXEC everywhere, but let's be extra sure we don't leak any fds to the child.
253-
int tablesize = getdtablesize();
254-
for (int i = 3; i < tablesize; i++) {
255-
if (i == REPRL_CHILD_CTRL_IN || i == REPRL_CHILD_CTRL_OUT || i == REPRL_CHILD_DATA_IN || i == REPRL_CHILD_DATA_OUT) {
256-
continue;
257-
}
258-
close(i);
259-
}
364+
close_all_non_reprl_fds();
260365

261366
execve(ctx->argv[0], ctx->argv, ctx->envp);
262367

0 commit comments

Comments
 (0)