Skip to content

Commit df0e588

Browse files
committed
Skip overlong-UTF-8 dirents in getdents64
sys_getdents64 aborted the whole directory stream the first time path_translate_dirent_name reported ENAMETOOLONG. On macOS APFS, filename byte length can exceed Linux NAME_MAX because the per-component limit is in Unicode characters, not bytes; a single 89-CJK-character entry already crosses 255 bytes. The pre-fix path truncated ls / find / coreutils listings to the guest against any APFS source tree containing one such name. A guest libc cannot represent an oversize entry in its 256-byte dirent buffer regardless of what elfuse does, so the only sensible behavior is to skip the unrepresentable name and keep delivering the rest of the stream. Skip only on ENAMETOOLONG; any other translation failure keeps the existing partial-return path so genuine errors are not silently dropped. A single process-wide log_warn records the first hit via an atomic latch. Coverage stages five 268-byte UTF-8 names plus one normal entry host-side and walks the listing with a one-entry-per-call buffer, which forces at least one call to begin fresh on an overlong entry under any APFS hash ordering. That is the exact condition under which pre-fix code returned -ENAMETOOLONG to userspace; the test fails three out of three pre-fix and passes three out of three post-fix.
1 parent 8441714 commit df0e588

3 files changed

Lines changed: 222 additions & 2 deletions

File tree

mk/tests.mk

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
test-rosetta-glibc test-rosetta-all bench-rosetta \
1010
test-matrix test-matrix-elfuse-aarch64 test-matrix-qemu-aarch64 \
1111
test-full test-multi-vcpu test-rwx test-sysroot-rename \
12-
test-case-collision test-case-collision-fallback test-sysroot-create-paths \
12+
test-case-collision test-case-collision-fallback test-getdents64-overlong \
13+
test-sysroot-create-paths \
1314
test-proctitle-host test-proctitle-low-stack \
1415
test-sysroot-procfs-exec test-timeout-disable test-fuse-alpine \
1516
test-sysroot-nofollow test-sysroot-chdir perf
@@ -48,6 +49,8 @@ check: $(ELFUSE_BIN) $(TEST_DEPS) check-syscall-coverage \
4849
@$(MAKE) --no-print-directory test-busybox
4950
@printf "\n$(BLUE)━━━ sysroot procfs exec validation ━━━$(RESET)\n"
5051
@$(MAKE) --no-print-directory test-sysroot-procfs-exec
52+
@printf "\n$(BLUE)━━━ getdents64 overlong-UTF-8 dirent skip ━━━$(RESET)\n"
53+
@$(MAKE) --no-print-directory test-getdents64-overlong
5154
@printf "\n$(BLUE)━━━ Alpine sysroot FUSE validation ━━━$(RESET)\n"
5255
@$(MAKE) --no-print-directory test-fuse-alpine
5356
@printf "\n$(BLUE)━━━ timeout=0 validation ━━━$(RESET)\n"
@@ -117,6 +120,26 @@ test-case-collision-fallback: $(ELFUSE_BIN) $(BUILD_DIR)/test-case-collision
117120
trap 'rm -rf "$$tmpdir"' EXIT; \
118121
$(ELFUSE_BIN) --sysroot "$$tmpdir" $(BUILD_DIR)/test-case-collision
119122

123+
# Build APFS-side dirents whose UTF-8 byte length exceeds Linux
124+
# NAME_MAX (255). 89 copies of U+3042 (3-byte UTF-8) plus a 1-byte
125+
# ASCII tag = 268 bytes per name; the guest cannot forge this via
126+
# openat (NAME_MAX is enforced), so the harness stages it host-side
127+
# and the guest scans the listing. Five overlong files plus one
128+
# normal entry: with a one-entry-per-call buffer on the guest side,
129+
# any APFS hash ordering puts an overlong entry in a position where
130+
# pre-fix code returned -ENAMETOOLONG to userspace.
131+
test-getdents64-overlong: $(ELFUSE_BIN) $(BUILD_DIR)/test-getdents64-overlong
132+
@set -e; \
133+
tmpdir=$$(mktemp -d); \
134+
trap 'rm -rf "$$tmpdir"' EXIT; \
135+
mkdir -p "$$tmpdir/fixture"; \
136+
: > "$$tmpdir/fixture/expected.txt"; \
137+
for tag in a b c d e; do \
138+
overlong=$$(printf '\343\201\202%.0s' $$(seq 1 89))$$tag; \
139+
: > "$$tmpdir/fixture/$$overlong"; \
140+
done; \
141+
$(ELFUSE_BIN) $(BUILD_DIR)/test-getdents64-overlong "$$tmpdir/fixture"
142+
120143
test-sysroot-create-paths: $(ELFUSE_BIN) $(BUILD_DIR)/test-sysroot-create-paths
121144
@set -e; \
122145
tmpdir=$$(mktemp -d); \

src/syscall/fs.c

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1107,8 +1107,35 @@ int64_t sys_getdents64(guest_t *g, int fd, uint64_t buf_gva, uint64_t count)
11071107
sizeof(guest_name));
11081108
if (name_rc > 0)
11091109
continue;
1110-
if (name_rc < 0)
1110+
if (name_rc < 0) {
1111+
/* macOS APFS accepts UTF-8 filenames whose byte length exceeds
1112+
* Linux NAME_MAX (255). A guest libc cannot represent such a
1113+
* name in its 256-byte dirent buffer at all, so elfuse silently
1114+
* skips the unrepresentable entry and keeps the rest of the
1115+
* stream intact. This is an elfuse compatibility policy, not
1116+
* Linux kernel behavior: real getdents64 has no equivalent
1117+
* skip path because Linux NAME_MAX is enforced at the
1118+
* filesystem layer, so no oversize entry ever reaches
1119+
* verify_dirent_name. Aborting the whole stream the way the
1120+
* pre-fix code did truncated ls / find / coreutils listings
1121+
* against APFS-mounted source trees. Skip on ENAMETOOLONG;
1122+
* keep the existing partial-return path for any other
1123+
* translation failure so genuine errors are not silently
1124+
* dropped.
1125+
*/
1126+
if (errno == ENAMETOOLONG) {
1127+
static bool overlong_warned;
1128+
if (!__atomic_exchange_n(&overlong_warned, true,
1129+
__ATOMIC_RELAXED))
1130+
log_warn(
1131+
"getdents64: skipping host dirent whose name "
1132+
"exceeds Linux NAME_MAX (%u); first hit was "
1133+
"%zu bytes on fd %d",
1134+
NAME_MAX, strlen(de->d_name), fd);
1135+
continue;
1136+
}
11111137
return guest_pos > 0 ? (int64_t) guest_pos : linux_errno();
1138+
}
11121139

11131140
size_t name_len = strlen(guest_name);
11141141
/* Linux dirent64: 19-byte header + name + null, padded to 8 */

tests/test-getdents64-overlong.c

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
/* getdents64 overlong-UTF-8 dirent skip regression
2+
*
3+
* Copyright 2026 elfuse contributors
4+
* SPDX-License-Identifier: Apache-2.0
5+
*
6+
* macOS APFS lets filenames exceed Linux NAME_MAX (255 bytes) on the
7+
* UTF-8 byte axis: ~90 CJK codepoints already crosses the cap while
8+
* staying under APFS's per-component character limit. The guest
9+
* cannot create such a name through Linux syscalls (NAME_MAX is
10+
* enforced at openat), so the surrounding harness builds the fixture
11+
* host-side and passes the directory path as argv[1].
12+
*
13+
* Pre-fix behavior: sys_getdents64 aborted the whole stream with
14+
* ENAMETOOLONG the first time path_translate_dirent_name reported an
15+
* oversize entry, truncating ls / find / coreutils listings.
16+
* Post-fix: the overlong entry is skipped and the rest of the stream
17+
* is delivered, matching what real Linux does on the same input.
18+
*/
19+
20+
#include <dirent.h>
21+
#include <errno.h>
22+
#include <fcntl.h>
23+
#include <stdio.h>
24+
#include <stdlib.h>
25+
#include <string.h>
26+
#include <sys/syscall.h>
27+
#include <unistd.h>
28+
29+
#include "test-harness.h"
30+
31+
#ifndef SYS_getdents64
32+
#define SYS_getdents64 61
33+
#endif
34+
35+
int passes = 0, fails = 0;
36+
37+
typedef struct {
38+
unsigned long long d_ino;
39+
long long d_off;
40+
unsigned short d_reclen;
41+
unsigned char d_type;
42+
char d_name[];
43+
} linux_dirent64_t;
44+
45+
static const char EXPECTED_NAME[] = "expected.txt";
46+
47+
/* Drain the directory via raw getdents64. Counts how many real entries
48+
* (skipping "." and "..") show up and whether EXPECTED_NAME is seen.
49+
* Returns -errno on the first non-EOF failure so the caller can tell a
50+
* mid-stream ENAMETOOLONG from an empty directory.
51+
*
52+
* The buffer is sized just past a single small dirent so each call
53+
* returns at most one entry. With multiple overlong files in the
54+
* fixture, this guarantees at least one call starts fresh (guest_pos
55+
* == 0) on an overlong entry, which is the exact condition under
56+
* which the pre-fix code returns -ENAMETOOLONG to userspace and
57+
* truncates the listing for ls / find. Larger buffers can mask the
58+
* bug because APFS hash order may bury every overlong after a
59+
* partial-return point.
60+
*/
61+
static int scan_directory(const char *path,
62+
int *out_entries,
63+
int *out_saw_expected)
64+
{
65+
*out_entries = 0;
66+
*out_saw_expected = 0;
67+
68+
int fd = open(path, O_RDONLY | O_DIRECTORY);
69+
if (fd < 0)
70+
return -errno;
71+
72+
/* 64 bytes caps each call at one entry for the visible names
73+
* (reclen 24 for ".", 24 for "..", 32 for "expected.txt"; ". + .."
74+
* could pack into 48, but five overlong files outnumber three
75+
* visible normals so at least one call still starts fresh on an
76+
* overlong entry with guest_pos == 0 -- the exact condition under
77+
* which pre-fix sys_getdents64 returned -ENAMETOOLONG to userspace
78+
* and truncated the listing).
79+
*/
80+
char buf[64];
81+
for (;;) {
82+
long n = syscall(SYS_getdents64, fd, buf, sizeof(buf));
83+
if (n < 0) {
84+
int err = errno;
85+
close(fd);
86+
return -err;
87+
}
88+
if (n == 0)
89+
break;
90+
91+
/* Validate the binary ABI strictly: an unterminated d_name or a
92+
* forged d_reclen could otherwise let strcmp walk off the buffer.
93+
* Header is 19 bytes; max valid record fits in n-off.
94+
*/
95+
for (long off = 0; off < n;) {
96+
linux_dirent64_t *de = (linux_dirent64_t *) (buf + off);
97+
if (de->d_reclen < 19 || de->d_reclen > (unsigned) (n - off)) {
98+
close(fd);
99+
return -EIO;
100+
}
101+
size_t name_cap = (size_t) de->d_reclen - 19;
102+
if (!memchr(de->d_name, '\0', name_cap)) {
103+
close(fd);
104+
return -EIO;
105+
}
106+
const char *name = de->d_name;
107+
if (strcmp(name, ".") != 0 && strcmp(name, "..") != 0) {
108+
(*out_entries)++;
109+
if (strcmp(name, EXPECTED_NAME) == 0)
110+
*out_saw_expected = 1;
111+
}
112+
off += de->d_reclen;
113+
}
114+
}
115+
116+
close(fd);
117+
return 0;
118+
}
119+
120+
int main(int argc, char **argv)
121+
{
122+
if (argc != 2) {
123+
fprintf(stderr, "usage: %s <fixture-dir>\n", argv[0]);
124+
return 2;
125+
}
126+
127+
const char *dir = argv[1];
128+
printf("test-getdents64-overlong: scanning %s\n", dir);
129+
130+
int entries = 0, saw_expected = 0;
131+
int rc = scan_directory(dir, &entries, &saw_expected);
132+
133+
TEST("getdents64 does not abort with ENAMETOOLONG");
134+
if (rc == -ENAMETOOLONG) {
135+
errno = ENAMETOOLONG;
136+
FAIL("stream aborted on overlong entry");
137+
} else if (rc < 0) {
138+
errno = -rc;
139+
FAIL("getdents64 returned unexpected error");
140+
} else {
141+
PASS();
142+
}
143+
144+
TEST("normal entry survives the scan");
145+
if (rc < 0) {
146+
errno = -rc;
147+
FAIL("scan failed before reaching expected entry");
148+
} else if (!saw_expected) {
149+
FAIL("expected.txt missing from listing");
150+
} else {
151+
PASS();
152+
}
153+
154+
TEST("listing has only the normal entry");
155+
/* The overlong file is present on disk but must be silently
156+
* skipped, so the visible-entry count is exactly 1.
157+
*/
158+
if (rc < 0) {
159+
errno = -rc;
160+
FAIL("scan failed before count check");
161+
} else if (entries != 1) {
162+
fprintf(stderr, " observed %d visible entries\n", entries);
163+
FAIL("unexpected visible entry count");
164+
} else {
165+
PASS();
166+
}
167+
168+
SUMMARY("test-getdents64-overlong");
169+
return fails == 0 ? 0 : 1;
170+
}

0 commit comments

Comments
 (0)