Skip to content

Commit 2be64ed

Browse files
tanyifenggvisor-bot
authored andcommitted
poll: Write back only revents to userspace, matching Linux behavior
doPoll() was modifying pfd.Events (adding POLLHUP|POLLERR) and writing back the entire pollfd struct. Linux only writes back revents via unsafe_put_user() in do_sys_poll(). The polluted events field broke libevent's poll_del(), causing a busy-loop in tmux (CPU 96.6%). Fix: write back only the 2-byte revents field per fd in doPoll(). FUTURE_COPYBARA_INTEGRATE_REVIEW=#12851 from tanyifeng:poll-revents-writeback 3169ea6 PiperOrigin-RevId: 896548818
1 parent 4343914 commit 2be64ed

2 files changed

Lines changed: 75 additions & 8 deletions

File tree

pkg/sentry/syscalls/linux/sys_poll.go

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ package linux
1717
import (
1818
"fmt"
1919
"time"
20+
"unsafe"
2021

2122
"gvisor.dev/gvisor/pkg/abi/linux"
2223
"gvisor.dev/gvisor/pkg/errors/linuxerr"
2324
"gvisor.dev/gvisor/pkg/hostarch"
25+
"gvisor.dev/gvisor/pkg/marshal/primitive"
2426
"gvisor.dev/gvisor/pkg/sentry/arch"
2527
"gvisor.dev/gvisor/pkg/sentry/kernel"
2628
"gvisor.dev/gvisor/pkg/sentry/ktime"
@@ -34,6 +36,15 @@ import (
3436
// unrecoverable.
3537
const fileCap = 1024 * 1024
3638

39+
var (
40+
// sizeofPollFD is the size of linux.PollFD struct in bytes.
41+
sizeofPollFD = (*linux.PollFD)(nil).SizeBytes()
42+
43+
// reventsOffsetInPollFD is the byte offset of the REvents field within
44+
// linux.PollFD.
45+
reventsOffsetInPollFD = int(unsafe.Offsetof(linux.PollFD{}.REvents))
46+
)
47+
3748
// Masks for "readable", "writable", and "exceptional" events as defined by
3849
// select(2).
3950
const (
@@ -184,21 +195,28 @@ func doPoll(t *kernel.Task, addr hostarch.Addr, nfds uint, timeout time.Duration
184195
return timeout, 0, err
185196
}
186197

187-
// Compatibility warning: Linux adds POLLHUP and POLLERR just before
188-
// polling, in fs/select.c:do_pollfd(). Since pfd is copied out after
189-
// polling, changing event masks here is an application-visible difference.
190-
// (Linux also doesn't copy out event masks at all, only revents.)
198+
// Linux adds POLLHUP and POLLERR just before polling, in
199+
// fs/select.c:do_pollfd(). We can modify pfd[i].Events because
200+
// it is not copied out after polling (consistent with Linux).
191201
for i := range pfd {
192202
pfd[i].Events |= linux.POLLHUP | linux.POLLERR
193203
}
194204
remainingTimeout, n, err := pollBlock(t, pfd, timeout)
195205
err = linuxerr.ConvertIntr(err, linuxerr.EINTR)
196206

197-
// The poll entries are copied out regardless of whether
198-
// any are set or not. This aligns with the Linux behavior.
207+
// Copy out only the revents field, matching Linux behavior.
208+
// Linux's do_sys_poll() only writes back revents via
209+
// unsafe_put_user(fds->revents, &ufds->revents), never the
210+
// events field. Writing back the full struct would corrupt
211+
// the caller's events mask (e.g. libevent's poll backend),
212+
// causing busy-loops when event_del() fails to fully remove
213+
// an fd from the pollfd array due to stale POLLHUP/POLLERR bits.
199214
if nfds > 0 && err == nil {
200-
if _, err := linux.CopyPollFDSliceOut(t, addr, pfd); err != nil {
201-
return remainingTimeout, 0, err
215+
for i := range pfd {
216+
off := hostarch.Addr(i*sizeofPollFD + reventsOffsetInPollFD)
217+
if _, copyErr := primitive.CopyInt16Out(t, addr+off, pfd[i].REvents); copyErr != nil {
218+
return remainingTimeout, 0, copyErr
219+
}
202220
}
203221
}
204222

test/syscalls/linux/poll.cc

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,55 @@ TEST_F(PollTest, UnpollableFile) {
364364
EXPECT_EQ(poll_fd.revents, POLLIN | POLLOUT);
365365
}
366366

367+
// Test that poll(2) does not write back the events field to userspace.
368+
// Linux's do_sys_poll() only writes back revents via unsafe_put_user(),
369+
// never the events field. This verifies that POLLHUP/POLLERR are not
370+
// injected into the events field.
371+
TEST_F(PollTest, EventsFieldNotModified) {
372+
// Create a pipe.
373+
int fds[2];
374+
ASSERT_THAT(pipe(fds), SyscallSucceeds());
375+
376+
FileDescriptor fd0(fds[0]);
377+
FileDescriptor fd1(fds[1]);
378+
379+
// Close the writer fd so the reader gets POLLHUP.
380+
fd1.reset();
381+
382+
// Poll with only POLLIN in events.
383+
struct pollfd poll_fd = {fd0.get(), POLLIN, 0};
384+
EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1));
385+
386+
// revents should contain POLLHUP (since writer is closed).
387+
EXPECT_NE(poll_fd.revents & POLLHUP, 0);
388+
389+
// The events field must remain unchanged (only POLLIN, no POLLHUP/POLLERR).
390+
EXPECT_EQ(poll_fd.events, POLLIN);
391+
}
392+
393+
// Test that poll(2) does not write back the events field when POLLERR occurs.
394+
TEST_F(PollTest, EventsFieldNotModifiedOnError) {
395+
// Create a pipe.
396+
int fds[2];
397+
ASSERT_THAT(pipe(fds), SyscallSucceeds());
398+
399+
FileDescriptor fd0(fds[0]);
400+
FileDescriptor fd1(fds[1]);
401+
402+
// Close the reader fd so the writer gets POLLERR.
403+
fd0.reset();
404+
405+
// Poll with only POLLOUT in events.
406+
struct pollfd poll_fd = {fd1.get(), POLLOUT, 0};
407+
EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1));
408+
409+
// revents should contain POLLERR (since reader is closed).
410+
EXPECT_NE(poll_fd.revents & POLLERR, 0);
411+
412+
// The events field must remain unchanged (only POLLOUT, no POLLERR/POLLHUP).
413+
EXPECT_EQ(poll_fd.events, POLLOUT);
414+
}
415+
367416
} // namespace
368417
} // namespace testing
369418
} // namespace gvisor

0 commit comments

Comments
 (0)