Skip to content

Commit 1cd9371

Browse files
committed
fix(io_uring): handle more robustly EINTR in io_uring_enter()
io_uring_enter() might return with a EINTR when called with IORING_ENTER_GETEVENTS. Make the submit() call a bit more robust by retrying when we observe this error. Retry 3 times. This is a semi-arbitrary choice. The assumption is that if an interrupt arrives subsequent call to the system call should most likely succeed. If we keep receiving interrupts something is more severely broken, so propagate to caller. Signed-off-by: Babis Chalios <babis.chalios@e2b.dev>
1 parent bd85e43 commit 1cd9371

1 file changed

Lines changed: 33 additions & 20 deletions

File tree

src/vmm/src/io_uring/queue/submission.rs

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use std::fmt::Debug;
5-
use std::io::Error as IOError;
5+
use std::io::{Error as IOError, ErrorKind};
66
use std::mem;
77
use std::num::Wrapping;
88
use std::os::unix::io::RawFd;
@@ -130,26 +130,39 @@ impl SubmissionQueue {
130130
if min_complete > 0 {
131131
flags |= generated::IORING_ENTER_GETEVENTS;
132132
}
133-
// SAFETY: Safe because values are valid and we check the return value.
134-
let submitted = SyscallReturnCode(unsafe {
135-
libc::syscall(
136-
libc::SYS_io_uring_enter,
137-
self.io_uring_fd,
138-
self.to_submit,
139-
min_complete,
140-
flags,
141-
std::ptr::null::<libc::sigset_t>(),
142-
)
143-
})
144-
.into_result()?;
145-
// It's safe to convert to u32 since the syscall didn't return an error.
146-
let submitted = u32::try_from(submitted).unwrap();
147-
148-
// This is safe since submitted <= self.to_submit. However we use a saturating_sub
149-
// for extra safety.
150-
self.to_submit = self.to_submit.saturating_sub(submitted);
151133

152-
Ok(submitted)
134+
// The number of retries is completely arbitrary here. I assume that this
135+
// will happen rarely and that if it happens subsequent retry will immediately
136+
// succeed. If we fall in a storm of interrupts something else is probably wrong
137+
// so let the consumer know.
138+
let mut eintr_retries = 3;
139+
loop {
140+
// SAFETY: Safe because values are valid and we check the return value.
141+
let ret = SyscallReturnCode(unsafe {
142+
libc::syscall(
143+
libc::SYS_io_uring_enter,
144+
self.io_uring_fd,
145+
self.to_submit,
146+
min_complete,
147+
flags,
148+
std::ptr::null::<libc::sigset_t>(),
149+
)
150+
})
151+
.into_result();
152+
match ret {
153+
Ok(num) => {
154+
// It's safe to convert to u32 since the syscall didn't return an error.
155+
let submitted = u32::try_from(num).unwrap();
156+
self.to_submit = self.to_submit.saturating_sub(submitted);
157+
return Ok(submitted);
158+
}
159+
Err(err) if err.kind() == ErrorKind::Interrupted && eintr_retries > 0 => {
160+
eintr_retries -= 1;
161+
continue;
162+
}
163+
Err(err) => return Err(SQueueError::from(err)),
164+
}
165+
}
153166
}
154167

155168
fn mmap(

0 commit comments

Comments
 (0)