Skip to content

Commit 362dac5

Browse files
committed
Disable inlining of custom io::Error destructor
1 parent 5165714 commit 362dac5

1 file changed

Lines changed: 44 additions & 10 deletions

File tree

library/core/src/io/error/repr_bitpacked.rs

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -227,22 +227,56 @@ impl Repr {
227227
}
228228

229229
impl Drop for Repr {
230+
// What this will end up inlining into the destructor call site is merely a
231+
// bitwise AND of the pointer with the constant TAG_MASK, a comparison to
232+
// the constant TAG_CUSTOM, and a call to drop_custom that is jumped over
233+
// if the tag isn't TAG_CUSTOM.
230234
#[inline]
231235
fn drop(&mut self) {
232-
// SAFETY: We're a Repr, decode_repr is fine. The `Box::from_raw` is
233-
// safe because we're being dropped.
234-
unsafe {
235-
let _ = decode_repr(self.0, |p| {
236-
CustomOwner::from_raw(core::ptr::NonNull::new_unchecked(p))
237-
});
236+
// Inlining the destructor of a custom error does not confer any
237+
// optimization opportunities, as it is type-erased. The cost,
238+
// meanwhile, is a code size increase by a factor of up to 5.4 in the
239+
// case of dropping multiple io::Results in the same function
240+
// (https://godbolt.org/z/8hfGchjsT).
241+
//
242+
// FIXME the 32-bit variant still has the code bloat that is being
243+
// avoided here, see the discussion in
244+
// https://github.com/rust-lang/rust/pull/149146
245+
#[inline(never)]
246+
// The destructor of a custom error value is normally only called
247+
// after it has been displayed or logged, or if it has been downcasted
248+
// and handled appropriately. Displaying and logging errors is
249+
// considered a cold path in most programs, and downcasting is
250+
// similarly not something that is considered performance-critical.
251+
// Thus, in the interest of happy path performance, have block
252+
// placement get the calls to this function out of the way.
253+
#[cold]
254+
unsafe fn drop_custom(ptr: NonNull<()>) {
255+
unsafe {
256+
// SAFETY: we're being dropped
257+
let _ = CustomOwner::from_raw(ptr.byte_sub(TAG_CUSTOM).cast::<Custom>());
258+
}
259+
}
260+
// The sum of the below code and the body of drop_custom is a
261+
// transclusion from decode_repr. This used to be a call to
262+
// decode_repr, but we need the amount of IR produced here to be the
263+
// smallest it can possibly be, as this destructor is ubiquitously
264+
// inlined into every io::Error destruction site. It has been confirmed
265+
// via visual inspection of the disassembly that the codegen for that
266+
// was absolutely atrocious. As such, the duplication of logic is
267+
// justified.
268+
if self.0.addr().get() & TAG_MASK == TAG_CUSTOM {
269+
// SAFETY: as per decode_repr
270+
unsafe { drop_custom(self.0) };
238271
}
239272
}
240273
}
241274

242-
// Shared helper to decode a `Repr`'s internal pointer into an ErrorData.
243-
//
244-
// Safety: `ptr`'s bits should be encoded as described in the document at the
245-
// top (it should `some_repr.0`)
275+
/// Shared helper to decode a [`Repr`]'s internal pointer into an [`ErrorData`].
276+
///
277+
/// # Safety
278+
/// `ptr`'s bits should be encoded as described in the document at the
279+
/// top (it should be `some_repr.0`).
246280
#[inline]
247281
unsafe fn decode_repr<C, F>(ptr: NonNull<()>, make_custom: F) -> ErrorData<C>
248282
where

0 commit comments

Comments
 (0)