@@ -227,22 +227,56 @@ impl Repr {
227227}
228228
229229impl Drop for Repr {
230+ // What this will end up inlining into the destructor call site is merely a
231+ // bitwise AND of the pointer with the constant TAG_MASK, a comparison to
232+ // the constant TAG_CUSTOM, and a call to drop_custom that is jumped over
233+ // if the tag isn't TAG_CUSTOM.
230234 #[ inline]
231235 fn drop ( & mut self ) {
232- // SAFETY: We're a Repr, decode_repr is fine. The `Box::from_raw` is
233- // safe because we're being dropped.
234- unsafe {
235- let _ = decode_repr ( self . 0 , |p| {
236- CustomOwner :: from_raw ( core:: ptr:: NonNull :: new_unchecked ( p) )
237- } ) ;
236+ // Inlining the destructor of a custom error does not confer any
237+ // optimization opportunities, as it is type-erased. The cost,
238+ // meanwhile, is a code size increase by a factor of up to 5.4 in the
239+ // case of dropping multiple io::Results in the same function
240+ // (https://godbolt.org/z/8hfGchjsT).
241+ //
242+ // FIXME the 32-bit variant still has the code bloat that is being
243+ // avoided here, see the discussion in
244+ // https://github.com/rust-lang/rust/pull/149146
245+ #[ inline( never) ]
246+ // The destructor of a custom error value is normally only called
247+ // after it has been displayed or logged, or if it has been downcasted
248+ // and handled appropriately. Displaying and logging errors is
249+ // considered a cold path in most programs, and downcasting is
250+ // similarly not something that is considered performance-critical.
251+ // Thus, in the interest of happy path performance, have block
252+ // placement get the calls to this function out of the way.
253+ #[ cold]
254+ unsafe fn drop_custom ( ptr : NonNull < ( ) > ) {
255+ unsafe {
256+ // SAFETY: we're being dropped
257+ let _ = CustomOwner :: from_raw ( ptr. byte_sub ( TAG_CUSTOM ) . cast :: < Custom > ( ) ) ;
258+ }
259+ }
260+ // The sum of the below code and the body of drop_custom is a
261+ // transclusion from decode_repr. This used to be a call to
262+ // decode_repr, but we need the amount of IR produced here to be the
263+ // smallest it can possibly be, as this destructor is ubiquitously
264+ // inlined into every io::Error destruction site. It has been confirmed
265+ // via visual inspection of the disassembly that the codegen for that
266+ // was absolutely atrocious. As such, the duplication of logic is
267+ // justified.
268+ if self . 0 . addr ( ) . get ( ) & TAG_MASK == TAG_CUSTOM {
269+ // SAFETY: as per decode_repr
270+ unsafe { drop_custom ( self . 0 ) } ;
238271 }
239272 }
240273}
241274
242- // Shared helper to decode a `Repr`'s internal pointer into an ErrorData.
243- //
244- // Safety: `ptr`'s bits should be encoded as described in the document at the
245- // top (it should `some_repr.0`)
275+ /// Shared helper to decode a [`Repr`]'s internal pointer into an [`ErrorData`].
276+ ///
277+ /// # Safety
278+ /// `ptr`'s bits should be encoded as described in the document at the
279+ /// top (it should be `some_repr.0`).
246280#[ inline]
247281unsafe fn decode_repr < C , F > ( ptr : NonNull < ( ) > , make_custom : F ) -> ErrorData < C >
248282where
0 commit comments