Skip to content

Potential optimization issues for match on integers #154851

@MauriceKayser

Description

@MauriceKayser

I'm trying to convert an integer to an enum variant. I couldn't find a builtin way to do it, so I tried implementing different TryFrom strategies manually, which I expected to generate the same assembly code, and was surprised to see how different the results were. I wonder if these are optimization issues?

Basic example with good results

These are 4 different attempts at implementing TryFrom<u8> for #[repr(u8)] Enum:

#[derive(Clone, Copy)]
#[repr(u8)]
enum Enum {
    Variant0,
    Variant1,
    Variant4 = 4,
    Variant5,
    Variant7 = 7,
}

impl Enum {
    fn const_copy(value: u8) -> Result<Self, u8> {
        const VARIANT_0: u8 = Enum::Variant0 as u8;
        const VARIANT_1: u8 = Enum::Variant1 as u8;
        const VARIANT_4: u8 = Enum::Variant4 as u8;
        const VARIANT_5: u8 = Enum::Variant5 as u8;
        const VARIANT_7: u8 = Enum::Variant7 as u8;
    
        match value {
            VARIANT_0 => Ok(Self::Variant0),
            VARIANT_1 => Ok(Self::Variant1),
            VARIANT_4 => Ok(Self::Variant4),
            VARIANT_5 => Ok(Self::Variant5),
            VARIANT_7 => Ok(Self::Variant7),
            _ => Err(value),
        }
    }

    fn const_transmute(value: u8) -> Result<Self, u8> {
        const VARIANT_0: u8 = Enum::Variant0 as u8;
        const VARIANT_1: u8 = Enum::Variant1 as u8;
        const VARIANT_4: u8 = Enum::Variant4 as u8;
        const VARIANT_5: u8 = Enum::Variant5 as u8;
        const VARIANT_7: u8 = Enum::Variant7 as u8;
    
        match value {
            VARIANT_0 |
            VARIANT_1 |
            VARIANT_4 |
            VARIANT_5 |
            VARIANT_7 => Ok(unsafe { ::core::mem::transmute(value) }),
            _ => Err(value),
        }
    }

    fn guard_copy(value: u8) -> Result<Self, u8> {
        match value {
            v if v == Self::Variant0 as u8 => Ok(Self::Variant0),
            v if v == Self::Variant1 as u8 => Ok(Self::Variant1),
            v if v == Self::Variant4 as u8 => Ok(Self::Variant4),
            v if v == Self::Variant5 as u8 => Ok(Self::Variant5),
            v if v == Self::Variant7 as u8 => Ok(Self::Variant7),
            _ => Err(value),
        }
    }

    fn guard_transmute(value: u8) -> Result<Self, u8> {
        match value {
            v if
                v == Self::Variant0 as u8 ||
                v == Self::Variant1 as u8 ||
                v == Self::Variant4 as u8 ||
                v == Self::Variant5 as u8 ||
                v == Self::Variant7 as u8 => Ok(unsafe { ::core::mem::transmute(v) }),
            _ => Err(value),
        }
    }
}

The following assembly code is generated on godbolt.org by rustc 1.94.0 x86_64 with -C opt-level=3:

example::Enum::const_copy:
    mov     ecx, edi
    cmp     cl, 8
    setae   dl
    mov     al, 76
    shr     al, cl
    or      al, dl
    mov     edx, ecx
    ret

example::Enum::guard_copy = example::Enum::const_copy
example::Enum::const_transmute = example::Enum::const_copy
example::Enum::guard_transmute = example::Enum::const_copy

const_copy looks pretty optimized, and the other 3 functions are deduplicated. So far, so good.

Easier test creation

The following macro simplifies test generation:

Rust code
macro_rules! int_to_enum {
    (impl TryFrom<$from:ident> for #[repr($repr:ident)] $name:ident { $($variant:ident $( = $discriminant:expr )?,)+ }) => {
        #[derive(Clone, Copy)]
        #[repr($repr)]
        pub enum $name {
            $($variant $( = $discriminant)?,)+
        }

        impl $name {
          // const_copy:
          //  const X = X;
          //  const Y = Y;
          //  ..
          //  match value {
          //      X => X,
          //      Y => Y,
          //      ..
          //  }
          int_to_enum!(#impl_consts: const_copy, $name, $from, { $($variant,)+ }, {
              $($variant => Ok(Self::$variant)),+
          });
          // const_transmute:
          //  const X = X;
          //  const Y = Y;
          //  ..
          //  match value {
          //      X | Y | .. => transmute(value as repr)
          //  }
          int_to_enum!(#impl_consts: const_transmute, $name, $from, { $($variant,)+ }, {
              $(value @ $variant)|+ => Ok(unsafe { ::core::mem::transmute(value as $repr) })
          });
          // guard_copy:
          //  match value {
          //      v if v == X => X,
          //      v if v == Y => Y,
          //      ..
          //  }
          int_to_enum!(#impl_fn: guard_copy, $from, {}, {
              $(v if v == Self::$variant as $from => Ok(Self::$variant)),+
          });
          // guard_transmute:
          //  match value {
          //      v if v == X || v == Y => transmute(v as repr)
          //  }
          int_to_enum!(#impl_fn: guard_transmute, $from, {}, {
              v if $(v == Self::$variant as $from)||+ => Ok(unsafe { ::core::mem::transmute(v as $repr) })
          });
        }
    };
    (#impl_consts: $fn_name:ident, $name:ident, $from:ident, { $($variant:ident,)+ }, { $($match_body:tt)+ }) => {
        int_to_enum!(#impl_fn: $fn_name, $from, {
            $(const $variant: $from = $name::$variant as $from;)+
        }, { $($match_body)+ });
    };
    (#impl_fn: $fn_name:ident, $from:ident, { $($fn_body:tt)* }, { $($match_body:tt)+ }) => {
        #[allow(non_upper_case_globals)]
        #[inline(never)]
        pub fn $fn_name(value: $from) -> Result<Self, $from> {
            $($fn_body)*

            match value {
                $($match_body)+,
                _ => Err(value),
            }
        }
    };
}

int_to_enum!(impl TryFrom<u8> for #[repr(u8)] Enum {
    Variant0,
    Variant1,
    Variant4 = 4,
    Variant5,
    Variant7 = 7,
});

Less ideal example 1

Generating a TryFrom<u16> instead results in the following assembly code on godbolt.org:

example::Enum::const_copy:
    mov     eax, edi
    cmp     ax, 7
    ja      .LBB0_1
    movzx   ecx, ax
    shl     ecx, 2
    lea     rdx, [rip + .Lswitch.table.example::Enum::guard_transmute]
    mov     edx, dword ptr [rcx + rdx]
    lea     rsi, [rip + .Lswitch.table.example::Enum::guard_transmute.4]
    mov     ecx, dword ptr [rcx + rsi]
    shl     eax, 16
    or      eax, edx
    or      eax, ecx
    ret
.LBB0_1:
    mov     ecx, 1
    xor     edx, edx
    shl     eax, 16
    or      eax, edx
    or      eax, ecx
    ret

example::Enum::guard_copy:
    mov     eax, edi
    cmp     ax, 7
    ja      .LBB1_1
    movzx   ecx, ax
    shl     ecx, 2
    lea     rdx, [rip + .Lswitch.table.example::Enum::guard_transmute]
    mov     edx, dword ptr [rcx + rdx]
    lea     rsi, [rip + .Lswitch.table.example::Enum::guard_transmute.4]
    mov     ecx, dword ptr [rcx + rsi]
    shl     eax, 16
    or      eax, edx
    or      eax, ecx
    ret
.LBB1_1:
    mov     ecx, 1
    xor     edx, edx
    shl     eax, 16
    or      eax, edx
    or      eax, ecx
    ret

example::Enum::const_transmute:
    mov     eax, edi
    cmp     ax, 7
    ja      .LBB2_1
    movzx   ecx, ax
    shl     ecx, 2
    lea     rdx, [rip + .Lswitch.table.example::Enum::guard_transmute]
    mov     edx, dword ptr [rcx + rdx]
    lea     rsi, [rip + .Lswitch.table.example::Enum::guard_transmute.4]
    mov     ecx, dword ptr [rcx + rsi]
    shl     eax, 16
    or      eax, edx
    or      eax, ecx
    ret
.LBB2_1:
    mov     ecx, 1
    xor     edx, edx
    shl     eax, 16
    or      eax, edx
    or      eax, ecx
    ret

.Lswitch.table.example::Enum::guard_transmute:
    .long   0
    .long   256
    .long   0
    .long   0
    .long   1024
    .long   1280
    .long   0
    .long   1792

.Lswitch.table.example::Enum::guard_transmute.4:
    .long   0
    .long   0
    .long   1
    .long   1
    .long   0
    .long   0
    .long   1
    .long   0

example::Enum::guard_transmute = example::Enum::const_transmute

3 seemingly identical implementations of a lookup table, only guard_transmute got deduplicated.

Less ideal example 2

Generating a TryFrom<u64> instead results in the following assembly code on godbolt.org:

example::Enum::const_copy:
    mov     rax, rdi
    cmp     rsi, 7
    ja      .LBB0_2
    lea     rcx, [rip + .LJTI0_0]
    movsxd  rdx, dword ptr [rcx + 4*rsi]
    add     rdx, rcx
    jmp     rdx
.LBB0_3:
    mov     byte ptr [rax + 1], 0
    xor     ecx, ecx
    mov     byte ptr [rax], cl
    ret
.LBB0_2:
    mov     qword ptr [rax + 8], rsi
    mov     cl, 1
    mov     byte ptr [rax], cl
    ret
.LBB0_5:
    mov     byte ptr [rax + 1], 4
    xor     ecx, ecx
    mov     byte ptr [rax], cl
    ret
.LBB0_4:
    mov     byte ptr [rax + 1], 1
    xor     ecx, ecx
    mov     byte ptr [rax], cl
    ret
.LBB0_6:
    mov     byte ptr [rax + 1], 5
    xor     ecx, ecx
    mov     byte ptr [rax], cl
    ret
.LBB0_7:
    mov     byte ptr [rax + 1], 7
    xor     ecx, ecx
    mov     byte ptr [rax], cl
    ret
.LJTI0_0:
    .long   .LBB0_3-.LJTI0_0
    .long   .LBB0_4-.LJTI0_0
    .long   .LBB0_2-.LJTI0_0
    .long   .LBB0_2-.LJTI0_0
    .long   .LBB0_5-.LJTI0_0
    .long   .LBB0_6-.LJTI0_0
    .long   .LBB0_2-.LJTI0_0
    .long   .LBB0_7-.LJTI0_0

example::Enum::const_transmute:
    mov     rax, rdi
    cmp     rsi, 7
    ja      .LBB1_2
    mov     ecx, 179
    bt      rcx, rsi
    jae     .LBB1_2
    mov     byte ptr [rax + 1], sil
    xor     ecx, ecx
    mov     byte ptr [rax], cl
    ret
.LBB1_2:
    mov     qword ptr [rax + 8], rsi
    mov     cl, 1
    mov     byte ptr [rax], cl
    ret

example::Enum::guard_transmute = example::Enum::const_transmute
example::Enum::guard_copy = example::Enum::const_copy

const_copy is implemented as a jump table, while const_transmute looks more similar to the optimal TryFrom<u8> implementation. The other two got deduplicated.

Complete data set

I generated a table of results for easier comparison:

from repr const_copy const_transmute guard_copy guard_transmute
u8 u8 optimal =const_copy =const_copy =const_copy
u16 u8 lookup_tbl lookup_tbl lookup_tbl =const_transmute
u32 u8 lookup_tbl lookup_tbl lookup_tbl =const_transmute
u64 u8 jump_tbl optimal =const_copy =const_transmute
u128 u8 jump_tbl optimal =const_copy =const_transmute
u8 u16 lookup_tbl lookup_tbl lookup_tbl =const_transmute
u16 u16 lookup_tbl =const_copy lookup_tbl =const_copy
u32 u16 lookup_tbl lookup_tbl lookup_tbl =const_transmute
u64 u16 jump_tbl optimal =const_copy =const_transmute
u128 u16 jump_tbl optimal =const_copy =const_transmute
u8 u32 lookup_tbl lookup_tbl lookup_tbl =const_transmute
u16 u32 lookup_tbl lookup_tbl lookup_tbl =const_transmute
u32 u32 lookup_tbl =const_copy lookup_tbl =const_copy
u64 u32 jump_tbl optimal =const_copy =const_transmute
u128 u32 jump_tbl optimal =const_copy =const_transmute
u8 u64 jump_tbl optimal =const_copy =const_transmute
u16 u64 jump_tbl optimal =const_copy =const_transmute
u32 u64 jump_tbl optimal =const_copy =const_transmute
u64 u64 lookup_tbl =const_copy lookup_tbl =const_copy
u128 u64 jump_tbl optimal =const_copy =const_transmute
u8 u128 jump_tbl optimal =const_copy =const_transmute
u16 u128 jump_tbl optimal =const_copy =const_transmute
u32 u128 jump_tbl optimal =const_copy =const_transmute
u64 u128 jump_tbl optimal =const_copy =const_transmute
u128 u128 jump_tbl optimal =const_copy optimal

Ideally all const_copy entries in the table should generate the optimal assembly code similar to the TryFrom<u8> for #[repr(u8)] Enum code, and all others should be deduplicated (=const_copy).

Metadata

Metadata

Assignees

No one assigned

    Labels

    C-bugCategory: This is a bug.needs-triageThis issue may need triage. Remove it if it has been sufficiently triaged.

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions