I'm trying to convert an integer to an enum variant. I couldn't find a builtin way to do it, so I tried implementing different TryFrom strategies manually, which I expected to generate the same assembly code, and was surprised to see how different the results were. I wonder if these are optimization issues?
Basic example with good results
These are 4 different attempts at implementing TryFrom<u8> for #[repr(u8)] Enum:
#[derive(Clone, Copy)]
#[repr(u8)]
enum Enum {
Variant0,
Variant1,
Variant4 = 4,
Variant5,
Variant7 = 7,
}
impl Enum {
fn const_copy(value: u8) -> Result<Self, u8> {
const VARIANT_0: u8 = Enum::Variant0 as u8;
const VARIANT_1: u8 = Enum::Variant1 as u8;
const VARIANT_4: u8 = Enum::Variant4 as u8;
const VARIANT_5: u8 = Enum::Variant5 as u8;
const VARIANT_7: u8 = Enum::Variant7 as u8;
match value {
VARIANT_0 => Ok(Self::Variant0),
VARIANT_1 => Ok(Self::Variant1),
VARIANT_4 => Ok(Self::Variant4),
VARIANT_5 => Ok(Self::Variant5),
VARIANT_7 => Ok(Self::Variant7),
_ => Err(value),
}
}
fn const_transmute(value: u8) -> Result<Self, u8> {
const VARIANT_0: u8 = Enum::Variant0 as u8;
const VARIANT_1: u8 = Enum::Variant1 as u8;
const VARIANT_4: u8 = Enum::Variant4 as u8;
const VARIANT_5: u8 = Enum::Variant5 as u8;
const VARIANT_7: u8 = Enum::Variant7 as u8;
match value {
VARIANT_0 |
VARIANT_1 |
VARIANT_4 |
VARIANT_5 |
VARIANT_7 => Ok(unsafe { ::core::mem::transmute(value) }),
_ => Err(value),
}
}
fn guard_copy(value: u8) -> Result<Self, u8> {
match value {
v if v == Self::Variant0 as u8 => Ok(Self::Variant0),
v if v == Self::Variant1 as u8 => Ok(Self::Variant1),
v if v == Self::Variant4 as u8 => Ok(Self::Variant4),
v if v == Self::Variant5 as u8 => Ok(Self::Variant5),
v if v == Self::Variant7 as u8 => Ok(Self::Variant7),
_ => Err(value),
}
}
fn guard_transmute(value: u8) -> Result<Self, u8> {
match value {
v if
v == Self::Variant0 as u8 ||
v == Self::Variant1 as u8 ||
v == Self::Variant4 as u8 ||
v == Self::Variant5 as u8 ||
v == Self::Variant7 as u8 => Ok(unsafe { ::core::mem::transmute(v) }),
_ => Err(value),
}
}
}
The following assembly code is generated on godbolt.org by rustc 1.94.0 x86_64 with -C opt-level=3:
example::Enum::const_copy:
mov ecx, edi
cmp cl, 8
setae dl
mov al, 76
shr al, cl
or al, dl
mov edx, ecx
ret
example::Enum::guard_copy = example::Enum::const_copy
example::Enum::const_transmute = example::Enum::const_copy
example::Enum::guard_transmute = example::Enum::const_copy
const_copy looks pretty optimized, and the other 3 functions are deduplicated. So far, so good.
Easier test creation
The following macro simplifies test generation:
Rust code
macro_rules! int_to_enum {
(impl TryFrom<$from:ident> for #[repr($repr:ident)] $name:ident { $($variant:ident $( = $discriminant:expr )?,)+ }) => {
#[derive(Clone, Copy)]
#[repr($repr)]
pub enum $name {
$($variant $( = $discriminant)?,)+
}
impl $name {
// const_copy:
// const X = X;
// const Y = Y;
// ..
// match value {
// X => X,
// Y => Y,
// ..
// }
int_to_enum!(#impl_consts: const_copy, $name, $from, { $($variant,)+ }, {
$($variant => Ok(Self::$variant)),+
});
// const_transmute:
// const X = X;
// const Y = Y;
// ..
// match value {
// X | Y | .. => transmute(value as repr)
// }
int_to_enum!(#impl_consts: const_transmute, $name, $from, { $($variant,)+ }, {
$(value @ $variant)|+ => Ok(unsafe { ::core::mem::transmute(value as $repr) })
});
// guard_copy:
// match value {
// v if v == X => X,
// v if v == Y => Y,
// ..
// }
int_to_enum!(#impl_fn: guard_copy, $from, {}, {
$(v if v == Self::$variant as $from => Ok(Self::$variant)),+
});
// guard_transmute:
// match value {
// v if v == X || v == Y => transmute(v as repr)
// }
int_to_enum!(#impl_fn: guard_transmute, $from, {}, {
v if $(v == Self::$variant as $from)||+ => Ok(unsafe { ::core::mem::transmute(v as $repr) })
});
}
};
(#impl_consts: $fn_name:ident, $name:ident, $from:ident, { $($variant:ident,)+ }, { $($match_body:tt)+ }) => {
int_to_enum!(#impl_fn: $fn_name, $from, {
$(const $variant: $from = $name::$variant as $from;)+
}, { $($match_body)+ });
};
(#impl_fn: $fn_name:ident, $from:ident, { $($fn_body:tt)* }, { $($match_body:tt)+ }) => {
#[allow(non_upper_case_globals)]
#[inline(never)]
pub fn $fn_name(value: $from) -> Result<Self, $from> {
$($fn_body)*
match value {
$($match_body)+,
_ => Err(value),
}
}
};
}
int_to_enum!(impl TryFrom<u8> for #[repr(u8)] Enum {
Variant0,
Variant1,
Variant4 = 4,
Variant5,
Variant7 = 7,
});
Less ideal example 1
Generating a TryFrom<u16> instead results in the following assembly code on godbolt.org:
example::Enum::const_copy:
mov eax, edi
cmp ax, 7
ja .LBB0_1
movzx ecx, ax
shl ecx, 2
lea rdx, [rip + .Lswitch.table.example::Enum::guard_transmute]
mov edx, dword ptr [rcx + rdx]
lea rsi, [rip + .Lswitch.table.example::Enum::guard_transmute.4]
mov ecx, dword ptr [rcx + rsi]
shl eax, 16
or eax, edx
or eax, ecx
ret
.LBB0_1:
mov ecx, 1
xor edx, edx
shl eax, 16
or eax, edx
or eax, ecx
ret
example::Enum::guard_copy:
mov eax, edi
cmp ax, 7
ja .LBB1_1
movzx ecx, ax
shl ecx, 2
lea rdx, [rip + .Lswitch.table.example::Enum::guard_transmute]
mov edx, dword ptr [rcx + rdx]
lea rsi, [rip + .Lswitch.table.example::Enum::guard_transmute.4]
mov ecx, dword ptr [rcx + rsi]
shl eax, 16
or eax, edx
or eax, ecx
ret
.LBB1_1:
mov ecx, 1
xor edx, edx
shl eax, 16
or eax, edx
or eax, ecx
ret
example::Enum::const_transmute:
mov eax, edi
cmp ax, 7
ja .LBB2_1
movzx ecx, ax
shl ecx, 2
lea rdx, [rip + .Lswitch.table.example::Enum::guard_transmute]
mov edx, dword ptr [rcx + rdx]
lea rsi, [rip + .Lswitch.table.example::Enum::guard_transmute.4]
mov ecx, dword ptr [rcx + rsi]
shl eax, 16
or eax, edx
or eax, ecx
ret
.LBB2_1:
mov ecx, 1
xor edx, edx
shl eax, 16
or eax, edx
or eax, ecx
ret
.Lswitch.table.example::Enum::guard_transmute:
.long 0
.long 256
.long 0
.long 0
.long 1024
.long 1280
.long 0
.long 1792
.Lswitch.table.example::Enum::guard_transmute.4:
.long 0
.long 0
.long 1
.long 1
.long 0
.long 0
.long 1
.long 0
example::Enum::guard_transmute = example::Enum::const_transmute
3 seemingly identical implementations of a lookup table, only guard_transmute got deduplicated.
Less ideal example 2
Generating a TryFrom<u64> instead results in the following assembly code on godbolt.org:
example::Enum::const_copy:
mov rax, rdi
cmp rsi, 7
ja .LBB0_2
lea rcx, [rip + .LJTI0_0]
movsxd rdx, dword ptr [rcx + 4*rsi]
add rdx, rcx
jmp rdx
.LBB0_3:
mov byte ptr [rax + 1], 0
xor ecx, ecx
mov byte ptr [rax], cl
ret
.LBB0_2:
mov qword ptr [rax + 8], rsi
mov cl, 1
mov byte ptr [rax], cl
ret
.LBB0_5:
mov byte ptr [rax + 1], 4
xor ecx, ecx
mov byte ptr [rax], cl
ret
.LBB0_4:
mov byte ptr [rax + 1], 1
xor ecx, ecx
mov byte ptr [rax], cl
ret
.LBB0_6:
mov byte ptr [rax + 1], 5
xor ecx, ecx
mov byte ptr [rax], cl
ret
.LBB0_7:
mov byte ptr [rax + 1], 7
xor ecx, ecx
mov byte ptr [rax], cl
ret
.LJTI0_0:
.long .LBB0_3-.LJTI0_0
.long .LBB0_4-.LJTI0_0
.long .LBB0_2-.LJTI0_0
.long .LBB0_2-.LJTI0_0
.long .LBB0_5-.LJTI0_0
.long .LBB0_6-.LJTI0_0
.long .LBB0_2-.LJTI0_0
.long .LBB0_7-.LJTI0_0
example::Enum::const_transmute:
mov rax, rdi
cmp rsi, 7
ja .LBB1_2
mov ecx, 179
bt rcx, rsi
jae .LBB1_2
mov byte ptr [rax + 1], sil
xor ecx, ecx
mov byte ptr [rax], cl
ret
.LBB1_2:
mov qword ptr [rax + 8], rsi
mov cl, 1
mov byte ptr [rax], cl
ret
example::Enum::guard_transmute = example::Enum::const_transmute
example::Enum::guard_copy = example::Enum::const_copy
const_copy is implemented as a jump table, while const_transmute looks more similar to the optimal TryFrom<u8> implementation. The other two got deduplicated.
Complete data set
I generated a table of results for easier comparison:
| from |
repr |
const_copy |
const_transmute |
guard_copy |
guard_transmute |
| u8 |
u8 |
optimal |
=const_copy |
=const_copy |
=const_copy |
| u16 |
u8 |
lookup_tbl |
lookup_tbl |
lookup_tbl |
=const_transmute |
| u32 |
u8 |
lookup_tbl |
lookup_tbl |
lookup_tbl |
=const_transmute |
| u64 |
u8 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
| u128 |
u8 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
|
|
|
|
|
|
| u8 |
u16 |
lookup_tbl |
lookup_tbl |
lookup_tbl |
=const_transmute |
| u16 |
u16 |
lookup_tbl |
=const_copy |
lookup_tbl |
=const_copy |
| u32 |
u16 |
lookup_tbl |
lookup_tbl |
lookup_tbl |
=const_transmute |
| u64 |
u16 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
| u128 |
u16 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
|
|
|
|
|
|
| u8 |
u32 |
lookup_tbl |
lookup_tbl |
lookup_tbl |
=const_transmute |
| u16 |
u32 |
lookup_tbl |
lookup_tbl |
lookup_tbl |
=const_transmute |
| u32 |
u32 |
lookup_tbl |
=const_copy |
lookup_tbl |
=const_copy |
| u64 |
u32 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
| u128 |
u32 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
|
|
|
|
|
|
| u8 |
u64 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
| u16 |
u64 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
| u32 |
u64 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
| u64 |
u64 |
lookup_tbl |
=const_copy |
lookup_tbl |
=const_copy |
| u128 |
u64 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
|
|
|
|
|
|
| u8 |
u128 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
| u16 |
u128 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
| u32 |
u128 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
| u64 |
u128 |
jump_tbl |
optimal |
=const_copy |
=const_transmute |
| u128 |
u128 |
jump_tbl |
optimal |
=const_copy |
optimal |
Ideally all const_copy entries in the table should generate the optimal assembly code similar to the TryFrom<u8> for #[repr(u8)] Enum code, and all others should be deduplicated (=const_copy).
I'm trying to convert an integer to an enum variant. I couldn't find a builtin way to do it, so I tried implementing different
TryFromstrategies manually, which I expected to generate the same assembly code, and was surprised to see how different the results were. I wonder if these are optimization issues?Basic example with good results
These are 4 different attempts at implementing
TryFrom<u8> for #[repr(u8)] Enum:The following assembly code is generated on godbolt.org by rustc 1.94.0 x86_64 with
-C opt-level=3:const_copylooks pretty optimized, and the other 3 functions are deduplicated. So far, so good.Easier test creation
The following macro simplifies test generation:
Rust code
Less ideal example 1
Generating a
TryFrom<u16>instead results in the following assembly code on godbolt.org:3 seemingly identical implementations of a lookup table, only
guard_transmutegot deduplicated.Less ideal example 2
Generating a
TryFrom<u64>instead results in the following assembly code on godbolt.org:const_copyis implemented as a jump table, whileconst_transmutelooks more similar to the optimalTryFrom<u8>implementation. The other two got deduplicated.Complete data set
I generated a table of results for easier comparison:
Ideally all
const_copyentries in the table should generate theoptimalassembly code similar to theTryFrom<u8> for #[repr(u8)] Enumcode, and all others should be deduplicated (=const_copy).