@@ -797,33 +797,12 @@ impl Assembler {
797797 * left = split_memory_read ( asm, * left, SCRATCH0_OPND ) ;
798798 * right = split_memory_read ( asm, * right, SCRATCH1_OPND ) ;
799799 let mem_out = split_memory_write ( out, SCRATCH0_OPND ) ;
800- let reg_out = out. clone ( ) ;
801-
802- let has_jo_mul = idx + 1 < linearized_insns. len ( ) && matches ! ( linearized_insns[ idx + 1 ] , Insn :: JoMul ( _) ) ;
803800
804801 asm. push_insn ( insn) ;
805802
806- // When JoMul follows, the emit pass needs Mul → RShift → JoMul
807- // to be contiguous so it can pair smulh+mul+asr+cmp. The spill
808- // Store must NOT be between Mul and RShift. Instead, we record
809- // the spill destination in the RShift and have the emit pass
810- // emit the store between mul and asr (before asr clobbers the
811- // mul output register).
812- if has_jo_mul {
813- // Emit RShift immediately after Mul (before any Store)
814- asm. push_insn ( Insn :: RShift { out : SCRATCH0_OPND , opnd : reg_out, shift : Opnd :: UImm ( 63 ) } ) ;
815- // Emit spill Store after RShift. The emit pass will
816- // skip it along with the RShift, and emit the spill
817- // at the right point (between mul and asr).
818- if let Some ( mem_out) = mem_out {
819- let mem_out = split_large_disp ( asm, mem_out, SCRATCH1_OPND ) ;
820- asm. store ( mem_out, reg_out) ;
821- }
822- } else {
823- if let Some ( mem_out) = mem_out {
824- let mem_out = split_large_disp ( asm, mem_out, SCRATCH1_OPND ) ;
825- asm. store ( mem_out, SCRATCH0_OPND ) ;
826- }
803+ if let Some ( mem_out) = mem_out {
804+ let mem_out = split_large_disp ( asm, mem_out, SCRATCH1_OPND ) ;
805+ asm. store ( mem_out, SCRATCH0_OPND ) ;
827806 }
828807 }
829808 Insn :: LShift { opnd, out, .. } |
@@ -928,6 +907,10 @@ impl Assembler {
928907 }
929908 }
930909 }
910+ Insn :: JoMul ( opnd, _) => {
911+ * opnd = split_memory_read ( asm, * opnd, SCRATCH0_OPND ) ;
912+ asm. push_insn ( insn) ;
913+ }
931914 & mut Insn :: PatchPoint { ref target, invariant, version } => {
932915 split_patch_point ( asm, target, invariant, version) ;
933916 }
@@ -1252,49 +1235,12 @@ impl Assembler {
12521235 }
12531236 } ,
12541237 Insn :: Mul { left, right, out } => {
1255- // Look for the RShift+JoMul overflow check sequence inserted
1256- // by arm64_scratch_split. When the Mul output is spilled,
1257- // scratch_split emits [Mul, RShift, Store, JoMul] with the
1258- // Store after the RShift. Without a spill, it's just
1259- // [Mul, RShift, JoMul].
1260- let rshift_insn = match ( insns. get ( insn_idx + 1 ) , insns. get ( insn_idx + 2 ) , insns. get ( insn_idx + 3 ) ) {
1261- ( Some ( & Insn :: RShift { out : out_sign, opnd : out_opnd, shift : out_shift } ) , Some ( & Insn :: Store { dest : spill_dest, src : spill_src } ) , Some ( Insn :: JoMul ( _) ) ) => {
1262- Some ( ( out_sign, out_opnd, out_shift, Some ( ( spill_dest, spill_src) ) ) )
1263- }
1264- ( Some ( & Insn :: RShift { out : out_sign, opnd : out_opnd, shift : out_shift } ) , Some ( Insn :: JoMul ( _) ) , _) => {
1265- Some ( ( out_sign, out_opnd, out_shift, None ) )
1266- }
1267- _ => None ,
1268- } ;
1269-
1270- if let Some ( ( out_sign, out_opnd, out_shift, spill) ) = rshift_insn {
1271- // Compute the high 64 bits into EMIT_OPND (X16)
1272- smulh ( cb, Self :: EMIT_OPND , left. into ( ) , right. into ( ) ) ;
1273-
1274- // Compute the low 64 bits into `out` (may clobber inputs,
1275- // so this must come after smulh)
1276- mul ( cb, out. into ( ) , left. into ( ) , right. into ( ) ) ;
1277-
1278- // If the mul result was spilled, emit the store now
1279- // BEFORE asr clobbers the output register with the sign
1280- // bit. The spill source is always a register (SCRATCH0),
1281- // not EMIT_OPND (X16), so the smulh result is preserved.
1282- if let Some ( ( spill_dest, spill_src) ) = spill {
1283- stur ( cb, spill_src. into ( ) , spill_dest. into ( ) ) ;
1284- insn_idx += 1 ; // will skip the Store insn
1285- }
1286-
1287- // Shift to extract the sign bit of the 64-bit mul result
1288- asr ( cb, out_sign. into ( ) , out_opnd. into ( ) , out_shift. into ( ) ) ;
1289- insn_idx += 1 ; // skip the RShift
1290-
1291- // If the high 64-bits are not all zeros or all ones,
1292- // matching the sign bit, then we have an overflow
1293- cmp ( cb, Self :: EMIT_OPND , out_sign. into ( ) ) ;
1294- // JoMul will emit_conditional_jump::<{Condition::NE}>
1295- } else {
1296- mul ( cb, out. into ( ) , left. into ( ) , right. into ( ) ) ;
1297- }
1238+ // Speculatively emit smulh into EMIT_OPND (X16) for a
1239+ // potential following JoMul. If no JoMul follows, X16 is
1240+ // simply overwritten later. Must come before mul since mul
1241+ // may clobber an input register.
1242+ smulh ( cb, Self :: EMIT_OPND , left. into ( ) , right. into ( ) ) ;
1243+ mul ( cb, out. into ( ) , left. into ( ) , right. into ( ) ) ;
12981244 } ,
12991245 Insn :: And { left, right, out } => {
13001246 and ( cb, out. into ( ) , left. into ( ) , right. into ( ) ) ;
@@ -1558,7 +1504,14 @@ impl Assembler {
15581504 Insn :: Je ( target) | Insn :: Jz ( target) => {
15591505 emit_conditional_jump :: < { Condition :: EQ } > ( self , cb, target. clone ( ) ) ;
15601506 } ,
1561- Insn :: Jne ( target) | Insn :: Jnz ( target) | Insn :: JoMul ( target) => {
1507+ Insn :: Jne ( target) | Insn :: Jnz ( target) => {
1508+ emit_conditional_jump :: < { Condition :: NE } > ( self , cb, target. clone ( ) ) ;
1509+ } ,
1510+ Insn :: JoMul ( val, target) => {
1511+ // Compare smulh result (in EMIT_OPND/X16 from preceding Mul)
1512+ // with the mul output sign-extended from bit 62. Uses the
1513+ // barrel shifter built into CMP for a single instruction.
1514+ cmp_shifted ( cb, Self :: EMIT_OPND , val. into ( ) , 0b10 , 62 ) ; // ASR #62
15621515 emit_conditional_jump :: < { Condition :: NE } > ( self , cb, target. clone ( ) ) ;
15631516 } ,
15641517 Insn :: Jl ( target) => {
@@ -1809,11 +1762,12 @@ mod tests {
18091762 asm. compile_with_num_regs ( & mut cb, 2 ) ;
18101763
18111764 assert_disasm_snapshot ! ( cb. disasm( ) , @"
1812- 0x0: mov x0, #3
1813- 0x4: mul x0, x9, x0
1814- 0x8: mov x1, x0
1765+ 0x0: mov x0, #3
1766+ 0x4: smulh x16, x9, x0
1767+ 0x8: mul x0, x9, x0
1768+ 0xc: mov x1, x0
18151769 " ) ;
1816- assert_snapshot ! ( cb. hexdump( ) , @"600080d2207d009be10300aa " ) ;
1770+ assert_snapshot ! ( cb. hexdump( ) , @"600080d2307d409b207d009be10300aa " ) ;
18171771 }
18181772
18191773 #[ test]
0 commit comments