@@ -799,31 +799,18 @@ impl Assembler {
799799 let mem_out = split_memory_write ( out, SCRATCH0_OPND ) ;
800800 let reg_out = out. clone ( ) ;
801801
802- let has_jo_mul = idx + 1 < linearized_insns. len ( ) && matches ! ( linearized_insns[ idx + 1 ] , Insn :: JoMul ( _) ) ;
803-
804802 asm. push_insn ( insn) ;
805803
806- // When JoMul follows, the emit pass needs Mul → RShift → JoMul
807- // to be contiguous so it can pair smulh+mul+asr+cmp. The spill
808- // Store must NOT be between Mul and RShift. Instead, we record
809- // the spill destination in the RShift and have the emit pass
810- // emit the store between mul and asr (before asr clobbers the
811- // mul output register).
812- if has_jo_mul {
813- // Emit RShift immediately after Mul (before any Store)
804+ if let Some ( mem_out) = mem_out {
805+ let mem_out = split_large_disp ( asm, mem_out, SCRATCH1_OPND ) ;
806+ asm. store ( mem_out, SCRATCH0_OPND ) ;
807+ } ;
808+
809+ // If the next instruction is JoMul
810+ if idx + 1 < linearized_insns. len ( ) && matches ! ( linearized_insns[ idx + 1 ] , Insn :: JoMul ( _) ) {
811+ // Produce a register that is all zeros or all ones
812+ // Based on the sign bit of the 64-bit mul result
814813 asm. push_insn ( Insn :: RShift { out : SCRATCH0_OPND , opnd : reg_out, shift : Opnd :: UImm ( 63 ) } ) ;
815- // Emit spill Store after RShift. The emit pass will
816- // skip it along with the RShift, and emit the spill
817- // at the right point (between mul and asr).
818- if let Some ( mem_out) = mem_out {
819- let mem_out = split_large_disp ( asm, mem_out, SCRATCH1_OPND ) ;
820- asm. store ( mem_out, reg_out) ;
821- }
822- } else {
823- if let Some ( mem_out) = mem_out {
824- let mem_out = split_large_disp ( asm, mem_out, SCRATCH1_OPND ) ;
825- asm. store ( mem_out, SCRATCH0_OPND ) ;
826- }
827814 }
828815 }
829816 Insn :: LShift { opnd, out, .. } |
@@ -1252,48 +1239,30 @@ impl Assembler {
12521239 }
12531240 } ,
12541241 Insn :: Mul { left, right, out } => {
1255- // Look for the RShift+JoMul overflow check sequence inserted
1256- // by arm64_scratch_split. When the Mul output is spilled,
1257- // scratch_split emits [Mul, RShift, Store, JoMul] with the
1258- // Store after the RShift. Without a spill, it's just
1259- // [Mul, RShift, JoMul].
1260- let rshift_insn = match ( insns. get ( insn_idx + 1 ) , insns. get ( insn_idx + 2 ) , insns. get ( insn_idx + 3 ) ) {
1261- ( Some ( & Insn :: RShift { out : out_sign, opnd : out_opnd, shift : out_shift } ) , Some ( & Insn :: Store { dest : spill_dest, src : spill_src } ) , Some ( Insn :: JoMul ( _) ) ) => {
1262- Some ( ( out_sign, out_opnd, out_shift, Some ( ( spill_dest, spill_src) ) ) )
1263- }
1264- ( Some ( & Insn :: RShift { out : out_sign, opnd : out_opnd, shift : out_shift } ) , Some ( Insn :: JoMul ( _) ) , _) => {
1265- Some ( ( out_sign, out_opnd, out_shift, None ) )
1242+ // If the next instruction is JoMul with RShift created by arm64_scratch_split
1243+ match ( insns. get ( insn_idx + 1 ) , insns. get ( insn_idx + 2 ) ) {
1244+ ( Some ( Insn :: RShift { out : out_sign, opnd : out_opnd, shift : out_shift } ) , Some ( Insn :: JoMul ( _) ) ) => {
1245+ // Compute the high 64 bits
1246+ smulh ( cb, Self :: EMIT_OPND , left. into ( ) , right. into ( ) ) ;
1247+
1248+ // Compute the low 64 bits
1249+ // This may clobber one of the input registers,
1250+ // so we do it after smulh
1251+ mul ( cb, out. into ( ) , left. into ( ) , right. into ( ) ) ;
1252+
1253+ // Insert the shift instruction created by arm64_scratch_split
1254+ // to prepare the register that has the sign bit of the high 64 bits after mul.
1255+ asr ( cb, out_sign. into ( ) , out_opnd. into ( ) , out_shift. into ( ) ) ;
1256+ insn_idx += 1 ; // skip the next Insn::RShift
1257+
1258+ // If the high 64-bits are not all zeros or all ones,
1259+ // matching the sign bit, then we have an overflow
1260+ cmp ( cb, Self :: EMIT_OPND , out_sign. into ( ) ) ;
1261+ // Insn::JoMul will emit_conditional_jump::<{Condition::NE}>
12661262 }
1267- _ => None ,
1268- } ;
1269-
1270- if let Some ( ( out_sign, out_opnd, out_shift, spill) ) = rshift_insn {
1271- // Compute the high 64 bits into EMIT_OPND (X16)
1272- smulh ( cb, Self :: EMIT_OPND , left. into ( ) , right. into ( ) ) ;
1273-
1274- // Compute the low 64 bits into `out` (may clobber inputs,
1275- // so this must come after smulh)
1276- mul ( cb, out. into ( ) , left. into ( ) , right. into ( ) ) ;
1277-
1278- // If the mul result was spilled, emit the store now
1279- // BEFORE asr clobbers the output register with the sign
1280- // bit. The spill source is always a register (SCRATCH0),
1281- // not EMIT_OPND (X16), so the smulh result is preserved.
1282- if let Some ( ( spill_dest, spill_src) ) = spill {
1283- stur ( cb, spill_src. into ( ) , spill_dest. into ( ) ) ;
1284- insn_idx += 1 ; // will skip the Store insn
1263+ _ => {
1264+ mul ( cb, out. into ( ) , left. into ( ) , right. into ( ) ) ;
12851265 }
1286-
1287- // Shift to extract the sign bit of the 64-bit mul result
1288- asr ( cb, out_sign. into ( ) , out_opnd. into ( ) , out_shift. into ( ) ) ;
1289- insn_idx += 1 ; // skip the RShift
1290-
1291- // If the high 64-bits are not all zeros or all ones,
1292- // matching the sign bit, then we have an overflow
1293- cmp ( cb, Self :: EMIT_OPND , out_sign. into ( ) ) ;
1294- // JoMul will emit_conditional_jump::<{Condition::NE}>
1295- } else {
1296- mul ( cb, out. into ( ) , left. into ( ) , right. into ( ) ) ;
12971266 }
12981267 } ,
12991268 Insn :: And { left, right, out } => {
0 commit comments