opt: 新增递归倍加模乘惯用法识别，折叠为 64 位宽乘取模

Arctitico · Arctitico · commit f8b7f03655ed · 2026-06-13T16:36:24.000+08:00
新增: ModMulIdiom pass

实现：新增专用指令 MulModInst（operands a,b + 模数字段，结果 i32），
toString 展开为合法 LLVM IR(sext/mul/srem/trunc i64) 以兼容 llvmir
测试模式；后端 translate_mulmod 用 64 位 mul + li/rem 降落；识别 pass
def-use 驱动匹配，注册在 Mem2Reg 之后、GVN 之前。
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -182,6 +182,8 @@ set(IR_SRCS
 	ir/Instructions/StoreInst.cpp
 	ir/Instructions/BinaryInst.h
 	ir/Instructions/BinaryInst.cpp
+	ir/Instructions/MulModInst.h
+	ir/Instructions/MulModInst.cpp
 	ir/Instructions/FCmpInst.h
 	ir/Instructions/FCmpInst.cpp
 	ir/Instructions/ICmpInst.h
@@ -297,6 +299,8 @@ set(IR_SRCS
 	ir/passes/modulePass/SmallFunctionInline.cpp
 	ir/passes/functionPass/TailRecursionElim.h
 	ir/passes/functionPass/TailRecursionElim.cpp
+	ir/passes/functionPass/ModMulIdiom.h
+	ir/passes/functionPass/ModMulIdiom.cpp
 	ir/passes/functionPass/LateLoopCFGCleanup.h
 	ir/passes/functionPass/LateLoopCFGCleanup.cpp
 	ir/passes/functionPass/PhiLowering.h
diff --git a/backend/riscv64/InstSelectorRiscV64.cpp b/backend/riscv64/InstSelectorRiscV64.cpp
@@ -24,6 +24,7 @@
 #include "AllocaInst.h"
 #include "BasicBlock.h"
 #include "BinaryInst.h"
+#include "MulModInst.h"
 #include "BranchInst.h"
 #include "CallInst.h"
 #include "ConstFloat.h"
@@ -504,6 +505,7 @@ InstSelectorRiscV64::InstSelectorRiscV64(
 	translatorHandlers[IRInstOperator::IRINST_OP_MUL_I] = &InstSelectorRiscV64::translate_mul;
 	translatorHandlers[IRInstOperator::IRINST_OP_DIV_I] = &InstSelectorRiscV64::translate_div;
 	translatorHandlers[IRInstOperator::IRINST_OP_MOD_I] = &InstSelectorRiscV64::translate_mod;
+	translatorHandlers[IRInstOperator::IRINST_OP_MULMOD_I] = &InstSelectorRiscV64::translate_mulmod;
 	translatorHandlers[IRInstOperator::IRINST_OP_SHL_I] = &InstSelectorRiscV64::translate_shl;
 	translatorHandlers[IRInstOperator::IRINST_OP_ASHR_I] = &InstSelectorRiscV64::translate_ashr;
 	translatorHandlers[IRInstOperator::IRINST_OP_LSHR_I] = &InstSelectorRiscV64::translate_lshr;
@@ -1410,6 +1412,51 @@ void InstSelectorRiscV64::translate_mod(Instruction * inst)
 	translate_binary(inst, "remw");
 }
 
+/// @brief 翻译宽乘取模指令 (i64)a*b % m
+///
+/// 两个 i32 操作数以有符号扩展形式驻留 64 位寄存器，用 64 位 mul 得到精确的
+/// 64 位积（调用点守卫保证 0<=a<m、b>=0，积非负且 < 2^61 不溢出），再对正常量
+/// 取有符号 64 位余数。余数落在 [0, m) 内，可直接当作已符号扩展的 i32 使用
+void InstSelectorRiscV64::translate_mulmod(Instruction * inst)
+{
+	auto * mulmod = dynamic_cast<MulModInst *>(inst);
+	if (mulmod == nullptr) {
+		return;
+	}
+	const int32_t modulus = mulmod->getModulus();
+
+	int dstReg = getResultReg(inst);
+	LocalTempManager::Lease dstLease;
+	if (dstReg < 0) {
+		dstLease = tempMgr.borrow(inst);
+		dstReg = dstLease.reg();
+	}
+
+	OperandReg lhs = loadOperand(mulmod->getA(), inst, dstReg);
+	const int rhsPreferredReg = lhs.reg != dstReg ? dstReg : -1;
+	OperandReg rhs = loadOperand(mulmod->getB(), inst, rhsPreferredReg < 0 ? dstReg : -1, rhsPreferredReg);
+
+	// 64 位无截断乘法
+	iloc.inst("mul",
+	          PlatformRiscV64::regName[dstReg],
+	          PlatformRiscV64::regName[lhs.reg],
+	          PlatformRiscV64::regName[rhs.reg]);
+
+	releaseOperand(rhs);
+	releaseOperand(lhs);
+
+	// 对常量取 64 位有符号余数
+	auto modTmp = tempMgr.borrowExcluding(inst, {dstReg});
+	iloc.load_imm(modTmp.reg(), modulus);
+	iloc.inst("rem",
+	          PlatformRiscV64::regName[dstReg],
+	          PlatformRiscV64::regName[dstReg],
+	          PlatformRiscV64::regName[modTmp.reg()]);
+	modTmp.release();
+
+	storeResult(inst, dstReg, inst);
+}
+
 /// @brief 翻译逻辑左移指令（shl）
 void InstSelectorRiscV64::translate_shl(Instruction * inst)
 {
diff --git a/backend/riscv64/InstSelectorRiscV64.h b/backend/riscv64/InstSelectorRiscV64.h
@@ -130,6 +130,8 @@ class InstSelectorRiscV64 {
 	void translate_div(Instruction * inst);
 	/// @brief 翻译mod指令（取模）
 	void translate_mod(Instruction * inst);
+	/// @brief 翻译宽乘取模指令（(i64)a*b % m，64 位无截断乘后对常量取模）
+	void translate_mulmod(Instruction * inst);
 	/// @brief 翻译逻辑左移指令（shl）
 	void translate_shl(Instruction * inst);
 	/// @brief 翻译算术右移指令（ashr，保留符号位）
diff --git a/ir/Instruction.h b/ir/Instruction.h
@@ -19,6 +19,7 @@ enum class IRInstOperator : std::int8_t {
     IRINST_OP_MUL_I,
     IRINST_OP_DIV_I,
     IRINST_OP_MOD_I,
+    IRINST_OP_MULMOD_I,  ///< 64 位宽乘后对常量取模：(i64)a * (i64)b % m，结果为 i32
     IRINST_OP_SHL_I,   ///< 逻辑左移
     IRINST_OP_ASHR_I,  ///< 算术右移（保留符号位）
     IRINST_OP_LSHR_I,  ///< 逻辑右移（高位补 0）
diff --git a/ir/Instructions/MulModInst.cpp b/ir/Instructions/MulModInst.cpp
@@ -0,0 +1,47 @@
+///
+/// @file MulModInst.cpp
+/// @brief 宽乘取模指令实现
+///
+
+#include "MulModInst.h"
+
+#include <string>
+
+#include "Function.h"
+#include "IntegerType.h"
+#include "Value.h"
+
+MulModInst::MulModInst(Function * func, Value * a, Value * b, int32_t m)
+    : Instruction(func, IRInstOperator::IRINST_OP_MULMOD_I, IntegerType::getTypeInt32()), modulus(m)
+{
+    addOperand(a);
+    addOperand(b);
+}
+
+Value * MulModInst::getA()
+{
+    return getOperand(0);
+}
+
+Value * MulModInst::getB()
+{
+    return getOperand(1);
+}
+
+void MulModInst::toString(std::string & str)
+{
+    // 展开为标准 LLVM IR：把两操作数符号扩展到 i64 后宽乘，再对模数取有符号余数并截回 i32
+    // 临时名以本指令结果名派生，保证函数内唯一；多行之间补两空格缩进与 .ll 对齐
+    const std::string dst = getIRName();
+    const std::string sa = dst + ".sea";
+    const std::string sb = dst + ".seb";
+    const std::string prod = dst + ".w64";
+    const std::string rem = dst + ".r64";
+    const std::string m = std::to_string(modulus);
+
+    str = sa + " = sext i32 " + getA()->getIRName() + " to i64\n";
+    str += "  " + sb + " = sext i32 " + getB()->getIRName() + " to i64\n";
+    str += "  " + prod + " = mul i64 " + sa + ", " + sb + "\n";
+    str += "  " + rem + " = srem i64 " + prod + ", " + m + "\n";
+    str += "  " + dst + " = trunc i64 " + rem + " to i32";
+}
diff --git a/ir/Instructions/MulModInst.h b/ir/Instructions/MulModInst.h
@@ -0,0 +1,46 @@
+///
+/// @file MulModInst.h
+/// @brief 宽乘取模指令 (i64)a * (i64)b % m
+///
+/// 表示对两个 i32 操作数做 64 位无截断乘法后，再对编译期正常量 m 取模，
+/// 结果回落到 i32。用于把"递归倍加模乘"惯用法折叠成 O(1) 的单条宽乘加取模，
+/// 避免 32 位乘法溢出。语义上 a、b 均按有符号扩展到 64 位参与运算
+///
+
+#pragma once
+
+#include <cstdint>
+
+#include "Instruction.h"
+
+class Value;
+class Function;
+
+class MulModInst final : public Instruction {
+
+public:
+    /// @brief 构造宽乘取模指令
+    /// @param func 所在函数
+    /// @param a    被乘数（i32）
+    /// @param b    乘数（i32）
+    /// @param m    取模的正常量（编译期已知）
+    MulModInst(Function * func, Value * a, Value * b, int32_t m);
+
+    /// @brief 获取被乘数
+    Value * getA();
+
+    /// @brief 获取乘数
+    Value * getB();
+
+    /// @brief 获取模数常量
+    [[nodiscard]] int32_t getModulus() const
+    {
+        return modulus;
+    }
+
+    /// @brief 序列化为等价的 LLVM IR 文本（sext/mul/srem/trunc 展开）
+    void toString(std::string & str) override;
+
+private:
+    int32_t modulus = 0;
+};
diff --git a/ir/passes/PassManager.cpp b/ir/passes/PassManager.cpp
@@ -36,6 +36,7 @@
 #include "functionPass/LateLoopCFGCleanup.h"
 #include "functionPass/LoopRotate.h"
 #include "functionPass/Mem2Reg.h"
+#include "functionPass/ModMulIdiom.h"
 #include "functionPass/PhiToSelect.h"
 #include "functionPass/PhiLowering.h"
 #include "functionPass/PureCallCSE.h"
@@ -104,6 +105,13 @@ void PassManager::registerDefaultOptimizationPipeline(int32_t optLevel, bool ena
         return false;
     });
 
+    // 递归倍加模乘惯用法识别：须在 Mem2Reg 后（依赖 SSA 分支形态）、
+    // GVN/InstCombine 前（避免 srem/sdiv 被变形破坏匹配）
+    registerFunctionPass("ModMulIdiom", [this](Function * func) {
+        ModMulIdiom pass(func, module);
+        return pass.run();
+    });
+
     registerFunctionPass("GVN", [this](Function * func) {
         GVN pass(func, module);
         return pass.run();
diff --git a/ir/passes/functionPass/ModMulIdiom.cpp b/ir/passes/functionPass/ModMulIdiom.cpp
diff --git a/ir/passes/functionPass/ModMulIdiom.h b/ir/passes/functionPass/ModMulIdiom.h