pnnx ncnn handle optional sdpa scale param (#6000)

nihui · web-flow · commit 0be3a34ff98c · 2025-04-18T10:24:11.000+08:00
diff --git a/tools/pnnx/src/pass_ncnn/F_scaled_dot_product_attention.cpp b/tools/pnnx/src/pass_ncnn/F_scaled_dot_product_attention.cpp
@@ -36,7 +36,7 @@ Tensor.reshape          op_5        1 1 v 14 shape=(%batch,%size,%num_heads,%fea
 Tensor.permute          op_6        1 1 10 16 dims=(0,2,1,3)
 Tensor.permute          op_7        1 1 12 17 dims=(0,2,1,3)
 Tensor.permute          op_8        1 1 14 18 dims=(0,2,1,3)
-F.scaled_dot_product_attention op_9 4 1 16 17 18 attn_mask 19 dropout_p=0.0 is_causal=False scale=%scale
+F.scaled_dot_product_attention sdpa 4 1 16 17 18 attn_mask 19 %*=%*
 Tensor.permute          op_10       1 1 19 20 dims=(0,2,1,3)
 Tensor.reshape          op_11       1 1 20 21 shape=(%batch,%size,%embed_dim)
 nn.Linear               out_proj    1 1 21 out bias=%outbias in_features=%embed_dim out_features=%qdim @bias @weight
@@ -54,6 +54,23 @@ pnnx.Output             output      1 0 out
         return "sdpa_attention";
     }
 
+    bool match(const std::map<std::string, Parameter>& captured_params) const
+    {
+        if (captured_params.find("sdpa.dropout_p") != captured_params.end())
+        {
+            if (captured_params.at("sdpa.dropout_p").type != 3 || captured_params.at("sdpa.dropout_p").f != 0.f)
+                return false;
+        }
+
+        if (captured_params.find("sdpa.is_causal") != captured_params.end())
+        {
+            if (captured_params.at("sdpa.is_causal").type != 1 || captured_params.at("sdpa.is_causal").b != false)
+                return false;
+        }
+
+        return true;
+    }
+
     void write(Operator* op, const std::map<std::string, Parameter>& captured_params, const std::map<std::string, Attribute>& captured_attrs) const
     {
         op->params["0"] = captured_params.at("embed_dim");
@@ -68,7 +85,8 @@ pnnx.Output             output      1 0 out
         op->params["3"] = kdim;
         op->params["4"] = vdim;
         op->params["5"] = 1;
-        op->params["6"] = captured_params.at("scale");
+        if (captured_params.find("sdpa.scale") != captured_params.end())
+            op->params["6"] = captured_params.at("sdpa.scale");
 
         op->attrs["0"] = Attribute();
         op->attrs["0"].data = {0, 0, 0, 0};
@@ -138,7 +156,7 @@ Tensor.reshape          op_5        1 1 v 14 shape=(%batch,%size,%num_heads,%fea
 Tensor.permute          op_6        1 1 10 16 dims=(0,2,1,3)
 Tensor.permute          op_7        1 1 12 17 dims=(0,2,1,3)
 Tensor.permute          op_8        1 1 14 18 dims=(0,2,1,3)
-F.scaled_dot_product_attention op_9 4 1 16 17 18 attn_mask 19 dropout_p=0.0 is_causal=False scale=%scale
+F.scaled_dot_product_attention sdpa 4 1 16 17 18 attn_mask 19 %*=%*
 Tensor.permute          op_10       1 1 19 20 dims=(0,2,1,3)
 Tensor.reshape          op_11       1 1 20 21 shape=(%batch,%qsize,%embed_dim)
 nn.Linear               out_proj    1 1 21 out bias=%outbias in_features=%embed_dim out_features=%qdim @bias @weight
@@ -166,7 +184,7 @@ Tensor.reshape          op_5        1 1 v 14 shape=(%batch,%size,%num_heads,%fea
 Tensor.permute          op_6        1 1 10 16 dims=(0,2,1,3)
 Tensor.permute          op_7        1 1 12 17 dims=(0,2,1,3)
 Tensor.permute          op_8        1 1 14 18 dims=(0,2,1,3)
-F.scaled_dot_product_attention op_9 3 1 16 17 18 19 dropout_p=0.0 is_causal=False attn_mask=None scale=%scale
+F.scaled_dot_product_attention sdpa 3 1 16 17 18 19 %*=%*
 Tensor.permute          op_10       1 1 19 20 dims=(0,2,1,3)
 Tensor.reshape          op_11       1 1 20 21 shape=(%batch,%size,%embed_dim)
 nn.Linear               out_proj    1 1 21 out bias=%outbias in_features=%embed_dim out_features=%qdim @bias @weight
@@ -201,7 +219,7 @@ Tensor.reshape          op_5        1 1 v 14 shape=(%batch,%size,%num_heads,%fea
 Tensor.permute          op_6        1 1 10 16 dims=(0,2,1,3)
 Tensor.permute          op_7        1 1 12 17 dims=(0,2,1,3)
 Tensor.permute          op_8        1 1 14 18 dims=(0,2,1,3)
-F.scaled_dot_product_attention op_9 3 1 16 17 18 19 dropout_p=0.0 is_causal=False attn_mask=None scale=%scale
+F.scaled_dot_product_attention sdpa 3 1 16 17 18 19 %*=%*
 Tensor.permute          op_10       1 1 19 20 dims=(0,2,1,3)
 Tensor.reshape          op_11       1 1 20 21 shape=(%batch,%qsize,%embed_dim)
 nn.Linear               out_proj    1 1 21 out bias=%outbias in_features=%embed_dim out_features=%qdim @bias @weight
@@ -218,6 +236,40 @@ pnnx.Output             output      1 0 out
 
 REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_scaled_dot_product_attention_3, 10)
 
+class F_scaled_dot_product_attention_4 : public F_scaled_dot_product_attention
+{
+public:
+    const char* match_pattern_graph() const
+    {
+        return R"PNNXIR(7767517
+15 14
+pnnx.Input              input       0 1 input
+nn.Linear               op_0        1 1 input q bias=%qbias in_features=%qdim out_features=%embed_dim @bias @weight
+nn.Linear               op_1        1 1 input k bias=%kbias in_features=%kdim out_features=%embed_dim @bias @weight
+nn.Linear               op_2        1 1 input v bias=%vbias in_features=%vdim out_features=%embed_dim @bias @weight
+Tensor.view             op_3        1 1 q 10 shape=(%batch,%size,%num_heads,%feat_per_head)
+Tensor.view             op_4        1 1 k 12 shape=(%batch,%size,%num_heads,%feat_per_head)
+Tensor.view             op_5        1 1 v 14 shape=(%batch,%size,%num_heads,%feat_per_head)
+torch.transpose         op_6        1 1 10 16 dim0=1 dim1=2
+torch.transpose         op_7        1 1 12 17 dim0=1 dim1=2
+torch.transpose         op_8        1 1 14 18 dim0=1 dim1=2
+F.scaled_dot_product_attention sdpa 3 1 16 17 18 19 %*=%*
+torch.transpose         op_10       1 1 19 20 dim0=1 dim1=2
+Tensor.reshape          op_11       1 1 20 21 shape=(%batch,%size,%embed_dim)
+nn.Linear               out_proj    1 1 21 out bias=%outbias in_features=%embed_dim out_features=%qdim @bias @weight
+pnnx.Output             output      1 0 out
+)PNNXIR";
+    }
+
+    void write(Operator* op, const std::map<std::string, Parameter>& captured_params, const std::map<std::string, Attribute>& captured_attrs) const
+    {
+        F_scaled_dot_product_attention::write(op, captured_params, captured_attrs);
+        op->params["5"] = 0;
+    }
+};
+
+REGISTER_GLOBAL_PNNX_NCNN_GRAPH_REWRITER_PASS(F_scaled_dot_product_attention_4, 10)
+
 } // namespace ncnn
 
 } // namespace pnnx