PaddlePaddle · EmmonsCurse · May 20, 2026 · May 20, 2026 · May 20, 2026 · PaddlePaddle-bot
diff --git a/.github/workflows/Codestyle-Check.yml b/.github/workflows/Codestyle-Check.yml
@@ -4,7 +4,7 @@ on:
   pull_request:
     branches:
       - develop
-      - 'release/*'
+      - 'release/**'
 
 jobs:
   pre-commit:

diff --git a/.github/workflows/check-bypass.yml b/.github/workflows/check-bypass.yml
@@ -22,7 +22,7 @@ jobs:
     permissions:
       contents: read
     env:
-      CI_TEAM_MEMBERS: '["yuanlehome","YuanRisheng","Jiang-Jia-Jun","DDDivano","XieYunshen","EmmonsCurse","CSWYF3634076","plusNew001"]'
+      CI_TEAM_MEMBERS: '["yuanlehome","YuanRisheng","Jiang-Jia-Jun","DDDivano","XieYunshen","EmmonsCurse","CSWYF3634076","plusNew001","freeliuzc"]'
     outputs:
       can-skip: ${{ steps.final-output.outputs.can-skip }}
       can-skip-docs: ${{ steps.final-output.outputs.can-skip-docs }}

diff --git a/.github/workflows/ci_iluvatar.yml b/.github/workflows/ci_iluvatar.yml
@@ -2,7 +2,7 @@ name: ILUVATAR-CI
 on:
   pull_request:
     types: [opened, synchronize]
-    branches: [develop, release/**]
+    branches: [develop, release/*]
 permissions: read-all
 
 concurrency:

diff --git a/.github/workflows/ci_metax.yml b/.github/workflows/ci_metax.yml
@@ -7,7 +7,7 @@ on:
       - synchronize
     branches:
       - develop
-      - release/**
+      - release/*
 
 permissions:
   contents: read

diff --git a/.github/workflows/ci_xpu.yml b/.github/workflows/ci_xpu.yml
@@ -3,7 +3,7 @@ name: CI_XPU
 on:
   pull_request:
     types: [opened, synchronize]
-    branches: [develop, release/**]
+    branches: [develop, release/*]
 permissions: read-all
 
 concurrency:

diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py b/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py
@@ -175,7 +175,7 @@ def apply_ep_prefill(
                 if fastdeploy.envs.FD_MOE_PROB_IN_ADVANCE:
                     out = paddlefleet_ops.fused_swiglu_scale(out, dst_weights)
                 else:
-                    out = paddle.incubate.nn.functional.swiglu(out)
+                    out = paddle.nn.functional.swiglu(out)
                 ffn_out = paddle.incubate.nn.functional.batched_gemm(
                     out,
                     getattr(layer, self.added_weight_attrs[1]),
@@ -335,7 +335,6 @@ def apply_tp(
                     layer.routed_scaling_factor,
                     layer.gate_correction_bias,
                     getattr(layer, "renormalize", True),
-                    use_fused_cast=use_fused,
                 )
             else:
                 gate_out = gate_out.cast("float32")
@@ -406,7 +405,6 @@ def apply_tp(
                 layer.gate_correction_bias,
                 getattr(layer, "renormalize", True),
                 topk_reduce_func=getattr(layer, "topk_reduce_func", None),
-                use_fused_cast=use_fused,
             )
 
             (

diff --git a/tests/input/test_process_stop_token_ids.py b/tests/input/test_process_stop_token_ids.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Tests for process_stop_token_ids in fastdeploy.input.utils.common."""
+"""Tests for process_stop_token_ids in fastdeploy.input.utils."""
 
-from fastdeploy.input.utils.common import process_stop_token_ids
+from fastdeploy.input.utils import process_stop_token_ids
 
 
 def _mock_update_stop_seq_fn(stop_sequences):
@@ -115,4 +115,4 @@ def test_stop_token_ids_empty_list():
     test_empty_request()
     test_stop_token_ids_none()
     test_stop_token_ids_empty_list()
-    print("All tests passed.")
+    print("All tests passed.")
diff --git a/tests/layers/test_speculative_sampler.py b/tests/layers/test_speculative_sampler.py
@@ -78,7 +78,7 @@ def _create_default_sampling_metadata(
     fake_sampling_metadata = SamplingMetadata(
         temperature=paddle.full(shape=[batch_size, 1], fill_value=0.9, dtype="float32"),
         top_p=paddle.full(shape=[batch_size, 1], fill_value=0.7, dtype="float32"),
-        top_k=paddle.full(shape=[batch_size, 1], fill_value=0, dtype="int32"),
+        top_k=paddle.full(shape=[batch_size, 1], fill_value=0, dtype="int64"),
         prompt_lens=paddle.full(shape=[batch_size, 1], fill_value=0, dtype="int64"),
         step_idx=paddle.full(shape=[batch_size, 1], fill_value=0, dtype="int64"),
         token_ids_all=_create_tokens_tensor(batch_size, max_seq_len),

diff --git a/tests/model_executor/test_ep.py b/tests/model_executor/test_ep.py
@@ -419,6 +419,9 @@ def fake_get_moe_scores(*_args, **_kwargs):
         routed_scaling_factor=1.0,
         gate_correction_bias=None,
         renormalize=False,
+        fd_config=SimpleNamespace(
+            scheduler_config=SimpleNamespace(enable_moe_scores_elementwise_fuse=False),
+        ),
     )
     gate_out = paddle.randn([1, 4], dtype="float32")