diff --git a/.github/workflows/Codestyle-Check.yml b/.github/workflows/Codestyle-Check.yml index 6811e3fb38d..21dfd480212 100644 --- a/.github/workflows/Codestyle-Check.yml +++ b/.github/workflows/Codestyle-Check.yml @@ -4,7 +4,7 @@ on: pull_request: branches: - develop - - 'release/*' + - 'release/**' jobs: pre-commit: diff --git a/.github/workflows/check-bypass.yml b/.github/workflows/check-bypass.yml index c9256e7a6cf..804c85f89a4 100644 --- a/.github/workflows/check-bypass.yml +++ b/.github/workflows/check-bypass.yml @@ -22,7 +22,7 @@ jobs: permissions: contents: read env: - CI_TEAM_MEMBERS: '["yuanlehome","YuanRisheng","Jiang-Jia-Jun","DDDivano","XieYunshen","EmmonsCurse","CSWYF3634076","plusNew001"]' + CI_TEAM_MEMBERS: '["yuanlehome","YuanRisheng","Jiang-Jia-Jun","DDDivano","XieYunshen","EmmonsCurse","CSWYF3634076","plusNew001","freeliuzc"]' outputs: can-skip: ${{ steps.final-output.outputs.can-skip }} can-skip-docs: ${{ steps.final-output.outputs.can-skip-docs }} diff --git a/.github/workflows/ci_iluvatar.yml b/.github/workflows/ci_iluvatar.yml index 3c46306ba92..1b452ddcf37 100644 --- a/.github/workflows/ci_iluvatar.yml +++ b/.github/workflows/ci_iluvatar.yml @@ -2,7 +2,7 @@ name: ILUVATAR-CI on: pull_request: types: [opened, synchronize] - branches: [develop, release/**] + branches: [develop, release/*] permissions: read-all concurrency: diff --git a/.github/workflows/ci_metax.yml b/.github/workflows/ci_metax.yml index 5584147eb8c..f8eb9e9ee0f 100644 --- a/.github/workflows/ci_metax.yml +++ b/.github/workflows/ci_metax.yml @@ -7,7 +7,7 @@ on: - synchronize branches: - develop - - release/** + - release/* permissions: contents: read diff --git a/.github/workflows/ci_xpu.yml b/.github/workflows/ci_xpu.yml index cf67385c24f..114a9ca79fc 100644 --- a/.github/workflows/ci_xpu.yml +++ b/.github/workflows/ci_xpu.yml @@ -3,7 +3,7 @@ name: CI_XPU on: pull_request: types: [opened, synchronize] - branches: [develop, release/**] + branches: [develop, release/*] permissions: read-all concurrency: diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py b/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py index 06e1ed1e18a..69ee6fae44e 100644 --- a/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py +++ b/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py @@ -175,7 +175,7 @@ def apply_ep_prefill( if fastdeploy.envs.FD_MOE_PROB_IN_ADVANCE: out = paddlefleet_ops.fused_swiglu_scale(out, dst_weights) else: - out = paddle.incubate.nn.functional.swiglu(out) + out = paddle.nn.functional.swiglu(out) ffn_out = paddle.incubate.nn.functional.batched_gemm( out, getattr(layer, self.added_weight_attrs[1]), @@ -335,7 +335,6 @@ def apply_tp( layer.routed_scaling_factor, layer.gate_correction_bias, getattr(layer, "renormalize", True), - use_fused_cast=use_fused, ) else: gate_out = gate_out.cast("float32") @@ -406,7 +405,6 @@ def apply_tp( layer.gate_correction_bias, getattr(layer, "renormalize", True), topk_reduce_func=getattr(layer, "topk_reduce_func", None), - use_fused_cast=use_fused, ) ( diff --git a/tests/input/test_process_stop_token_ids.py b/tests/input/test_process_stop_token_ids.py index 7aed4546919..171896a19ba 100644 --- a/tests/input/test_process_stop_token_ids.py +++ b/tests/input/test_process_stop_token_ids.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for process_stop_token_ids in fastdeploy.input.utils.common.""" +"""Tests for process_stop_token_ids in fastdeploy.input.utils.""" -from fastdeploy.input.utils.common import process_stop_token_ids +from fastdeploy.input.utils import process_stop_token_ids def _mock_update_stop_seq_fn(stop_sequences): @@ -115,4 +115,4 @@ def test_stop_token_ids_empty_list(): test_empty_request() test_stop_token_ids_none() test_stop_token_ids_empty_list() - print("All tests passed.") \ No newline at end of file + print("All tests passed.") diff --git a/tests/layers/test_speculative_sampler.py b/tests/layers/test_speculative_sampler.py index ef75fe5d4e8..bc635755ffa 100644 --- a/tests/layers/test_speculative_sampler.py +++ b/tests/layers/test_speculative_sampler.py @@ -78,7 +78,7 @@ def _create_default_sampling_metadata( fake_sampling_metadata = SamplingMetadata( temperature=paddle.full(shape=[batch_size, 1], fill_value=0.9, dtype="float32"), top_p=paddle.full(shape=[batch_size, 1], fill_value=0.7, dtype="float32"), - top_k=paddle.full(shape=[batch_size, 1], fill_value=0, dtype="int32"), + top_k=paddle.full(shape=[batch_size, 1], fill_value=0, dtype="int64"), prompt_lens=paddle.full(shape=[batch_size, 1], fill_value=0, dtype="int64"), step_idx=paddle.full(shape=[batch_size, 1], fill_value=0, dtype="int64"), token_ids_all=_create_tokens_tensor(batch_size, max_seq_len), diff --git a/tests/model_executor/test_ep.py b/tests/model_executor/test_ep.py index 373e8899396..950314a5e57 100644 --- a/tests/model_executor/test_ep.py +++ b/tests/model_executor/test_ep.py @@ -419,6 +419,9 @@ def fake_get_moe_scores(*_args, **_kwargs): routed_scaling_factor=1.0, gate_correction_bias=None, renormalize=False, + fd_config=SimpleNamespace( + scheduler_config=SimpleNamespace(enable_moe_scores_elementwise_fuse=False), + ), ) gate_out = paddle.randn([1, 4], dtype="float32")