File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
fastdeploy/model_executor/layers/moe Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -120,7 +120,7 @@ def m_grouped_fp8_gemm_nt_contiguous_custom_python_op(
120120 # down_proj
121121 if not fastdeploy .envs .FD_USE_PHI_FP8_QUANT :
122122 ffn_in_x , ffn_in_x_scale_tensor = fastdeploy .model_executor .ops .gpu .per_token_quant (
123- ffn_out , quant_config_weight_block_size_0 , not disable_ue8m0_cast
123+ ffn_out , quant_config_weight_block_size_0
124124 )
125125
126126 ffn_in_x_scale_tensor = ffn_in_x_scale_tensor .transpose ([1 , 0 ]).contiguous ()
@@ -397,7 +397,7 @@ def apply_ep_prefill(
397397 # down_proj
398398 if not fastdeploy .envs .FD_USE_PHI_FP8_QUANT :
399399 ffn_in_x , ffn_in_x_scale_tensor = fastdeploy .model_executor .ops .gpu .per_token_quant (
400- ffn_out , self .quant_config .weight_block_size [0 ], self . quant_config . deepgemm_scale_ue8m0
400+ ffn_out , self .quant_config .weight_block_size [0 ]
401401 )
402402 ffn_in_x_scale_tensor = ffn_in_x_scale_tensor .transpose ([1 , 0 ]).contiguous ().transpose ([1 , 0 ])
403403 else :
You can’t perform that action at this time.
0 commit comments