@@ -727,7 +727,7 @@ index 77e3537124..8f6022bc76 100644
727727 template <typename T>
728728 struct SumOp {
729729diff --git a/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h b/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h
730- index 9d4bb18d55..78bf0ad1b9 100644
730+ index a28047c624..30832164f4 100644
731731--- a/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h
732732+++ b/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h
733733@@ -24,11 +24,7 @@ namespace fusion {
@@ -754,7 +754,7 @@ index 9d4bb18d55..78bf0ad1b9 100644
754754- __shared__ U shared_var[32];
755755- #endif
756756
757- phi:: funcs::ReluFunctor<T> relu;
757+ funcs::ReluFunctor<T> relu;
758758 U mean_val = 0;
759759@@ -352,13 +343,8 @@ __global__ void FusedLayernormResidualDropoutBiasInfer(
760760
@@ -768,7 +768,7 @@ index 9d4bb18d55..78bf0ad1b9 100644
768768- __shared__ U shared_var[32];
769769- #endif
770770
771- phi:: funcs::ReluFunctor<T> relu;
771+ funcs::ReluFunctor<T> relu;
772772 U mean_val = 0;
773773@@ -638,9 +624,6 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void fused_fast_ln_fwd_kernel(
774774 RandVec<VecSize>(&state, rand);
@@ -780,6 +780,7 @@ index 9d4bb18d55..78bf0ad1b9 100644
780780 mask_vec[it][jt] = static_cast<MaskType>(rand[jt] >= dropout_prob);
781781 }
782782 }
783+
783784diff --git a/paddle/phi/kernels/gpu/elementwise_grad.h b/paddle/phi/kernels/gpu/elementwise_grad.h
784785index 411ee4510c..36c2f8fba7 100644
785786--- a/paddle/phi/kernels/gpu/elementwise_grad.h
0 commit comments