Skip to content

Commit deb6931

Browse files
YqGe585duqimeng
andauthored
Update Paddle to 1223 and fix dense_tensor_iterator (#2299)
* add fused_rms_norm_ext and fused_rms_norm_ext_grad Co-authored-by: duqimeng <77875733+duqimeng@users.noreply.github.com>
1 parent 06a3785 commit deb6931

18 files changed

Lines changed: 45 additions & 37 deletions

File tree

Paddle

Submodule Paddle updated 563 files

backends/iluvatar_gpu/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ file(
118118
${PADDLE_SOURCE_DIR}/paddle/phi/core/platform/device/gpu/gpu_info.cc
119119
# kernels/funcs
120120
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/*.cu
121+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/dense_tensor_iterator.cc
121122
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/math/*.cu
122123
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/eigen/*.cu
123124
# cudnn/cublas

backends/iluvatar_gpu/kernels/ernie_core/layer_norm_cuda_kernel.cu

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,16 @@ void RMSLnBwd(const Context &ctx,
8181

8282
} // namespace phi
8383

84-
PD_REGISTER_PLUGIN_KERNEL(
85-
fused_rms_norm_ext, iluvatar_gpu, ALL_LAYOUT, phi::RMSLnFwd, float) {}
84+
PD_REGISTER_PLUGIN_KERNEL(fused_rms_norm_ext,
85+
iluvatar_gpu,
86+
ALL_LAYOUT,
87+
phi::RMSLnFwd,
88+
float,
89+
phi::bfloat16) {}
8690

87-
PD_REGISTER_PLUGIN_KERNEL(
88-
fused_rms_norm_ext_grad, iluvatar_gpu, ALL_LAYOUT, phi::RMSLnBwd, float) {}
91+
PD_REGISTER_PLUGIN_KERNEL(fused_rms_norm_ext_grad,
92+
iluvatar_gpu,
93+
ALL_LAYOUT,
94+
phi::RMSLnBwd,
95+
float,
96+
phi::bfloat16) {}

backends/iluvatar_gpu/patches/paddle-corex.patch

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -727,7 +727,7 @@ index 77e3537124..8f6022bc76 100644
727727
template <typename T>
728728
struct SumOp {
729729
diff --git a/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h b/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h
730-
index 9d4bb18d55..78bf0ad1b9 100644
730+
index a28047c624..30832164f4 100644
731731
--- a/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h
732732
+++ b/paddle/phi/kernels/fusion/gpu/fused_layernorm_residual_dropout_bias.h
733733
@@ -24,11 +24,7 @@ namespace fusion {
@@ -754,7 +754,7 @@ index 9d4bb18d55..78bf0ad1b9 100644
754754
- __shared__ U shared_var[32];
755755
-#endif
756756

757-
phi::funcs::ReluFunctor<T> relu;
757+
funcs::ReluFunctor<T> relu;
758758
U mean_val = 0;
759759
@@ -352,13 +343,8 @@ __global__ void FusedLayernormResidualDropoutBiasInfer(
760760

@@ -768,7 +768,7 @@ index 9d4bb18d55..78bf0ad1b9 100644
768768
- __shared__ U shared_var[32];
769769
-#endif
770770

771-
phi::funcs::ReluFunctor<T> relu;
771+
funcs::ReluFunctor<T> relu;
772772
U mean_val = 0;
773773
@@ -638,9 +624,6 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void fused_fast_ln_fwd_kernel(
774774
RandVec<VecSize>(&state, rand);
@@ -780,6 +780,7 @@ index 9d4bb18d55..78bf0ad1b9 100644
780780
mask_vec[it][jt] = static_cast<MaskType>(rand[jt] >= dropout_prob);
781781
}
782782
}
783+
783784
diff --git a/paddle/phi/kernels/gpu/elementwise_grad.h b/paddle/phi/kernels/gpu/elementwise_grad.h
784785
index 411ee4510c..36c2f8fba7 100644
785786
--- a/paddle/phi/kernels/gpu/elementwise_grad.h

backends/iluvatar_gpu/tests/disabled_test.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,3 +556,4 @@ test_rms_norm_op.py
556556
test_batched_gemm.py
557557
test_match_matrix_tensor_op.py
558558
test_tensor.py
559+
test_rms_norm.py

backends/metax_gpu/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ file(
121121
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cusparse.cc
122122
# kernels/Funcs
123123
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/*.cu
124+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/dense_tensor_iterator.cc
124125
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/math/*.cu
125126
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/eigen/*.cu
126127
# kernels/gpu
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../../../../Paddle/paddle/phi/backends/dynload/cuda_driver.h
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../../../../Paddle/paddle/phi/backends/dynload/cudnn_frontend.h
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../../../../Paddle/paddle/phi/backends/dynload/curand.h
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../../../../Paddle/paddle/phi/backends/dynload/cusparseLt.h

0 commit comments

Comments
 (0)