Skip to content

Commit 06a3785

Browse files
metax666duqimeng
andauthored
[Metax] Fix CI error (#201) (#2291)
* [Metax] Fix CI error (#201) * [Metax] Fix CI error * [Metax] Fix name bug * [Metax] Fix swiglu ut in metax * [Metax] Fix swiglu ut in metax * [Metax] Fix RmsNormQuantKernel (#202) * [Metax] Fix CI error * [Metax] Fix name bug * [Metax] Fix swiglu ut in metax * [Metax] Fix swiglu ut in metax * [Metax] Fix RmsNormQuantKernel * [Metax] Fix softmax --------- Co-authored-by: duqimeng <77875733+duqimeng@users.noreply.github.com>
1 parent a36d51e commit 06a3785

3 files changed

Lines changed: 29 additions & 27 deletions

File tree

backends/metax_gpu/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,8 @@ file(
698698
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/partial_sum_kernel.cu
699699
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu
700700
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/embedding_with_scaled_gradient_grad_kernel.cu
701+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fused_rms_norm_quant_kernel.cu
702+
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fused_rms_norm_quant_grad_kernel.cu
701703
${PADDLE_SOURCE_DIR}/paddle/phi/backends/gpu/gpu_info.cc
702704
# ############################################################################
703705
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu

backends/metax_gpu/compile.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ export PATH=${PATH}:${CUCC_PATH}/tools:${CUCC_PATH}/bin
2323
export PATH=${MACA_PATH}/bin:${PATH}
2424
export LD_LIBRARY_PATH=${MACA_PATH}/lib:${MACA_PATH}/mxgpu_llvm/lib:${LD_LIBRARY_PATH}
2525
export PADDLE_VERSION="3.3.0.dev$(date +%Y%m%d)"
26+
export MACA_AI_VERSION=$(cat /opt/maca/Version.txt | cut -d':' -f2)
2627
if [ ! -d build ]; then
2728
echo "build directory not found, creating..."
2829
mkdir build
@@ -35,6 +36,6 @@ make_maca -j18 VERBOSE=1
3536

3637

3738
echo "install whl"
38-
pip install dist/paddle_metax_gpu-${PADDLE_VERSION}*.whl --force-reinstall
39+
pip install dist/paddle_metax_gpu-${PADDLE_VERSION}+maca${MACA_AI_VERSION}*.whl --force-reinstall
3940
cd ..
4041
echo "Done!"

backends/metax_gpu/kernels/funcs/softmax.cu

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ limitations under the License. */
2020
#include "paddle/phi/kernels/funcs/math_function.h"
2121
#include "paddle/phi/kernels/funcs/softmax.h"
2222
#include "paddle/phi/kernels/funcs/softmax_impl.h"
23+
2324
namespace phi {
2425
namespace funcs {
2526

2627
using ScopedTensorDescriptor = phi::backends::gpu::ScopedTensorDescriptor;
27-
using DataLayout = phi::backends::gpu::DataLayout;
2828
template <typename T>
2929
using CudnnDataType = phi::backends::gpu::CudnnDataType<T>;
3030

@@ -37,10 +37,9 @@ void SoftmaxCUDNNFunctor<T, DeviceContext>::operator()(
3737
ScopedTensorDescriptor xDesc;
3838
ScopedTensorDescriptor yDesc;
3939
std::vector<int> cudnn_tensor_dims = common::vectorize<int>(X->dims());
40-
DataLayout layout = DataLayout::kNCHW;
41-
VLOG(0) << "Enter softmax Kernel22.";
40+
DataLayout layout = DataLayout::NCHW;
4241
if (cudnn_tensor_dims.size() == 5) {
43-
layout = DataLayout::kNCDHW;
42+
layout = DataLayout::NCDHW;
4443
}
4544
// NOTE(*) : cudnn softmax only support >= 4D phi::DenseTensor,
4645
// fill 1 at unused dims
@@ -91,9 +90,9 @@ void SoftmaxGradCUDNNFunctor<T, DeviceContext>::operator()(
9190
ScopedTensorDescriptor dyDesc;
9291
ScopedTensorDescriptor dxDesc;
9392
std::vector<int> cudnn_tensor_dims = common::vectorize<int>(Y->dims());
94-
DataLayout layout = DataLayout::kNCHW;
93+
DataLayout layout = DataLayout::NCHW;
9594
if (cudnn_tensor_dims.size() == 5) {
96-
layout = DataLayout::kNCDHW;
95+
layout = DataLayout::NCDHW;
9796
}
9897
// NOTE(*) : cudnn softmax only support >= 4D phi::DenseTensor,
9998
// fill 1 at unused dims
@@ -107,18 +106,18 @@ void SoftmaxGradCUDNNFunctor<T, DeviceContext>::operator()(
107106
dxDesc.descriptor<T>(layout, cudnn_tensor_dims);
108107
miopenTensorDescriptor_t cudnn_ygrad_desc =
109108
dyDesc.descriptor<T>(layout, cudnn_tensor_dims);
110-
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxBackward_V2(
111-
GetDnnHandle(dev_ctx.stream(), dev_ctx.GetPlace()),
112-
CudnnDataType<T>::kOne(),
113-
cudnn_y_desc,
114-
Y->data<T>(),
115-
cudnn_ygrad_desc,
116-
YGrad->data<T>(),
117-
CudnnDataType<T>::kZero(),
118-
cudnn_xgrad_desc,
119-
dev_ctx.template Alloc<T>(XGrad),
120-
MIOPEN_SOFTMAX_ACCURATE,
121-
MIOPEN_SOFTMAX_MODE_INSTANCE));
109+
PADDLE_ENFORCE_GPU_SUCCESS(
110+
phi::dynload::miopenSoftmaxBackward_V2(dev_ctx.cudnn_handle(),
111+
CudnnDataType<T>::kOne(),
112+
cudnn_y_desc,
113+
Y->data<T>(),
114+
cudnn_ygrad_desc,
115+
YGrad->data<T>(),
116+
CudnnDataType<T>::kZero(),
117+
cudnn_xgrad_desc,
118+
dev_ctx.template Alloc<T>(XGrad),
119+
MIOPEN_SOFTMAX_ACCURATE,
120+
MIOPEN_SOFTMAX_MODE_INSTANCE));
122121
#else
123122
cudnnTensorDescriptor_t cudnn_y_desc =
124123
yDesc.descriptor<T>(layout, cudnn_tensor_dims);
@@ -142,12 +141,12 @@ void SoftmaxGradCUDNNFunctor<T, DeviceContext>::operator()(
142141
}
143142

144143
template class SoftmaxCUDNNFunctor<float, phi::GPUContext>;
145-
template class SoftmaxCUDNNFunctor<phi::dtype::float16, phi::GPUContext>;
144+
template class SoftmaxCUDNNFunctor<phi::float16, phi::GPUContext>;
146145
template class SoftmaxGradCUDNNFunctor<float, phi::GPUContext>;
147-
template class SoftmaxGradCUDNNFunctor<phi::dtype::float16, phi::GPUContext>;
146+
template class SoftmaxGradCUDNNFunctor<phi::float16, phi::GPUContext>;
148147
#if CUDNN_VERSION_MIN(8, 1, 0)
149-
template class SoftmaxCUDNNFunctor<phi::dtype::bfloat16, phi::GPUContext>;
150-
template class SoftmaxGradCUDNNFunctor<phi::dtype::bfloat16, phi::GPUContext>;
148+
template class SoftmaxCUDNNFunctor<phi::bfloat16, phi::GPUContext>;
149+
template class SoftmaxGradCUDNNFunctor<phi::bfloat16, phi::GPUContext>;
151150
#endif
152151

153152
// MIOPEN do not support double
@@ -156,14 +155,14 @@ template class SoftmaxCUDNNFunctor<double, phi::GPUContext>;
156155
template class SoftmaxGradCUDNNFunctor<double, phi::GPUContext>;
157156
#endif
158157

159-
template class SoftmaxFunctor<phi::GPUContext, phi::dtype::float16>;
160-
template class SoftmaxFunctor<phi::GPUContext, phi::dtype::bfloat16>;
158+
template class SoftmaxFunctor<phi::GPUContext, phi::float16>;
159+
template class SoftmaxFunctor<phi::GPUContext, phi::bfloat16>;
161160
template class SoftmaxFunctor<phi::GPUContext, float>;
162161
template class SoftmaxFunctor<phi::GPUContext, double>;
163162
template class SoftmaxGradFunctor<phi::GPUContext, float>;
164163
template class SoftmaxGradFunctor<phi::GPUContext, double>;
165-
template class SoftmaxGradFunctor<phi::GPUContext, phi::dtype::float16>;
166-
template class SoftmaxGradFunctor<phi::GPUContext, phi::dtype::bfloat16>;
164+
template class SoftmaxGradFunctor<phi::GPUContext, phi::float16>;
165+
template class SoftmaxGradFunctor<phi::GPUContext, phi::bfloat16>;
167166

168167
} // namespace funcs
169168
} // namespace phi

0 commit comments

Comments
 (0)