Skip to content

Commit e9fd1a4

Browse files
committed
Recover CINN irrelevant code.
1 parent 878c9ef commit e9fd1a4

8 files changed

Lines changed: 17 additions & 30 deletions

File tree

backends/metax_gpu/build.sh

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,12 @@
1818
set -e
1919

2020
# install requirement.txt
21-
# pip install -r requirement.txt -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
21+
pip install -r requirement.txt -i https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
2222

2323
# uninstall paddle
24-
# pip uninstall paddlepaddle -y
24+
pip uninstall paddlepaddle -y
2525

26-
27-
# python -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/
26+
python -m pip install --pre paddlepaddle -i https://www.paddlepaddle.org.cn/packages/nightly/cpu/
2827

2928

3029
# apply patch

backends/metax_gpu/change_patch.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@ cp -r patch/eigen3/ ../../Paddle/third_party/eigen3
2424
rm -r patch/eigen3
2525
# cp patch/tmp/mixed_vector* ../../Paddle/paddle/phi/core
2626
cd ../../Paddle/
27-
git apply --verbose /home/sw/Baidu-xuyuhan/PaddleCustomDevice/backends/metax_gpu/patch/paddle.patch
27+
git apply --verbose ../backends/metax_gpu/patch/paddle.patch
2828
cd -
2929
# cp -r patch/intrinsics.cuh ../../Paddle/third_party/warpctc/include/contrib/moderngpu/include/device/

backends/metax_gpu/compile.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ export LD_LIBRARY_PATH=${MACA_PATH}/lib:${MACA_PATH}/mxgpu_llvm/lib:${LD_LIBRARY
2828
export PADDLE_VERSION="3.3.0.dev$(date +%Y%m%d)"
2929
export MACA_AI_VERSION=$(cat /opt/maca/Version.txt | cut -d':' -f2)
3030
if [ ! -d build ]; then
31-
echo "build directory not found, creating..."
31+
echo "build directory not found, creating..."
3232
mkdir build
3333
fi
3434

backends/metax_gpu/kernels/impl/conv_grad_kernel_impl.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ void ConvGradKernel(const Context& dev_ctx,
160160
if (is_expand) {
161161
set_zero(dev_ctx, &transformed_input_grad, static_cast<T>(0));
162162
}
163-
phi::funcs::Col2ImFunctor<phi::funcs::ColFormat::CFO, Context, T> col2im;
163+
phi::funcs::Col2ImFunctor<phi::funcs::ColFormat::kCFO, Context, T> col2im;
164164
phi::funcs::Col2VolFunctor<Context, T> col2vol;
165165

166166
for (int i = 0; i < batch_size; i++) {
@@ -214,7 +214,7 @@ void ConvGradKernel(const Context& dev_ctx,
214214
Tensor filter_grad_ = *filter_grad;
215215
filter_grad_.Resize(filter_matrix_shape);
216216
set_zero(dev_ctx, filter_grad, static_cast<T>(0));
217-
phi::funcs::Im2ColFunctor<phi::funcs::ColFormat::CFO, Context, T> im2col;
217+
phi::funcs::Im2ColFunctor<phi::funcs::ColFormat::kCFO, Context, T> im2col;
218218
phi::funcs::Vol2ColFunctor<Context, T> vol2col;
219219
for (int i = 0; i < batch_size; i++) {
220220
DenseTensor out_grad_batch =
@@ -391,7 +391,7 @@ void ConvGradGradKernel(const Context& dev_ctx,
391391
if (is_expand) {
392392
set_zero(dev_ctx, &transformed_dX, static_cast<T>(0));
393393
}
394-
phi::funcs::Col2ImFunctor<phi::funcs::ColFormat::CFO, Context, T> col2im;
394+
phi::funcs::Col2ImFunctor<phi::funcs::ColFormat::kCFO, Context, T> col2im;
395395
phi::funcs::Col2VolFunctor<Context, T> col2vol;
396396

397397
for (int i = 0; i < batch_size; i++) {
@@ -436,7 +436,7 @@ void ConvGradGradKernel(const Context& dev_ctx,
436436
set_zero(dev_ctx, dW, static_cast<T>(0));
437437
DenseTensor dW_arr = *dW;
438438
dW_arr.Resize(filter_matrix_shape);
439-
phi::funcs::Im2ColFunctor<phi::funcs::ColFormat::CFO, Context, T> im2col;
439+
phi::funcs::Im2ColFunctor<phi::funcs::ColFormat::kCFO, Context, T> im2col;
440440
phi::funcs::Vol2ColFunctor<Context, T> vol2col;
441441
for (int i = 0; i < batch_size; ++i) {
442442
DenseTensor dy_batch =
@@ -483,7 +483,7 @@ void ConvGradGradKernel(const Context& dev_ctx,
483483
}
484484

485485
set_zero(dev_ctx, &transformed_ddY, static_cast<T>(0));
486-
phi::funcs::Im2ColFunctor<phi::funcs::ColFormat::CFO, Context, T> im2col;
486+
phi::funcs::Im2ColFunctor<phi::funcs::ColFormat::kCFO, Context, T> im2col;
487487
phi::funcs::Vol2ColFunctor<Context, T> vol2col;
488488
for (int i = 0; i < batch_size; ++i) {
489489
DenseTensor ddy_batch =

backends/metax_gpu/kernels/impl/conv_kernel_impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ void ConvKernelImpl(const Context& dev_ctx,
140140
int in_step = static_cast<int>(transformed_input.dims()[1]) / groups;
141141
int out_step = static_cast<int>(transformed_output.dims()[1]) / groups;
142142

143-
phi::funcs::Im2ColFunctor<phi::funcs::ColFormat::CFO, Context, T> im2col;
143+
phi::funcs::Im2ColFunctor<phi::funcs::ColFormat::kCFO, Context, T> im2col;
144144
phi::funcs::Vol2ColFunctor<Context, T> vol2col;
145145

146146
auto blas = phi::funcs::GetBlas<Context, T>(dev_ctx);

backends/metax_gpu/kernels/impl/conv_transpose_kernel_impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ void ConvTransposeRawKernel(const Context& dev_ctx,
142142
(data_layout != DataLayout::kNHWC
143143
? static_cast<int>(out_dims[1]) / groups
144144
: static_cast<int>(out_dims[out_dims.size() - 1]) / groups);
145-
phi::funcs::Col2ImFunctor<phi::funcs::ColFormat::CFO, Context, T> col2im;
145+
phi::funcs::Col2ImFunctor<phi::funcs::ColFormat::kCFO, Context, T> col2im;
146146
phi::funcs::Col2VolFunctor<Context, T> col2vol;
147147
funcs::ConcatFunctor<Context, T> concat_functor;
148148

backends/metax_gpu/tests/run_test.sh

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,6 @@ TEST_PATH2="${SCRIPT_DIR}/../../../python/tests"
2323
export PYTHONPATH="${LEGACY_TEST_PATH}:${PYTHONPATH}:${TEST_PATH1}:${TEST_PATH2}"
2424
export PADDLE_XCCL_BACKEND=metax_gpu
2525
export CUDA_VISIBLE_DEVICES=0
26-
27-
PYTHONUNBUFFERED=1
28-
# 以下三条为运行CINN必开
29-
FLAGS_prim_all=true
30-
FLAGS_prim_enable_dynamic=true
31-
FLAGS_use_cinn=true
32-
# 关闭多线程编译,调试时用
33-
FLAGS_enable_cinn_compile_cache=false
34-
# 打印log,调试时用
35-
FLAGS_print_ir=true
36-
GLOG_v=1
37-
3826
# export
3927
# sleep 1000000
4028

@@ -93,8 +81,8 @@ done
9381
export GLOG_v=$TEST_LOG_LEVEL
9482

9583

96-
cmake .. -DTEST_LIST_FILE=$TEST_LIST_FILE -DLOG_OUTPUT_DIR=$TEST_LOG_OUTPUT_DIR -DIGNORE_BLOCKS="$IGNORE_BLOCKS" -DWITH_CINN=ON
84+
cmake .. -DTEST_LIST_FILE=$TEST_LIST_FILE -DLOG_OUTPUT_DIR=$TEST_LOG_OUTPUT_DIR -DIGNORE_BLOCKS="$IGNORE_BLOCKS"
9785

9886
cmake --build .
9987

100-
GLOG_v=1 FLAGS_print_ir=1 ctest -j$TEST_PARALLEL_NUM --output-on-failure
88+
ctest -j$TEST_PARALLEL_NUM --output-on-failure

backends/metax_gpu/tests/tmp_save/gpudnn/conv_cudnn_v7.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ struct SearchAlgorithmBase<ConvKind::kForward> {
227227

228228
// auto workspace_handle = dev_ctx.cudnn_workspace_handle();
229229
auto workspace_handle = GetDnnWorkspace(
230-
const_cast<Allocator*>(&(dev_ctx.GetAllocator())), dev_ctx.stream(), dev_ctx.GetPlace());
230+
const_cast<Allocator*>(&(dev_ctx.GetAllocator())), dev_ctx.stream());
231231

232232
// auto handle = GetDnnHandle(dev_ctx.stream(), dev_ctx.GetPlace());
233233

@@ -416,7 +416,7 @@ struct SearchAlgorithmBase<ConvKind::kBackwardData> {
416416

417417
// auto workspace_handle = dev_ctx.cudnn_workspace_handle();
418418
auto workspace_handle = GetDnnWorkspace(
419-
const_cast<Allocator*>(&(dev_ctx.GetAllocator())), dev_ctx.stream(), dev_ctx.GetPlace());
419+
const_cast<Allocator*>(&(dev_ctx.GetAllocator())), dev_ctx.stream());
420420
workspace_handle.RunFuncSync(
421421
cudnn_find_func, max_workspace_size, UseFixedWorkspace());
422422

@@ -569,7 +569,7 @@ struct SearchAlgorithmBase<ConvKind::kBackwardFilter> {
569569
CalcWorkspaceLimitInBytes(UseFixedWorkspace());
570570
// auto workspace_handle = dev_ctx.cudnn_workspace_handle();
571571
auto workspace_handle = GetDnnWorkspace(
572-
const_cast<Allocator*>(&(dev_ctx.GetAllocator())), dev_ctx.stream(), dev_ctx.GetPlace());
572+
const_cast<Allocator*>(&(dev_ctx.GetAllocator())), dev_ctx.stream());
573573
if (phi::backends::gpu::CudnnDataType<T>::type != CUDNN_DATA_HALF) {
574574
size_t max_workspace_size =
575575
GetMaxWorkspaceSize(args, workspace_size_limit);

0 commit comments

Comments
 (0)