From f4c8fc12c67add99015550420e443b7009f4ca31 Mon Sep 17 00:00:00 2001 From: co63oc <4617245+co63oc@users.noreply.github.com> Date: Tue, 21 Apr 2026 09:09:42 +0800 Subject: [PATCH 1/4] update mlu kernels --- backends/mlu/kernels/abs_kernel.cc | 10 +- backends/mlu/kernels/accuracy_kernel.cc | 12 +- backends/mlu/kernels/activation_kernel.cc | 272 +++++++++--------- backends/mlu/kernels/adam_kernel.cc | 198 +++++++------ backends/mlu/kernels/add_n_kernel.cc | 4 +- backends/mlu/kernels/arange_kernel.cc | 18 +- backends/mlu/kernels/arg_max_kernel.cc | 8 +- backends/mlu/kernels/argsort_kernel.cc | 14 +- backends/mlu/kernels/assign_kernel.cc | 24 +- backends/mlu/kernels/batch_norm_kernel.cc | 95 +++--- backends/mlu/kernels/bce_loss_kernel.cc | 14 +- backends/mlu/kernels/bitwise_kernel.cc | 42 +-- backends/mlu/kernels/cast_kernel.cc | 4 +- .../check_finite_and_unscale_kernel.cc | 8 +- backends/mlu/kernels/clip_kernel.cc | 18 +- .../mlu/kernels/coalesce_tensor_kernel.cc | 43 ++- backends/mlu/kernels/compare_kernel.cc | 72 ++--- backends/mlu/kernels/concat_kernel.cc | 14 +- backends/mlu/kernels/contiguous_kernel.cc | 6 +- backends/mlu/kernels/conv_kernel.cc | 72 ++--- backends/mlu/kernels/conv_transpose_kernel.cc | 25 +- backends/mlu/kernels/cross_entropy_kernel.cc | 61 ++-- backends/mlu/kernels/cumsum_kernel.cc | 6 +- backends/mlu/kernels/data_kernel.cc | 6 +- .../mlu/kernels/deformable_conv_kernel.cc | 28 +- backends/mlu/kernels/dropout_kernel.cc | 20 +- .../mlu/kernels/elementwise_add_kernel.cc | 22 +- .../mlu/kernels/elementwise_div_kernel.cc | 24 +- .../mlu/kernels/elementwise_max_kernel.cc | 22 +- .../mlu/kernels/elementwise_min_kernel.cc | 22 +- .../mlu/kernels/elementwise_mul_kernel.cc | 22 +- .../mlu/kernels/elementwise_pow_kernel.cc | 22 +- .../mlu/kernels/elementwise_sub_kernel.cc | 22 +- backends/mlu/kernels/expand_as_kernel.cc | 8 +- backends/mlu/kernels/expand_kernel.cc | 12 +- backends/mlu/kernels/fill_kernel.cc | 6 +- backends/mlu/kernels/flash_attn_kernel.cc | 132 +++++---- backends/mlu/kernels/flip_kernel.cc | 4 +- backends/mlu/kernels/full_kernel.cc | 22 +- .../mlu/kernels/funcs/elementwise_utils.h | 26 +- backends/mlu/kernels/funcs/logic_op.h | 6 +- backends/mlu/kernels/funcs/mlu_baseop.h | 3 + backends/mlu/kernels/funcs/mlu_funcs.h | 32 +-- backends/mlu/kernels/funcs/range_op.h | 2 +- backends/mlu/kernels/funcs/reduce_op.h | 4 +- backends/mlu/kernels/gather_kernel.cc | 18 +- backends/mlu/kernels/gather_nd_kernel.cc | 24 +- backends/mlu/kernels/gaussian_kernel.cc | 2 +- .../mlu/kernels/generate_proposals_kernel.cc | 16 +- backends/mlu/kernels/grid_sample_kernel.cc | 16 +- backends/mlu/kernels/huber_loss_kernel.cc | 16 +- backends/mlu/kernels/index_sample_kernel.cc | 30 +- backends/mlu/kernels/index_select_kernel.cc | 24 +- backends/mlu/kernels/interpolate_kernel.cc | 72 ++--- backends/mlu/kernels/kldiv_loss_kernel.cc | 28 +- backends/mlu/kernels/label_smooth_kernel.cc | 6 +- backends/mlu/kernels/layer_norm_kernel.cc | 30 +- backends/mlu/kernels/log_softmax_kernel.cc | 10 +- backends/mlu/kernels/logical_kernel.cc | 22 +- .../mlu/kernels/lookup_table_v2_op_kernel.cc | 14 +- backends/mlu/kernels/masked_select_kernel.cc | 14 +- backends/mlu/kernels/matmul_kernel.cc | 80 +++--- backends/mlu/kernels/mean_all_kernel.cc | 10 +- backends/mlu/kernels/memcpy_kernel.cc | 16 +- backends/mlu/kernels/meshgrid_kernel.cc | 10 +- backends/mlu/kernels/momentum_kernel.cc | 36 +-- backends/mlu/kernels/multinomial_kernel.cc | 6 +- backends/mlu/kernels/nonzero_kernel.cc | 4 +- backends/mlu/kernels/numel_kernel.cc | 4 +- backends/mlu/kernels/one_hot_kernel.cc | 14 +- backends/mlu/kernels/p_norm_kernel.cc | 34 +-- backends/mlu/kernels/pool2d_kernel.cc | 30 +- backends/mlu/kernels/prior_box_kernel.cc | 16 +- backends/mlu/kernels/prod_kernel.cc | 8 +- backends/mlu/kernels/randperm_kernel.cc | 6 +- backends/mlu/kernels/reduce_all_kernel.cc | 4 +- backends/mlu/kernels/reduce_any_kernel.cc | 4 +- backends/mlu/kernels/reduce_max_kernel.cc | 34 ++- backends/mlu/kernels/reduce_mean_kernel.cc | 14 +- backends/mlu/kernels/reduce_min_kernel.cc | 8 +- backends/mlu/kernels/reduce_sum_kernel.cc | 16 +- backends/mlu/kernels/rnn_kernel.cc | 40 +-- backends/mlu/kernels/roi_align_kernel.cc | 22 +- backends/mlu/kernels/roll_kernel.cc | 10 +- backends/mlu/kernels/scale_kernel.cc | 14 +- backends/mlu/kernels/scatter_kernel.cc | 16 +- backends/mlu/kernels/set_value_kernel.cc | 28 +- backends/mlu/kernels/sgd_kernel.cc | 12 +- .../sigmoid_cross_with_logits_kernel.cc | 18 +- backends/mlu/kernels/slice_kernel.cc | 36 +-- backends/mlu/kernels/softmax_kernel.cc | 10 +- backends/mlu/kernels/split_kernel.cc | 12 +- .../mlu/kernels/squared_l2_norm_kernel.cc | 18 +- backends/mlu/kernels/squeeze_kernel.cc | 22 +- backends/mlu/kernels/stack_kernel.cc | 4 +- backends/mlu/kernels/strided_copy_kernel.cc | 6 +- backends/mlu/kernels/strided_slice_kernel.cc | 44 +-- .../mlu/kernels/sync_batch_norm_kernel.cc | 59 ++-- backends/mlu/kernels/tile_kernel.cc | 20 +- backends/mlu/kernels/top_k_kernel.cc | 15 +- backends/mlu/kernels/transpose_kernel.cc | 8 +- backends/mlu/kernels/tril_triu_op_mlu.cc | 24 +- .../truncated_gaussian_random_kernel.cc | 6 +- backends/mlu/kernels/uniform_kernel.cc | 16 +- backends/mlu/kernels/unsqueeze_kernel.cc | 20 +- backends/mlu/kernels/unstack_kernel.cc | 8 +- backends/mlu/kernels/where_kernel.cc | 20 +- backends/mlu/kernels/yolo_box_kernel.cc | 12 +- 108 files changed, 1348 insertions(+), 1375 deletions(-) mode change 100755 => 100644 backends/mlu/kernels/top_k_kernel.cc diff --git a/backends/mlu/kernels/abs_kernel.cc b/backends/mlu/kernels/abs_kernel.cc index 570edb639df..9eb11adb76f 100644 --- a/backends/mlu/kernels/abs_kernel.cc +++ b/backends/mlu/kernels/abs_kernel.cc @@ -18,9 +18,7 @@ namespace custom_kernel { template -void AbsKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { +void AbsKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc input_desc(x); @@ -34,9 +32,9 @@ void AbsKernel(const Context& dev_ctx, template void AbsGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& x, + const DenseTensor& dout, + DenseTensor* dx) { dev_ctx.template Alloc(dx); MLUCnnlTensorDesc input_desc(x); diff --git a/backends/mlu/kernels/accuracy_kernel.cc b/backends/mlu/kernels/accuracy_kernel.cc index 646acee3c74..51317c5a9e6 100644 --- a/backends/mlu/kernels/accuracy_kernel.cc +++ b/backends/mlu/kernels/accuracy_kernel.cc @@ -18,12 +18,12 @@ namespace custom_kernel { template void AccuracyRawKernel(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& indices, - const phi::DenseTensor& label, - phi::DenseTensor* accuracy, - phi::DenseTensor* correct, - phi::DenseTensor* total) { + const DenseTensor& out, + const DenseTensor& indices, + const DenseTensor& label, + DenseTensor* accuracy, + DenseTensor* correct, + DenseTensor* total) { int num_samples = indices.dims()[0]; if (num_samples == 0) { return; diff --git a/backends/mlu/kernels/activation_kernel.cc b/backends/mlu/kernels/activation_kernel.cc index 38bb3592948..a25afc70b46 100644 --- a/backends/mlu/kernels/activation_kernel.cc +++ b/backends/mlu/kernels/activation_kernel.cc @@ -20,10 +20,10 @@ namespace custom_kernel { template void ActivationKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, float alpha, cnnlActivationMode_t act_mode, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlActivationDesc act_desc(act_mode, alpha); @@ -40,11 +40,11 @@ void ActivationKernel(const Context& dev_ctx, template void ActivationGradKernelV1(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& dout, + const DenseTensor& x, + const DenseTensor& dout, float alpha, cnnlActivationMode_t act_mode, - phi::DenseTensor* dx) { + DenseTensor* dx) { dev_ctx.template Alloc(dx); MLUCnnlTensorDesc x_desc(x); @@ -67,10 +67,10 @@ void ActivationGradKernelV1(const Context& dev_ctx, template void ActivationGradKernelV2(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& dout, + const DenseTensor& out, + const DenseTensor& dout, cnnlActivationMode_t act_mode, - phi::DenseTensor* dx) { + DenseTensor* dx) { dev_ctx.template Alloc(dx); MLUCnnlTensorDesc out_desc(out); @@ -93,10 +93,10 @@ void ActivationGradKernelV2(const Context& dev_ctx, template void ActivationGradKernelV3(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& dout, + const DenseTensor& out, + const DenseTensor& dout, cnnlActivationMode_t act_mode, - phi::DenseTensor* dx) { + DenseTensor* dx) { dev_ctx.template Alloc(dx); MLUCnnlTensorDesc out_desc(out); @@ -119,91 +119,91 @@ void ActivationGradKernelV3(const Context& dev_ctx, template void ReluKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { ActivationKernel(dev_ctx, x, 1.0, CNNL_ACTIVATION_RELU, out); } template void ReluGradKernel(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx) { ActivationGradKernelV3( dev_ctx, out, dout, CNNL_ACTIVATION_RELU, dx); } template void Relu6RawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, float threshold, - phi::DenseTensor* out) { + DenseTensor* out) { ActivationKernel(dev_ctx, x, 1.0, CNNL_ACTIVATION_RELU6, out); } template void Relu6Kernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { custom_kernel::Relu6RawKernel(dev_ctx, x, 6.0, out); } template void Relu6GradKernel(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx) { ActivationGradKernelV3( dev_ctx, out, dout, CNNL_ACTIVATION_RELU6, dx); } template void SigmoidKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { ActivationKernel(dev_ctx, x, 1.0, CNNL_ACTIVATION_SIGMOID, out); } template void SigmoidGradKernel(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx) { ActivationGradKernelV2( dev_ctx, out, dout, CNNL_ACTIVATION_SIGMOID, dx); } template void TanhKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { ActivationKernel(dev_ctx, x, 1.0, CNNL_ACTIVATION_TANH, out); } template void TanhGradKernel(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& out_grad, - phi::DenseTensor* x_grad) { + const DenseTensor& out, + const DenseTensor& out_grad, + DenseTensor* x_grad) { ActivationGradKernelV2( dev_ctx, out, out_grad, CNNL_ACTIVATION_TANH, x_grad); } template void LeakyReluKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, double alpha, - phi::DenseTensor* out) { + DenseTensor* out) { float alp = static_cast(alpha); ActivationKernel(dev_ctx, x, alp, CNNL_ACTIVATION_LEAKYRELU, out); } template void LeakyReluGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& dout, + const DenseTensor& x, + const DenseTensor& dout, double alpha, - phi::DenseTensor* dx) { + DenseTensor* dx) { float alp = static_cast(alpha); ActivationGradKernelV1( dev_ctx, x, dout, alp, CNNL_ACTIVATION_LEAKYRELU, dx); @@ -211,43 +211,43 @@ void LeakyReluGradKernel(const Context& dev_ctx, template void GeluKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, bool approximate, - phi::DenseTensor* out) { + DenseTensor* out) { ActivationKernel(dev_ctx, x, 1.0, CNNL_ACTIVATION_GELU, out); } template void GeluGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& out_grad, bool approximate, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { ActivationGradKernelV1( dev_ctx, x, out_grad, 1.0, CNNL_ACTIVATION_GELU, x_grad); } template void SiluKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { ActivationKernel(dev_ctx, x, 1.0, CNNL_ACTIVATION_SILU, out); } template void SiluGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& out, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& x, + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx) { ActivationGradKernelV3( dev_ctx, x, dout, CNNL_ACTIVATION_SILU, dx); } template void SquareKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc input_desc(x); @@ -262,9 +262,9 @@ void SquareKernel(const Context& dev_ctx, template void SquareGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& x, + const DenseTensor& dout, + DenseTensor* dx) { dev_ctx.template Alloc(dx); auto factor_1 = static_cast(1.0); auto factor_2 = static_cast(2.0); @@ -291,12 +291,12 @@ void SquareGradKernel(const Context& dev_ctx, template void PowKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::Scalar& factor_scalar, - phi::DenseTensor* out) { + const DenseTensor& x, + const Scalar& factor_scalar, + DenseTensor* out) { auto factor = factor_scalar.to(); dev_ctx.template Alloc(out); - phi::DenseTensor factor_tensor; + DenseTensor factor_tensor; factor_tensor.Resize(x.dims()); dev_ctx.template Alloc(&factor_tensor); MLUCnnlTensorDesc factor_desc(factor_tensor); @@ -322,20 +322,20 @@ void PowKernel(const Context& dev_ctx, // dx = dout * factor * x.pow(factor-1) template void PowGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& dout, - const phi::Scalar& factor_scalar, - phi::DenseTensor* dx) { + const DenseTensor& x, + const DenseTensor& dout, + const Scalar& factor_scalar, + DenseTensor* dx) { auto factor = factor_scalar.to(); // Step1: Compute x_pow = x.pow(factor-1) - phi::DenseTensor x_pow; + DenseTensor x_pow; x_pow.Resize(x.dims()); - auto factor_1 = phi::Scalar(factor - static_cast(1.0)); + auto factor_1 = Scalar(factor - static_cast(1.0)); custom_kernel::PowKernel(dev_ctx, x, factor_1, &x_pow); // Step 2: Construct a broadcast factor, which has the same shape with x. - phi::DenseTensor factor_tensor; + DenseTensor factor_tensor; factor_tensor.Resize(x.dims()); dev_ctx.template Alloc(&factor_tensor); MLUCnnlTensorDesc factor_desc(factor_tensor); @@ -346,7 +346,7 @@ void PowGradKernel(const Context& dev_ctx, GetBasePtr(&factor_tensor)); // Step 3: Compute x_power_mul_factor = factor * x_pow - phi::DenseTensor x_power_mul_factor; + DenseTensor x_power_mul_factor; x_power_mul_factor.Resize(x.dims()); dev_ctx.template Alloc(&x_power_mul_factor); MLUOpTensorKernel(dev_ctx, @@ -364,8 +364,8 @@ void PowGradKernel(const Context& dev_ctx, template void AtanKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { dev_ctx.template Alloc(out); cnnlComputationPreference_t prefer = CNNL_COMPUTATION_FAST; MLUCnnlTrigonDesc trigon_desc(CNNL_TRIGON_ATAN, prefer); @@ -382,17 +382,17 @@ void AtanKernel(const Context& dev_ctx, // dx = dout * 1 / (1 + x.pow(2)) template void AtanGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& x, + const DenseTensor& dout, + DenseTensor* dx) { // Step1: Compute x_pow = x.pow(2) - phi::DenseTensor x_pow; + DenseTensor x_pow; x_pow.Resize(x.dims()); - auto factor = phi::Scalar(static_cast(2.0)); + auto factor = Scalar(static_cast(2.0)); custom_kernel::PowKernel(dev_ctx, x, factor, &x_pow); // Step2: x_pow_1 = x_pow + 1 - phi::DenseTensor factor_tensor, x_pow_1; + DenseTensor factor_tensor, x_pow_1; factor_tensor.Resize(x.dims()); x_pow_1.Resize(x.dims()); dev_ctx.template Alloc(&x_pow_1); @@ -415,8 +415,8 @@ void AtanGradKernel(const Context& dev_ctx, template void ReciprocalKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc x_desc(x); MLUCnnlTensorDesc out_desc(*out); @@ -426,9 +426,9 @@ void ReciprocalKernel(const Context& dev_ctx, template void ReciprocalGradKernel(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx) { dev_ctx.template Alloc(dx); Tensor square_out; square_out.Resize(out.dims()); @@ -466,8 +466,8 @@ void ReciprocalGradKernel(const Context& dev_ctx, template void SqrtKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc input_desc(x); @@ -484,9 +484,9 @@ void SqrtKernel(const Context& dev_ctx, template void SqrtGradKernel(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx) { dev_ctx.template Alloc(dx); MLUCnnlTensorDesc data_desc(out); @@ -499,8 +499,8 @@ void SqrtGradKernel(const Context& dev_ctx, template void RsqrtKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc input_desc(x); @@ -517,9 +517,9 @@ void RsqrtKernel(const Context& dev_ctx, template void RsqrtGradKernel(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx) { dev_ctx.template Alloc(dx); MLUCnnlTensorDesc data_desc(out); @@ -531,9 +531,7 @@ void RsqrtGradKernel(const Context& dev_ctx, } template -void CosKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { +void CosKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc input_desc(x); @@ -550,12 +548,12 @@ void CosKernel(const Context& dev_ctx, template void CosGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& x, + const DenseTensor& dout, + DenseTensor* dx) { dev_ctx.template Alloc(dx); - phi::DenseTensor sin_out; + DenseTensor sin_out; sin_out.Resize(x.dims()); dev_ctx.template Alloc(&sin_out); @@ -592,9 +590,9 @@ void CosGradKernel(const Context& dev_ctx, // CNNL_LOG_10 = 2, template void LogMLUKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, cnnlLogBase_t log_base, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc input_desc(x); @@ -611,30 +609,26 @@ void LogMLUKernel(const Context& dev_ctx, } template -void LogKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { +void LogKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { LogMLUKernel(dev_ctx, x, CNNL_LOG_E, out); } template void Log2Kernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { LogMLUKernel(dev_ctx, x, CNNL_LOG_2, out); } template void Log10Kernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { LogMLUKernel(dev_ctx, x, CNNL_LOG_10, out); } template -void ExpKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { +void ExpKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc input_desc(x); @@ -651,9 +645,9 @@ void ExpKernel(const Context& dev_ctx, template void ExpGradKernel(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx) { dev_ctx.template Alloc(dx); MLUCnnlTensorDesc dout_desc(dout); @@ -675,9 +669,7 @@ void ExpGradKernel(const Context& dev_ctx, } template -void SinKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { +void SinKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc input_desc(x); MLUCnnlTensorDesc output_desc(*out); @@ -692,12 +684,12 @@ void SinKernel(const Context& dev_ctx, template void SinGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& x, + const DenseTensor& dout, + DenseTensor* dx) { dev_ctx.template Alloc(dx); - phi::DenseTensor cos_out; - phi::DenseTensorMeta meta = {x.dtype(), x.dims()}; + DenseTensor cos_out; + DenseTensorMeta meta = {x.dtype(), x.dims()}; cos_out.set_meta(meta); dev_ctx.template Alloc(&cos_out); @@ -729,8 +721,8 @@ void SinGradKernel(const Context& dev_ctx, template void HardSwishKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { float threshold = 6; float scale = 6; float offset = 3; @@ -763,26 +755,26 @@ void HardSwishKernel(const Context& dev_ctx, template void SwishKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { ActivationKernel( dev_ctx, x, 1.0 /* ceof */, CNNL_ACTIVATION_SILU, out); } template void SwishGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& x, + const DenseTensor& dout, + DenseTensor* dx) { ActivationGradKernelV3( dev_ctx, x, dout, CNNL_ACTIVATION_SILU, dx); } template void HardSwishGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& x, + const DenseTensor& dout, + DenseTensor* dx) { float threshold = 6; float scale = 6; float offset = 3; @@ -822,10 +814,10 @@ void HardSwishGradKernel(const Context& dev_ctx, template void HardSigmoidKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, float slope, float offset, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlActivationDesc act_desc(CNNL_ACTIVATION_HARDSIGMOID, @@ -846,11 +838,11 @@ void HardSigmoidKernel(const Context& dev_ctx, template void HardSigmoidGradKernel(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& dout, + const DenseTensor& out, + const DenseTensor& dout, float slope, float offset, - phi::DenseTensor* dx) { + DenseTensor* dx) { dev_ctx.template Alloc(dx); MLUCnnlActivationDesc act_desc(CNNL_ACTIVATION_HARDSIGMOID, 1.0f /*ceof useless*/, @@ -876,8 +868,8 @@ void HardSigmoidGradKernel(const Context& dev_ctx, template void FloorKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc input_desc(x); @@ -891,10 +883,10 @@ void FloorKernel(const Context& dev_ctx, template void RoundKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const int decimals UNUSED, // add decimals, Now it's just for CI - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc input_desc(x); @@ -908,8 +900,8 @@ void RoundKernel(const Context& dev_ctx, template void RoundGradKernel(const Context& dev_ctx, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& dout, + DenseTensor* dx) { dev_ctx.template Alloc(dx); FillMLUTensorWithHostValue(dev_ctx, static_cast(0), dx); } diff --git a/backends/mlu/kernels/adam_kernel.cc b/backends/mlu/kernels/adam_kernel.cc index 86a68a12acd..5dfc2ca78fc 100644 --- a/backends/mlu/kernels/adam_kernel.cc +++ b/backends/mlu/kernels/adam_kernel.cc @@ -18,33 +18,32 @@ namespace custom_kernel { template -void AdamKernel( - const Context& dev_ctx, - const phi::DenseTensor& param, - const phi::DenseTensor& grad, - const phi::DenseTensor& learning_rate, - const phi::DenseTensor& moment1, - const phi::DenseTensor& moment2, - const paddle::optional& moment2_max, // UNUSED - const phi::DenseTensor& beta1_pow_in, - const phi::DenseTensor& beta2_pow_in, - const paddle::optional& master_param, - const paddle::optional& skip_update, - const phi::Scalar& beta1_in, - const phi::Scalar& beta2_in, - const phi::Scalar& epsilon_in, - bool lazy_mode, - int64_t min_row_size_to_use_multithread, - bool multi_precision, - bool use_global_beta_pow, - bool amsgrad, // UNUSED - phi::DenseTensor* param_out, - phi::DenseTensor* moment1_out, - phi::DenseTensor* moment2_out, - phi::DenseTensor* moment2_max_out, // UNUSED - phi::DenseTensor* beta1_pow_out, - phi::DenseTensor* beta2_pow_out, - phi::DenseTensor* master_param_out) { +void AdamKernel(const Context& dev_ctx, + const DenseTensor& param, + const DenseTensor& grad, + const DenseTensor& learning_rate, + const DenseTensor& moment1, + const DenseTensor& moment2, + const paddle::optional& moment2_max, // UNUSED + const DenseTensor& beta1_pow_in, + const DenseTensor& beta2_pow_in, + const paddle::optional& master_param, + const paddle::optional& skip_update, + const Scalar& beta1_in, + const Scalar& beta2_in, + const Scalar& epsilon_in, + bool lazy_mode, + int64_t min_row_size_to_use_multithread, + bool multi_precision, + bool use_global_beta_pow, + bool amsgrad, // UNUSED + DenseTensor* param_out, + DenseTensor* moment1_out, + DenseTensor* moment2_out, + DenseTensor* moment2_max_out, // UNUSED + DenseTensor* beta1_pow_out, + DenseTensor* beta2_pow_out, + DenseTensor* master_param_out) { PADDLE_ENFORCE_NE( amsgrad, true, @@ -78,8 +77,8 @@ void AdamKernel( return; } - phi::DenseTensor* beta1_pow = const_cast(&beta1_pow_in); - phi::DenseTensor* beta2_pow = const_cast(&beta2_pow_in); + DenseTensor* beta1_pow = const_cast(&beta1_pow_in); + DenseTensor* beta2_pow = const_cast(&beta2_pow_in); VLOG(4) << "use_global_beta_pow:" << use_global_beta_pow; @@ -88,8 +87,8 @@ void AdamKernel( *moment1_out = moment1; *moment2_out = moment2; - phi::DenseTensor beta1_pow_tmp; - phi::DenseTensor beta2_pow_tmp; + DenseTensor beta1_pow_tmp; + DenseTensor beta2_pow_tmp; if (beta1_pow->place().GetType() == phi::AllocationType::CPU) { MPDType beta1 = *beta1_pow->data(); beta1_pow_tmp.Resize({1}); @@ -123,14 +122,14 @@ void AdamKernel( "value is:%d.", beta2_pow_out->numel())); - const phi::DenseTensor* beta1_tensor = nullptr; - const phi::DenseTensor* beta2_tensor = nullptr; - const phi::DenseTensor* epsilon_tensor = nullptr; + const DenseTensor* beta1_tensor = nullptr; + const DenseTensor* beta2_tensor = nullptr; + const DenseTensor* epsilon_tensor = nullptr; - phi::DenseTensor beta1_tmp; - phi::DenseTensor beta2_tmp; - phi::DenseTensor epsilon_tmp; - phi::DenseTensorMeta meta = {phi::DataType::FLOAT32, {1}}; + DenseTensor beta1_tmp; + DenseTensor beta2_tmp; + DenseTensor epsilon_tmp; + DenseTensorMeta meta = {phi::DataType::FLOAT32, {1}}; beta1_tmp.Resize({1}); beta2_tmp.Resize({1}); epsilon_tmp.Resize({1}); @@ -215,7 +214,7 @@ void AdamKernel( if (param.dtype() != phi::DataType::FLOAT32) { // 1. cast param_in_out(MPDType) to param_out(T) anyway. - phi::DenseTensorMeta meta = {param.dtype(), param.dims()}; + DenseTensorMeta meta = {param.dtype(), param.dims()}; param_out->set_meta(meta); dev_ctx.template Alloc(param_out); MLUCnnlTensorDesc param_out_desc(*param_out); @@ -272,36 +271,35 @@ void AdamKernel( } template -void AdamWKernel( - const Context& dev_ctx, - const phi::DenseTensor& param, - const phi::DenseTensor& grad, - const phi::DenseTensor& learning_rate, - const phi::DenseTensor& moment1, - const phi::DenseTensor& moment2, - const paddle::optional& moment2_max, // UNUSED - const phi::DenseTensor& beta1_pow, - const phi::DenseTensor& beta2_pow, - const paddle::optional& master_param, - const paddle::optional& skip_update, - const phi::Scalar& beta1, - const phi::Scalar& beta2, - const phi::Scalar& epsilon, - float lr_ratio, - float coeff, - bool with_decay, - bool lazy_mode, - int64_t min_row_size_to_use_multithread, - bool multi_precision, - bool use_global_beta_pow, - bool amsgrad, // UNUSED - phi::DenseTensor* param_out, - phi::DenseTensor* moment1_out, - phi::DenseTensor* moment2_out, - phi::DenseTensor* moment2_max_out, // UNUSED - phi::DenseTensor* beta1_pow_out, - phi::DenseTensor* beta2_pow_out, - phi::DenseTensor* master_param_outs) { +void AdamWKernel(const Context& dev_ctx, + const DenseTensor& param, + const DenseTensor& grad, + const DenseTensor& learning_rate, + const DenseTensor& moment1, + const DenseTensor& moment2, + const paddle::optional& moment2_max, // UNUSED + const DenseTensor& beta1_pow, + const DenseTensor& beta2_pow, + const paddle::optional& master_param, + const paddle::optional& skip_update, + const Scalar& beta1, + const Scalar& beta2, + const Scalar& epsilon, + float lr_ratio, + float coeff, + bool with_decay, + bool lazy_mode, + int64_t min_row_size_to_use_multithread, + bool multi_precision, + bool use_global_beta_pow, + bool amsgrad, // UNUSED + DenseTensor* param_out, + DenseTensor* moment1_out, + DenseTensor* moment2_out, + DenseTensor* moment2_max_out, // UNUSED + DenseTensor* beta1_pow_out, + DenseTensor* beta2_pow_out, + DenseTensor* master_param_outs) { PADDLE_ENFORCE_NE( amsgrad, true, @@ -416,28 +414,28 @@ void AdamWKernel( template void MergedAdamKernel( const Context& dev_ctx, - const std::vector& param, - const std::vector& grad, - const std::vector& learning_rate, - const std::vector& moment1, - const std::vector& moment2, - const paddle::optional>& moment2_max, - const std::vector& beta1_pow, - const std::vector& beta2_pow, - const paddle::optional>& master_param, - const phi::Scalar& beta1, - const phi::Scalar& beta2, - const phi::Scalar& epsilon, + const std::vector& param, + const std::vector& grad, + const std::vector& learning_rate, + const std::vector& moment1, + const std::vector& moment2, + const paddle::optional>& moment2_max, + const std::vector& beta1_pow, + const std::vector& beta2_pow, + const paddle::optional>& master_param, + const Scalar& beta1, + const Scalar& beta2, + const Scalar& epsilon, bool multi_precision, bool use_global_beta_pow, bool amsgrad, - std::vector param_out, - std::vector moment1_out, - std::vector moment2_out, - std::vector moment2_max_out, - std::vector beta1_pow_out, - std::vector beta2_pow_out, - std::vector master_param_out) { + std::vector param_out, + std::vector moment1_out, + std::vector moment2_out, + std::vector moment2_max_out, + std::vector beta1_pow_out, + std::vector beta2_pow_out, + std::vector master_param_out) { size_t param_num = param.size(); PADDLE_ENFORCE_EQ(param_num, grad.size(), @@ -495,14 +493,14 @@ void MergedAdamKernel( true, phi::errors::Unimplemented("Operation amsgrad is not supported yet.")); - const phi::DenseTensor* beta1_tensor = nullptr; - const phi::DenseTensor* beta2_tensor = nullptr; - const phi::DenseTensor* epsilon_tensor = nullptr; + const DenseTensor* beta1_tensor = nullptr; + const DenseTensor* beta2_tensor = nullptr; + const DenseTensor* epsilon_tensor = nullptr; - phi::DenseTensor beta1_tmp; - phi::DenseTensor beta2_tmp; - phi::DenseTensor epsilon_tmp; - phi::DenseTensorMeta meta = {phi::DataType::FLOAT32, {1}}; + DenseTensor beta1_tmp; + DenseTensor beta2_tmp; + DenseTensor epsilon_tmp; + DenseTensorMeta meta = {phi::DataType::FLOAT32, {1}}; beta1_tmp.set_meta(meta); beta2_tmp.set_meta(meta); epsilon_tmp.set_meta(meta); @@ -528,12 +526,10 @@ void MergedAdamKernel( *moment1_out[idx] = *moment1[idx]; *moment2_out[idx] = *moment2[idx]; - phi::DenseTensor* beta1_pow_tensor = - const_cast(beta1_pow[idx]); - phi::DenseTensor* beta2_pow_tensor = - const_cast(beta2_pow[idx]); - phi::DenseTensor beta1_pow_tmp; - phi::DenseTensor beta2_pow_tmp; + DenseTensor* beta1_pow_tensor = const_cast(beta1_pow[idx]); + DenseTensor* beta2_pow_tensor = const_cast(beta2_pow[idx]); + DenseTensor beta1_pow_tmp; + DenseTensor beta2_pow_tmp; if (beta1_pow_tensor->place().GetType() == phi::AllocationType::CPU) { T beta1_pow_ = *beta1_pow_tensor->data(); beta1_pow_tmp.Resize({1}); diff --git a/backends/mlu/kernels/add_n_kernel.cc b/backends/mlu/kernels/add_n_kernel.cc index efb8c030bd1..55f76e47d2a 100644 --- a/backends/mlu/kernels/add_n_kernel.cc +++ b/backends/mlu/kernels/add_n_kernel.cc @@ -19,8 +19,8 @@ namespace custom_kernel { template void AddNKernel(const Context& dev_ctx, - const std::vector& x, - phi::DenseTensor* out) { + const std::vector& x, + DenseTensor* out) { dev_ctx.template Alloc(out); auto stream = dev_ctx.stream(); diff --git a/backends/mlu/kernels/arange_kernel.cc b/backends/mlu/kernels/arange_kernel.cc index 7cba83bf3e8..01deefcd501 100644 --- a/backends/mlu/kernels/arange_kernel.cc +++ b/backends/mlu/kernels/arange_kernel.cc @@ -18,14 +18,12 @@ namespace custom_kernel { -using phi::Scalar; - template void ArangeKernel(const Context& dev_ctx, - const phi::Scalar& start, - const phi::Scalar& end, - const phi::Scalar& step, - phi::DenseTensor* out) { + const Scalar& start, + const Scalar& end, + const Scalar& step, + DenseTensor* out) { T start_value = start.to(); T end_value = end.to(); T step_value = step.to(); @@ -35,10 +33,10 @@ void ArangeKernel(const Context& dev_ctx, template void ArangeTensorKernel(const Context& dev_ctx, - const phi::DenseTensor& start_t, - const phi::DenseTensor& end_t, - const phi::DenseTensor& step_t, - phi::DenseTensor* out) { + const DenseTensor& start_t, + const DenseTensor& end_t, + const DenseTensor& step_t, + DenseTensor* out) { custom_kernel::ArangeKernel( dev_ctx, Scalar(start_t), Scalar(end_t), Scalar(step_t), out); } diff --git a/backends/mlu/kernels/arg_max_kernel.cc b/backends/mlu/kernels/arg_max_kernel.cc index a46e5b68dad..8862b33cb67 100644 --- a/backends/mlu/kernels/arg_max_kernel.cc +++ b/backends/mlu/kernels/arg_max_kernel.cc @@ -18,12 +18,12 @@ namespace custom_kernel { template void ArgMaxKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::Scalar& axis, + const DenseTensor& x, + const Scalar& axis, bool keepdims, bool flatten, phi::DataType dtype, - phi::DenseTensor* out) { + DenseTensor* out) { auto arg_max_axis = axis.to(); if (x.numel() == 0) return; PADDLE_ENFORCE_EQ( @@ -37,7 +37,7 @@ void ArgMaxKernel(const Context& dev_ctx, dtype)); if (arg_max_axis < 0) { - phi::DDim x_dims; + DDim x_dims; x_dims = x.dims(); arg_max_axis += x_dims.size(); } diff --git a/backends/mlu/kernels/argsort_kernel.cc b/backends/mlu/kernels/argsort_kernel.cc index be3d7f0d2ac..b2c700af9ba 100755 --- a/backends/mlu/kernels/argsort_kernel.cc +++ b/backends/mlu/kernels/argsort_kernel.cc @@ -19,12 +19,12 @@ namespace custom_kernel { template void ArgsortKernel(const Context& dev_ctx, - const phi::DenseTensor& in, + const DenseTensor& in, int axis, bool descending, bool stable, - phi::DenseTensor* output, - phi::DenseTensor* indices) { + DenseTensor* output, + DenseTensor* indices) { const auto& sorted = true; // axis < 0, cacluate the real axis if (axis < 0) { @@ -79,13 +79,13 @@ void ArgsortKernel(const Context& dev_ctx, template void ArgsortGradKernel(const Context& dev_ctx, - const phi::DenseTensor& indices, - const phi::DenseTensor& input, - const phi::DenseTensor& out_grad, + const DenseTensor& indices, + const DenseTensor& input, + const DenseTensor& out_grad, int axis, bool descending, bool stable, - phi::DenseTensor* in_grad) { + DenseTensor* in_grad) { dev_ctx.template Alloc(in_grad); auto in_dims = indices.dims(); diff --git a/backends/mlu/kernels/assign_kernel.cc b/backends/mlu/kernels/assign_kernel.cc index d796b4b0116..8a5c5418dfa 100644 --- a/backends/mlu/kernels/assign_kernel.cc +++ b/backends/mlu/kernels/assign_kernel.cc @@ -18,16 +18,16 @@ namespace custom_kernel { template void AssignKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { dev_ctx.template Alloc(out); TensorCopy(dev_ctx, x, false, out); } template void AssignRawKernel(const Context& dev_ctx, - const paddle::optional& x, - phi::DenseTensor* out) { + const paddle::optional& x, + DenseTensor* out) { if (x) { if (!x->initialized()) { return; @@ -39,8 +39,8 @@ void AssignRawKernel(const Context& dev_ctx, template void AssignArrayKernel(const Context& dev_ctx, - const std::vector& x, - std::vector out) { + const std::vector& x, + std::vector out) { for (size_t i = 0; i < x.size(); ++i) { custom_kernel::AssignKernel(dev_ctx, *x[i], out.at(i)); } @@ -49,8 +49,8 @@ void AssignArrayKernel(const Context& dev_ctx, template typename std::enable_if::value>::type CopyVectorToTensor( const Context& dev_ctx, - const std::vector& values, - phi::DenseTensor* out) { + const std::vector& values, + DenseTensor* out) { // If attribute value dtype is vector, it will be converted to // vector. at the same time, we can not use vector to hold // the value, because the c++ use bit value to replace byte value. @@ -75,8 +75,8 @@ typename std::enable_if::value>::type CopyVectorToTensor( template typename std::enable_if::value>::type CopyVectorToTensor( const Context& dev_ctx, - const std::vector& values, - phi::DenseTensor* out) { + const std::vector& values, + DenseTensor* out) { std::vector assign_values; assign_values.reserve(values.size()); for (const auto& val : values) { @@ -90,8 +90,8 @@ template void AssignValueKernel(const Context& dev_ctx, const std::vector& shape, phi::DataType dtype, - const std::vector& values, - phi::DenseTensor* out) { + const std::vector& values, + DenseTensor* out) { auto template_dtype = phi::CppTypeToDataType::Type(); PADDLE_ENFORCE_EQ( dtype, diff --git a/backends/mlu/kernels/batch_norm_kernel.cc b/backends/mlu/kernels/batch_norm_kernel.cc index dccfba0b1d4..67096520734 100644 --- a/backends/mlu/kernels/batch_norm_kernel.cc +++ b/backends/mlu/kernels/batch_norm_kernel.cc @@ -19,23 +19,23 @@ namespace custom_kernel { template void BatchNormKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& running_mean, - const phi::DenseTensor& running_var, - const paddle::optional& scale, - const paddle::optional& bias, + const DenseTensor& x, + const DenseTensor& running_mean, + const DenseTensor& running_var, + const paddle::optional& scale, + const paddle::optional& bias, bool is_test, float momentum, float epsilon, const std::string& data_layout_str, bool use_global_stats, bool trainable_stats, - phi::DenseTensor* y, - phi::DenseTensor* mean_out, - phi::DenseTensor* variance_out, - phi::DenseTensor* saved_mean, - phi::DenseTensor* saved_variance, - phi::DenseTensor* reserve_space) { + DenseTensor* y, + DenseTensor* mean_out, + DenseTensor* variance_out, + DenseTensor* saved_mean, + DenseTensor* saved_variance, + DenseTensor* reserve_space) { bool test_mode = is_test && (!trainable_stats); bool global_stats = test_mode || use_global_stats; @@ -64,7 +64,7 @@ void BatchNormKernel(const Context& dev_ctx, auto* Scale = scale.get_ptr(); auto* Bias = bias.get_ptr(); - phi::DenseTensor new_scale, new_bias; + DenseTensor new_scale, new_bias; if (Scale) { new_scale = scale.get(); } else { @@ -104,9 +104,9 @@ void BatchNormKernel(const Context& dev_ctx, bool need_transpose = (data_layout == DataLayout::kNCHW && x_dims.size() != 2); if (need_transpose) { - transformed_x.Resize(phi::DDim(transformed_shape, transformed_dim_size)); + transformed_x.Resize(DDim(transformed_shape, transformed_dim_size)); dev_ctx.template Alloc(&transformed_x); - transformed_y.Resize(phi::DDim(transformed_shape, transformed_dim_size)); + transformed_y.Resize(DDim(transformed_shape, transformed_dim_size)); dev_ctx.template Alloc(&transformed_y); const std::vector perm = {0, 2, 3, 1}; @@ -172,26 +172,25 @@ void BatchNormKernel(const Context& dev_ctx, } template -void BatchNormGradKernel( - const Context& dev_ctx, - const phi::DenseTensor& x, - const paddle::optional& scale, - const paddle::optional& bias, - const paddle::optional& mean, - const paddle::optional& variance, - const phi::DenseTensor& saved_mean, - const phi::DenseTensor& saved_inv_variance, - const paddle::optional& reserve_space, - const phi::DenseTensor& d_y, - float momentum, - float epsilon, - const std::string& data_layout_str, - bool is_test, - bool use_global_stats, - bool trainable_statistics, - phi::DenseTensor* d_x, - phi::DenseTensor* d_scale, - phi::DenseTensor* d_bias) { +void BatchNormGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const paddle::optional& scale, + const paddle::optional& bias, + const paddle::optional& mean, + const paddle::optional& variance, + const DenseTensor& saved_mean, + const DenseTensor& saved_inv_variance, + const paddle::optional& reserve_space, + const DenseTensor& d_y, + float momentum, + float epsilon, + const std::string& data_layout_str, + bool is_test, + bool use_global_stats, + bool trainable_statistics, + DenseTensor* d_x, + DenseTensor* d_scale, + DenseTensor* d_bias) { const auto& x_dims = x.dims(); PADDLE_ENFORCE_GE( x_dims.size(), @@ -217,7 +216,7 @@ void BatchNormGradKernel( auto* Scale = scale.get_ptr(); auto* Bias = bias.get_ptr(); - phi::DenseTensor new_scale, new_bias; + DenseTensor new_scale, new_bias; if (Scale) { new_scale = scale.get(); } else { @@ -272,11 +271,11 @@ void BatchNormGradKernel( bool need_transpose = (data_layout == DataLayout::kNCHW && x_dims.size() != 2); if (need_transpose) { - transformed_d_y.Resize(phi::DDim(transformed_shape, transformed_dim_size)); + transformed_d_y.Resize(DDim(transformed_shape, transformed_dim_size)); dev_ctx.template Alloc(&transformed_d_y); - transformed_x.Resize(phi::DDim(transformed_shape, transformed_dim_size)); + transformed_x.Resize(DDim(transformed_shape, transformed_dim_size)); dev_ctx.template Alloc(&transformed_x); - transformed_d_x.Resize(phi::DDim(transformed_shape, transformed_dim_size)); + transformed_d_x.Resize(DDim(transformed_shape, transformed_dim_size)); dev_ctx.template Alloc(&transformed_d_x); const int org_reshaped[] = {N, C, sample_size, 1}; @@ -372,17 +371,17 @@ void BatchNormGradKernel( template void BatchNormInferKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& mean, - const phi::DenseTensor& variance, - const phi::DenseTensor& scale, - const phi::DenseTensor& bias, + const DenseTensor& x, + const DenseTensor& mean, + const DenseTensor& variance, + const DenseTensor& scale, + const DenseTensor& bias, float momentum, float epsilon, const std::string& data_layout_str, - phi::DenseTensor* y, - phi::DenseTensor* mean_out, - phi::DenseTensor* variance_out) { + DenseTensor* y, + DenseTensor* mean_out, + DenseTensor* variance_out) { DataLayout data_layout = StringToDataLayout(data_layout_str); const auto& x_dims = x.dims(); @@ -426,9 +425,9 @@ void BatchNormInferKernel(const Context& dev_ctx, bool need_transpose = (data_layout == DataLayout::kNCHW && x_dims.size() != 2); if (need_transpose) { - transformed_x.Resize(phi::DDim(transformed_shape, transformed_dim_size)); + transformed_x.Resize(DDim(transformed_shape, transformed_dim_size)); dev_ctx.template Alloc(&transformed_x); - transformed_y.Resize(phi::DDim(transformed_shape, transformed_dim_size)); + transformed_y.Resize(DDim(transformed_shape, transformed_dim_size)); dev_ctx.template Alloc(&transformed_y); const std::vector perm = {0, 2, 3, 1}; diff --git a/backends/mlu/kernels/bce_loss_kernel.cc b/backends/mlu/kernels/bce_loss_kernel.cc index ee9b815d3f7..21adb8ac3de 100644 --- a/backends/mlu/kernels/bce_loss_kernel.cc +++ b/backends/mlu/kernels/bce_loss_kernel.cc @@ -18,9 +18,9 @@ namespace custom_kernel { template void BCELossKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& labels, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& labels, + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc x_desc(x); @@ -40,10 +40,10 @@ void BCELossKernel(const Context& dev_ctx, template void BCELossGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& labels, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& x, + const DenseTensor& labels, + const DenseTensor& dout, + DenseTensor* dx) { dev_ctx.template Alloc(dx); MLUCnnlTensorDesc x_desc(x); diff --git a/backends/mlu/kernels/bitwise_kernel.cc b/backends/mlu/kernels/bitwise_kernel.cc index 22c0d31f83a..6aa1b593f43 100644 --- a/backends/mlu/kernels/bitwise_kernel.cc +++ b/backends/mlu/kernels/bitwise_kernel.cc @@ -16,25 +16,25 @@ namespace custom_kernel { -#define DEFINE_BITWISE_KERNEL(op_type) \ - template \ - void Bitwise##op_type##Kernel(const Context& dev_ctx, \ - const phi::DenseTensor& x, \ - const phi::DenseTensor& y, \ - phi::DenseTensor* out) { \ - dev_ctx.template Alloc(out); \ - MLUCnnlTensorDesc x_desc(x); \ - MLUCnnlTensorDesc y_desc(y); \ - MLUCnnlTensorDesc out_desc(*out); \ - cnnlBitComputeOp_t type = CNNL_CYCLE_B##op_type##_OP; \ - MLUCnnl::BitWise(dev_ctx, \ - type, \ - x_desc.get(), \ - GetBasePtr(&x), \ - y_desc.get(), \ - GetBasePtr(&y), \ - out_desc.get(), \ - GetBasePtr(out)); \ +#define DEFINE_BITWISE_KERNEL(op_type) \ + template \ + void Bitwise##op_type##Kernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + DenseTensor* out) { \ + dev_ctx.template Alloc(out); \ + MLUCnnlTensorDesc x_desc(x); \ + MLUCnnlTensorDesc y_desc(y); \ + MLUCnnlTensorDesc out_desc(*out); \ + cnnlBitComputeOp_t type = CNNL_CYCLE_B##op_type##_OP; \ + MLUCnnl::BitWise(dev_ctx, \ + type, \ + x_desc.get(), \ + GetBasePtr(&x), \ + y_desc.get(), \ + GetBasePtr(&y), \ + out_desc.get(), \ + GetBasePtr(out)); \ } DEFINE_BITWISE_KERNEL(AND) @@ -44,8 +44,8 @@ DEFINE_BITWISE_KERNEL(XOR) template void BitwiseNOTKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc x_desc(x); MLUCnnlTensorDesc out_desc(*out); diff --git a/backends/mlu/kernels/cast_kernel.cc b/backends/mlu/kernels/cast_kernel.cc index f066e59709c..b07fe007522 100644 --- a/backends/mlu/kernels/cast_kernel.cc +++ b/backends/mlu/kernels/cast_kernel.cc @@ -18,9 +18,9 @@ namespace custom_kernel { template void CastKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, phi::DataType dtype, - phi::DenseTensor* out) { + DenseTensor* out) { if (x.dtype() == dtype) { *out = x; return; diff --git a/backends/mlu/kernels/check_finite_and_unscale_kernel.cc b/backends/mlu/kernels/check_finite_and_unscale_kernel.cc index 5f2d48a8ad1..03bfcf1e333 100644 --- a/backends/mlu/kernels/check_finite_and_unscale_kernel.cc +++ b/backends/mlu/kernels/check_finite_and_unscale_kernel.cc @@ -19,10 +19,10 @@ namespace custom_kernel { template void CheckFiniteAndUnscale(const Context& dev_ctx, - const std::vector& xs, - const phi::DenseTensor& t_scale, - std::vector outs, - phi::DenseTensor* found_inf) { + const std::vector& xs, + const DenseTensor& t_scale, + std::vector outs, + DenseTensor* found_inf) { using MPDType = typename MPTypeTrait::Type; dev_ctx.template Alloc(found_inf); diff --git a/backends/mlu/kernels/clip_kernel.cc b/backends/mlu/kernels/clip_kernel.cc index 6c519ca65e5..6b626997730 100644 --- a/backends/mlu/kernels/clip_kernel.cc +++ b/backends/mlu/kernels/clip_kernel.cc @@ -18,10 +18,10 @@ namespace custom_kernel { template void ClipKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::Scalar& min, - const phi::Scalar& max, - phi::DenseTensor* out) { + const DenseTensor& x, + const Scalar& min, + const Scalar& max, + DenseTensor* out) { dev_ctx.template Alloc(out); auto min_val = min.to(); @@ -40,11 +40,11 @@ void ClipKernel(const Context& dev_ctx, template void ClipGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& dout, - const phi::Scalar& min, - const phi::Scalar& max, - phi::DenseTensor* dx) { + const DenseTensor& x, + const DenseTensor& dout, + const Scalar& min, + const Scalar& max, + DenseTensor* dx) { dev_ctx.template Alloc(dx); auto min_val = min.to(); diff --git a/backends/mlu/kernels/coalesce_tensor_kernel.cc b/backends/mlu/kernels/coalesce_tensor_kernel.cc index 62af8c1e916..bebcf6c4fda 100644 --- a/backends/mlu/kernels/coalesce_tensor_kernel.cc +++ b/backends/mlu/kernels/coalesce_tensor_kernel.cc @@ -19,9 +19,9 @@ #include "kernels/funcs/mlu_funcs.h" namespace custom_kernel { -inline phi::DenseTensor Slice(const phi::DenseTensor &src, - int64_t begin_idx, - int64_t end_idx) { +inline DenseTensor Slice(const DenseTensor &src, + int64_t begin_idx, + int64_t end_idx) { auto meta = src.meta(); PADDLE_ENFORCE_GE( begin_idx, @@ -47,13 +47,12 @@ inline phi::DenseTensor Slice(const phi::DenseTensor &src, return src; } else { size_t base = src.numel() / meta.dims[0]; - phi::DenseTensor dst(src); - phi::DDim dst_dims = meta.dims; + DenseTensor dst(src); + DDim dst_dims = meta.dims; dst_dims[0] = end_idx - begin_idx; size_t dst_offset = meta.offset + begin_idx * base * phi::SizeOf(meta.dtype); - phi::DenseTensorMeta dst_meta = { - meta.dtype, dst_dims, meta.layout, dst_offset}; + DenseTensorMeta dst_meta = {meta.dtype, dst_dims, meta.layout, dst_offset}; dst.set_meta(dst_meta); return dst; } @@ -73,7 +72,7 @@ size_t Alignment(size_t size, const phi::Place &place, int align_size) { template struct FillConstantVisitor { FillConstantVisitor(const Context &dev_ctx, - phi::DenseTensor *tensor, + DenseTensor *tensor, const float value, phi::DataType dtype) : dev_ctx_(dev_ctx), tensor_(tensor), value_(value), dtype_(dtype) {} @@ -90,8 +89,8 @@ struct FillConstantVisitor { void apply(typename std::enable_if::value || std::is_same::value)>::type * = nullptr) const { - phi::DenseTensor tensor_tmp; - phi::DenseTensorMeta meta = {dtype_, {1}}; + DenseTensor tensor_tmp; + DenseTensorMeta meta = {dtype_, {1}}; tensor_tmp.set_meta(meta); dev_ctx_.template Alloc(&tensor_tmp); FillMLUTensorWithHostValue( @@ -113,18 +112,17 @@ struct FillConstantVisitor { } const Context &dev_ctx_; - phi::DenseTensor *tensor_; + DenseTensor *tensor_; float value_; phi::DataType dtype_; }; -void GetMemSizeAndDtype( - const std::vector &dense_tensors, - size_t *numel, - const size_t &size_of_dtype, - const phi::Place &place, - const bool use_align = true, - const int align_size = -1) { +void GetMemSizeAndDtype(const std::vector &dense_tensors, + size_t *numel, + const size_t &size_of_dtype, + const phi::Place &place, + const bool use_align = true, + const int align_size = -1) { *numel = 0; std::stringstream ss; ss << "alloc_space_for_vars: "; @@ -153,7 +151,7 @@ void GetMemSizeAndDtype( template void CoalesceTensorKernel(const Context &dev_ctx, - const std::vector &input, + const std::vector &input, phi::DataType dtype, bool copy_data, bool set_constant, @@ -164,8 +162,8 @@ void CoalesceTensorKernel(const Context &dev_ctx, int size_of_dtype, const std::vector &concated_shapes, const std::vector &concated_ranks, - std::vector output, - phi::DenseTensor *fused_output) { + std::vector output, + DenseTensor *fused_output) { PADDLE_ENFORCE_GT(input.size(), static_cast(0), phi::errors::InvalidArgument( @@ -203,8 +201,7 @@ void CoalesceTensorKernel(const Context &dev_ctx, "equal to the output tensor number.")); int64_t accumulated_ranks = 0; for (size_t i = 0; i < input.size(); ++i) { - phi::DDim dims(concated_shapes.data() + accumulated_ranks, - concated_ranks[i]); + DDim dims(concated_shapes.data() + accumulated_ranks, concated_ranks[i]); if (!input[i]->initialized()) { PADDLE_ENFORCE_EQ( input[i], diff --git a/backends/mlu/kernels/compare_kernel.cc b/backends/mlu/kernels/compare_kernel.cc index efe0ac428c7..1d07363f15b 100644 --- a/backends/mlu/kernels/compare_kernel.cc +++ b/backends/mlu/kernels/compare_kernel.cc @@ -18,103 +18,103 @@ namespace custom_kernel { template void EqualRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { MLULogicOp(dev_ctx, x, y, "equal", out); } template void EqualKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { custom_kernel::EqualRawKernel(dev_ctx, x, y, -1, out); } template void NotEqualRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { MLULogicOp(dev_ctx, x, y, "not_equal", out); } template void NotEqualKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { custom_kernel::NotEqualRawKernel(dev_ctx, x, y, -1, out); } template void LessThanRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { MLULogicOp(dev_ctx, x, y, "less_than", out); } template void LessThanKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { custom_kernel::LessThanRawKernel(dev_ctx, x, y, -1, out); } template void LessEqualRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { MLULogicOp(dev_ctx, x, y, "less_equal", out); } template void LessEqualKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { custom_kernel::LessEqualRawKernel(dev_ctx, x, y, -1, out); } template void GreaterThanRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { MLULogicOp(dev_ctx, x, y, "greater_than", out); } template void GreaterThanKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { custom_kernel::GreaterThanRawKernel(dev_ctx, x, y, -1, out); } template void GreaterEqualRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { MLULogicOp(dev_ctx, x, y, "greater_equal", out); } template void GreaterEqualKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { custom_kernel::GreaterEqualRawKernel(dev_ctx, x, y, -1, out); } diff --git a/backends/mlu/kernels/concat_kernel.cc b/backends/mlu/kernels/concat_kernel.cc index 7e30912cb8a..fb101e57c79 100644 --- a/backends/mlu/kernels/concat_kernel.cc +++ b/backends/mlu/kernels/concat_kernel.cc @@ -33,9 +33,9 @@ static inline int64_t ComputeAxis(int64_t axis, int64_t rank) { template void ConcatKernel(const Context& dev_ctx, - const std::vector& ins, - const phi::Scalar& axis_scalar, - phi::DenseTensor* out) { + const std::vector& ins, + const Scalar& axis_scalar, + DenseTensor* out) { dev_ctx.template Alloc(out); auto axis = axis_scalar.to(); auto ins_size = ins.size(); @@ -76,10 +76,10 @@ void ConcatKernel(const Context& dev_ctx, template void ConcatGradKernel(const Context& dev_ctx, - const std::vector& ins, - const phi::DenseTensor& dout, - const phi::Scalar& axis_scalar, - std::vector outs) { + const std::vector& ins, + const DenseTensor& dout, + const Scalar& axis_scalar, + std::vector outs) { auto axis = axis_scalar.to(); int split_num = ins.size(); diff --git a/backends/mlu/kernels/contiguous_kernel.cc b/backends/mlu/kernels/contiguous_kernel.cc index 576b90d043b..c8f598af2c6 100644 --- a/backends/mlu/kernels/contiguous_kernel.cc +++ b/backends/mlu/kernels/contiguous_kernel.cc @@ -19,9 +19,9 @@ namespace custom_kernel { template void ContiguousKernel(const Context& dev_ctx, - const phi::DenseTensor& input, - phi::DenseTensor* out) { - phi::DenseTensorMeta meta = input.meta(); + const DenseTensor& input, + DenseTensor* out) { + DenseTensorMeta meta = input.meta(); meta.strides = meta.calc_strides(meta.dims); meta.offset = 0; out->set_meta(meta); diff --git a/backends/mlu/kernels/conv_kernel.cc b/backends/mlu/kernels/conv_kernel.cc index 6b502145666..e5a9a051162 100644 --- a/backends/mlu/kernels/conv_kernel.cc +++ b/backends/mlu/kernels/conv_kernel.cc @@ -18,15 +18,15 @@ namespace custom_kernel { template void Conv2dKernel(const Context& dev_ctx, - const phi::DenseTensor& input, - const phi::DenseTensor& filter, + const DenseTensor& input, + const DenseTensor& filter, const std::vector& strides_t, const std::vector& paddings_t, const std::string& padding_algorithm, const std::vector& dilations_t, int groups, const std::string& data_format, - phi::DenseTensor* output) { + DenseTensor* output) { dev_ctx.template Alloc(output); auto strides = strides_t; auto paddings = paddings_t; @@ -37,8 +37,8 @@ void Conv2dKernel(const Context& dev_ctx, auto in_dims = input.dims(); auto filter_dims = filter.dims(); auto in_dims_size = in_dims.size(); - phi::DDim in_data_dims; - phi::DDim filter_data_dims; + DDim in_data_dims; + DDim filter_data_dims; if (channel_last) { in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); @@ -118,17 +118,17 @@ void Conv2dKernel(const Context& dev_ctx, template void Conv2dGradKernel(const Context& dev_ctx, - const phi::DenseTensor& input, - const phi::DenseTensor& filter, - const phi::DenseTensor& output_grad, + const DenseTensor& input, + const DenseTensor& filter, + const DenseTensor& output_grad, const std::vector& strides_t, const std::vector& paddings_t, const std::string& padding_algorithm, const std::vector& dilations_t, int groups, const std::string& data_format, - phi::DenseTensor* input_grad, - phi::DenseTensor* filter_grad) { + DenseTensor* input_grad, + DenseTensor* filter_grad) { auto strides = strides_t; auto paddings = paddings_t; auto dilations = dilations_t; @@ -137,8 +137,8 @@ void Conv2dGradKernel(const Context& dev_ctx, auto in_dims = input.dims(); auto filter_dims = filter.dims(); auto in_dims_size = in_dims.size(); - phi::DDim in_data_dims; - phi::DDim filter_data_dims; + DDim in_data_dims; + DDim filter_data_dims; if (channel_last) { in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); @@ -274,15 +274,15 @@ void Conv2dGradKernel(const Context& dev_ctx, template void DepthwiseConv2dKernel(const Context& dev_ctx, - const phi::DenseTensor& input, - const phi::DenseTensor& filter, + const DenseTensor& input, + const DenseTensor& filter, const std::vector& stride, const std::vector& paddings_in, const std::string& padding_algorithm, int groups, const std::vector& dilations_in, const std::string& data_format, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); std::vector strides = stride; @@ -294,8 +294,8 @@ void DepthwiseConv2dKernel(const Context& dev_ctx, auto in_dims = input.dims(); auto filter_dims = filter.dims(); auto in_dims_size = in_dims.size(); - phi::DDim in_data_dims; - phi::DDim filter_data_dims; + DDim in_data_dims; + DDim filter_data_dims; if (channel_last) { in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); @@ -377,17 +377,17 @@ void DepthwiseConv2dKernel(const Context& dev_ctx, template void DepthwiseConv2dGradKernel(const Context& dev_ctx, - const phi::DenseTensor& input, - const phi::DenseTensor& filter, - const phi::DenseTensor& out_grad, + const DenseTensor& input, + const DenseTensor& filter, + const DenseTensor& out_grad, const std::vector& stride, const std::vector& paddings_in, const std::string& padding_algorithm, int groups, const std::vector& dilations_in, const std::string& data_format, - phi::DenseTensor* input_grad, - phi::DenseTensor* filter_grad) { + DenseTensor* input_grad, + DenseTensor* filter_grad) { std::vector strides = stride; std::vector paddings = paddings_in; std::vector dilations = dilations_in; @@ -397,8 +397,8 @@ void DepthwiseConv2dGradKernel(const Context& dev_ctx, auto in_dims = input.dims(); auto filter_dims = filter.dims(); auto in_dims_size = in_dims.size(); - phi::DDim in_data_dims; - phi::DDim filter_data_dims; + DDim in_data_dims; + DDim filter_data_dims; if (channel_last) { in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); @@ -539,15 +539,15 @@ void DepthwiseConv2dGradKernel(const Context& dev_ctx, template void Conv3dKernel(const Context& dev_ctx, - const phi::DenseTensor& input, - const phi::DenseTensor& filter, + const DenseTensor& input, + const DenseTensor& filter, const std::vector& strides_t, const std::vector& paddings_t, const std::string& padding_algorithm, int groups, const std::vector& dilations_t, const std::string& data_format, - phi::DenseTensor* output) { + DenseTensor* output) { dev_ctx.template Alloc(output); auto strides = strides_t; auto paddings = paddings_t; @@ -557,8 +557,8 @@ void Conv3dKernel(const Context& dev_ctx, auto in_dims = input.dims(); auto filter_dims = filter.dims(); auto in_dims_size = in_dims.size(); - phi::DDim in_data_dims; - phi::DDim filter_data_dims; + DDim in_data_dims; + DDim filter_data_dims; if (channel_last) { // NDHWC -> DHW in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); @@ -641,17 +641,17 @@ void Conv3dKernel(const Context& dev_ctx, template void Conv3dGradKernel(const Context& dev_ctx, - const phi::DenseTensor& input, - const phi::DenseTensor& filter, - const phi::DenseTensor& out_grad, + const DenseTensor& input, + const DenseTensor& filter, + const DenseTensor& out_grad, const std::vector& strides, const std::vector& paddings, const std::string& padding_algorithm, int groups, const std::vector& dilations, const std::string& data_format, - phi::DenseTensor* input_grad, - phi::DenseTensor* filter_grad) { + DenseTensor* input_grad, + DenseTensor* filter_grad) { const bool channel_last = data_format == "NDHWC"; auto in_dims = input.dims(); auto filter_dims = filter.dims(); @@ -659,8 +659,8 @@ void Conv3dGradKernel(const Context& dev_ctx, auto updated_paddings = paddings; auto updated_dilations = dilations; - phi::DDim in_data_dims; - phi::DDim filter_data_dims; + DDim in_data_dims; + DDim filter_data_dims; if (channel_last) { in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); } else { diff --git a/backends/mlu/kernels/conv_transpose_kernel.cc b/backends/mlu/kernels/conv_transpose_kernel.cc index 85cbefca902..59c8519f9a3 100644 --- a/backends/mlu/kernels/conv_transpose_kernel.cc +++ b/backends/mlu/kernels/conv_transpose_kernel.cc @@ -19,8 +19,8 @@ namespace custom_kernel { template void Conv2dTransposeKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& filter, + const DenseTensor& x, + const DenseTensor& filter, const std::vector& strides, const std::vector& padding, const std::vector& out_padding, @@ -29,7 +29,7 @@ void Conv2dTransposeKernel(const Context& dev_ctx, int groups, const std::vector& dilation, const std::string& data_format, - phi::DenseTensor* out) { + DenseTensor* out) { auto paddings = padding; auto dilations = dilation; auto output_padding = out_padding; @@ -39,8 +39,8 @@ void Conv2dTransposeKernel(const Context& dev_ctx, auto in_dims = x.dims(); auto filter_dims = filter.dims(); auto in_dims_size = in_dims.size(); - phi::DDim in_data_dims; - phi::DDim filter_data_dims; + DDim in_data_dims; + DDim filter_data_dims; if (channel_last) { in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); @@ -117,9 +117,9 @@ void Conv2dTransposeKernel(const Context& dev_ctx, template void Conv2dTransposeGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& filter, - const phi::DenseTensor& dout, + const DenseTensor& x, + const DenseTensor& filter, + const DenseTensor& dout, const std::vector& strides, const std::vector& padding, const std::vector& output_padding, @@ -128,8 +128,8 @@ void Conv2dTransposeGradKernel(const Context& dev_ctx, int groups, const std::vector& dilation, const std::string& data_format, - phi::DenseTensor* dx, - phi::DenseTensor* dfilter) { + DenseTensor* dx, + DenseTensor* dfilter) { auto paddings = padding; auto dilations = dilation; if ((!dx) && (!dfilter)) return; @@ -141,14 +141,13 @@ void Conv2dTransposeGradKernel(const Context& dev_ctx, const bool channel_last = (data_layout == DataLayout::kNHWC); - phi::DDim in_data_dims; + DDim in_data_dims; if (channel_last) { in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); } else { in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); } - phi::DDim filter_data_dims = - phi::slice_ddim(filter_dims, 2, filter_dims.size()); + DDim filter_data_dims = phi::slice_ddim(filter_dims, 2, filter_dims.size()); std::vector ksize = phi::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); diff --git a/backends/mlu/kernels/cross_entropy_kernel.cc b/backends/mlu/kernels/cross_entropy_kernel.cc index 1f18edfb062..49e470463c4 100644 --- a/backends/mlu/kernels/cross_entropy_kernel.cc +++ b/backends/mlu/kernels/cross_entropy_kernel.cc @@ -19,16 +19,16 @@ namespace custom_kernel { template void CrossEntropyWithSoftmaxKernel(const Context& dev_ctx, - const phi::DenseTensor& logits, - const phi::DenseTensor& labels, + const DenseTensor& logits, + const DenseTensor& labels, bool soft_label, bool use_softmax, bool numeric_stable_mode, int ignore_index, int axis, - phi::DenseTensor* softmax, - phi::DenseTensor* loss) { - phi::DenseTensor backprop; + DenseTensor* softmax, + DenseTensor* loss) { + DenseTensor backprop; PADDLE_ENFORCE_EQ(use_softmax, true, phi::errors::InvalidArgument( @@ -59,7 +59,7 @@ void CrossEntropyWithSoftmaxKernel(const Context& dev_ctx, regard_labels_shape = {d1, d2_labels, d3}; regard_loss_shape = {d1, 1, d3}; } - phi::DenseTensorMeta meta = {logits.dtype(), {softmax->dims()}}; + DenseTensorMeta meta = {logits.dtype(), {softmax->dims()}}; backprop.set_meta(meta); dev_ctx.template Alloc(&backprop); @@ -153,7 +153,7 @@ void CrossEntropyWithSoftmaxKernel(const Context& dev_ctx, Tensor ignore_idx_tensor, mask_tensor; std::vector ignore_dim_vec(trans_labels.dims().size(), 1); ignore_idx_tensor.Resize( - phi::DDim(ignore_dim_vec.data(), ignore_dim_vec.size())); + DDim(ignore_dim_vec.data(), ignore_dim_vec.size())); mask_tensor.Resize(trans_labels.dims()); dev_ctx.template Alloc(&ignore_idx_tensor); dev_ctx.template Alloc(&mask_tensor); @@ -184,8 +184,8 @@ void CrossEntropyWithSoftmaxKernel(const Context& dev_ctx, // 3. mask: if mask = True, set 0 float fill_value = 0.0f; - phi::DenseTensor t_fill_value; - phi::DenseTensorMeta fill_value_meta = {trans_loss.dtype(), {1}}; + DenseTensor t_fill_value; + DenseTensorMeta fill_value_meta = {trans_loss.dtype(), {1}}; t_fill_value.set_meta(fill_value_meta); dev_ctx.template Alloc(&t_fill_value); FillMLUTensorWithHostValue( @@ -209,7 +209,7 @@ void CrossEntropyWithSoftmaxKernel(const Context& dev_ctx, VLOG(5) << "[cross_entropy] d3 != 1, transpose loss back." << " [d1, d3, 1] -> [d1, 1, d3] -> original shape"; std::vector perm{0, 2, 1}; - phi::DDim original_loss_dim = loss->dims(); + DDim original_loss_dim = loss->dims(); loss->Resize({d1, 1, d3}); TransposeFromMLUTensor(dev_ctx, perm, &trans_loss, loss, false); loss->Resize(original_loss_dim); @@ -219,19 +219,19 @@ void CrossEntropyWithSoftmaxKernel(const Context& dev_ctx, template void CrossEntropyWithSoftmaxGradCPUKernel(const Context& dev_ctx, - const phi::DenseTensor& label, - const phi::DenseTensor& softmax, - const phi::DenseTensor& loss_grad, + const DenseTensor& label, + const DenseTensor& softmax, + const DenseTensor& loss_grad, bool soft_label, bool use_softmax, bool numeric_stable_mode, int ignore_index, int axis, - phi::DenseTensor* logits_grad) { + DenseTensor* logits_grad) { dev_ctx.Wait(); auto logits_grad_dims = logits_grad->dims(); dev_ctx.template Alloc(logits_grad); - phi::DenseTensor cpu_logits_grad_tensor; + DenseTensor cpu_logits_grad_tensor; cpu_logits_grad_tensor.Resize(logits_grad_dims); auto cpu_logits_grad_data = dev_ctx.template HostAlloc(&cpu_logits_grad_tensor); @@ -358,15 +358,15 @@ void CrossEntropyWithSoftmaxGradCPUKernel(const Context& dev_ctx, template void CrossEntropyWithSoftmaxGradKernel(const Context& dev_ctx, - const phi::DenseTensor& labels, - const phi::DenseTensor& softmax, - const phi::DenseTensor& loss_grad, + const DenseTensor& labels, + const DenseTensor& softmax, + const DenseTensor& loss_grad, bool soft_label, bool use_softmax, bool numeric_stable_mode, int ignore_index, int axis, - phi::DenseTensor* logits_grad) { + DenseTensor* logits_grad) { auto logits_grad_dims = logits_grad->dims(); const int rank = logits_grad_dims.size(); const int use_axis = axis < 0 ? axis + rank : axis; @@ -377,16 +377,16 @@ void CrossEntropyWithSoftmaxGradKernel(const Context& dev_ctx, if ((!soft_label && labels.numel() == n && (ignore_index == -1 || ignore_index == 255 || ignore_index == -100)) || soft_label) { - phi::DenseTensor last_labels; + DenseTensor last_labels; if (!soft_label) { // hard label: last_labels = onehot(labels), onehot only supports // dtype-int32 int cls_num = softmax.dims()[softmax.dims().size() - 1]; // cast label from int64/int32 to int32 for OneHotD - phi::DenseTensor casted_labels; + DenseTensor casted_labels; if (labels.dtype() != phi::DataType::INT32) { - phi::DenseTensorMeta casted_labels_meta = {phi::DataType::INT32, - labels.dims()}; + DenseTensorMeta casted_labels_meta = {phi::DataType::INT32, + labels.dims()}; casted_labels.set_meta(casted_labels_meta); dev_ctx.template Alloc(&casted_labels); MLUCnnlTensorDesc labels_desc(labels); @@ -403,8 +403,8 @@ void CrossEntropyWithSoftmaxGradKernel(const Context& dev_ctx, casted_labels = labels; } // on and off - phi::DenseTensor on_tensor, off_tensor; - phi::DenseTensorMeta on_off_meta = {phi::DataType::INT32, {1}}; + DenseTensor on_tensor, off_tensor; + DenseTensorMeta on_off_meta = {phi::DataType::INT32, {1}}; on_tensor.set_meta(on_off_meta); off_tensor.set_meta(on_off_meta); dev_ctx.template Alloc(&on_tensor); @@ -412,9 +412,8 @@ void CrossEntropyWithSoftmaxGradKernel(const Context& dev_ctx, FillMLUTensorWithHostValue(dev_ctx, static_cast(1), &on_tensor); FillMLUTensorWithHostValue(dev_ctx, static_cast(0), &off_tensor); // one_hot - phi::DenseTensor tmp_onehot; - phi::DenseTensorMeta tmp_onehot_meta = {on_tensor.dtype(), - softmax.dims()}; + DenseTensor tmp_onehot; + DenseTensorMeta tmp_onehot_meta = {on_tensor.dtype(), softmax.dims()}; tmp_onehot.set_meta(tmp_onehot_meta); dev_ctx.template Alloc(&tmp_onehot); MLUCnnlTensorDesc casted_labels_desc(casted_labels); @@ -431,7 +430,7 @@ void CrossEntropyWithSoftmaxGradKernel(const Context& dev_ctx, GetBasePtr(&tmp_onehot)); // cast one_hot from int32 to T if (softmax.dtype() != phi::DataType::INT32) { - phi::DenseTensorMeta onehot_meta = {softmax.dtype(), tmp_onehot.dims()}; + DenseTensorMeta onehot_meta = {softmax.dtype(), tmp_onehot.dims()}; last_labels.set_meta(onehot_meta); dev_ctx.template Alloc(&last_labels); cnnlCastDataType_t cast_type = @@ -452,8 +451,8 @@ void CrossEntropyWithSoftmaxGradKernel(const Context& dev_ctx, } // sub - phi::DenseTensor tmp_sub; - phi::DenseTensorMeta tmp_sub_meta = {softmax.dtype(), softmax.dims()}; + DenseTensor tmp_sub; + DenseTensorMeta tmp_sub_meta = {softmax.dtype(), softmax.dims()}; tmp_sub.set_meta(tmp_sub_meta); dev_ctx.template Alloc(&tmp_sub); MLUCnnlOpTensorDesc mul_sub_op_desc( diff --git a/backends/mlu/kernels/cumsum_kernel.cc b/backends/mlu/kernels/cumsum_kernel.cc index a16a646636d..c5999175f6a 100644 --- a/backends/mlu/kernels/cumsum_kernel.cc +++ b/backends/mlu/kernels/cumsum_kernel.cc @@ -18,12 +18,12 @@ namespace custom_kernel { template void CumsumKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::Scalar& axis_scalar, + const DenseTensor& x, + const Scalar& axis_scalar, bool flatten, bool exclusive, bool reverse, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); auto axis = axis_scalar.to(); diff --git a/backends/mlu/kernels/data_kernel.cc b/backends/mlu/kernels/data_kernel.cc index d253bb70237..9bc1673e8fe 100644 --- a/backends/mlu/kernels/data_kernel.cc +++ b/backends/mlu/kernels/data_kernel.cc @@ -23,7 +23,7 @@ const char kBackward[] = "BACKWARD"; template void PrintKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, int first_n, const std::string& message, int summarize, @@ -34,9 +34,9 @@ void PrintKernel(const Context& dev_ctx, bool print_tensor_lod, const std::string& print_phase, bool is_forward, - phi::DenseTensor* out) { + DenseTensor* out) { TensorCopy(dev_ctx, x, false, out); - phi::DenseTensorMeta meta = {x.dtype(), x.dims()}; + DenseTensorMeta meta = {x.dtype(), x.dims()}; out->set_meta(meta); if ((is_forward && print_phase == kBackward) || diff --git a/backends/mlu/kernels/deformable_conv_kernel.cc b/backends/mlu/kernels/deformable_conv_kernel.cc index 0febe1c10b8..6bd87aac21a 100644 --- a/backends/mlu/kernels/deformable_conv_kernel.cc +++ b/backends/mlu/kernels/deformable_conv_kernel.cc @@ -18,17 +18,17 @@ namespace custom_kernel { template void DeformableConvKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& offset, - const phi::DenseTensor& filter, - const paddle::optional& mask, + const DenseTensor& x, + const DenseTensor& offset, + const DenseTensor& filter, + const paddle::optional& mask, const std::vector& strides, const std::vector& paddings, const std::vector& dilations, int deformable_groups, int groups, int im2col_step, - phi::DenseTensor* out) { + DenseTensor* out) { // TODO(fwg): Remove this check when cnnl fix the bug that groups > 1. PADDLE_ENFORCE_EQ( groups == 1, @@ -115,21 +115,21 @@ void DeformableConvKernel(const Context& dev_ctx, template void DeformableConvGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& offset, - const phi::DenseTensor& filter, - const paddle::optional& mask, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& offset, + const DenseTensor& filter, + const paddle::optional& mask, + const DenseTensor& out_grad, const std::vector& strides, const std::vector& paddings, const std::vector& dilations, int deformable_groups, int groups, int im2col_step, - phi::DenseTensor* dx, - phi::DenseTensor* offset_grad, - phi::DenseTensor* filter_grad, - phi::DenseTensor* mask_grad) { + DenseTensor* dx, + DenseTensor* offset_grad, + DenseTensor* filter_grad, + DenseTensor* mask_grad) { // TODO(fwg): Remove this check when cnnl fix the bug that groups > 1. PADDLE_ENFORCE_EQ(groups == 1, true, diff --git a/backends/mlu/kernels/dropout_kernel.cc b/backends/mlu/kernels/dropout_kernel.cc index a20ad0d3ef5..b0570fca612 100644 --- a/backends/mlu/kernels/dropout_kernel.cc +++ b/backends/mlu/kernels/dropout_kernel.cc @@ -18,7 +18,7 @@ namespace custom_kernel { void GetSeed(const phi::DeviceContext& dev_ctx, - const paddle::optional& seed_tensor, + const paddle::optional& seed_tensor, int seed, bool fix_seed, int* seed_out) { @@ -34,15 +34,15 @@ void GetSeed(const phi::DeviceContext& dev_ctx, template void DropoutRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const paddle::optional& seed_tensor, - const phi::Scalar& p, + const DenseTensor& x, + const paddle::optional& seed_tensor, + const Scalar& p, bool is_test, const std::string& mode, int seed, bool fix_seed, - phi::DenseTensor* out, - phi::DenseTensor* mask) { + DenseTensor* out, + DenseTensor* mask) { dev_ctx.template Alloc(out); auto dropout_prob = p.to(); @@ -126,12 +126,12 @@ void DropoutRawKernel(const Context& dev_ctx, template void DropoutGradRawKernel(const Context& dev_ctx, - const phi::DenseTensor& mask, - const phi::DenseTensor& dout, - const phi::Scalar& p, + const DenseTensor& mask, + const DenseTensor& dout, + const Scalar& p, bool is_test, const std::string& mode, - phi::DenseTensor* dx) { + DenseTensor* dx) { PADDLE_ENFORCE_EQ(is_test, false, phi::errors::InvalidArgument( diff --git a/backends/mlu/kernels/elementwise_add_kernel.cc b/backends/mlu/kernels/elementwise_add_kernel.cc index 8326c5bc1be..1beb32a0067 100644 --- a/backends/mlu/kernels/elementwise_add_kernel.cc +++ b/backends/mlu/kernels/elementwise_add_kernel.cc @@ -19,30 +19,30 @@ namespace custom_kernel { template void AddRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { MLUOpTensorKernel(dev_ctx, x, y, axis, CNNL_OP_TENSOR_ADD, out); } template void AddKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { int axis = -1; custom_kernel::AddRawKernel(dev_ctx, x, y, axis, out); } template void AddGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - const phi::DenseTensor& dout, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, int axis, - phi::DenseTensor* dx, - phi::DenseTensor* dy) { + DenseTensor* dx, + DenseTensor* dy) { axis = (axis == -1 ? std::abs(x.dims().size() - y.dims().size()) : axis); MLUCnnlTensorDesc dout_desc(dout); if (dx) { diff --git a/backends/mlu/kernels/elementwise_div_kernel.cc b/backends/mlu/kernels/elementwise_div_kernel.cc index 0e553dde956..579ac85eeeb 100644 --- a/backends/mlu/kernels/elementwise_div_kernel.cc +++ b/backends/mlu/kernels/elementwise_div_kernel.cc @@ -18,31 +18,31 @@ namespace custom_kernel { template void DivideRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { MLUBinaryOp(dev_ctx, x, y, axis, out); } template void DivideKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { int axis = -1; custom_kernel::DivideRawKernel(dev_ctx, x, y, axis, out); } template void DivideGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - const phi::DenseTensor& out, - const phi::DenseTensor& dout, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out, + const DenseTensor& dout, int axis, - phi::DenseTensor* dx, - phi::DenseTensor* dy) { + DenseTensor* dx, + DenseTensor* dy) { Tensor x_t, y_t; x_t = x; y_t = y; diff --git a/backends/mlu/kernels/elementwise_max_kernel.cc b/backends/mlu/kernels/elementwise_max_kernel.cc index f16028a518c..8d437f8fd10 100644 --- a/backends/mlu/kernels/elementwise_max_kernel.cc +++ b/backends/mlu/kernels/elementwise_max_kernel.cc @@ -18,29 +18,29 @@ namespace custom_kernel { template void MaximumRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { MLUBinaryOp(dev_ctx, x, y, axis, out); } template void MaximumKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { int axis = -1; custom_kernel::MaximumRawKernel(dev_ctx, x, y, axis, out); } template void MaximumGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - const phi::DenseTensor& dout, - phi::DenseTensor* dx, - phi::DenseTensor* dy) { + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + DenseTensor* dx, + DenseTensor* dy) { int axis = -1; MLUMinMaxGradHelper(dev_ctx, x, y, dout, axis, dx, dy); } diff --git a/backends/mlu/kernels/elementwise_min_kernel.cc b/backends/mlu/kernels/elementwise_min_kernel.cc index 9784bfd5e8e..bce91ead46b 100644 --- a/backends/mlu/kernels/elementwise_min_kernel.cc +++ b/backends/mlu/kernels/elementwise_min_kernel.cc @@ -18,29 +18,29 @@ namespace custom_kernel { template void MinimumRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { MLUBinaryOp(dev_ctx, x, y, axis, out); } template void MinimumKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { int axis = -1; custom_kernel::MinimumRawKernel(dev_ctx, x, y, axis, out); } template void MinimumGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - const phi::DenseTensor& dout, - phi::DenseTensor* dx, - phi::DenseTensor* dy) { + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + DenseTensor* dx, + DenseTensor* dy) { int axis = -1; MLUMinMaxGradHelper(dev_ctx, x, y, dout, axis, dx, dy); } diff --git a/backends/mlu/kernels/elementwise_mul_kernel.cc b/backends/mlu/kernels/elementwise_mul_kernel.cc index 491471db695..5947da8c07f 100644 --- a/backends/mlu/kernels/elementwise_mul_kernel.cc +++ b/backends/mlu/kernels/elementwise_mul_kernel.cc @@ -18,30 +18,30 @@ namespace custom_kernel { template void MultiplyRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { MLUOpTensorKernel(dev_ctx, x, y, axis, CNNL_OP_TENSOR_MUL, out); } template void MultiplyKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { int axis = -1; custom_kernel::MultiplyRawKernel(dev_ctx, x, y, axis, out); } template void MultiplyGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - const phi::DenseTensor& dout, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, int axis, - phi::DenseTensor* dx, - phi::DenseTensor* dy) { + DenseTensor* dx, + DenseTensor* dy) { const auto& x_dims = x.dims(); const auto& y_dims = y.dims(); axis = diff --git a/backends/mlu/kernels/elementwise_pow_kernel.cc b/backends/mlu/kernels/elementwise_pow_kernel.cc index f6cc4f56b69..4daa488f37a 100644 --- a/backends/mlu/kernels/elementwise_pow_kernel.cc +++ b/backends/mlu/kernels/elementwise_pow_kernel.cc @@ -18,29 +18,29 @@ namespace custom_kernel { template void ElementwisePowRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { MLUBinaryOp(dev_ctx, x, y, axis, out); } template void ElementwisePowKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { int axis = -1; custom_kernel::ElementwisePowRawKernel(dev_ctx, x, y, axis, out); } template void ElementwisePowGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - const phi::DenseTensor& dout, - phi::DenseTensor* dx, - phi::DenseTensor* dy) { + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + DenseTensor* dx, + DenseTensor* dy) { int axis = -1; auto x_dims = x.dims(); auto y_dims = y.dims(); diff --git a/backends/mlu/kernels/elementwise_sub_kernel.cc b/backends/mlu/kernels/elementwise_sub_kernel.cc index 96d19997259..456bd7b5997 100644 --- a/backends/mlu/kernels/elementwise_sub_kernel.cc +++ b/backends/mlu/kernels/elementwise_sub_kernel.cc @@ -19,30 +19,30 @@ namespace custom_kernel { template void SubtractRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { MLUOpTensorKernel(dev_ctx, x, y, axis, CNNL_OP_TENSOR_SUB, out); } template void SubtractKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { int axis = -1; custom_kernel::SubtractRawKernel(dev_ctx, x, y, axis, out); } template void SubtractGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - const phi::DenseTensor& dout, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, int axis, - phi::DenseTensor* dx, - phi::DenseTensor* dy) { + DenseTensor* dx, + DenseTensor* dy) { axis = (axis == -1 ? std::abs(x.dims().size() - y.dims().size()) : axis); MLUCnnlTensorDesc dout_desc(dout); diff --git a/backends/mlu/kernels/expand_as_kernel.cc b/backends/mlu/kernels/expand_as_kernel.cc index b815c57d86f..446e579196b 100644 --- a/backends/mlu/kernels/expand_as_kernel.cc +++ b/backends/mlu/kernels/expand_as_kernel.cc @@ -20,10 +20,10 @@ namespace custom_kernel { template void ExpandAsKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const paddle::optional& y, + const DenseTensor& x, + const paddle::optional& y, const std::vector& target_shape_64, - phi::DenseTensor* out) { + DenseTensor* out) { std::vector target_shape = std::vector(target_shape_64.begin(), target_shape_64.end()); auto rank = x.dims().size(); @@ -72,7 +72,7 @@ void ExpandAsKernel(const Context& dev_ctx, target_shape[i])); } } - phi::DDim out_dims = phi::make_ddim(target_shape); + DDim out_dims = phi::make_ddim(target_shape); out->Resize(out_dims); dev_ctx.template Alloc(out); diff --git a/backends/mlu/kernels/expand_kernel.cc b/backends/mlu/kernels/expand_kernel.cc index 488d4a2ed85..c26c9d18200 100644 --- a/backends/mlu/kernels/expand_kernel.cc +++ b/backends/mlu/kernels/expand_kernel.cc @@ -18,9 +18,9 @@ namespace custom_kernel { template void ExpandKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& shape, - phi::DenseTensor* out) { + DenseTensor* out) { auto in_dims = x.dims(); auto expand_shape = shape.GetData(); auto vec_in_dims = phi::vectorize(in_dims); @@ -90,7 +90,7 @@ void ExpandKernel(const Context& dev_ctx, shape_size, rank)); - phi::DDim out_dims = phi::make_ddim(final_expand_shape); + DDim out_dims = phi::make_ddim(final_expand_shape); out->Resize(out_dims); dev_ctx.template Alloc(out); MLUCnnlTensorDesc x_desc(x); @@ -101,10 +101,10 @@ void ExpandKernel(const Context& dev_ctx, // template // void ExpandGradKernel(const Context& dev_ctx, -// const phi::DenseTensor& x, -// const phi::DenseTensor& out_grad, +// const DenseTensor& x, +// const DenseTensor& out_grad, // const phi::IntArray& shape, -// phi::DenseTensor* in_grad) { +// DenseTensor* in_grad) { // } } // namespace custom_kernel diff --git a/backends/mlu/kernels/fill_kernel.cc b/backends/mlu/kernels/fill_kernel.cc index 772e0952e37..31192b20d91 100644 --- a/backends/mlu/kernels/fill_kernel.cc +++ b/backends/mlu/kernels/fill_kernel.cc @@ -18,9 +18,9 @@ namespace custom_kernel { template void FillKernel(const Context& dev_ctx, - const phi::DenseTensor& x UNUSED, - const phi::Scalar& val, - phi::DenseTensor* out) { + const DenseTensor& x UNUSED, + const Scalar& val, + DenseTensor* out) { dev_ctx.template Alloc(out); T value = val.to(); diff --git a/backends/mlu/kernels/flash_attn_kernel.cc b/backends/mlu/kernels/flash_attn_kernel.cc index d7ea8648577..48b6c1c42ad 100644 --- a/backends/mlu/kernels/flash_attn_kernel.cc +++ b/backends/mlu/kernels/flash_attn_kernel.cc @@ -21,13 +21,13 @@ namespace custom_kernel { template void FlashAttnUnpaddedMLUKernel( const Context& dev_ctx, - const phi::DenseTensor& q, - const phi::DenseTensor& k, - const phi::DenseTensor& v, - const phi::DenseTensor& cu_seqlens_q, - const phi::DenseTensor& cu_seqlens_k, - const paddle::optional& fixed_seed_offset, - const paddle::optional& attn_mask, + const DenseTensor& q, + const DenseTensor& k, + const DenseTensor& v, + const DenseTensor& cu_seqlens_q, + const DenseTensor& cu_seqlens_k, + const paddle::optional& fixed_seed_offset, + const paddle::optional& attn_mask, int64_t max_seqlen_q, int64_t max_seqlen_k, float scale, @@ -36,10 +36,10 @@ void FlashAttnUnpaddedMLUKernel( bool return_softmax, bool is_test, const std::string& rng_name, - phi::DenseTensor* out, - phi::DenseTensor* softmax, - phi::DenseTensor* softmax_lse, - phi::DenseTensor* seed_offset) { + DenseTensor* out, + DenseTensor* softmax, + DenseTensor* softmax_lse, + DenseTensor* seed_offset) { dev_ctx.template Alloc(out); // q,k,v [total_*, num_heads, head_dim] auto dims = q.dims(); @@ -81,11 +81,11 @@ void FlashAttnUnpaddedMLUKernel( phi::errors::InvalidArgument( "flash_attn_raw receive input with return_softmax should be false")); - phi::DenseTensor dropout_mask; + DenseTensor dropout_mask; void* dropout_mask_ptr = nullptr; if (return_softmax) { - phi::DenseTensorMeta dropout_mask_meta = { - phi::DataType::INT32, {num_heads, total_q, max_seqlen_k}}; + DenseTensorMeta dropout_mask_meta = {phi::DataType::INT32, + {num_heads, total_q, max_seqlen_k}}; dropout_mask.set_meta(dropout_mask_meta); dropout_mask_ptr = dev_ctx.template Alloc(&dropout_mask); } @@ -166,22 +166,21 @@ void FlashAttnUnpaddedMLUKernel( } template -void FlashAttnKernel( - const Context& ctx, - const phi::DenseTensor& q, - const phi::DenseTensor& k, - const phi::DenseTensor& v, - const paddle::optional& fixed_seed_offset, - const paddle::optional& attn_mask, - float dropout, - bool causal, - bool return_softmax, - bool is_test, - const std::string& rng_name, - phi::DenseTensor* out, - phi::DenseTensor* softmax, - phi::DenseTensor* softmax_lse, - phi::DenseTensor* seed_offset) { +void FlashAttnKernel(const Context& ctx, + const DenseTensor& q, + const DenseTensor& k, + const DenseTensor& v, + const paddle::optional& fixed_seed_offset, + const paddle::optional& attn_mask, + float dropout, + bool causal, + bool return_softmax, + bool is_test, + const std::string& rng_name, + DenseTensor* out, + DenseTensor* softmax, + DenseTensor* softmax_lse, + DenseTensor* seed_offset) { // q,k,v [batch_size, seq_len, num_heads, head_dim] const auto& dims = q.dims(); PADDLE_ENFORCE_EQ(dims.size(), @@ -200,7 +199,7 @@ void FlashAttnKernel( const int32_t total_k = batch_size * seqlen_k; const float scale = 1.0f / std::sqrt(head_size); - phi::DenseTensor q_t_s, k_t_s, v_t_s; + DenseTensor q_t_s, k_t_s, v_t_s; q_t_s = q; k_t_s = k; v_t_s = v; @@ -209,8 +208,8 @@ void FlashAttnKernel( k_t_s.Resize({total_k, num_heads, head_size}); v_t_s.Resize({total_k, num_heads, head_size}); - phi::DenseTensor cu_seqlens_q; - phi::DenseTensor cu_seqlens_k; + DenseTensor cu_seqlens_q; + DenseTensor cu_seqlens_k; ArangeRawKernel( ctx, 0, (batch_size + 1) * seqlen_q, seqlen_q, &cu_seqlens_q); ArangeRawKernel( @@ -239,26 +238,25 @@ void FlashAttnKernel( } template -void FlashAttnUnpaddedGradKernel( - const Context& dev_ctx, - const phi::DenseTensor& q, - const phi::DenseTensor& k, - const phi::DenseTensor& v, - const phi::DenseTensor& cu_seqlens_q, - const phi::DenseTensor& cu_seqlens_k, - const phi::DenseTensor& out, - const phi::DenseTensor& softmax_lse, - const phi::DenseTensor& seed_offset, - const paddle::optional& attn_mask, - const phi::DenseTensor& dout, - int64_t max_seqlen_q, - int64_t max_seqlen_k, - float scale, - float dropout, - bool causal, - phi::DenseTensor* dq, - phi::DenseTensor* dk, - phi::DenseTensor* dv) { +void FlashAttnUnpaddedGradKernel(const Context& dev_ctx, + const DenseTensor& q, + const DenseTensor& k, + const DenseTensor& v, + const DenseTensor& cu_seqlens_q, + const DenseTensor& cu_seqlens_k, + const DenseTensor& out, + const DenseTensor& softmax_lse, + const DenseTensor& seed_offset, + const paddle::optional& attn_mask, + const DenseTensor& dout, + int64_t max_seqlen_q, + int64_t max_seqlen_k, + float scale, + float dropout, + bool causal, + DenseTensor* dq, + DenseTensor* dk, + DenseTensor* dv) { dev_ctx.template Alloc(dq); dev_ctx.template Alloc(dk); dev_ctx.template Alloc(dv); @@ -377,19 +375,19 @@ void FlashAttnUnpaddedGradKernel( template void FlashAttnGradKernel(const Context& ctx, - const phi::DenseTensor& q, - const phi::DenseTensor& k, - const phi::DenseTensor& v, - const phi::DenseTensor& out, - const phi::DenseTensor& softmax_lse, - const phi::DenseTensor& seed_offset, - const paddle::optional& attn_mask, - const phi::DenseTensor& dout, + const DenseTensor& q, + const DenseTensor& k, + const DenseTensor& v, + const DenseTensor& out, + const DenseTensor& softmax_lse, + const DenseTensor& seed_offset, + const paddle::optional& attn_mask, + const DenseTensor& dout, float dropout, bool causal, - phi::DenseTensor* dq, - phi::DenseTensor* dk, - phi::DenseTensor* dv) { + DenseTensor* dq, + DenseTensor* dk, + DenseTensor* dv) { // q,k,v [batch_size, seq_len, num_heads, head_dim] const auto& dims = q.dims(); const int32_t batch_size = dims[0]; @@ -413,7 +411,7 @@ void FlashAttnGradKernel(const Context& ctx, << "], v[" << v.dims() << "]"; const float scale = 1.0f / std::sqrt(head_size); - phi::DenseTensor q_t_s, k_t_s, v_t_s; + DenseTensor q_t_s, k_t_s, v_t_s; q_t_s = q; k_t_s = k; v_t_s = v; @@ -422,8 +420,8 @@ void FlashAttnGradKernel(const Context& ctx, k_t_s.Resize({total_k, num_heads, head_size}); v_t_s.Resize({total_k, num_heads, head_size}); - phi::DenseTensor cu_seqlens_q; - phi::DenseTensor cu_seqlens_k; + DenseTensor cu_seqlens_q; + DenseTensor cu_seqlens_k; ArangeRawKernel( ctx, 0, (batch_size + 1) * seqlen_q, seqlen_q, &cu_seqlens_q); ArangeRawKernel( diff --git a/backends/mlu/kernels/flip_kernel.cc b/backends/mlu/kernels/flip_kernel.cc index f985e2d7c91..0886e26ec86 100644 --- a/backends/mlu/kernels/flip_kernel.cc +++ b/backends/mlu/kernels/flip_kernel.cc @@ -18,9 +18,9 @@ namespace custom_kernel { template void FlipKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const std::vector& axis, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); if (axis.size() == 0) { diff --git a/backends/mlu/kernels/full_kernel.cc b/backends/mlu/kernels/full_kernel.cc index 268c9a9f704..a76fca50a65 100644 --- a/backends/mlu/kernels/full_kernel.cc +++ b/backends/mlu/kernels/full_kernel.cc @@ -20,9 +20,9 @@ namespace custom_kernel { template void FullKernel(const Context& dev_ctx, const phi::IntArray& shape, - const phi::Scalar& val, + const Scalar& val, phi::DataType dtype, - phi::DenseTensor* out) { + DenseTensor* out) { auto shape_vec = shape.GetData(); out->ResizeAndAllocate(phi::make_ddim(shape_vec)); dev_ctx.template Alloc(out); @@ -37,10 +37,10 @@ void FullKernel(const Context& dev_ctx, template void FullLikeKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::Scalar& val, + const DenseTensor& x, + const Scalar& val, phi::DataType dtype, - phi::DenseTensor* out) { + DenseTensor* out) { auto value = val.to(); using CommonType = typename std::common_type< float, @@ -90,13 +90,13 @@ void FullLikeKernel(const Context& dev_ctx, template void FullBatchSizeLikeKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const std::vector& shape, - const phi::Scalar& val, + const Scalar& val, phi::DataType dtype, int x_batch_size_dim, int out_batch_size_dim, - phi::DenseTensor* out) { + DenseTensor* out) { if (x.lod().size() && x_batch_size_dim == 0) { // set the correct batch size for the DenseTensor. auto odims = out->dims(); @@ -109,13 +109,13 @@ void FullBatchSizeLikeKernel(const Context& dev_ctx, template void FullWithTensorKernel(const Context& dev_ctx, - const phi::DenseTensor& value, + const DenseTensor& value, const phi::IntArray& shape, DataType dtype, - phi::DenseTensor* out) { + DenseTensor* out) { out->Resize(common::make_ddim(shape.GetData())); custom_kernel::FullKernel( - dev_ctx, shape, phi::Scalar(value), dtype, out); + dev_ctx, shape, Scalar(value), dtype, out); } } // namespace custom_kernel diff --git a/backends/mlu/kernels/funcs/elementwise_utils.h b/backends/mlu/kernels/funcs/elementwise_utils.h index 50a596d03e0..0b80e0edcfe 100644 --- a/backends/mlu/kernels/funcs/elementwise_utils.h +++ b/backends/mlu/kernels/funcs/elementwise_utils.h @@ -120,11 +120,11 @@ inline void GetReduceAxesAndDstDims(const int axis, template void MLUOpTensorKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, const cnnlOpTensorDesc_t op_tensor_type, - phi::DenseTensor* out) { + DenseTensor* out) { PADDLE_ENFORCE_EQ((op_tensor_type == CNNL_OP_TENSOR_ADD) || (op_tensor_type == CNNL_OP_TENSOR_SUB) || (op_tensor_type == CNNL_OP_TENSOR_MUL), @@ -241,10 +241,10 @@ inline void MLUBinary(const Context& dev_ctx, template void MLUBinaryOp(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); Tensor x_t, y_t; x_t = x; @@ -319,8 +319,8 @@ inline void MLUUnary(const Context& dev_ctx, template void MLUUnaryOp(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc x_desc(x, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); @@ -342,12 +342,12 @@ enum MINMAX_GRAD_FUNCTOR { }; template void MLUMinMaxGradHelper(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - const phi::DenseTensor& dout, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, int axis, - phi::DenseTensor* dx, - phi::DenseTensor* dy) { + DenseTensor* dx, + DenseTensor* dy) { const auto& x_dims = x.dims(); const auto& y_dims = y.dims(); axis = diff --git a/backends/mlu/kernels/funcs/logic_op.h b/backends/mlu/kernels/funcs/logic_op.h index 53a7501b528..7221be7f1f3 100644 --- a/backends/mlu/kernels/funcs/logic_op.h +++ b/backends/mlu/kernels/funcs/logic_op.h @@ -21,10 +21,10 @@ namespace custom_kernel { template void MLULogicOp(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, const std::string& logic_name, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc input_x(x, CNNL_LAYOUT_ARRAY, ToCnnlDataType(x.dtype())); diff --git a/backends/mlu/kernels/funcs/mlu_baseop.h b/backends/mlu/kernels/funcs/mlu_baseop.h index 3bcd7210d79..63a8c47a047 100644 --- a/backends/mlu/kernels/funcs/mlu_baseop.h +++ b/backends/mlu/kernels/funcs/mlu_baseop.h @@ -25,6 +25,9 @@ namespace custom_kernel { using Tensor = phi::DenseTensor; +using DenseTensor = phi::DenseTensor; +using Scalar = phi::Scalar; +using DDim = phi::DDim; using Context = phi::CustomContext; using DataType = phi::DataType; using DataLayout = phi::DataLayout; diff --git a/backends/mlu/kernels/funcs/mlu_funcs.h b/backends/mlu/kernels/funcs/mlu_funcs.h index d16d1f44152..ade26ae0bd4 100644 --- a/backends/mlu/kernels/funcs/mlu_funcs.h +++ b/backends/mlu/kernels/funcs/mlu_funcs.h @@ -25,9 +25,9 @@ namespace custom_kernel { */ template inline void TensorCopy(const Context& dev_ctx, - const phi::DenseTensor& src, + const DenseTensor& src, bool blocking, - phi::DenseTensor* dst, + DenseTensor* dst, const phi::Place& dst_place = phi::CustomPlace()) { dev_ctx.Wait(); auto* src_ptr = src.data(); @@ -103,7 +103,7 @@ template inline void TensorFromVector(const phi::CustomContext& ctx, const std::vector& src, const phi::CustomContext& dev_ctx, - phi::DenseTensor* dst) { + DenseTensor* dst) { auto dst_place = dev_ctx.GetPlace(); C_Device_st device{dst_place.GetDeviceId()}; auto src_ptr = static_cast(src.data()); @@ -128,7 +128,7 @@ template <> inline void TensorFromVector(const phi::CustomContext& ctx, const std::vector& src, const phi::CustomContext& dev_ctx, - phi::DenseTensor* dst) { + DenseTensor* dst) { // vector has no data() member, use array instead. // See details: // https://stackoverflow.com/questions/46115669/why-does-stdvectorbool-have-no-data/46115714 @@ -166,7 +166,7 @@ template inline void TensorFromVector(const phi::CustomContext& ctx, const std::vector& src, const phi::CPUContext& dev_ctx, - phi::DenseTensor* dst) { + DenseTensor* dst) { auto dst_place = dev_ctx.GetPlace(); C_Device_st device{dst_place.GetDeviceId()}; auto src_ptr = static_cast(src.data()); @@ -191,7 +191,7 @@ template <> inline void TensorFromVector(const phi::CustomContext& ctx, const std::vector& src, const phi::CPUContext& dev_ctx, - phi::DenseTensor* dst) { + DenseTensor* dst) { auto dst_place = dev_ctx.GetPlace(); PADDLE_THROW(phi::errors::Unimplemented( "TensorFromVector on %s is not supported.", dst_place)); @@ -202,7 +202,7 @@ void TensorFromArray(const phi::CustomContext& ctx, const T* src, const size_t& array_size, const phi::CustomContext& dev_ctx, - phi::DenseTensor* dst) { + DenseTensor* dst) { auto dst_place = dev_ctx.GetPlace(); C_Device_st device{dst_place.GetDeviceId()}; auto src_ptr = static_cast(src); @@ -227,7 +227,7 @@ void TensorFromArray(const phi::CustomContext& ctx, */ template inline void TensorToVector(const phi::CustomContext& ctx, - const phi::DenseTensor& src, + const DenseTensor& src, const phi::CustomContext& dev_ctx, std::vector* dst) { auto src_ptr = static_cast(src.data()); @@ -251,7 +251,7 @@ inline void TensorToVector(const phi::CustomContext& ctx, template <> inline void TensorToVector(const phi::CustomContext& ctx, - const phi::DenseTensor& src, + const DenseTensor& src, const phi::CustomContext& dev_ctx, std::vector* dst) { auto src_ptr = static_cast(src.data()); @@ -359,11 +359,10 @@ inline void ExtractNCDWH(const phi::DDim& dims, template inline std::vector get_new_data_from_tensor( - const phi::CustomContext& dev_ctx, - const phi::DenseTensor* new_data_tensor) { + const phi::CustomContext& dev_ctx, const DenseTensor* new_data_tensor) { std::vector vec_new_data; auto place = new_data_tensor->place(); - phi::DenseTensor cpu_starts_tensor; + DenseTensor cpu_starts_tensor; if (place.GetType() == phi::AllocationType::CUSTOM) { // if tensor on CUSTOM place, do memcpy to host cpu_starts_tensor.Resize(new_data_tensor->dims()); @@ -381,22 +380,21 @@ inline std::vector get_new_data_from_tensor( } template -inline phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src, - T num_col_dims) { +inline DenseTensor ReshapeToMatrix(const DenseTensor& src, T num_col_dims) { int rank = src.dims().size(); PADDLE_ENFORCE_GE( rank, 2, phi::errors::InvalidArgument( "'ReshapeToMatrix()' is only used for flatten high rank " - "tensors to matrixs. The dimensions of phi::DenseTensor must be " + "tensors to matrixs. The dimensions of DenseTensor must be " "greater or equal than 2. " - "But received dimensions of phi::DenseTensor is %d", + "But received dimensions of DenseTensor is %d", rank)); if (rank == 2) { return src; } - phi::DenseTensor res; + DenseTensor res; res = src; res.Resize(phi::flatten_to_2d(src.dims(), num_col_dims)); return res; diff --git a/backends/mlu/kernels/funcs/range_op.h b/backends/mlu/kernels/funcs/range_op.h index 209f34e8900..5ae93932298 100644 --- a/backends/mlu/kernels/funcs/range_op.h +++ b/backends/mlu/kernels/funcs/range_op.h @@ -24,7 +24,7 @@ void ArangeRawKernel(const Context& dev_ctx, const T start_value, const T end_value, const T step_value, - phi::DenseTensor* out) { + DenseTensor* out) { int64_t size = 0; GetSize(start_value, end_value, step_value, &size); diff --git a/backends/mlu/kernels/funcs/reduce_op.h b/backends/mlu/kernels/funcs/reduce_op.h index ef4d58e92dd..b7146f78e98 100644 --- a/backends/mlu/kernels/funcs/reduce_op.h +++ b/backends/mlu/kernels/funcs/reduce_op.h @@ -21,12 +21,12 @@ namespace custom_kernel { template void MLUReduceOp(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const std::vector& axes, bool keep_dim, bool reduce_all, const std::string& reduce_name, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); if (x.dims().size() == 0) { TensorCopy(dev_ctx, x, true, out); diff --git a/backends/mlu/kernels/gather_kernel.cc b/backends/mlu/kernels/gather_kernel.cc index eee120a7eb2..6bb537e849c 100644 --- a/backends/mlu/kernels/gather_kernel.cc +++ b/backends/mlu/kernels/gather_kernel.cc @@ -19,10 +19,10 @@ namespace custom_kernel { template void GatherKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& index, - const phi::Scalar& axis, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& index, + const Scalar& axis, + DenseTensor* out) { dev_ctx.template Alloc(out); PADDLE_ENFORCE_EQ( @@ -76,11 +76,11 @@ void GatherKernel(const Context& dev_ctx, template void GatherGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& index, - const phi::DenseTensor& out_grad, - const phi::Scalar& axis, - phi::DenseTensor* x_grad) { + const DenseTensor& x, + const DenseTensor& index, + const DenseTensor& out_grad, + const Scalar& axis, + DenseTensor* x_grad) { dev_ctx.template Alloc(x_grad); const auto index_dims = index.dims(); diff --git a/backends/mlu/kernels/gather_nd_kernel.cc b/backends/mlu/kernels/gather_nd_kernel.cc index 8db63da7efc..dadb44ebf53 100644 --- a/backends/mlu/kernels/gather_nd_kernel.cc +++ b/backends/mlu/kernels/gather_nd_kernel.cc @@ -19,9 +19,9 @@ namespace custom_kernel { template void GatherNdKernel(const Context &dev_ctx, - const phi::DenseTensor &x, - const phi::DenseTensor &index, - phi::DenseTensor *out) { + const DenseTensor &x, + const DenseTensor &index, + DenseTensor *out) { dev_ctx.template Alloc(out); if (x.numel() == 0) return; @@ -35,7 +35,7 @@ void GatherNdKernel(const Context &dev_ctx, new_dims.emplace_back(x.dims()[i]); } - phi::DenseTensor x_tmp(x); + DenseTensor x_tmp(x); x_tmp.Resize(phi::make_ddim(new_dims)); MLUCnnlTensorDesc x_tmp_desc(x_tmp); MLUCnnlTensorDesc out_desc(*out); @@ -75,10 +75,10 @@ void GatherNdKernel(const Context &dev_ctx, template void GatherNdGradKernel(const Context &dev_ctx, - const phi::DenseTensor &x, - const phi::DenseTensor &index, - const phi::DenseTensor &dout, - phi::DenseTensor *dx) { + const DenseTensor &x, + const DenseTensor &index, + const DenseTensor &dout, + DenseTensor *dx) { auto x_dims = dx->dims(); dev_ctx.template Alloc(dx); @@ -98,10 +98,10 @@ void GatherNdGradKernel(const Context &dev_ctx, return; } - const phi::DenseTensor *p_index = &index; - const phi::DenseTensor *p_dout = &dout; - phi::DenseTensor tmp_tensor(index); - phi::DenseTensor tmp_tensor2(dout); + const DenseTensor *p_index = &index; + const DenseTensor *p_dout = &dout; + DenseTensor tmp_tensor(index); + DenseTensor tmp_tensor2(dout); const auto index_dims = index.dims(); if (index_dims.size() == 1) { std::vector new_dim = {1, index_dims[0]}; diff --git a/backends/mlu/kernels/gaussian_kernel.cc b/backends/mlu/kernels/gaussian_kernel.cc index 6be2cf3d798..738e9055ed2 100644 --- a/backends/mlu/kernels/gaussian_kernel.cc +++ b/backends/mlu/kernels/gaussian_kernel.cc @@ -23,7 +23,7 @@ void GaussianKernel(const Context& dev_ctx, float std, int seed, phi::DataType dtype, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); int real_seed = diff --git a/backends/mlu/kernels/generate_proposals_kernel.cc b/backends/mlu/kernels/generate_proposals_kernel.cc index 35db3ac3dfa..ea3f2ad5272 100644 --- a/backends/mlu/kernels/generate_proposals_kernel.cc +++ b/backends/mlu/kernels/generate_proposals_kernel.cc @@ -18,20 +18,20 @@ namespace custom_kernel { template void GenerateProposalsKernel(const Context& dev_ctx, - const phi::DenseTensor& scores, - const phi::DenseTensor& bbox_deltas, - const phi::DenseTensor& im_shape, - const phi::DenseTensor& anchors, - const phi::DenseTensor& variances, + const DenseTensor& scores, + const DenseTensor& bbox_deltas, + const DenseTensor& im_shape, + const DenseTensor& anchors, + const DenseTensor& variances, int pre_nms_top_n, int post_nms_top_n, float nms_thresh, float min_size, float eta, bool pixel_offset, - phi::DenseTensor* rpn_rois, - phi::DenseTensor* rpn_roi_probs, - phi::DenseTensor* rpn_rois_num) { + DenseTensor* rpn_rois, + DenseTensor* rpn_roi_probs, + DenseTensor* rpn_rois_num) { PADDLE_ENFORCE_GE(eta, 1., phi::errors::InvalidArgument( diff --git a/backends/mlu/kernels/grid_sample_kernel.cc b/backends/mlu/kernels/grid_sample_kernel.cc index 889e7e2e773..b17b2e49195 100644 --- a/backends/mlu/kernels/grid_sample_kernel.cc +++ b/backends/mlu/kernels/grid_sample_kernel.cc @@ -18,12 +18,12 @@ namespace custom_kernel { template void GridSampleKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& grid, + const DenseTensor& x, + const DenseTensor& grid, const std::string& mode, const std::string& padding_mode, bool align_corners, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); int n = x.dims()[0]; @@ -80,14 +80,14 @@ void GridSampleKernel(const Context& dev_ctx, } template void GridSampleGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& grid, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& grid, + const DenseTensor& out_grad, const std::string& mode, const std::string& padding_mode, bool align_corners, - phi::DenseTensor* x_grad, - phi::DenseTensor* grid_grad) { + DenseTensor* x_grad, + DenseTensor* grid_grad) { const int n = grid.dims()[0]; const int out_h = grid.dims()[1]; const int out_w = grid.dims()[2]; diff --git a/backends/mlu/kernels/huber_loss_kernel.cc b/backends/mlu/kernels/huber_loss_kernel.cc index e2aa9e9c5e8..b247a9a4a95 100644 --- a/backends/mlu/kernels/huber_loss_kernel.cc +++ b/backends/mlu/kernels/huber_loss_kernel.cc @@ -19,11 +19,11 @@ namespace custom_kernel { template void HuberLossKernel(const Context& dev_ctx, - const phi::DenseTensor& input, - const phi::DenseTensor& label, + const DenseTensor& input, + const DenseTensor& label, float delta, - phi::DenseTensor* out, - phi::DenseTensor* residual) { + DenseTensor* out, + DenseTensor* residual) { // compute y-x cnnlDataType_t data_type = ToCnnlDataType(); dev_ctx.template Alloc(residual); @@ -82,11 +82,11 @@ void HuberLossKernel(const Context& dev_ctx, template void HuberLossGradKernel(const Context& dev_ctx, - const phi::DenseTensor& residual, - const phi::DenseTensor& dout, + const DenseTensor& residual, + const DenseTensor& dout, float delta, - phi::DenseTensor* dx, - phi::DenseTensor* dy) { + DenseTensor* dx, + DenseTensor* dy) { Tensor t_grad_rd; t_grad_rd.Resize(residual.dims()); dev_ctx.template Alloc(&t_grad_rd); diff --git a/backends/mlu/kernels/index_sample_kernel.cc b/backends/mlu/kernels/index_sample_kernel.cc index 229eb7d98df..254ba9ca4f7 100644 --- a/backends/mlu/kernels/index_sample_kernel.cc +++ b/backends/mlu/kernels/index_sample_kernel.cc @@ -18,9 +18,9 @@ namespace custom_kernel { template void IndexSampleGather(const Context& dev_ctx, - const phi::DenseTensor* index, - const phi::DenseTensor* input, - phi::DenseTensor* out) { + const DenseTensor* index, + const DenseTensor* input, + DenseTensor* out) { auto index_dims = index->dims(); auto input_dims = input->dims(); auto batch_size = input_dims[0]; @@ -58,7 +58,7 @@ void IndexSampleGather(const Context& dev_ctx, } } - phi::DenseTensor gather_index; + DenseTensor gather_index; TensorFromVector(dev_ctx, gather_index_vec, dev_ctx, &gather_index); dev_ctx.Wait(); gather_index.Resize({batch_size, index_length, 2}); @@ -77,9 +77,9 @@ void IndexSampleGather(const Context& dev_ctx, template void IndexSampleKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& index, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& index, + DenseTensor* out) { dev_ctx.template Alloc(out); const auto& index_type = index.dtype(); @@ -102,9 +102,9 @@ void IndexSampleKernel(const Context& dev_ctx, template void IndexSampleGradScatter(const Context& dev_ctx, - const phi::DenseTensor* index, - const phi::DenseTensor* out_grad, - phi::DenseTensor* x_grad) { + const DenseTensor* index, + const DenseTensor* out_grad, + DenseTensor* x_grad) { auto index_dims = index->dims(); auto input_dims = x_grad->dims(); auto batch_size = input_dims[0]; @@ -120,7 +120,7 @@ void IndexSampleGradScatter(const Context& dev_ctx, scatter_index_vec.push_back(index_vec[i * index_length + j]); } } - phi::DenseTensor scatter_index; + DenseTensor scatter_index; TensorFromVector(dev_ctx, scatter_index_vec, dev_ctx, &scatter_index); dev_ctx.Wait(); scatter_index.Resize({batch_size, index_length, 2}); @@ -151,10 +151,10 @@ void IndexSampleGradScatter(const Context& dev_ctx, template void IndexSampleGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& index, - const phi::DenseTensor& out_grad, - phi::DenseTensor* x_grad) { + const DenseTensor& x, + const DenseTensor& index, + const DenseTensor& out_grad, + DenseTensor* x_grad) { dev_ctx.template Alloc(x_grad); const auto& index_type = index.dtype(); diff --git a/backends/mlu/kernels/index_select_kernel.cc b/backends/mlu/kernels/index_select_kernel.cc index 942d0783f89..01d9e42e952 100644 --- a/backends/mlu/kernels/index_select_kernel.cc +++ b/backends/mlu/kernels/index_select_kernel.cc @@ -19,10 +19,10 @@ namespace custom_kernel { template void IndexSelectKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& index, + const DenseTensor& x, + const DenseTensor& index, int dim, - phi::DenseTensor* output) { + DenseTensor* output) { dev_ctx.template Alloc(output); MLUCnnlTensorDesc x_desc(x); MLUCnnlTensorDesc out_desc(*output); @@ -42,11 +42,11 @@ void IndexSelectKernel(const Context& dev_ctx, template void IndexSelectGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& index, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& index, + const DenseTensor& out_grad, int dim, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { auto x_dims = x_grad->dims(); auto out_dims = out_grad.dims(); @@ -54,7 +54,7 @@ void IndexSelectGradKernel(const Context& dev_ctx, dim += out_dims.size(); } - phi::DenseTensor casted_index; + DenseTensor casted_index; MLUCnnlTensorDesc index_desc(index); MLUCnnlTensorDesc out_grad_desc(out_grad); if (index.dtype() != phi::DataType::INT32) { @@ -94,14 +94,14 @@ void IndexSelectGradKernel(const Context& dev_ctx, x_grad_desc.get(), GetBasePtr(x_grad)); } else { - phi::DenseTensor transed_out_grad; + DenseTensor transed_out_grad; std::vector in_trans_perm; in_trans_perm.push_back(dim); for (int i = 0; i < out_dims.size(); ++i) { if (i == dim) continue; in_trans_perm.push_back(i); } - phi::DDim transed_out_dims(out_dims); + DDim transed_out_dims(out_dims); for (size_t i = 0; i < in_trans_perm.size(); ++i) { transed_out_dims[i] = out_dims[in_trans_perm[i]]; } @@ -117,8 +117,8 @@ void IndexSelectGradKernel(const Context& dev_ctx, transed_out_grad_desc.get(), GetBasePtr(&transed_out_grad)); - phi::DenseTensor sum_out; - phi::DDim sum_dims(x_dims); + DenseTensor sum_out; + DDim sum_dims(x_dims); sum_dims[0] = x_dims[dim]; auto idx = 1; for (int i = 0; i < x_dims.size(); ++i) { diff --git a/backends/mlu/kernels/interpolate_kernel.cc b/backends/mlu/kernels/interpolate_kernel.cc index f99cede5e54..12bb4bd139a 100644 --- a/backends/mlu/kernels/interpolate_kernel.cc +++ b/backends/mlu/kernels/interpolate_kernel.cc @@ -49,10 +49,10 @@ inline std::vector get_new_shape_mlu( template void InterpolateKernel( const Context& dev_ctx, - const phi::DenseTensor& x, - const paddle::optional& out_size, - const paddle::optional>& size_tensor, - const paddle::optional& scale_tensor, + const DenseTensor& x, + const paddle::optional& out_size, + const paddle::optional>& size_tensor, + const paddle::optional& scale_tensor, const std::string& data_layout_str, int out_d, int out_h, @@ -61,7 +61,7 @@ void InterpolateKernel( const std::string& interp_method, bool align_corners, int align_mode, - phi::DenseTensor* out) { + DenseTensor* out) { auto input_dims = x.dims(); PADDLE_ENFORCE_GE(input_dims.size(), 4, @@ -185,7 +185,7 @@ void InterpolateKernel( // do transpose according to cnnl's constraints // cnnlInterp_v2 only accepts NHWC when mode is CNNL_INTERP_BILINEAR and // CNNL_INTERP_NEAREST, - phi::DDim dim_in, dim_in_trans, dim_out, dim_out_trans; + DDim dim_in, dim_in_trans, dim_out, dim_out_trans; Tensor transformed_input, transformed_output; bool need_transpose = input_dims.size() != 2; if (input_dims.size() == 4) { @@ -369,11 +369,11 @@ void InterpolateKernel( template void InterpolateGradKernel( const Context& dev_ctx, - const phi::DenseTensor& x, - const paddle::optional& out_size, - const paddle::optional>& size_tensor, - const paddle::optional& scale_tensor, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const paddle::optional& out_size, + const paddle::optional>& size_tensor, + const paddle::optional& scale_tensor, + const DenseTensor& out_grad, const std::string& data_layout_str, int out_d, int out_h, @@ -382,7 +382,7 @@ void InterpolateGradKernel( const std::string& interp_method, bool align_corners, int align_mode, - phi::DenseTensor* dx) { + DenseTensor* dx) { auto output_grad_dims = out_grad.dims(); PADDLE_ENFORCE_EQ( output_grad_dims.size(), @@ -449,8 +449,8 @@ void InterpolateGradKernel( align_center = 0; } - phi::DDim dim_grad; - phi::DDim dim_out_grad, dim_out_trans_grad, dim_in_grad, dim_in_trans_grad; + DDim dim_grad; + DDim dim_out_grad, dim_out_trans_grad, dim_in_grad, dim_in_trans_grad; Tensor transformed_output_grad, transformed_input_grad; bool need_transpose = input_dims.size() != 2 && data_layout == DataLayout::kNCHW; @@ -539,10 +539,10 @@ void InterpolateGradKernel( template void BilinearInterpKernel( const Context& dev_ctx, - const phi::DenseTensor& x, - const paddle::optional& out_size, - const paddle::optional>& size_tensor, - const paddle::optional& scale_tensor, + const DenseTensor& x, + const paddle::optional& out_size, + const paddle::optional>& size_tensor, + const paddle::optional& scale_tensor, const std::string& data_layout, int out_d, int out_h, @@ -551,7 +551,7 @@ void BilinearInterpKernel( const std::string& interp_method, bool align_corners, int align_mode, - phi::DenseTensor* out) { + DenseTensor* out) { InterpolateKernel(dev_ctx, x, out_size, @@ -571,10 +571,10 @@ void BilinearInterpKernel( template void NearestInterpKernel( const Context& dev_ctx, - const phi::DenseTensor& x, - const paddle::optional& out_size, - const paddle::optional>& size_tensor, - const paddle::optional& scale_tensor, + const DenseTensor& x, + const paddle::optional& out_size, + const paddle::optional>& size_tensor, + const paddle::optional& scale_tensor, const std::string& data_layout, int out_d, int out_h, @@ -583,7 +583,7 @@ void NearestInterpKernel( const std::string& interp_method, bool align_corners, int align_mode, - phi::DenseTensor* out) { + DenseTensor* out) { InterpolateKernel(dev_ctx, x, out_size, @@ -603,11 +603,11 @@ void NearestInterpKernel( template void BilinearInterpGradKernel( const Context& dev_ctx, - const phi::DenseTensor& x, - const paddle::optional& out_size, - const paddle::optional>& size_tensor, - const paddle::optional& scale_tensor, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const paddle::optional& out_size, + const paddle::optional>& size_tensor, + const paddle::optional& scale_tensor, + const DenseTensor& out_grad, const std::string& data_layout, int out_d, int out_h, @@ -616,7 +616,7 @@ void BilinearInterpGradKernel( const std::string& interp_method, bool align_corners, int align_mode, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { InterpolateGradKernel(dev_ctx, x, out_size, @@ -637,11 +637,11 @@ void BilinearInterpGradKernel( template void NearestInterpGradKernel( const Context& dev_ctx, - const phi::DenseTensor& x, - const paddle::optional& out_size, - const paddle::optional>& size_tensor, - const paddle::optional& scale_tensor, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const paddle::optional& out_size, + const paddle::optional>& size_tensor, + const paddle::optional& scale_tensor, + const DenseTensor& out_grad, const std::string& data_layout, int out_d, int out_h, @@ -650,7 +650,7 @@ void NearestInterpGradKernel( const std::string& interp_method, bool align_corners, int align_mode, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { InterpolateGradKernel(dev_ctx, x, out_size, diff --git a/backends/mlu/kernels/kldiv_loss_kernel.cc b/backends/mlu/kernels/kldiv_loss_kernel.cc index 58d42a80ae6..6ea116c54cf 100644 --- a/backends/mlu/kernels/kldiv_loss_kernel.cc +++ b/backends/mlu/kernels/kldiv_loss_kernel.cc @@ -20,17 +20,17 @@ namespace custom_kernel { template void KLDivLossKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& label, + const DenseTensor& x, + const DenseTensor& label, const std::string& reduction, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); - phi::DenseTensor out_tmp; + DenseTensor out_tmp; out_tmp.Resize(x.dims()); dev_ctx.template Alloc(&out_tmp); // formula: label * (log(label) - x) // 0. mark label >=0 - phi::DenseTensor tensor_zeros; + DenseTensor tensor_zeros; tensor_zeros.Resize(label.dims()); dev_ctx.template Alloc(&tensor_zeros); MLUCnnlTensorDesc tensor_zeros_desc(tensor_zeros); @@ -42,14 +42,14 @@ void KLDivLossKernel(const Context& dev_ctx, tensor_zeros_desc.get(), GetBasePtr(&tensor_zeros)); - phi::DenseTensor condiction_out; + DenseTensor condiction_out; condiction_out.Resize(label.dims()); dev_ctx.template Alloc(&condiction_out); MLULogicOp(dev_ctx, label, tensor_zeros, "greater_equal", &condiction_out); MLUCnnlTensorDesc label_desc(label); // 1. log(label) ->log_label - phi::DenseTensor log_label; + DenseTensor log_label; log_label.Resize(label.dims()); dev_ctx.template Alloc(&log_label); @@ -63,7 +63,7 @@ void KLDivLossKernel(const Context& dev_ctx, loglabel_desc.get(), GetBasePtr(&log_label)); // 2. optensor --sub( log(label) - x)->sub_out - phi::DenseTensor sub_out; + DenseTensor sub_out; sub_out.Resize(x.dims()); dev_ctx.template Alloc(&sub_out); MLUOpTensorKernel(dev_ctx, log_label, x, -1, CNNL_OP_TENSOR_SUB, &sub_out); @@ -109,15 +109,15 @@ void KLDivLossKernel(const Context& dev_ctx, template void KLDivLossGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& label, - const phi::DenseTensor& d_out, + const DenseTensor& x, + const DenseTensor& label, + const DenseTensor& d_out, const std::string& reduction, - phi::DenseTensor* d_x) { + DenseTensor* d_x) { dev_ctx.template Alloc(d_x); // formula: dx = -1 * label * d_out // Relu(label) make label >=0 - phi::DenseTensor label_clip; + DenseTensor label_clip; label_clip.Resize(label.dims()); dev_ctx.template Alloc(&label_clip); MLUCnnlActivationDesc act_desc(CNNL_ACTIVATION_RELU, 1.0); @@ -132,7 +132,7 @@ void KLDivLossGradKernel(const Context& dev_ctx, GetBasePtr(&label_clip)); // label * d_out - phi::DenseTensor out_tmp; + DenseTensor out_tmp; out_tmp.Resize(x.dims()); dev_ctx.template Alloc(&out_tmp); MLUOpTensorKernel( diff --git a/backends/mlu/kernels/label_smooth_kernel.cc b/backends/mlu/kernels/label_smooth_kernel.cc index 75f51383ba8..ae3eb92fef2 100644 --- a/backends/mlu/kernels/label_smooth_kernel.cc +++ b/backends/mlu/kernels/label_smooth_kernel.cc @@ -19,10 +19,10 @@ namespace custom_kernel { template void LabelSmoothKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const paddle::optional& dist, + const DenseTensor& x, + const paddle::optional& dist, float epsilon, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); auto epsilon_gt = 1.0f - epsilon; if (x.numel() == 0) return; diff --git a/backends/mlu/kernels/layer_norm_kernel.cc b/backends/mlu/kernels/layer_norm_kernel.cc index 627b20b33c4..63efa9682af 100644 --- a/backends/mlu/kernels/layer_norm_kernel.cc +++ b/backends/mlu/kernels/layer_norm_kernel.cc @@ -18,14 +18,14 @@ namespace custom_kernel { template void LayerNormKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const paddle::optional& scale_opt, - const paddle::optional& bias_opt, + const DenseTensor& x, + const paddle::optional& scale_opt, + const paddle::optional& bias_opt, float epsilon, int begin_norm_axis, - phi::DenseTensor* out, - phi::DenseTensor* mean, - phi::DenseTensor* variance) { + DenseTensor* out, + DenseTensor* mean, + DenseTensor* variance) { auto* scale = scale_opt.get_ptr(); auto* bias = bias_opt.get_ptr(); @@ -142,17 +142,17 @@ void LayerNormKernel(const Context& dev_ctx, template void LayerNormGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const paddle::optional& scale_opt, - const paddle::optional& bias, - const phi::DenseTensor& mean, - const phi::DenseTensor& variance, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const paddle::optional& scale_opt, + const paddle::optional& bias, + const DenseTensor& mean, + const DenseTensor& variance, + const DenseTensor& out_grad, float epsilon, int begin_norm_axis, - phi::DenseTensor* x_grad, - phi::DenseTensor* scale_grad, - phi::DenseTensor* bias_grad) { + DenseTensor* x_grad, + DenseTensor* scale_grad, + DenseTensor* bias_grad) { auto* scale = scale_opt.get_ptr(); dev_ctx.template Alloc(x_grad); diff --git a/backends/mlu/kernels/log_softmax_kernel.cc b/backends/mlu/kernels/log_softmax_kernel.cc index 92c543cc655..8dc1c2185b9 100644 --- a/backends/mlu/kernels/log_softmax_kernel.cc +++ b/backends/mlu/kernels/log_softmax_kernel.cc @@ -19,9 +19,9 @@ namespace custom_kernel { template void LogSoftmaxKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); const int rank = x.dims().size(); @@ -65,10 +65,10 @@ void LogSoftmaxKernel(const Context& dev_ctx, template void LogSoftmaxGradKernel(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& dout, + const DenseTensor& out, + const DenseTensor& dout, int axis, - phi::DenseTensor* dx) { + DenseTensor* dx) { dev_ctx.template Alloc(dx); const int rank = dout.dims().size(); diff --git a/backends/mlu/kernels/logical_kernel.cc b/backends/mlu/kernels/logical_kernel.cc index 317fe86d1f3..8e45a2c971e 100644 --- a/backends/mlu/kernels/logical_kernel.cc +++ b/backends/mlu/kernels/logical_kernel.cc @@ -18,33 +18,33 @@ namespace custom_kernel { template void LogicalNotMLUKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { // LogicalNot only has one input x, set y = x also for cnnl computation MLULogicOp(dev_ctx, x, x, "not", out); } template void LogicalAndMLUKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { MLULogicOp(dev_ctx, x, y, "and", out); } template void LogicalOrMLUKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { MLULogicOp(dev_ctx, x, y, "or", out); } template void LogicalXorMLUKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { MLULogicOp(dev_ctx, x, y, "xor", out); } } // namespace custom_kernel diff --git a/backends/mlu/kernels/lookup_table_v2_op_kernel.cc b/backends/mlu/kernels/lookup_table_v2_op_kernel.cc index 0631699e7e5..cc93435d2a3 100644 --- a/backends/mlu/kernels/lookup_table_v2_op_kernel.cc +++ b/backends/mlu/kernels/lookup_table_v2_op_kernel.cc @@ -19,10 +19,10 @@ namespace custom_kernel { template void EmbeddingKernel(const Context& dev_ctx, - const phi::DenseTensor& inputx, - const phi::DenseTensor& weight, + const DenseTensor& inputx, + const DenseTensor& weight, int64_t padding_idx, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); int padding_index = static_cast(padding_idx); @@ -42,11 +42,11 @@ void EmbeddingKernel(const Context& dev_ctx, template void EmbeddingGradKernel(const Context& dev_ctx, - const phi::DenseTensor& input, - const phi::DenseTensor& weight, - const phi::DenseTensor& out_grad, + const DenseTensor& input, + const DenseTensor& weight, + const DenseTensor& out_grad, int64_t padding_idx, - phi::DenseTensor* weight_grad) { + DenseTensor* weight_grad) { dev_ctx.template Alloc(weight_grad); int padding_index = static_cast(padding_idx); diff --git a/backends/mlu/kernels/masked_select_kernel.cc b/backends/mlu/kernels/masked_select_kernel.cc index 5b75b41c426..eee278de3a5 100644 --- a/backends/mlu/kernels/masked_select_kernel.cc +++ b/backends/mlu/kernels/masked_select_kernel.cc @@ -20,9 +20,9 @@ namespace custom_kernel { template void MaskedSelectKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& mask, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& mask, + DenseTensor* out) { auto input_dim = x.dims(); auto mask_dim = mask.dims(); PADDLE_ENFORCE_EQ(input_dim, @@ -117,10 +117,10 @@ void MaskedSelectKernel(const Context& dev_ctx, template void MaskedSelectGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& mask, - const phi::DenseTensor& out_grad, - phi::DenseTensor* x_grad) { + const DenseTensor& x, + const DenseTensor& mask, + const DenseTensor& out_grad, + DenseTensor* x_grad) { C_Stream stream = static_cast(dev_ctx.stream()); Tensor mask_tensor, mask_valid_num_tensor; std::vector mask_valid_num_vec; diff --git a/backends/mlu/kernels/matmul_kernel.cc b/backends/mlu/kernels/matmul_kernel.cc index 7f33c853e42..997243475c4 100644 --- a/backends/mlu/kernels/matmul_kernel.cc +++ b/backends/mlu/kernels/matmul_kernel.cc @@ -19,9 +19,9 @@ namespace custom_kernel { template static void Mul(const Context& dev_ctx, - const phi::DenseTensor& X, - const phi::DenseTensor& Y, - phi::DenseTensor* out) { + const DenseTensor& X, + const DenseTensor& Y, + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc x_desc(X, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); @@ -43,9 +43,9 @@ static void Mul(const Context& dev_ctx, template static void MatMul2D(const Context& dev_ctx, - const phi::DenseTensor& X, - const phi::DenseTensor& Y, - phi::DenseTensor* out, + const DenseTensor& X, + const DenseTensor& Y, + DenseTensor* out, const bool transpose_x, const bool transpose_y) { dev_ctx.template Alloc(out); @@ -66,9 +66,9 @@ static void MatMul2D(const Context& dev_ctx, template static void MatMul2DwithReduceBatch(const Context& dev_ctx, - const phi::DenseTensor& X, - const phi::DenseTensor& Y, - phi::DenseTensor* out, + const DenseTensor& X, + const DenseTensor& Y, + DenseTensor* out, const bool transpose_x, const bool transpose_y) { dev_ctx.template Alloc(out); @@ -95,9 +95,9 @@ static void MatMul2DwithReduceBatch(const Context& dev_ctx, template static void MatMulND(const Context& dev_ctx, - const phi::DenseTensor& X, - const phi::DenseTensor& Y, - phi::DenseTensor* out, + const DenseTensor& X, + const DenseTensor& Y, + DenseTensor* out, const bool transpose_x, const bool transpose_y) { dev_ctx.template Alloc(out); @@ -121,8 +121,8 @@ template static void ReduceDims(const Context& dev_ctx, const std::vector& dims, const std::vector& brd_dims, - const phi::DenseTensor& in, - phi::DenseTensor* out) { + const DenseTensor& in, + DenseTensor* out) { std::vector axes; int64_t size = brd_dims.size(); int64_t diff = brd_dims.size() - dims.size(); @@ -163,11 +163,11 @@ static void ReduceDims(const Context& dev_ctx, template void MatmulKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, bool transpose_x, bool transpose_y, - phi::DenseTensor* out) { + DenseTensor* out) { std::vector x_dims = phi::vectorize(x.dims()); std::vector y_dims = phi::vectorize(y.dims()); std::vector out_dims = phi::vectorize(out->dims()); @@ -255,11 +255,11 @@ void MatmulKernel(const Context& dev_ctx, template void MatmulWithFlattenKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int x_num_col_dims, int y_num_col_dims, - phi::DenseTensor* out) { + DenseTensor* out) { const Tensor x_matrix = x.dims().size() > 2 ? ReshapeToMatrix(x, x_num_col_dims) : x; const Tensor y_matrix = @@ -279,13 +279,13 @@ void MatmulWithFlattenKernel(const Context& dev_ctx, template void MatmulGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - const phi::DenseTensor& dout, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, bool transpose_x, bool transpose_y, - phi::DenseTensor* dx, - phi::DenseTensor* dy) { + DenseTensor* dx, + DenseTensor* dy) { std::vector x_dims = phi::vectorize(x.dims()); std::vector y_dims = phi::vectorize(y.dims()); std::vector out_dims = phi::vectorize(dout.dims()); @@ -414,13 +414,13 @@ void MatmulGradKernel(const Context& dev_ctx, template void MatmulWithFlattenGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out_grad, int x_num_col_dims, int y_num_col_dims, - phi::DenseTensor* x_grad, - phi::DenseTensor* y_grad) { + DenseTensor* x_grad, + DenseTensor* y_grad) { auto x_matrix = x.dims().size() > 2 ? ReshapeToMatrix(x, x_num_col_dims) : x; auto y_matrix = y.dims().size() > 2 ? ReshapeToMatrix(y, y_num_col_dims) : y; auto* dout = &out_grad; @@ -433,11 +433,11 @@ void MatmulWithFlattenGradKernel(const Context& dev_ctx, auto* dy = y_grad; if (dx != nullptr) { - phi::DenseTensorMeta x_meta = {x.dtype(), x.dims()}; + DenseTensorMeta x_meta = {x.dtype(), x.dims()}; dx->set_meta(x_meta); } if (dy != nullptr) { - phi::DenseTensorMeta y_meta = {y.dtype(), y.dims()}; + DenseTensorMeta y_meta = {y.dtype(), y.dims()}; dy->set_meta(y_meta); } @@ -462,19 +462,19 @@ void MatmulWithFlattenGradKernel(const Context& dev_ctx, template void BmmKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { MatMulND(dev_ctx, x, y, out, false, false); } template void BmmGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - const phi::DenseTensor& dout, - phi::DenseTensor* dx, - phi::DenseTensor* dy) { + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + DenseTensor* dx, + DenseTensor* dy) { if (dx) { MatMulND(dev_ctx, dout, y, dx, false, true); } diff --git a/backends/mlu/kernels/mean_all_kernel.cc b/backends/mlu/kernels/mean_all_kernel.cc index 6f7746a5d38..a8d46e1111a 100644 --- a/backends/mlu/kernels/mean_all_kernel.cc +++ b/backends/mlu/kernels/mean_all_kernel.cc @@ -20,8 +20,8 @@ namespace custom_kernel { template void MeanAllKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { auto rank = x.dims().size(); if (rank == 0) { // scalar TensorCopy(dev_ctx, x, false, out); @@ -33,9 +33,9 @@ void MeanAllKernel(const Context& dev_ctx, template void MeanAllGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& out_grad, - phi::DenseTensor* x_grad) { + const DenseTensor& x, + const DenseTensor& out_grad, + DenseTensor* x_grad) { PADDLE_ENFORCE_EQ(out_grad.numel(), 1, phi::errors::InvalidArgument( diff --git a/backends/mlu/kernels/memcpy_kernel.cc b/backends/mlu/kernels/memcpy_kernel.cc index 2d289ce24e1..d9cbc8ac2cf 100644 --- a/backends/mlu/kernels/memcpy_kernel.cc +++ b/backends/mlu/kernels/memcpy_kernel.cc @@ -19,9 +19,9 @@ namespace custom_kernel { template void MemcpyKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, int dst_place_type, - phi::DenseTensor* out) { + DenseTensor* out) { if (!x.initialized()) { return; } @@ -41,26 +41,26 @@ void MemcpyKernel(const Context& dev_ctx, template void MemcpyH2DKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, int dst_place_type, - phi::DenseTensor* out) { + DenseTensor* out) { TensorCopy(dev_ctx, x, false, out, dev_ctx.GetPlace()); } // used in new executor, for memory copy from device to host template void MemcpyD2HKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, int dst_place_type, - phi::DenseTensor* out) { + DenseTensor* out) { TensorCopy(dev_ctx, x, true, out, phi::CPUPlace()); } template void MemcpyD2HMultiIOKernel(const Context& dev_ctx, - const std::vector& array, + const std::vector& array, int dst_place_type, - std::vector out_array) { + std::vector out_array) { PADDLE_ENFORCE_EQ( array.size(), out_array.size(), diff --git a/backends/mlu/kernels/meshgrid_kernel.cc b/backends/mlu/kernels/meshgrid_kernel.cc index a6f9ec8f593..68523ff7cfe 100644 --- a/backends/mlu/kernels/meshgrid_kernel.cc +++ b/backends/mlu/kernels/meshgrid_kernel.cc @@ -19,8 +19,8 @@ namespace custom_kernel { template void MeshgridKernel(const Context& dev_ctx, - const std::vector& ins, - std::vector outs) { + const std::vector& ins, + std::vector outs) { PADDLE_ENFORCE_EQ( (ins.size() > 1) && (ins.size() < 7), true, @@ -52,11 +52,11 @@ void MeshgridKernel(const Context& dev_ctx, std::vector view_shape(size, 1); view_shape[i] = shape[i]; - phi::DDim out_dims_reshape = phi::make_ddim(view_shape); - phi::DenseTensor reshape_ins_tensor(*ins[i]); + DDim out_dims_reshape = phi::make_ddim(view_shape); + DenseTensor reshape_ins_tensor(*ins[i]); reshape_ins_tensor.Resize(out_dims_reshape); - phi::DDim out_dims = phi::make_ddim(shape); + DDim out_dims = phi::make_ddim(shape); outs[i]->Resize(out_dims); dev_ctx.template Alloc(outs[i]); diff --git a/backends/mlu/kernels/momentum_kernel.cc b/backends/mlu/kernels/momentum_kernel.cc index f75d437780f..323c4bddf24 100644 --- a/backends/mlu/kernels/momentum_kernel.cc +++ b/backends/mlu/kernels/momentum_kernel.cc @@ -24,31 +24,31 @@ enum class RegularizationType { template void MomentumKernel(const Context &dev_ctx, - const phi::DenseTensor ¶m, - const phi::DenseTensor &grad, - const phi::DenseTensor &velocity, - const phi::DenseTensor &learning_rate, - const paddle::optional &master_param, + const DenseTensor ¶m, + const DenseTensor &grad, + const DenseTensor &velocity, + const DenseTensor &learning_rate, + const paddle::optional &master_param, float mu_f, bool use_nesterov, const std::string ®ularization_method, float regularization_coeff, bool multi_precision, float rescale_grad, - phi::DenseTensor *param_out, - phi::DenseTensor *velocity_out, - phi::DenseTensor *master_param_out) { + DenseTensor *param_out, + DenseTensor *velocity_out, + DenseTensor *master_param_out) { auto mu = static_cast(mu_f); dev_ctx.template Alloc(param_out); dev_ctx.template Alloc(velocity_out); - phi::DenseTensor mu_tensor; + DenseTensor mu_tensor; mu_tensor.Resize({1}); dev_ctx.template Alloc(&mu_tensor); FillMLUTensorWithHostValue(dev_ctx, mu, &mu_tensor); - phi::DenseTensor regularized_grad; + DenseTensor regularized_grad; MLUCnnlTensorDesc param_desc(param); if (regularization_method == "l2_decay") { regularized_grad.Resize(grad.dims()); @@ -83,20 +83,20 @@ void MomentumKernel(const Context &dev_ctx, template void MergedMomentumKernel( const Context &dev_ctx, - const std::vector ¶m, - const std::vector &grad, - const std::vector &velocity, - const std::vector &learning_rate, - const paddle::optional> &master_param, + const std::vector ¶m, + const std::vector &grad, + const std::vector &velocity, + const std::vector &learning_rate, + const paddle::optional> &master_param, float mu, bool use_nesterov, const std::vector ®ularization_method, const std::vector ®ularization_coeff, bool multi_precision, float rescale_grad, - std::vector param_out, - std::vector velocity_out, - std::vector master_param_out) { + std::vector param_out, + std::vector velocity_out, + std::vector master_param_out) { size_t n = param.size(); PADDLE_ENFORCE_EQ(n, param_out.size(), diff --git a/backends/mlu/kernels/multinomial_kernel.cc b/backends/mlu/kernels/multinomial_kernel.cc index 905e2717a90..2cd85bf5edf 100644 --- a/backends/mlu/kernels/multinomial_kernel.cc +++ b/backends/mlu/kernels/multinomial_kernel.cc @@ -18,10 +18,10 @@ namespace custom_kernel { template void MultinomialKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::Scalar& num, + const DenseTensor& x, + const Scalar& num, bool replacement, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc desc_x(x); MLUCnnlTensorDesc desc_out(*out); diff --git a/backends/mlu/kernels/nonzero_kernel.cc b/backends/mlu/kernels/nonzero_kernel.cc index 1f56469aee6..110c575a9b5 100644 --- a/backends/mlu/kernels/nonzero_kernel.cc +++ b/backends/mlu/kernels/nonzero_kernel.cc @@ -21,8 +21,8 @@ namespace custom_kernel { template void NonZeroKernel(const Context& dev_ctx, - const phi::DenseTensor& condition, - phi::DenseTensor* out) { + const DenseTensor& condition, + DenseTensor* out) { auto dims = condition.dims(); const int rank = dims.size(); diff --git a/backends/mlu/kernels/numel_kernel.cc b/backends/mlu/kernels/numel_kernel.cc index b92e626293c..41a8a4c9699 100644 --- a/backends/mlu/kernels/numel_kernel.cc +++ b/backends/mlu/kernels/numel_kernel.cc @@ -19,8 +19,8 @@ namespace custom_kernel { template void NumelKernel(const Context& dev_ctx, - const phi::DenseTensor& input, - phi::DenseTensor* out) { + const DenseTensor& input, + DenseTensor* out) { dev_ctx.template Alloc(out); int64_t size = input.numel(); FillMLUTensorWithHostValue(dev_ctx, size, out); diff --git a/backends/mlu/kernels/one_hot_kernel.cc b/backends/mlu/kernels/one_hot_kernel.cc index 236b87a4c95..8d2628e0425 100644 --- a/backends/mlu/kernels/one_hot_kernel.cc +++ b/backends/mlu/kernels/one_hot_kernel.cc @@ -19,11 +19,11 @@ namespace custom_kernel { template void OneHotRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::Scalar& depth_scalar, + const DenseTensor& x, + const Scalar& depth_scalar, phi::DataType dtype, bool allow_out_of_range, - phi::DenseTensor* out) { + DenseTensor* out) { int depth = depth_scalar.to(); auto out_dims = out->dims(); out_dims[out_dims.size() - 1] = depth; @@ -33,7 +33,7 @@ void OneHotRawKernel(const Context& dev_ctx, float on_value = 1.0f, off_value = 0.0f; std::vector in_off_dim_vec(1, 1); - phi::DDim in_out_dims = phi::make_ddim(in_off_dim_vec); + DDim in_out_dims = phi::make_ddim(in_off_dim_vec); Tensor on_value_tensor, off_value_tensor; on_value_tensor.Resize(in_out_dims); @@ -81,9 +81,9 @@ void OneHotRawKernel(const Context& dev_ctx, template void OneHotKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::Scalar& num_classes_s, - phi::DenseTensor* out) { + const DenseTensor& x, + const Scalar& num_classes_s, + DenseTensor* out) { dev_ctx.template Alloc(out); custom_kernel::OneHotRawKernel( diff --git a/backends/mlu/kernels/p_norm_kernel.cc b/backends/mlu/kernels/p_norm_kernel.cc index 0fb6668f4ad..d9f973c199a 100644 --- a/backends/mlu/kernels/p_norm_kernel.cc +++ b/backends/mlu/kernels/p_norm_kernel.cc @@ -21,13 +21,13 @@ namespace custom_kernel { template void PnormKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, float porder, int axis, float epsilon, bool keepdim, bool asvector, - phi::DenseTensor* out) { + DenseTensor* out) { VLOG(5) << "[PnormKernel] x dims: " << x.dims() << " out dims: " << out->dims() << " porder: " << porder << " epsilon: " << epsilon << " keepdim: " << keepdim @@ -90,22 +90,22 @@ void PnormKernel(const Context& dev_ctx, template void PnormGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - const phi::DenseTensor& dy, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dy, float porder, int axis, float epsilon, bool keepdim, bool asvector, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); auto xdim = x.dims(); axis = axis < 0 ? xdim.size() + axis : axis; - phi::DenseTensor y_share(y); - phi::DenseTensor dy_share(dy); + DenseTensor y_share(y); + DenseTensor dy_share(dy); auto ydim = xdim; if (!keepdim) { ydim[axis] = 1; @@ -118,7 +118,7 @@ void PnormGradKernel(const Context& dev_ctx, FillMLUTensorWithHostValue(dev_ctx, static_cast(0), out); out->Resize(xdim); } else if (porder == INFINITY || porder == -INFINITY) { - phi::DenseTensor x_abs; + DenseTensor x_abs; x_abs.Resize(xdim); dev_ctx.template Alloc(&x_abs); MLUCnnlTensorDesc abs_in_desc(x); @@ -129,16 +129,16 @@ void PnormGradKernel(const Context& dev_ctx, abs_out_desc.get(), GetBasePtr(&x_abs)); - phi::DenseTensor t_cond; + DenseTensor t_cond; t_cond.Resize(xdim); MLULogicOp(dev_ctx, x_abs, y_share, "equal", out); - phi::DenseTensor t_zero; + DenseTensor t_zero; t_zero.Resize({1}); dev_ctx.template Alloc(&t_zero); FillMLUTensorWithHostValue(dev_ctx, static_cast(0), &t_zero); - phi::DenseTensor x_sign; + DenseTensor x_sign; x_sign.Resize(xdim); dev_ctx.template Alloc(&x_sign); MLUCnnlTensorDesc sign_out_desc(x_sign); @@ -164,7 +164,7 @@ void PnormGradKernel(const Context& dev_ctx, out_desc.get(), GetBasePtr(out)); } else { - phi::DenseTensor x_abs; + DenseTensor x_abs; x_abs.Resize(xdim); dev_ctx.template Alloc(&x_abs); MLUCnnlTensorDesc out_desc(*out); @@ -176,7 +176,7 @@ void PnormGradKernel(const Context& dev_ctx, abs_out_desc.get(), GetBasePtr(&x_abs)); - phi::DenseTensor x_sign; + DenseTensor x_sign; x_sign.Resize(xdim); dev_ctx.template Alloc(&x_sign); MLUCnnlTensorDesc sign_out_desc(x_sign); @@ -186,11 +186,11 @@ void PnormGradKernel(const Context& dev_ctx, sign_out_desc.get(), GetBasePtr(&x_sign)); - phi::DenseTensor y_pow; + DenseTensor y_pow; y_pow.Resize(ydim); dev_ctx.template Alloc(&y_pow); if (porder >= 1) { - phi::DenseTensor t_exp; + DenseTensor t_exp; t_exp.Resize({1}); dev_ctx.template Alloc(&t_exp); FillMLUTensorWithHostValue(dev_ctx, static_cast(porder - 1), &t_exp); @@ -207,7 +207,7 @@ void PnormGradKernel(const Context& dev_ctx, out_desc.get(), GetBasePtr(out)); } else { - phi::DenseTensor t_exp; + DenseTensor t_exp; t_exp.Resize({1}); dev_ctx.template Alloc(&t_exp); FillMLUTensorWithHostValue(dev_ctx, static_cast(1 - porder), &t_exp); diff --git a/backends/mlu/kernels/pool2d_kernel.cc b/backends/mlu/kernels/pool2d_kernel.cc index 9bbd44fea3d..42530e44ecf 100644 --- a/backends/mlu/kernels/pool2d_kernel.cc +++ b/backends/mlu/kernels/pool2d_kernel.cc @@ -44,7 +44,7 @@ inline void UpdatePadding(std::vector* paddings, const bool global_pooling, const bool adaptive, const std::string padding_algorithm, - const phi::DDim data_dims, + const DDim data_dims, const std::vector& strides, const std::vector& kernel_size) { // set padding size == data_dims.size() * 2 @@ -92,7 +92,7 @@ inline void UpdatePadding(std::vector* paddings, template inline void UpdateKernelSize(std::vector* kernel_size, - const phi::DDim data_dims) { + const DDim data_dims) { kernel_size->resize(static_cast(data_dims.size())); for (size_t i = 0; i < kernel_size->size(); ++i) { *(kernel_size->begin() + i) = static_cast(data_dims[i]); @@ -101,7 +101,7 @@ inline void UpdateKernelSize(std::vector* kernel_size, template void Pool2dKernel(const Context& dev_ctx, - const phi::DenseTensor& in_x, + const DenseTensor& in_x, const phi::IntArray& kernel_size, const std::vector& strides_t_64, const std::vector& paddings_t_64, @@ -112,7 +112,7 @@ void Pool2dKernel(const Context& dev_ctx, bool global_pooling, bool adaptive, const std::string& padding_algorithm, - phi::DenseTensor* out) { + DenseTensor* out) { std::vector strides_t = std::vector(strides_t_64.begin(), strides_t_64.end()); std::vector paddings_t = @@ -131,7 +131,7 @@ void Pool2dKernel(const Context& dev_ctx, int64_t out_h = out_dims[2]; int64_t out_w = out_dims[3]; auto in_x_dims = in_x.dims(); - phi::DDim data_dims = phi::slice_ddim(in_x_dims, 2, in_x_dims.size()); + DDim data_dims = phi::slice_ddim(in_x_dims, 2, in_x_dims.size()); if (channel_last) { cnnl_layout = CNNL_LAYOUT_NHWC; @@ -170,7 +170,7 @@ void Pool2dKernel(const Context& dev_ctx, std::vector perm{0, 2, 3, 1}; TransposeFromMLUTensor( dev_ctx, perm, &in_x, &trans_in_x, true /*need_reshape_or_alloc*/); - phi::DDim trans_out_dims = + DDim trans_out_dims = phi::make_ddim({out_dims[0], out_dims[2], out_dims[3], out_dims[1]}); trans_out.Resize(trans_out_dims); dev_ctx.template Alloc(&trans_out); @@ -264,9 +264,9 @@ void Pool2dKernel(const Context& dev_ctx, template void Pool2dGradKernel(const Context& dev_ctx, - const phi::DenseTensor& in_x, - const phi::DenseTensor& out, - const phi::DenseTensor& out_grad, + const DenseTensor& in_x, + const DenseTensor& out, + const DenseTensor& out_grad, const phi::IntArray& kernel_size, const std::vector& strides_t_64, const std::vector& paddings_t_64, @@ -277,7 +277,7 @@ void Pool2dGradKernel(const Context& dev_ctx, bool global_pooling, bool adaptive, const std::string& padding_algorithm, - phi::DenseTensor* in_x_grad) { + DenseTensor* in_x_grad) { std::vector strides_t = std::vector(strides_t_64.begin(), strides_t_64.end()); std::vector paddings_t = @@ -292,7 +292,7 @@ void Pool2dGradKernel(const Context& dev_ctx, const bool channel_last = data_format == "NHWC"; auto in_x_dims = in_x.dims(); - phi::DDim data_dims = phi::slice_ddim(in_x_dims, 2, in_x_dims.size()); + DDim data_dims = phi::slice_ddim(in_x_dims, 2, in_x_dims.size()); if (channel_last) { data_dims = phi::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1); } @@ -328,10 +328,10 @@ void Pool2dGradKernel(const Context& dev_ctx, &trans_out_grad, true /*need_reshape_or_alloc*/); auto in_x_grad_dims = in_x_grad->dims(); - phi::DDim trans_in_grad_dims = phi::make_ddim({in_x_grad_dims[0], - in_x_grad_dims[2], - in_x_grad_dims[3], - in_x_grad_dims[1]}); + DDim trans_in_grad_dims = phi::make_ddim({in_x_grad_dims[0], + in_x_grad_dims[2], + in_x_grad_dims[3], + in_x_grad_dims[1]}); trans_in_x_grad.Resize(trans_in_grad_dims); dev_ctx.template Alloc(&trans_in_x_grad); } diff --git a/backends/mlu/kernels/prior_box_kernel.cc b/backends/mlu/kernels/prior_box_kernel.cc index 6e121f9beac..6f4485edd4e 100644 --- a/backends/mlu/kernels/prior_box_kernel.cc +++ b/backends/mlu/kernels/prior_box_kernel.cc @@ -19,8 +19,8 @@ namespace custom_kernel { template void PriorBoxKernel(const Context& dev_ctx, - const phi::DenseTensor& input, - const phi::DenseTensor& image, + const DenseTensor& input, + const DenseTensor& image, const std::vector& min_sizes, const std::vector& max_sizes, const std::vector& aspect_ratios, @@ -31,8 +31,8 @@ void PriorBoxKernel(const Context& dev_ctx, float step_h, float offset, bool min_max_aspect_ratios_order, - phi::DenseTensor* out, - phi::DenseTensor* var) { + DenseTensor* out, + DenseTensor* var) { int im_width = image.dims()[3]; int im_height = image.dims()[2]; @@ -41,22 +41,22 @@ void PriorBoxKernel(const Context& dev_ctx, std::vector new_aspect_ratios; phi::ExpandAspectRatios(aspect_ratios, flip, &new_aspect_ratios); - phi::DenseTensor ratios; + DenseTensor ratios; TensorFromVector(dev_ctx, new_aspect_ratios, dev_ctx, &ratios); dev_ctx.Wait(); MLUOpTensorDesc new_aspect_ratios_desc(ratios); - phi::DenseTensor min; + DenseTensor min; TensorFromVector(dev_ctx, min_sizes, dev_ctx, &min); dev_ctx.Wait(); MLUOpTensorDesc min_sizes_desc(min); - phi::DenseTensor max; + DenseTensor max; TensorFromVector(dev_ctx, max_sizes, dev_ctx, &max); dev_ctx.Wait(); MLUOpTensorDesc max_sizes_desc(max); - phi::DenseTensor var_tensor; + DenseTensor var_tensor; TensorFromVector(dev_ctx, variances, dev_ctx, &var_tensor); dev_ctx.Wait(); MLUOpTensorDesc variances_attr_desc(var_tensor); diff --git a/backends/mlu/kernels/prod_kernel.cc b/backends/mlu/kernels/prod_kernel.cc index 3839a81ea49..4d68a53c403 100644 --- a/backends/mlu/kernels/prod_kernel.cc +++ b/backends/mlu/kernels/prod_kernel.cc @@ -18,21 +18,21 @@ namespace custom_kernel { template void ProdKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& axes, bool keep_dim, bool reduce_all, - phi::DenseTensor* out) { + DenseTensor* out) { MLUReduceOp( dev_ctx, x, axes.GetData(), keep_dim, reduce_all, "reduce_prod", out); } template void ProdInferKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& dims, bool keep_dim, - phi::DenseTensor* out) { + DenseTensor* out) { bool reduce_all = false; custom_kernel::ProdKernel(dev_ctx, x, dims, keep_dim, reduce_all, out); } diff --git a/backends/mlu/kernels/randperm_kernel.cc b/backends/mlu/kernels/randperm_kernel.cc index afd89c11090..d97af4f91f6 100644 --- a/backends/mlu/kernels/randperm_kernel.cc +++ b/backends/mlu/kernels/randperm_kernel.cc @@ -22,7 +22,7 @@ void RandpermRawKernel(const Context& dev_ctx, int n, phi::DataType dtype, unsigned int seed, - phi::DenseTensor* out) { + DenseTensor* out) { std::shared_ptr engine; if (seed) { @@ -40,7 +40,7 @@ void RandpermRawKernel(const Context& dev_ctx, std::shuffle(out_data, out_data + n, *engine); } else { dev_ctx.template Alloc(out); - phi::DenseTensor tmp_tensor; + DenseTensor tmp_tensor; tmp_tensor.Resize(phi::make_ddim({n})); T* tmp_data = dev_ctx.template HostAlloc(&tmp_tensor); for (int i = 0; i < n; ++i) { @@ -55,7 +55,7 @@ template void RandpermKernel(const Context& dev_ctx, int n, phi::DataType dtype, - phi::DenseTensor* out) { + DenseTensor* out) { custom_kernel::RandpermRawKernel(dev_ctx, n, dtype, 0, out); } diff --git a/backends/mlu/kernels/reduce_all_kernel.cc b/backends/mlu/kernels/reduce_all_kernel.cc index e9d78276e6a..943bcd70052 100644 --- a/backends/mlu/kernels/reduce_all_kernel.cc +++ b/backends/mlu/kernels/reduce_all_kernel.cc @@ -18,10 +18,10 @@ namespace custom_kernel { template void AllKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const std::vector& dims, bool keep_dim, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); bool reduce_all = dims.size() == 0 || static_cast(dims.size()) == x.dims().size(); diff --git a/backends/mlu/kernels/reduce_any_kernel.cc b/backends/mlu/kernels/reduce_any_kernel.cc index f7d020fa98e..acb1383d346 100644 --- a/backends/mlu/kernels/reduce_any_kernel.cc +++ b/backends/mlu/kernels/reduce_any_kernel.cc @@ -18,10 +18,10 @@ namespace custom_kernel { template void AnyKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const std::vector& dims, bool keep_dim, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); bool reduce_all = false; if (dims.size() == 0) { diff --git a/backends/mlu/kernels/reduce_max_kernel.cc b/backends/mlu/kernels/reduce_max_kernel.cc index 40d69b118c8..a7d0118fe19 100644 --- a/backends/mlu/kernels/reduce_max_kernel.cc +++ b/backends/mlu/kernels/reduce_max_kernel.cc @@ -19,11 +19,11 @@ namespace custom_kernel { template void MaxRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& axes, bool keep_dim, bool reduce_all, - phi::DenseTensor* out) { + DenseTensor* out) { Tensor in_t, out_t; auto need_cast_flag = x.dtype() == phi::DataType::INT64 || x.dtype() == phi::DataType::BOOL @@ -71,10 +71,10 @@ void MaxRawKernel(const Context& dev_ctx, template void MaxKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& dims, bool keep_dim, - phi::DenseTensor* out) { + DenseTensor* out) { bool reduce_all = false; if (dims.size() == 0) { reduce_all = true; @@ -84,13 +84,13 @@ void MaxKernel(const Context& dev_ctx, template void MaxGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& out, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& out, + const DenseTensor& out_grad, const phi::IntArray& reduce_dims_in, bool keep_dim, bool reduce_all, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { auto reduce_dims = reduce_dims_in.GetData(); auto need_cast_for_int64 = x.dtype() == phi::DataType::INT64 ? true : false; dev_ctx.template Alloc(x_grad); @@ -160,14 +160,13 @@ void MaxGradKernel(const Context& dev_ctx, tmp_out.Resize(phi::make_ddim(tmp_out_dims_vec)); tmp_out_grad.Resize(phi::make_ddim(tmp_out_dims_vec)); - phi::DenseTensor transformed_out; + DenseTensor transformed_out; if (need_cast_for_int64) { - phi::DenseTensorMeta meta = {phi::DataType::INT32, - phi::make_ddim(x_dims_vec)}; + DenseTensorMeta meta = {phi::DataType::INT32, phi::make_ddim(x_dims_vec)}; transformed_out.set_meta(meta); dev_ctx.template Alloc(&transformed_out); } else { - phi::DenseTensorMeta meta = {x.dtype(), phi::make_ddim(x_dims_vec)}; + DenseTensorMeta meta = {x.dtype(), phi::make_ddim(x_dims_vec)}; transformed_out.set_meta(meta); dev_ctx.template Alloc(&transformed_out); } @@ -180,14 +179,13 @@ void MaxGradKernel(const Context& dev_ctx, transformed_out_desc.get(), GetBasePtr(&transformed_out)); - phi::DenseTensor transformed_out_grad; + DenseTensor transformed_out_grad; if (need_cast_for_int64) { - phi::DenseTensorMeta meta = {phi::DataType::INT32, - phi::make_ddim(x_dims_vec)}; + DenseTensorMeta meta = {phi::DataType::INT32, phi::make_ddim(x_dims_vec)}; transformed_out_grad.set_meta(meta); dev_ctx.template Alloc(&transformed_out_grad); } else { - phi::DenseTensorMeta meta = {x.dtype(), phi::make_ddim(x_dims_vec)}; + DenseTensorMeta meta = {x.dtype(), phi::make_ddim(x_dims_vec)}; transformed_out_grad.set_meta(meta); dev_ctx.template Alloc(&transformed_out_grad); } @@ -200,7 +198,7 @@ void MaxGradKernel(const Context& dev_ctx, transformed_out_grad_desc.get(), GetBasePtr(&transformed_out_grad)); // compare - phi::DenseTensor equal_cond; + DenseTensor equal_cond; equal_cond.Resize(x_grad->dims()); dev_ctx.template Alloc(&equal_cond); MLUCnnlTensorDesc x_desc(tmp_x); @@ -215,7 +213,7 @@ void MaxGradKernel(const Context& dev_ctx, GetBasePtr(&equal_cond)); // select - phi::DenseTensor t_zero; + DenseTensor t_zero; t_zero.Resize(x_grad->dims()); if (need_cast_for_int64) { dev_ctx.template Alloc(&t_zero); diff --git a/backends/mlu/kernels/reduce_mean_kernel.cc b/backends/mlu/kernels/reduce_mean_kernel.cc index fbda1f018b9..e2e74e3a5ec 100644 --- a/backends/mlu/kernels/reduce_mean_kernel.cc +++ b/backends/mlu/kernels/reduce_mean_kernel.cc @@ -18,11 +18,11 @@ namespace custom_kernel { template void MeanRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& axes, bool keep_dim, bool reduce_all, - phi::DenseTensor* out) { + DenseTensor* out) { if (x.dims().size() == 0) { TensorCopy(dev_ctx, x, false, out); return; @@ -33,22 +33,22 @@ void MeanRawKernel(const Context& dev_ctx, template void MeanKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& dims, bool keep_dim, - phi::DenseTensor* out) { + DenseTensor* out) { bool reduce_all = false; custom_kernel::MeanRawKernel(dev_ctx, x, dims, keep_dim, reduce_all, out); } template void MeanGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& out_grad, const phi::IntArray& axes, bool keep_dim, bool reduce_all, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { dev_ctx.template Alloc(x_grad); if (x.dims().size() == 0) { diff --git a/backends/mlu/kernels/reduce_min_kernel.cc b/backends/mlu/kernels/reduce_min_kernel.cc index f4f3b50a58c..24373c467af 100644 --- a/backends/mlu/kernels/reduce_min_kernel.cc +++ b/backends/mlu/kernels/reduce_min_kernel.cc @@ -18,21 +18,21 @@ namespace custom_kernel { template void MinRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& axes, bool keep_dim, bool reduce_all, - phi::DenseTensor* out) { + DenseTensor* out) { MLUReduceOp( dev_ctx, x, axes.GetData(), keep_dim, reduce_all, "reduce_min", out); } template void MinKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& dims, bool keep_dim, - phi::DenseTensor* out) { + DenseTensor* out) { bool reduce_all = false; if (dims.size() == 0) { reduce_all = true; diff --git a/backends/mlu/kernels/reduce_sum_kernel.cc b/backends/mlu/kernels/reduce_sum_kernel.cc index 4783468a417..c4bbad0b264 100644 --- a/backends/mlu/kernels/reduce_sum_kernel.cc +++ b/backends/mlu/kernels/reduce_sum_kernel.cc @@ -20,12 +20,12 @@ namespace custom_kernel { template void SumRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& axes, bool keep_dim, bool reduce_all, phi::DataType out_dtype, - phi::DenseTensor* out) { + DenseTensor* out) { Tensor in_t, out_t; auto need_cast_for_int64 = x.dtype() == phi::DataType::INT64 || x.dtype() == phi::DataType::BOOL @@ -74,11 +74,11 @@ void SumRawKernel(const Context& dev_ctx, template void SumKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& dims, phi::DataType out_dtype, bool keep_dim, - phi::DenseTensor* out) { + DenseTensor* out) { bool reduce_all = false; if (dims.size() == 0) { reduce_all = true; @@ -89,12 +89,12 @@ void SumKernel(const Context& dev_ctx, template void SumGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& out_grad, const phi::IntArray& dims_array, bool keep_dim, bool reduce_all, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { auto reduce_dims = dims_array.GetData(); dev_ctx.template Alloc(x_grad); @@ -127,7 +127,7 @@ void SumGradKernel(const Context& dev_ctx, if (x_grad->dtype() == out_grad.dtype()) { tmp_out = out_grad; } else { - phi::DenseTensorMeta meta = {x_grad->dtype(), out_grad.dims()}; + DenseTensorMeta meta = {x_grad->dtype(), out_grad.dims()}; tmp_out.set_meta(meta); dev_ctx.template Alloc(&tmp_out); diff --git a/backends/mlu/kernels/rnn_kernel.cc b/backends/mlu/kernels/rnn_kernel.cc index c2eaab83a5c..fec20891860 100644 --- a/backends/mlu/kernels/rnn_kernel.cc +++ b/backends/mlu/kernels/rnn_kernel.cc @@ -50,10 +50,10 @@ void reset_parameter_vector( template void RnnKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const std::vector& pre_state, - const std::vector& weight_list, - const paddle::optional& sequence_length, + const DenseTensor& x, + const std::vector& pre_state, + const std::vector& weight_list, + const paddle::optional& sequence_length, float dropout_prob, bool is_bidirec, int input_size, @@ -62,10 +62,10 @@ void RnnKernel(const Context& dev_ctx, const std::string& mode, int seed, bool is_test, - phi::DenseTensor* out, - phi::DenseTensor* dropout_state, - std::vector state, - phi::DenseTensor* reserve) { + DenseTensor* out, + DenseTensor* dropout_state, + std::vector state, + DenseTensor* reserve) { // Input auto init_h = pre_state[0]; // -> hx auto init_c = pre_state[1]; // -> cx @@ -331,15 +331,15 @@ void RnnKernel(const Context& dev_ctx, template void RnnGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const std::vector& pre_state, - const std::vector& weight_list, - const paddle::optional& sequence_length, - const phi::DenseTensor& out, - const phi::DenseTensor& dropout_state, - const phi::DenseTensor& reserve, - const phi::DenseTensor& out_grad, - const std::vector& state_grad, + const DenseTensor& x, + const std::vector& pre_state, + const std::vector& weight_list, + const paddle::optional& sequence_length, + const DenseTensor& out, + const DenseTensor& dropout_state, + const DenseTensor& reserve, + const DenseTensor& out_grad, + const std::vector& state_grad, float dropout_prob, bool is_bidirec, int input_size, @@ -348,9 +348,9 @@ void RnnGradKernel(const Context& dev_ctx, const std::string& mode, int seed, bool is_test, - phi::DenseTensor* x_grad, - std::vector pre_state_grad, - std::vector weight_grad_list) { + DenseTensor* x_grad, + std::vector pre_state_grad, + std::vector weight_grad_list) { C_Stream stream = static_cast(dev_ctx.stream()); PADDLE_ENFORCE_EQ( diff --git a/backends/mlu/kernels/roi_align_kernel.cc b/backends/mlu/kernels/roi_align_kernel.cc index 6f237d3ef95..5593e87f011 100644 --- a/backends/mlu/kernels/roi_align_kernel.cc +++ b/backends/mlu/kernels/roi_align_kernel.cc @@ -18,18 +18,18 @@ namespace custom_kernel { template void RoiAlignKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& boxes, - const paddle::optional& boxes_num, + const DenseTensor& x, + const DenseTensor& boxes, + const paddle::optional& boxes_num, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, bool aligned, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); - phi::DenseTensor out_tensor(*out); - phi::DenseTensorMeta out_meta = { + DenseTensor out_tensor(*out); + DenseTensorMeta out_meta = { out->dtype(), out->dims(), phi::DataLayout::kNHWC}; out_tensor.set_meta(out_meta); const auto& in_dims = x.dims(); @@ -169,16 +169,16 @@ void RoiAlignKernel(const Context& dev_ctx, template void RoiAlignGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& boxes, - const paddle::optional& boxes_num, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& boxes, + const paddle::optional& boxes_num, + const DenseTensor& out_grad, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, bool aligned, - phi::DenseTensor* dx) { + DenseTensor* dx) { int rois_num = boxes.dims()[0]; if (!dx) { return; diff --git a/backends/mlu/kernels/roll_kernel.cc b/backends/mlu/kernels/roll_kernel.cc index 1a1caae0777..78cda7012db 100644 --- a/backends/mlu/kernels/roll_kernel.cc +++ b/backends/mlu/kernels/roll_kernel.cc @@ -19,10 +19,10 @@ namespace custom_kernel { template void RollKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& shifts, const std::vector& axis, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); std::vector shifts_data(shifts.GetData().begin(), shifts.GetData().end()); @@ -43,11 +43,11 @@ void RollKernel(const Context& dev_ctx, template void RollGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x UNUSED, - const phi::DenseTensor& out_grad, + const DenseTensor& x UNUSED, + const DenseTensor& out_grad, const phi::IntArray& shifts, const std::vector& axis, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { std::vector shifts_data(shifts.GetData().begin(), shifts.GetData().end()); std::vector axis_int32(axis.begin(), axis.end()); diff --git a/backends/mlu/kernels/scale_kernel.cc b/backends/mlu/kernels/scale_kernel.cc index c8ec354933a..cb5e822d977 100644 --- a/backends/mlu/kernels/scale_kernel.cc +++ b/backends/mlu/kernels/scale_kernel.cc @@ -19,14 +19,14 @@ namespace custom_kernel { template void ScaleKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::Scalar& in_scale, - const phi::Scalar& bias, + const DenseTensor& x, + const Scalar& in_scale, + const Scalar& bias, bool bias_after_scale, - phi::DenseTensor* out) { + DenseTensor* out) { // cnnl require input, scale, bias with same type. And all in device side. auto scale = in_scale.to(); - phi::DenseTensor scale_tensor; + DenseTensor scale_tensor; scale_tensor.Resize({1}); dev_ctx.template Alloc(&scale_tensor); @@ -37,7 +37,7 @@ void ScaleKernel(const Context& dev_ctx, scale_desc.get(), GetBasePtr(&scale_tensor)); - phi::DenseTensor bias_tensor; + DenseTensor bias_tensor; bias_tensor.Resize({1}); dev_ctx.template Alloc(&bias_tensor); @@ -67,7 +67,7 @@ void ScaleKernel(const Context& dev_ctx, output_desc.get(), GetBasePtr(out)); } else { - phi::DenseTensor new_bias_tensor; + DenseTensor new_bias_tensor; new_bias_tensor.Resize({1}); dev_ctx.template Alloc(&new_bias_tensor); diff --git a/backends/mlu/kernels/scatter_kernel.cc b/backends/mlu/kernels/scatter_kernel.cc index e97fd471887..c43f8b94bb2 100644 --- a/backends/mlu/kernels/scatter_kernel.cc +++ b/backends/mlu/kernels/scatter_kernel.cc @@ -19,11 +19,11 @@ namespace custom_kernel { template void ScatterKernel(const Context &dev_ctx, - const phi::DenseTensor &x, - const phi::DenseTensor &index, - const phi::DenseTensor &updates, + const DenseTensor &x, + const DenseTensor &index, + const DenseTensor &updates, bool overwrite, - phi::DenseTensor *out) { + DenseTensor *out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc x_desc(x); MLUCnnlTensorDesc index_desc(index); @@ -78,10 +78,10 @@ void ScatterKernel(const Context &dev_ctx, template void ScatterNdAddKernel(const Context &dev_ctx, - const phi::DenseTensor &x, - const phi::DenseTensor &index, - const phi::DenseTensor &updates, - phi::DenseTensor *out) { + const DenseTensor &x, + const DenseTensor &index, + const DenseTensor &updates, + DenseTensor *out) { dev_ctx.template Alloc(out); cnnlScatterNdMode_t mode = CNNL_SCATTERND_ADD; const auto &index_type = index.dtype(); diff --git a/backends/mlu/kernels/set_value_kernel.cc b/backends/mlu/kernels/set_value_kernel.cc index 82add43d75d..ad7806c94d2 100644 --- a/backends/mlu/kernels/set_value_kernel.cc +++ b/backends/mlu/kernels/set_value_kernel.cc @@ -20,7 +20,7 @@ namespace custom_kernel { template void SetValueKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& starts, const phi::IntArray& ends, const phi::IntArray& steps, @@ -28,8 +28,8 @@ void SetValueKernel(const Context& dev_ctx, const std::vector& decrease_axes, const std::vector& none_axes, const std::vector& shape, - const std::vector& values, - phi::DenseTensor* out) { + const std::vector& values, + DenseTensor* out) { dev_ctx.template Alloc(out); std::vector starts_local = starts.GetData(); @@ -90,13 +90,13 @@ void SetValueKernel(const Context& dev_ctx, for (const auto& val : values) { assgin_values.push_back(val.to()); } - phi::DenseTensor value_t; + DenseTensor value_t; value_t.Resize(phi::make_ddim(shape)); custom_kernel::TensorFromVector(dev_ctx, assgin_values, dev_ctx, &value_t); dev_ctx.Wait(); value_t.Resize(phi::make_ddim(shape)); - phi::DenseTensor value_temp; + DenseTensor value_temp; if (slice_dims_for_assign == value_t.dims()) { value_temp = value_t; } else { @@ -113,11 +113,11 @@ void SetValueKernel(const Context& dev_ctx, int64_t input_numel = phi::product(in_dims); int64_t value_numel = phi::product(value_temp.dims()); - phi::DenseTensor in_temp, out_temp, val_temp, index_out; + DenseTensor in_temp, out_temp, val_temp, index_out; int64_t stride_step = phi::product(in_dims); std::vector index_indices(stride_step); std::iota(index_indices.begin(), index_indices.end(), 0); - phi::DenseTensor index_temp; + DenseTensor index_temp; in_temp = x; val_temp = value_temp; custom_kernel::TensorFromVector(dev_ctx, index_indices, dev_ctx, &index_temp); @@ -185,15 +185,15 @@ void SetValueKernel(const Context& dev_ctx, template void SetTensorValueKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& value, + const DenseTensor& x, + const DenseTensor& value, const phi::IntArray& starts, const phi::IntArray& ends, const phi::IntArray& steps, const std::vector& axes, const std::vector& decrease_axes, const std::vector& none_axes, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); std::vector starts_local = starts.GetData(); @@ -249,7 +249,7 @@ void SetTensorValueKernel(const Context& dev_ctx, strides_indices[axis_index] = static_cast(steps_local[i]); } - phi::DenseTensor value_temp; + DenseTensor value_temp; if (slice_dims_for_assign == value.dims()) { value_temp = value; } else { @@ -266,11 +266,11 @@ void SetTensorValueKernel(const Context& dev_ctx, int64_t input_numel = phi::product(in_dims); int64_t value_numel = phi::product(value_temp.dims()); - phi::DenseTensor in_temp, out_temp, val_temp, index_out; + DenseTensor in_temp, out_temp, val_temp, index_out; int64_t stride_step = phi::product(in_dims); std::vector index_indices(stride_step); std::iota(index_indices.begin(), index_indices.end(), 0); - phi::DenseTensor index_temp; + DenseTensor index_temp; in_temp = x; val_temp = value_temp; custom_kernel::TensorFromVector(dev_ctx, index_indices, dev_ctx, &index_temp); @@ -391,7 +391,7 @@ void SetTensorValueKernel(const Context& dev_ctx, if (GetBasePtr(&x) != GetBasePtr(out)) { // a workaround method to avoid output incorrection since the op creates a // tensor while not using it in static graph. - auto x_rm_const = const_cast(x); + auto x_rm_const = const_cast(x); TensorCopy(dev_ctx, *out, false, &x_rm_const); } } diff --git a/backends/mlu/kernels/sgd_kernel.cc b/backends/mlu/kernels/sgd_kernel.cc index 99f16fd0a5d..2279f399468 100644 --- a/backends/mlu/kernels/sgd_kernel.cc +++ b/backends/mlu/kernels/sgd_kernel.cc @@ -19,13 +19,13 @@ namespace custom_kernel { template void SGDKernel(const Context& dev_ctx, - const phi::DenseTensor& param_var, - const phi::DenseTensor& learning_rate, - const phi::DenseTensor& grad_var, - const paddle::optional& master_param, + const DenseTensor& param_var, + const DenseTensor& learning_rate, + const DenseTensor& grad_var, + const paddle::optional& master_param, bool multi_precision, - phi::DenseTensor* param_out, - phi::DenseTensor* master_param_out) { + DenseTensor* param_out, + DenseTensor* master_param_out) { dev_ctx.template Alloc(param_out); MLUCnnlTensorDesc grad_desc(grad_var); diff --git a/backends/mlu/kernels/sigmoid_cross_with_logits_kernel.cc b/backends/mlu/kernels/sigmoid_cross_with_logits_kernel.cc index 010f308e004..5082e2cc538 100644 --- a/backends/mlu/kernels/sigmoid_cross_with_logits_kernel.cc +++ b/backends/mlu/kernels/sigmoid_cross_with_logits_kernel.cc @@ -38,12 +38,12 @@ void CheckAttrs(bool normalize, int ignore_index) { template void SigmoidCrossEntropyWithLogitsKernel( const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& label, - const paddle::optional& pos_weight, + const DenseTensor& x, + const DenseTensor& label, + const paddle::optional& pos_weight, bool normalize, int ignore_index, - phi::DenseTensor* out) { + DenseTensor* out) { CheckAttrs(normalize, ignore_index); const auto* t_pos_weight = pos_weight.get_ptr(); @@ -88,13 +88,13 @@ void SigmoidCrossEntropyWithLogitsKernel( template void SigmoidCrossEntropyWithLogitsGradKernel( const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& label, - const paddle::optional& pos_weight, - const phi::DenseTensor& dout, + const DenseTensor& x, + const DenseTensor& label, + const paddle::optional& pos_weight, + const DenseTensor& dout, bool normalize, int ignore_index, - phi::DenseTensor* dx) { + DenseTensor* dx) { CheckAttrs(normalize, ignore_index); const auto* t_pos_weight = pos_weight.get_ptr(); diff --git a/backends/mlu/kernels/slice_kernel.cc b/backends/mlu/kernels/slice_kernel.cc index 3396316c2fb..15cee733295 100644 --- a/backends/mlu/kernels/slice_kernel.cc +++ b/backends/mlu/kernels/slice_kernel.cc @@ -175,7 +175,7 @@ void normalize_interval( } } -void UpdateAttr(const phi::DDim& in_dims, +void UpdateAttr(const DDim& in_dims, const std::vector axes, const std::vector starts, const std::vector ends, @@ -208,7 +208,7 @@ void UpdateAttr(const phi::DDim& in_dims, } template -inline void CheckAndUpdateSliceAttrs(const phi::DDim in_dims, +inline void CheckAndUpdateSliceAttrs(const DDim in_dims, const std::vector& axes, std::vector* starts, std::vector* ends, @@ -257,13 +257,13 @@ inline void CheckAndUpdateSliceAttrs(const phi::DDim in_dims, } template -inline phi::DDim GetSliceDims(const phi::DDim in_dims, - const std::vector& axes, - const std::vector& starts, - const std::vector& ends, - std::vector* steps = nullptr, - std::vector* infer_flags = nullptr) { - phi::DDim slice_dims(in_dims); +inline DDim GetSliceDims(const DDim in_dims, + const std::vector& axes, + const std::vector& starts, + const std::vector& ends, + std::vector* steps = nullptr, + std::vector* infer_flags = nullptr) { + DDim slice_dims(in_dims); for (size_t i = 0; i < axes.size(); ++i) { T axis = axes[i]; @@ -286,10 +286,10 @@ inline phi::DDim GetSliceDims(const phi::DDim in_dims, } template -inline phi::DDim GetDecreasedDims(const phi::DDim slice_dims, - const std::vector& decrease_axes, - std::vector* infer_flags = nullptr) { - phi::DDim decreased_dims(slice_dims); +inline DDim GetDecreasedDims(const DDim slice_dims, + const std::vector& decrease_axes, + std::vector* infer_flags = nullptr) { + DDim decreased_dims(slice_dims); std::vector decrease_flag(slice_dims.size(), 0); if (decrease_axes.size() > 0) { for (size_t i = 0; i < decrease_axes.size(); ++i) { @@ -318,13 +318,13 @@ inline phi::DDim GetDecreasedDims(const phi::DDim slice_dims, template void SliceRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const std::vector& axes_t, const phi::IntArray& starts_array, const phi::IntArray& ends_array, const std::vector& infer_flags, const std::vector& decrease_axis, - phi::DenseTensor* out) { + DenseTensor* out) { std::vector axes(axes_t.begin(), axes_t.end()); auto starts_int = starts_array.GetData(); auto ends_int = ends_array.GetData(); @@ -402,14 +402,14 @@ void SliceRawKernel(const Context& dev_ctx, template void SliceGradRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& out_grad, const std::vector& axes_t, const phi::IntArray& starts_array, const phi::IntArray& ends_array, const std::vector& infer_flags, const std::vector& decrease_axis, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { std::vector axes(axes_t.begin(), axes_t.end()); auto starts_int = starts_array.GetData(); auto ends_int = ends_array.GetData(); diff --git a/backends/mlu/kernels/softmax_kernel.cc b/backends/mlu/kernels/softmax_kernel.cc index d237e2e0adc..c7439342634 100644 --- a/backends/mlu/kernels/softmax_kernel.cc +++ b/backends/mlu/kernels/softmax_kernel.cc @@ -19,9 +19,9 @@ namespace custom_kernel { template void SoftmaxKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); const int rank = x.dims().size(); @@ -65,10 +65,10 @@ void SoftmaxKernel(const Context& dev_ctx, template void SoftmaxGradKernel(const Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& out_grad, + const DenseTensor& out, + const DenseTensor& out_grad, int axis, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { dev_ctx.template Alloc(x_grad); const int rank = out.dims().size(); diff --git a/backends/mlu/kernels/split_kernel.cc b/backends/mlu/kernels/split_kernel.cc index 58d193286e9..0a0ee373642 100644 --- a/backends/mlu/kernels/split_kernel.cc +++ b/backends/mlu/kernels/split_kernel.cc @@ -18,10 +18,10 @@ namespace custom_kernel { template void SplitKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& num_or_sections, - const phi::Scalar& axis_scalar, - std::vector outs) { + const Scalar& axis_scalar, + std::vector outs) { // init parameter if (num_or_sections.FromTensor() || axis_scalar.FromTensor()) { std::vector out_metas; @@ -71,10 +71,10 @@ void SplitKernel(const Context& dev_ctx, template void SplitWithNumKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, int num, - const phi::Scalar& axis_scalar, - std::vector outs) { + const Scalar& axis_scalar, + std::vector outs) { int axis_value = axis_scalar.to(); auto input_axis_dim = x.dims().at(axis_value); std::vector sections_vec; diff --git a/backends/mlu/kernels/squared_l2_norm_kernel.cc b/backends/mlu/kernels/squared_l2_norm_kernel.cc index f6492a5d769..695bb39cf3a 100644 --- a/backends/mlu/kernels/squared_l2_norm_kernel.cc +++ b/backends/mlu/kernels/squared_l2_norm_kernel.cc @@ -18,8 +18,8 @@ namespace custom_kernel { template void SquaredL2NormKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { + const DenseTensor& x, + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc input_desc(x); @@ -29,11 +29,11 @@ void SquaredL2NormKernel(const Context& dev_ctx, MLUCnnl::L2Loss(dev_ctx, input_desc.get(), GetBasePtr(&x), GetBasePtr(out)); // do mul - phi::DenseTensor scale_tensor; + DenseTensor scale_tensor; scale_tensor.Resize({1}); dev_ctx.template Alloc(&scale_tensor); - phi::DenseTensor bias_tensor; + DenseTensor bias_tensor; bias_tensor.Resize({1}); dev_ctx.template Alloc(&bias_tensor); @@ -56,9 +56,9 @@ void SquaredL2NormKernel(const Context& dev_ctx, template void SquaredL2NormGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& out_grad, - phi::DenseTensor* x_grad) { + const DenseTensor& x, + const DenseTensor& out_grad, + DenseTensor* x_grad) { PADDLE_ENFORCE_EQ( out_grad.numel(), 1, @@ -97,11 +97,11 @@ void SquaredL2NormGradKernel(const Context& dev_ctx, ToCnnlDataType(x.dtype())); // mul - phi::DenseTensor scale_tensor; + DenseTensor scale_tensor; scale_tensor.Resize({1}); dev_ctx.template Alloc(&scale_tensor); - phi::DenseTensor bias_tensor; + DenseTensor bias_tensor; bias_tensor.Resize({1}); dev_ctx.template Alloc(&bias_tensor); diff --git a/backends/mlu/kernels/squeeze_kernel.cc b/backends/mlu/kernels/squeeze_kernel.cc index 5b05a06a9dd..d33d2bd706a 100644 --- a/backends/mlu/kernels/squeeze_kernel.cc +++ b/backends/mlu/kernels/squeeze_kernel.cc @@ -17,9 +17,9 @@ namespace custom_kernel { -phi::DDim GetOutputShape(const std::vector squeeze_dims, - const phi::DDim& in_dims, - bool is_runtime) { +DDim GetOutputShape(const std::vector squeeze_dims, + const DDim& in_dims, + bool is_runtime) { size_t num_squeeze_dims = squeeze_dims.size(); std::vector should_squeeze(in_dims.size(), false); @@ -83,9 +83,9 @@ phi::DDim GetOutputShape(const std::vector squeeze_dims, template void SqueezeKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& axes_int_array, - phi::DenseTensor* out) { + DenseTensor* out) { auto stream = dev_ctx.stream(); std::vector axes(axes_int_array.GetData().begin(), axes_int_array.GetData().end()); @@ -100,19 +100,19 @@ void SqueezeKernel(const Context& dev_ctx, template void SqueezeWithXShapeKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& axes_int_array, - phi::DenseTensor* out, - phi::DenseTensor* xshape) { + DenseTensor* out, + DenseTensor* xshape) { custom_kernel::SqueezeKernel(dev_ctx, x, axes_int_array, out); } template void SqueezeGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& dout, + const DenseTensor& x, + const DenseTensor& dout, const phi::IntArray& axes_int_array, - phi::DenseTensor* dx) { + DenseTensor* dx) { auto stream = dev_ctx.stream(); auto x_dims = dx->dims(); diff --git a/backends/mlu/kernels/stack_kernel.cc b/backends/mlu/kernels/stack_kernel.cc index 7f4a8733495..c289637126b 100644 --- a/backends/mlu/kernels/stack_kernel.cc +++ b/backends/mlu/kernels/stack_kernel.cc @@ -19,9 +19,9 @@ namespace custom_kernel { template void StackKernel(const Context& dev_ctx, - const std::vector& x, + const std::vector& x, int axis, - phi::DenseTensor* y) { + DenseTensor* y) { if (axis < 0) axis += (x[0]->dims().size() + 1); int num = static_cast(x.size()); diff --git a/backends/mlu/kernels/strided_copy_kernel.cc b/backends/mlu/kernels/strided_copy_kernel.cc index f4525d2cf86..e6bd38347a8 100644 --- a/backends/mlu/kernels/strided_copy_kernel.cc +++ b/backends/mlu/kernels/strided_copy_kernel.cc @@ -19,12 +19,12 @@ namespace custom_kernel { template void StridedCopyKernel(const Context& dev_ctx, - const phi::DenseTensor& input, + const DenseTensor& input, const std::vector& dims, const std::vector& out_stride, int64_t offset, - phi::DenseTensor* out) { - phi::DenseTensorMeta meta = input.meta(); + DenseTensor* out) { + DenseTensorMeta meta = input.meta(); meta.strides = common::make_ddim(out_stride); meta.dims = common::make_ddim(dims); meta.offset = offset; diff --git a/backends/mlu/kernels/strided_slice_kernel.cc b/backends/mlu/kernels/strided_slice_kernel.cc index 6396437f72f..7e7c9e6404f 100644 --- a/backends/mlu/kernels/strided_slice_kernel.cc +++ b/backends/mlu/kernels/strided_slice_kernel.cc @@ -22,7 +22,7 @@ static void StridedSliceOutDims(const std::vector& starts, const std::vector& strides, const std::vector& axes, const std::vector& infer_flags, - const phi::DDim in_dims, + const DDim in_dims, const std::vector& decrease_axis, int64_t* out_dims_vector, const size_t size, @@ -94,7 +94,7 @@ static void StridedSliceFunctor(int64_t* starts, int64_t* strides, const int* axes, int* reverse_axis, - const phi::DDim dims, + const DDim dims, const std::vector& infer_flags, const std::vector& decrease_axis, const size_t size) { @@ -162,14 +162,14 @@ static void StridedSliceFunctor(int64_t* starts, template void StridedSliceCompute(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const std::vector& axes, const phi::IntArray& starts_array, const phi::IntArray& ends_array, const phi::IntArray& strides_array, const std::vector& infer_flags, const std::vector& decrease_axis, - phi::DenseTensor* out) { + DenseTensor* out) { auto in_dims = x.dims(); // list auto starts = starts_array.GetData(); @@ -188,7 +188,7 @@ void StridedSliceCompute(const Context& dev_ctx, out_dims_vector.data(), axes.size(), false); - phi::DDim out_dims(phi::make_ddim(out_dims_vector)); + DDim out_dims(phi::make_ddim(out_dims_vector)); // check whether need to reverse (false: stride > 0; true: stride < 0) std::vector reverse_vector(starts.size(), 0); @@ -264,12 +264,12 @@ void StridedSliceCompute(const Context& dev_ctx, GetBasePtr(out)); if (need_reverse) { - phi::DenseTensor out_tmp; + DenseTensor out_tmp; out_tmp.Resize(out_dims); dev_ctx.template Alloc(&out_tmp); TensorCopy(dev_ctx, *out, false, &out_tmp); - phi::DenseTensor reverse_axis; + DenseTensor reverse_axis; std::vector reverse_axis_vector; for (size_t axis = 0; axis < axes.size(); axis++) { if (reverse_vector[axis] == 1) { @@ -298,14 +298,14 @@ void StridedSliceCompute(const Context& dev_ctx, template void StridedSliceRawKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const std::vector& axes, const phi::IntArray& starts, const phi::IntArray& ends, const phi::IntArray& strides, const std::vector& infer_flags, const std::vector& decrease_axis, - phi::DenseTensor* out) { + DenseTensor* out) { int rank = x.dims().size(); switch (rank) { case 1: @@ -405,15 +405,15 @@ void StridedSliceRawKernel(const Context& dev_ctx, template void StridedSliceGradCompute(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& out_grad, const std::vector& axes, const phi::IntArray& starts_array, const phi::IntArray& ends_array, const phi::IntArray& strides_array, const std::vector& infer_flags, const std::vector& decrease_axis, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { auto input_dims = x.dims(); x_grad->Resize(input_dims); dev_ctx.template Alloc(x_grad); @@ -471,7 +471,7 @@ void StridedSliceGradCompute(const Context& dev_ctx, } if (need_reverse) { - phi::DenseTensor reverse_axis; + DenseTensor reverse_axis; std::vector reverse_axis_vector; for (size_t axis = 0; axis < axes.size(); axis++) { if (reverse_vector[axis] == 1) { @@ -480,7 +480,7 @@ void StridedSliceGradCompute(const Context& dev_ctx, } TensorFromVector(dev_ctx, reverse_axis_vector, dev_ctx, &reverse_axis); - phi::DenseTensor out_grad_tmp; + DenseTensor out_grad_tmp; out_grad_tmp.Resize(out_grad.dims()); dev_ctx.template Alloc(&out_grad_tmp); MLUCnnlTensorDesc input_desc(out_grad); @@ -520,15 +520,15 @@ void StridedSliceGradCompute(const Context& dev_ctx, template void StridedSliceRawGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& out_grad, const std::vector& axes, const phi::IntArray& starts, const phi::IntArray& ends, const phi::IntArray& strides, const std::vector& infer_flags, const std::vector& decrease_axis, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { int rank = x.dims().size(); switch (rank) { @@ -637,12 +637,12 @@ void StridedSliceRawGradKernel(const Context& dev_ctx, template void StridedSliceKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const std::vector& axes, const phi::IntArray& starts, const phi::IntArray& ends, const phi::IntArray& strides, - phi::DenseTensor* out) { + DenseTensor* out) { std::vector infer_flags(axes.size(), 1); std::vector decrease_axis; custom_kernel::StridedSliceRawKernel( @@ -651,13 +651,13 @@ void StridedSliceKernel(const Context& dev_ctx, template void StridedSliceGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& out_grad, const std::vector& axes, const phi::IntArray& starts, const phi::IntArray& ends, const phi::IntArray& strides, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { std::vector infer_flags(axes.size(), 1); std::vector decrease_axis; custom_kernel::StridedSliceRawGradKernel(dev_ctx, diff --git a/backends/mlu/kernels/sync_batch_norm_kernel.cc b/backends/mlu/kernels/sync_batch_norm_kernel.cc index 9189d19848e..2c796eaacfd 100644 --- a/backends/mlu/kernels/sync_batch_norm_kernel.cc +++ b/backends/mlu/kernels/sync_batch_norm_kernel.cc @@ -23,7 +23,7 @@ namespace custom_kernel { static std::vector supported_input_layout = { CNNL_LAYOUT_NC, CNNL_LAYOUT_NLC, CNNL_LAYOUT_NHWC, CNNL_LAYOUT_NDHWC}; -inline void ExtractNCWHD(const phi::DDim& dims, +inline void ExtractNCWHD(const DDim& dims, const DataLayout& data_layout, int* N, int* C, @@ -50,23 +50,23 @@ inline void ExtractNCWHD(const phi::DDim& dims, template void SyncBatchNormKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& mean, - const phi::DenseTensor& variance, - const phi::DenseTensor& scale, - const phi::DenseTensor& bias, + const DenseTensor& x, + const DenseTensor& mean, + const DenseTensor& variance, + const DenseTensor& scale, + const DenseTensor& bias, bool is_test, float momentum, float epsilon_f, const std::string& data_layout_str, bool use_global_stats, bool trainable_statistics, - phi::DenseTensor* y, - phi::DenseTensor* mean_out, - phi::DenseTensor* variance_out, - phi::DenseTensor* saved_mean, - phi::DenseTensor* saved_variance, - phi::DenseTensor* reserve_space) { + DenseTensor* y, + DenseTensor* mean_out, + DenseTensor* variance_out, + DenseTensor* saved_mean, + DenseTensor* saved_variance, + DenseTensor* reserve_space) { const DataLayout layout = StringToDataLayout(data_layout_str); PADDLE_ENFORCE_EQ(use_global_stats, false, @@ -291,24 +291,23 @@ void SyncBatchNormKernel(const Context& dev_ctx, } template -void SyncBatchNormGradKernel( - const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& scale, - const phi::DenseTensor& bias, - const phi::DenseTensor& saved_mean, - const phi::DenseTensor& saved_variance, - const paddle::optional& reserve_space, - const phi::DenseTensor& y_grad, - float momentum, - float epsilon_f, - const std::string& data_layout_str, - bool is_test, - bool use_global_stats, - bool trainable_statistics, - phi::DenseTensor* x_grad, - phi::DenseTensor* scale_grad, - phi::DenseTensor* bias_grad) { +void SyncBatchNormGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& scale, + const DenseTensor& bias, + const DenseTensor& saved_mean, + const DenseTensor& saved_variance, + const paddle::optional& reserve_space, + const DenseTensor& y_grad, + float momentum, + float epsilon_f, + const std::string& data_layout_str, + bool is_test, + bool use_global_stats, + bool trainable_statistics, + DenseTensor* x_grad, + DenseTensor* scale_grad, + DenseTensor* bias_grad) { const DataLayout layout = StringToDataLayout(data_layout_str); const auto& x_dims = x.dims(); PADDLE_ENFORCE_GE(x_dims.size(), diff --git a/backends/mlu/kernels/tile_kernel.cc b/backends/mlu/kernels/tile_kernel.cc index 036b6658f42..fe394f1fd66 100644 --- a/backends/mlu/kernels/tile_kernel.cc +++ b/backends/mlu/kernels/tile_kernel.cc @@ -19,10 +19,10 @@ namespace custom_kernel { template void TileKernelImpl(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, std::vector repeat_times, int rank, - phi::DenseTensor* out) { + DenseTensor* out) { auto in_dims = x.dims(); for (size_t i = 0; i < repeat_times.size(); ++i) { PADDLE_ENFORCE_GT( @@ -66,8 +66,8 @@ void TileKernelImpl(const Context& dev_ctx, out->Resize({1}); } } else { - phi::DDim new_in_dims = phi::make_ddim(vec_in_dims); - phi::DDim out_dims(new_in_dims); + DDim new_in_dims = phi::make_ddim(vec_in_dims); + DDim out_dims(new_in_dims); for (size_t i = 0; i < repeat_times.size(); ++i) { out_dims[i] *= repeat_times[i]; } @@ -82,9 +82,9 @@ void TileKernelImpl(const Context& dev_ctx, template void TileKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& repeat_times, - phi::DenseTensor* out) { + DenseTensor* out) { int rank = static_cast(x.dims().size()); PADDLE_ENFORCE_GE(rank, 0, @@ -123,10 +123,10 @@ void TileKernel(const Context& dev_ctx, template void TileGradKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& out_grad, + const DenseTensor& x, + const DenseTensor& out_grad, const phi::IntArray& repeat_times, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { auto x_dims = x.dims(); auto vec_x_dims = phi::vectorize(x_dims); std::vector origin_x_dims = vec_x_dims; @@ -211,7 +211,7 @@ void TileGradKernel(const Context& dev_ctx, MAX_RANK_SUPPORTED, dims)); dev_ctx.template Alloc(x_grad); - phi::DenseTensor dout(out_grad); + DenseTensor dout(out_grad); dout.Resize(phi::make_ddim(reshape_dims_vec)); std::string reduce_name = "reduce_sum"; diff --git a/backends/mlu/kernels/top_k_kernel.cc b/backends/mlu/kernels/top_k_kernel.cc old mode 100755 new mode 100644 index c41cc7f19c9..5f24a293501 --- a/backends/mlu/kernels/top_k_kernel.cc +++ b/backends/mlu/kernels/top_k_kernel.cc @@ -19,18 +19,18 @@ namespace custom_kernel { template void TopkKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::Scalar& k_scalar, + const DenseTensor& x, + const Scalar& k_scalar, int axis, bool largest, bool sorted, - phi::DenseTensor* out, - phi::DenseTensor* indices) { + DenseTensor* out, + DenseTensor* indices) { if (axis < 0) { axis += x.dims().size(); } int k = k_scalar.to(); - phi::DDim output_dims = x.dims(); + DDim output_dims = x.dims(); output_dims[axis] = k; out->Resize(output_dims); @@ -47,9 +47,8 @@ void TopkKernel(const Context& dev_ctx, return; } - phi::DenseTensor indices_int32; - phi::DenseTensorMeta indices_int32_meta = {phi::DataType::INT32, - indices->dims()}; + DenseTensor indices_int32; + DenseTensorMeta indices_int32_meta = {phi::DataType::INT32, indices->dims()}; indices_int32.set_meta(indices_int32_meta); dev_ctx.template Alloc(&indices_int32); MLUCnnlTensorDesc input_desc(x); diff --git a/backends/mlu/kernels/transpose_kernel.cc b/backends/mlu/kernels/transpose_kernel.cc index 477741ba395..3520bd89cd5 100644 --- a/backends/mlu/kernels/transpose_kernel.cc +++ b/backends/mlu/kernels/transpose_kernel.cc @@ -19,9 +19,9 @@ namespace custom_kernel { template void TransposeKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const std::vector& axis, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); TransposeFromMLUTensor( @@ -30,9 +30,9 @@ void TransposeKernel(const Context& dev_ctx, template void TransposeGradKernel(const Context& dev_ctx, - const phi::DenseTensor& dout, + const DenseTensor& dout, const std::vector& axis, - phi::DenseTensor* dx) { + DenseTensor* dx) { dev_ctx.template Alloc(dx); std::vector reversed_axis(axis); diff --git a/backends/mlu/kernels/tril_triu_op_mlu.cc b/backends/mlu/kernels/tril_triu_op_mlu.cc index 6c16300566a..2e68cca3c3e 100644 --- a/backends/mlu/kernels/tril_triu_op_mlu.cc +++ b/backends/mlu/kernels/tril_triu_op_mlu.cc @@ -18,10 +18,10 @@ namespace custom_kernel { template void TrilTriuKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, int diagonal, bool lower, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); bool upper; if (lower) { @@ -79,44 +79,44 @@ void TrilTriuKernel(const Context& dev_ctx, template void TrilKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, int diagonal, - phi::DenseTensor* out) { + DenseTensor* out) { custom_kernel::TrilTriuKernel(dev_ctx, x, diagonal, true, out); } template void TriuKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, int diagonal, - phi::DenseTensor* out) { + DenseTensor* out) { custom_kernel::TrilTriuKernel(dev_ctx, x, diagonal, false, out); } template void TrilTriuGradKernel(const Context& dev_ctx, - const phi::DenseTensor& out_grad, + const DenseTensor& out_grad, int diagonal, bool lower, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { custom_kernel::TrilTriuKernel( dev_ctx, out_grad, diagonal, lower, x_grad); } template void TrilGradKernel(const Context& dev_ctx, - const phi::DenseTensor& out_grad, + const DenseTensor& out_grad, int diagonal, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { custom_kernel::TrilTriuGradKernel( dev_ctx, out_grad, diagonal, true, x_grad); } template void TriuGradKernel(const Context& dev_ctx, - const phi::DenseTensor& out_grad, + const DenseTensor& out_grad, int diagonal, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { custom_kernel::TrilTriuGradKernel( dev_ctx, out_grad, diagonal, false, x_grad); } diff --git a/backends/mlu/kernels/truncated_gaussian_random_kernel.cc b/backends/mlu/kernels/truncated_gaussian_random_kernel.cc index bf9cf5f6644..8b65db4c048 100644 --- a/backends/mlu/kernels/truncated_gaussian_random_kernel.cc +++ b/backends/mlu/kernels/truncated_gaussian_random_kernel.cc @@ -165,11 +165,11 @@ void TruncatedGaussianRandomKernel(const Context& dev_ctx, float a, float b, phi::DataType dtype, - phi::DenseTensor* out) { + DenseTensor* out) { dev_ctx.template Alloc(out); - phi::DenseTensor cpu_out; - phi::DenseTensorMeta cpu_meta = {out->dtype(), out->dims()}; + DenseTensor cpu_out; + DenseTensorMeta cpu_meta = {out->dtype(), out->dims()}; cpu_out.set_meta(cpu_meta); T* cpu_data = dev_ctx.template HostAlloc(&cpu_out); diff --git a/backends/mlu/kernels/uniform_kernel.cc b/backends/mlu/kernels/uniform_kernel.cc index 48ac88c8f05..5d8f48146a3 100644 --- a/backends/mlu/kernels/uniform_kernel.cc +++ b/backends/mlu/kernels/uniform_kernel.cc @@ -34,21 +34,21 @@ template void UniformRawKernel(const Context& dev_ctx, const phi::IntArray& shape, phi::DataType dtype, - const phi::Scalar& min, - const phi::Scalar& max, + const Scalar& min, + const Scalar& max, int seed, int diag_num, int diag_step, float diag_val, - phi::DenseTensor* out) { + DenseTensor* out) { out->Resize(phi::make_ddim(shape.GetData())); VLOG(4) << out->dims(); T* data = dev_ctx.template Alloc(out); auto size = out->numel(); // 1. CPU implement - phi::DenseTensor cpu_out; - phi::DenseTensorMeta cpu_out_meta = {out->dtype(), out->dims()}; + DenseTensor cpu_out; + DenseTensorMeta cpu_out_meta = {out->dtype(), out->dims()}; cpu_out.set_meta(cpu_out_meta); T* cpu_data = dev_ctx.template HostAlloc(&cpu_out); @@ -87,10 +87,10 @@ template void UniformKernel(const Context& dev_ctx, const phi::IntArray& shape, phi::DataType dtype, - const phi::Scalar& min, - const phi::Scalar& max, + const Scalar& min, + const Scalar& max, int seed, - phi::DenseTensor* out) { + DenseTensor* out) { custom_kernel::UniformRawKernel( dev_ctx, shape, dtype, min, max, seed, 0, 0, 0.0f, out); } diff --git a/backends/mlu/kernels/unsqueeze_kernel.cc b/backends/mlu/kernels/unsqueeze_kernel.cc index f56eb04c494..1d3a1a15786 100644 --- a/backends/mlu/kernels/unsqueeze_kernel.cc +++ b/backends/mlu/kernels/unsqueeze_kernel.cc @@ -17,8 +17,8 @@ namespace custom_kernel { -inline phi::DDim GetUnsqueezeShape(const std::vector unsqz_dims, - const phi::DDim& in_dims) { +inline DDim GetUnsqueezeShape(const std::vector unsqz_dims, + const DDim& in_dims) { int output_size = in_dims.size() + static_cast(unsqz_dims.size()); int cur_output_size = in_dims.size(); std::vector output_shape(output_size, 0); @@ -68,9 +68,9 @@ inline phi::DDim GetUnsqueezeShape(const std::vector unsqz_dims, template void UnsqueezeKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& axes, - phi::DenseTensor* out) { + DenseTensor* out) { auto x_dims = x.dims(); auto out_dims = out->dims(); @@ -89,18 +89,18 @@ void UnsqueezeKernel(const Context& dev_ctx, template void UnsqueezeWithXShapeKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, const phi::IntArray& axes, - phi::DenseTensor* out, - phi::DenseTensor* xshape) { + DenseTensor* out, + DenseTensor* xshape) { custom_kernel::UnsqueezeKernel(dev_ctx, x, axes, out); } template void UnsqueezeGradMLUKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& dout, - phi::DenseTensor* dx) { + const DenseTensor& x, + const DenseTensor& dout, + DenseTensor* dx) { auto x_dims = dx->dims(); dev_ctx.template Alloc(dx); diff --git a/backends/mlu/kernels/unstack_kernel.cc b/backends/mlu/kernels/unstack_kernel.cc index d80b4999bd4..cd930dd7471 100644 --- a/backends/mlu/kernels/unstack_kernel.cc +++ b/backends/mlu/kernels/unstack_kernel.cc @@ -18,10 +18,10 @@ namespace custom_kernel { template void UnStackKernel(const Context& dev_ctx, - const phi::DenseTensor& x, + const DenseTensor& x, int axis, int num, - std::vector outs) { + std::vector outs) { if (axis < 0) axis += x.dims().size(); num = x.dims()[axis]; @@ -50,9 +50,9 @@ void UnStackKernel(const Context& dev_ctx, template void UnStackGradKernel(const Context& dev_ctx, - const std::vector& x, + const std::vector& x, int axis, - phi::DenseTensor* outs) { + DenseTensor* outs) { dev_ctx.template Alloc(outs); if (axis < 0) axis += (x[0]->dims().size() + 1); diff --git a/backends/mlu/kernels/where_kernel.cc b/backends/mlu/kernels/where_kernel.cc index 4daff35c003..7c9a19361f8 100755 --- a/backends/mlu/kernels/where_kernel.cc +++ b/backends/mlu/kernels/where_kernel.cc @@ -19,10 +19,10 @@ namespace custom_kernel { template void WhereKernel(const Context& dev_ctx, - const phi::DenseTensor& condition, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { + const DenseTensor& condition, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { dev_ctx.template Alloc(out); MLUCnnlTensorDesc x_desc(x); @@ -42,12 +42,12 @@ void WhereKernel(const Context& dev_ctx, template void WhereGradKernel(const Context& dev_ctx, - const phi::DenseTensor& condition, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - const phi::DenseTensor& out_grad, - phi::DenseTensor* x_grad, - phi::DenseTensor* y_grad) { + const DenseTensor& condition, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out_grad, + DenseTensor* x_grad, + DenseTensor* y_grad) { if (x_grad != nullptr) { dev_ctx.template Alloc(x_grad); } diff --git a/backends/mlu/kernels/yolo_box_kernel.cc b/backends/mlu/kernels/yolo_box_kernel.cc index 3b4268d7269..3d1f77add2f 100644 --- a/backends/mlu/kernels/yolo_box_kernel.cc +++ b/backends/mlu/kernels/yolo_box_kernel.cc @@ -15,8 +15,8 @@ limitations under the License. */ namespace custom_kernel { template void YoloBoxKernel(const Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& img_size, + const DenseTensor& x, + const DenseTensor& img_size, const std::vector& anchors, int class_num, float conf_thresh, @@ -25,8 +25,8 @@ void YoloBoxKernel(const Context& dev_ctx, float scale_x_y, bool iou_aware, float iou_aware_factor, - phi::DenseTensor* boxes, - phi::DenseTensor* scores) { + DenseTensor* boxes, + DenseTensor* scores) { int anchor_num = anchors.size() / 2; int64_t size = anchors.size(); auto dim_x = x.dims(); @@ -47,11 +47,11 @@ void YoloBoxKernel(const Context& dev_ctx, std::vector boxes_out_dim({n, s, h * w, 4}); std::vector scores_out_dim({n, s, h * w, class_num}); - phi::DenseTensor boxes_tensor_mluops; + DenseTensor boxes_tensor_mluops; boxes_tensor_mluops.Resize({n, s, 4, h * w}); dev_ctx.template Alloc(&boxes_tensor_mluops); - phi::DenseTensor scores_tensor_mluops; + DenseTensor scores_tensor_mluops; scores_tensor_mluops.Resize({n, s, class_num, h * w}); dev_ctx.template Alloc(&scores_tensor_mluops); From d7dac933b2ce699c06a69bb235091b5991f60e66 Mon Sep 17 00:00:00 2001 From: co63oc <4617245+co63oc@users.noreply.github.com> Date: Tue, 21 Apr 2026 09:32:46 +0800 Subject: [PATCH 2/4] ci From 1222c8ff0b887fbf902fb62cedabcc730b8243dc Mon Sep 17 00:00:00 2001 From: co63oc <4617245+co63oc@users.noreply.github.com> Date: Tue, 21 Apr 2026 09:57:29 +0800 Subject: [PATCH 3/4] fix --- backends/mlu/kernels/funcs/mlu_baseop.h | 1 + 1 file changed, 1 insertion(+) diff --git a/backends/mlu/kernels/funcs/mlu_baseop.h b/backends/mlu/kernels/funcs/mlu_baseop.h index 63a8c47a047..5d723696154 100644 --- a/backends/mlu/kernels/funcs/mlu_baseop.h +++ b/backends/mlu/kernels/funcs/mlu_baseop.h @@ -26,6 +26,7 @@ namespace custom_kernel { using Tensor = phi::DenseTensor; using DenseTensor = phi::DenseTensor; +using DenseTensorMeta = phi::DenseTensorMeta; using Scalar = phi::Scalar; using DDim = phi::DDim; using Context = phi::CustomContext; From fc26c8e821a0551b807f7236099596bc84f20c30 Mon Sep 17 00:00:00 2001 From: co63oc <4617245+co63oc@users.noreply.github.com> Date: Tue, 21 Apr 2026 15:29:13 +0800 Subject: [PATCH 4/4] ci