diff --git a/backends/intel_gpu/kernels/argsort_kernel.cc b/backends/intel_gpu/kernels/argsort_kernel.cc index 82a775c0def..9c8194609a6 100644 --- a/backends/intel_gpu/kernels/argsort_kernel.cc +++ b/backends/intel_gpu/kernels/argsort_kernel.cc @@ -17,13 +17,13 @@ #include "kernels/phi_funcs.h" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { namespace gpu { template -void Transpose(const phi::Context& ctx, - const phi::DenseTensor& x, +void Transpose(const Context& ctx, + const DenseTensor& x, const std::vector& axis, T* out_data, const std::vector& out_dims, @@ -113,12 +113,12 @@ void FullSort(int input_height, } template -void ArgsortKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& input, +void ArgsortKernel(const Context& dev_ctx, + const DenseTensor& input, int axis, bool descending, - phi::DenseTensor* output, - phi::DenseTensor* indices) { + DenseTensor* output, + DenseTensor* indices) { auto in_dims = input.dims(); auto out_dims = output->dims(); auto out_size = output->numel(); @@ -142,7 +142,7 @@ void ArgsortKernel(const phi::Context& dev_ctx, n = in_dims[0]; m = in_dims[1]; } - phi::DenseTensor cpu_input; + DenseTensor cpu_input; cpu_input.Resize(std::vector(in_dims)); cpu_input.set_dtype(input.dtype()); auto cpu_input_data = dev_ctx.template HostAlloc(&cpu_input); @@ -151,14 +151,14 @@ void ArgsortKernel(const phi::Context& dev_ctx, q->memcpy(cpu_input_data, input_data, input.memory_size()); q->wait(); // cpu implement - phi::DenseTensor cpu_output; + DenseTensor cpu_output; cpu_output.Resize(std::vector(out_dims)); cpu_output.set_dtype(output->dtype()); auto cpu_output_dims = cpu_output.dims(); auto cpu_output_numel = cpu_output.numel(); auto cpu_output_data = dev_ctx.template HostAlloc(&cpu_output); - phi::DenseTensor cpu_ids; + DenseTensor cpu_ids; cpu_ids.Resize(std::vector(indices->dims())); cpu_ids.set_dtype(indices->dtype()); auto cpu_ids_dims = cpu_ids.dims(); @@ -191,7 +191,7 @@ void ArgsortKernel(const phi::Context& dev_ctx, trans_dims[i] = in_dims[trans[i]]; } - phi::DenseTensor trans_inp; + DenseTensor trans_inp; trans_inp.Resize(trans_dims); auto trans_input_dims = trans_inp.dims(); auto trans_input_numel = trans_inp.numel(); @@ -207,12 +207,12 @@ void ArgsortKernel(const phi::Context& dev_ctx, const int64_t input_height = trans_dims[0]; const int64_t input_width = trans_dims[trans_dims.size() - 1]; - phi::DenseTensor cpu_tmp_output; + DenseTensor cpu_tmp_output; cpu_tmp_output.Resize(trans_dims); cpu_tmp_output.set_dtype(output->dtype()); auto cpu_tmp_output_data = dev_ctx.template HostAlloc(&cpu_tmp_output); - phi::DenseTensor cpu_tmp_ids; + DenseTensor cpu_tmp_ids; cpu_tmp_ids.Resize(trans_dims); cpu_tmp_ids.set_dtype(indices->dtype()); auto cpu_tmp_ids_data = dev_ctx.template HostAlloc(&cpu_tmp_ids); @@ -243,12 +243,12 @@ void ArgsortKernel(const phi::Context& dev_ctx, } // namespace gpu -} // namespace custom_kernel +} // namespace phi PD_BUILD_PHI_KERNEL(argsort, intel_gpu, ALL_LAYOUT, - custom_kernel::gpu::ArgsortKernel, + phi::gpu::ArgsortKernel, float, double, int, diff --git a/backends/intel_gpu/kernels/assign_value_kernel.cc b/backends/intel_gpu/kernels/assign_value_kernel.cc index ffab9f8dd6f..af233921fd0 100644 --- a/backends/intel_gpu/kernels/assign_value_kernel.cc +++ b/backends/intel_gpu/kernels/assign_value_kernel.cc @@ -16,14 +16,14 @@ #include "kernels/phi_funcs.h" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { template -void AssignValueKernel(const phi::Context& dev_ctx, +void AssignValueKernel(const Context& dev_ctx, const std::vector& shape, phi::DataType dtype, const std::vector& values, - phi::DenseTensor* out) { + DenseTensor* out) { show_kernel("AssignValue-SYCL, type=" << dnn_support::type2String::name()); auto template_dtype = phi::capi::CppTypeToPDType::Type(); @@ -49,18 +49,18 @@ void AssignValueKernel(const phi::Context& dev_ctx, } template -void AssignKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { +void AssignKernel(const Context& dev_ctx, + const DenseTensor& x, + DenseTensor* out) { auto out_data = dev_ctx.template Alloc(out); auto x_data = x.data(); std::memcpy(out_data, x_data, sizeof(T) * x.numel()); } template -void AssignRawKernel(const phi::Context& dev_ctx, - const paddle::optional& x, - phi::DenseTensor* out) { +void AssignRawKernel(const Context& dev_ctx, + const paddle::optional& x, + DenseTensor* out) { show_kernel("AssignRaw-SYCL, type=" << dnn_support::type2String::name()); if (x) { @@ -76,12 +76,12 @@ void AssignRawKernel(const phi::Context& dev_ctx, } } -} // namespace custom_kernel +} // namespace phi PD_BUILD_PHI_KERNEL(assign_value, intel_gpu, ALL_LAYOUT, - custom_kernel::AssignValueKernel, + phi::AssignValueKernel, int, int64_t, float, @@ -90,7 +90,7 @@ PD_BUILD_PHI_KERNEL(assign_value, PD_BUILD_PHI_KERNEL(assign_raw, intel_gpu, ALL_LAYOUT, - custom_kernel::AssignRawKernel, + phi::AssignRawKernel, int, int64_t, float, diff --git a/backends/intel_gpu/kernels/cast_kernel.cc b/backends/intel_gpu/kernels/cast_kernel.cc index ed1713475d3..75f3e8669eb 100644 --- a/backends/intel_gpu/kernels/cast_kernel.cc +++ b/backends/intel_gpu/kernels/cast_kernel.cc @@ -16,13 +16,13 @@ #include "kernels/phi_funcs.h" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { template -void CastKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void CastKernel(const Context& dev_ctx, + const DenseTensor& x, phi::DataType out_dtype, - phi::DenseTensor* out) { + DenseTensor* out) { show_kernel("Cast-SYCL"); auto x_data = x.data(); @@ -109,12 +109,12 @@ void CastKernel(const phi::Context& dev_ctx, q->wait(); } -} // namespace custom_kernel +} // namespace phi PD_BUILD_PHI_KERNEL(cast, intel_gpu, ALL_LAYOUT, - custom_kernel::CastKernel, + phi::CastKernel, float, double, int, diff --git a/backends/intel_gpu/kernels/compare_kernel.cc b/backends/intel_gpu/kernels/compare_kernel.cc index bc586e3d02b..704eee8e4b5 100644 --- a/backends/intel_gpu/kernels/compare_kernel.cc +++ b/backends/intel_gpu/kernels/compare_kernel.cc @@ -16,15 +16,15 @@ #include "kernels/phi_funcs.h" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { template -void RawCompareKernelSycl(const phi::Context& dev_ctx, +void RawCompareKernelSycl(const Context& dev_ctx, std::string kernel_name, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out, + DenseTensor* out, const F& func, const FF& float_func) { show_kernel(kernel_name << "-SYCL type=" @@ -51,13 +51,13 @@ void RawCompareKernelSycl(const phi::Context& dev_ctx, } template -void RawCompareKernelDNN(const phi::Context& dev_ctx, +void RawCompareKernelDNN(const Context& dev_ctx, std::string kernel_name, dnnl::algorithm binary_type, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { show_kernel(kernel_name << "-DNN type=" << dnn_support::type2String::name()); @@ -106,13 +106,13 @@ void RawCompareKernelDNN(const phi::Context& dev_ctx, } template -void EqualityKernel(const phi::Context& dev_ctx, +void EqualityKernel(const Context& dev_ctx, std::string kernel_name, dnnl::algorithm binary_type, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out, + DenseTensor* out, const F& func, const FF& float_func) { if constexpr (std::is_same::value) { @@ -124,13 +124,13 @@ void EqualityKernel(const phi::Context& dev_ctx, } template -void CompareKernel(const phi::Context& dev_ctx, +void CompareKernel(const Context& dev_ctx, std::string kernel_name, dnnl::algorithm binary_type, - const phi::DenseTensor& x, - const phi::DenseTensor& y, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out, + DenseTensor* out, const F& func) { if constexpr (std::is_same::value) { RawCompareKernelDNN(dev_ctx, kernel_name, binary_type, x, y, axis, out); @@ -140,11 +140,11 @@ void CompareKernel(const phi::Context& dev_ctx, } template -void NotEqualKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, +void NotEqualKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { EqualityKernel( dev_ctx, "NotEqual", @@ -163,11 +163,11 @@ void NotEqualKernel(const phi::Context& dev_ctx, } template -void EqualKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, +void EqualKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { EqualityKernel( dev_ctx, "Equal", @@ -186,11 +186,11 @@ void EqualKernel(const phi::Context& dev_ctx, } template -void LessThanKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, +void LessThanKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { CompareKernel(dev_ctx, "LessThanKernel", dnnl::algorithm::binary_lt, @@ -204,11 +204,11 @@ void LessThanKernel(const phi::Context& dev_ctx, } template -void LessEqualKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, +void LessEqualKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { CompareKernel(dev_ctx, "LessEqual", dnnl::algorithm::binary_le, @@ -222,11 +222,11 @@ void LessEqualKernel(const phi::Context& dev_ctx, } template -void GreaterThanKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, +void GreaterThanKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { CompareKernel(dev_ctx, "GreaterThan", dnnl::algorithm::binary_gt, @@ -240,11 +240,11 @@ void GreaterThanKernel(const phi::Context& dev_ctx, } template -void GreaterEqualKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, +void GreaterEqualKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { CompareKernel(dev_ctx, "GreaterEqual", dnnl::algorithm::binary_ge, @@ -257,13 +257,13 @@ void GreaterEqualKernel(const phi::Context& dev_ctx, }); } -} // namespace custom_kernel +} // namespace phi #define PD_REGISTER_COMPARE_KERNEL(name, func) \ PD_BUILD_PHI_KERNEL(name, \ intel_gpu, \ ALL_LAYOUT, \ - custom_kernel::func##Kernel, \ + phi::func##Kernel, \ float, \ double, \ uint8_t, \ diff --git a/backends/intel_gpu/kernels/elementwise_kernel.cc b/backends/intel_gpu/kernels/elementwise_kernel.cc index abac8f93ad2..c5bf987c5b6 100644 --- a/backends/intel_gpu/kernels/elementwise_kernel.cc +++ b/backends/intel_gpu/kernels/elementwise_kernel.cc @@ -16,14 +16,14 @@ #include "kernels/phi_funcs.h" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { template -void MultiplyRawKernelGPU(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, +void MultiplyRawKernelGPU(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { show_kernel( "ElementWise-SYCL-MUL type=" << dnn_support::type2String::name()); void* stream = const_cast(dev_ctx.stream()); @@ -46,20 +46,20 @@ void MultiplyRawKernelGPU(const phi::Context& dev_ctx, } template -void MultiplyKernelGPU(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { +void MultiplyKernelGPU(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { int axis = -1; MultiplyRawKernelGPU(dev_ctx, x, y, axis, out); } template -void MultiplyOneDNNRawKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, +void MultiplyOneDNNRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { show_kernel( "ElementWise-ONEDNN type=" << dnn_support::type2String::name()); auto* q = static_cast(const_cast(dev_ctx.stream())); @@ -107,20 +107,20 @@ void MultiplyOneDNNRawKernel(const phi::Context& dev_ctx, } template -void MultiplyOneDNNKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { +void MultiplyOneDNNKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { int axis = -1; MultiplyOneDNNRawKernel(dev_ctx, x, y, axis, out); } template -void MultiplyMainRaw(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, +void MultiplyMainRaw(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { if constexpr (std::is_same::value || std::is_same::value //|| std::is_same::value ) { @@ -130,20 +130,20 @@ void MultiplyMainRaw(const phi::Context& dev_ctx, } } template -void MultiplyMain(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& y, - phi::DenseTensor* out) { +void MultiplyMain(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { int axis = -1; MultiplyMainRaw(dev_ctx, x, y, axis, out); } -} // namespace custom_kernel +} // namespace phi PD_BUILD_PHI_KERNEL(multiply_raw, intel_gpu, ALL_LAYOUT, - custom_kernel::MultiplyMainRaw, + phi::MultiplyMainRaw, int32_t, int64_t, float, @@ -152,7 +152,7 @@ PD_BUILD_PHI_KERNEL(multiply_raw, PD_BUILD_PHI_KERNEL(multiply, intel_gpu, ALL_LAYOUT, - custom_kernel::MultiplyMain, + phi::MultiplyMain, int32_t, int64_t, float, diff --git a/backends/intel_gpu/kernels/full_kernel.cc b/backends/intel_gpu/kernels/full_kernel.cc index f4f4c3d8a67..34c12431f27 100644 --- a/backends/intel_gpu/kernels/full_kernel.cc +++ b/backends/intel_gpu/kernels/full_kernel.cc @@ -15,12 +15,10 @@ #include "kernels/dnn_support.hpp" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { template -void FullValue(const phi::Context& dev_ctx, - phi::DenseTensor* tensor, - VType val) { +void FullValue(const Context& dev_ctx, DenseTensor* tensor, VType val) { show_kernel("FullValue type=" << dnn_support::type2String::name()); auto t = dev_ctx.template Alloc(tensor); auto* q = static_cast(dev_ctx.stream()); @@ -31,21 +29,21 @@ void FullValue(const phi::Context& dev_ctx, } template -void FullKernel(const phi::Context& dev_ctx, +void FullKernel(const Context& dev_ctx, const phi::IntArray& shape, const phi::Scalar& val, phi::DataType dtype, - phi::DenseTensor* out) { + DenseTensor* out) { auto int_shape = shape.GetData(); out->Resize(std::vector(int_shape.cbegin(), int_shape.cend())); FullValue(dev_ctx, out, val.to()); } -} // namespace custom_kernel +} // namespace phi PD_BUILD_PHI_KERNEL(full, intel_gpu, ALL_LAYOUT, - custom_kernel::FullKernel, + phi::FullKernel, float, double, uint8_t, diff --git a/backends/intel_gpu/kernels/kernels.h b/backends/intel_gpu/kernels/kernels.h index 10f964aa083..8fc455abd5b 100644 --- a/backends/intel_gpu/kernels/kernels.h +++ b/backends/intel_gpu/kernels/kernels.h @@ -15,16 +15,16 @@ #pragma once #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { template -void TransposeKernelGPU(const phi::Context& ctx, - const phi::DenseTensor& x, +void TransposeKernelGPU(const Context& ctx, + const DenseTensor& x, const std::vector& axis, - phi::DenseTensor* out); + DenseTensor* out); template -void SoftmaxKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void SoftmaxKernel(const Context& dev_ctx, + const DenseTensor& x, int axis, - phi::DenseTensor* out); -} // namespace custom_kernel + DenseTensor* out); +} // namespace phi diff --git a/backends/intel_gpu/kernels/mean_kernel.cc b/backends/intel_gpu/kernels/mean_kernel.cc index 7a7685a119e..cf26bed4597 100644 --- a/backends/intel_gpu/kernels/mean_kernel.cc +++ b/backends/intel_gpu/kernels/mean_kernel.cc @@ -15,12 +15,12 @@ #include "kernels/dnn_support.hpp" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { template -void MeanAllKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - phi::DenseTensor* out) { +void MeanAllKernel(const Context& dev_ctx, + const DenseTensor& x, + DenseTensor* out) { auto out_data = dev_ctx.template Alloc(out); auto x_data = x.data(); auto numel = x.numel(); @@ -42,10 +42,10 @@ void MeanAllKernel(const phi::Context& dev_ctx, } template -void MeanAllGradKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, - const phi::DenseTensor& out_grad, - phi::DenseTensor* x_grad) { +void MeanAllGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + DenseTensor* x_grad) { PD_CHECK(out_grad.numel() == 1UL, "Mean Gradient should be scalar. But received " "Out@Grad's elements num is %d.", @@ -63,18 +63,14 @@ void MeanAllGradKernel(const phi::Context& dev_ctx, q->wait(); } -} // namespace custom_kernel +} // namespace phi -PD_BUILD_PHI_KERNEL(mean_all, - intel_gpu, - ALL_LAYOUT, - custom_kernel::MeanAllKernel, - float, - double) {} +PD_BUILD_PHI_KERNEL( + mean_all, intel_gpu, ALL_LAYOUT, phi::MeanAllKernel, float, double) {} PD_BUILD_PHI_KERNEL(mean_all_grad, intel_gpu, ALL_LAYOUT, - custom_kernel::MeanAllGradKernel, + phi::MeanAllGradKernel, float, double) {} diff --git a/backends/intel_gpu/kernels/memcpy_kernel.cc b/backends/intel_gpu/kernels/memcpy_kernel.cc index 340a0339216..ec5046bf36d 100644 --- a/backends/intel_gpu/kernels/memcpy_kernel.cc +++ b/backends/intel_gpu/kernels/memcpy_kernel.cc @@ -16,13 +16,13 @@ #include "kernels/phi_funcs.h" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { template -void MemcpyD2HKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void MemcpyD2HKernel(const Context& dev_ctx, + const DenseTensor& x, int dst_place_type, - phi::DenseTensor* out) { + DenseTensor* out) { show_kernel("memcpy_d2h"); auto out_data = dev_ctx.HostAlloc(out); auto x_data = x.data(); @@ -35,10 +35,10 @@ void MemcpyD2HKernel(const phi::Context& dev_ctx, } template -void MemcpyH2DKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void MemcpyH2DKernel(const Context& dev_ctx, + const DenseTensor& x, int dst_place_type, - phi::DenseTensor* out) { + DenseTensor* out) { show_kernel("memcpy_h2d"); auto out_data = dev_ctx.Alloc(out); auto x_data = x.data(); @@ -52,10 +52,10 @@ void MemcpyH2DKernel(const phi::Context& dev_ctx, } template -void MemcpyKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void MemcpyKernel(const Context& dev_ctx, + const DenseTensor& x, int dst_place_type, - phi::DenseTensor* out) { + DenseTensor* out) { if (!x.initialized()) { return; } @@ -70,12 +70,12 @@ void MemcpyKernel(const phi::Context& dev_ctx, } } -} // namespace custom_kernel +} // namespace phi PD_BUILD_PHI_KERNEL(memcpy_d2h, intel_gpu, ALL_LAYOUT, - custom_kernel::MemcpyD2HKernel, + phi::MemcpyD2HKernel, float, double, int32_t, @@ -85,7 +85,7 @@ PD_BUILD_PHI_KERNEL(memcpy_d2h, PD_BUILD_PHI_KERNEL(memcpy_h2d, intel_gpu, ALL_LAYOUT, - custom_kernel::MemcpyH2DKernel, + phi::MemcpyH2DKernel, float, double, int32_t, @@ -95,7 +95,7 @@ PD_BUILD_PHI_KERNEL(memcpy_h2d, PD_BUILD_PHI_KERNEL(memcpy, intel_gpu, ALL_LAYOUT, - custom_kernel::MemcpyKernel, + phi::MemcpyKernel, phi::dtype::float16, float, double, diff --git a/backends/intel_gpu/kernels/phi_funcs.h b/backends/intel_gpu/kernels/phi_funcs.h index c585ba20fb7..e2e60f96400 100644 --- a/backends/intel_gpu/kernels/phi_funcs.h +++ b/backends/intel_gpu/kernels/phi_funcs.h @@ -267,11 +267,11 @@ inline std::vector GetDecreasedDims( } // namespace funcs template -inline void BroadcastTo(const phi::Context& dev_ctx, - const phi::DenseTensor& in, +inline void BroadcastTo(const Context& dev_ctx, + const DenseTensor& in, std::vector out_dims, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { auto in_dims = in.dims(); if (in_dims.size() == out_dims.size()) { diff --git a/backends/intel_gpu/kernels/reduce_kernel.cc b/backends/intel_gpu/kernels/reduce_kernel.cc index 31ce0174c68..9a787da2d3f 100644 --- a/backends/intel_gpu/kernels/reduce_kernel.cc +++ b/backends/intel_gpu/kernels/reduce_kernel.cc @@ -16,17 +16,17 @@ #include "kernels/phi_funcs.h" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { template -void ReduceKernel(const phi::Context& dev_ctx, +void ReduceKernel(const Context& dev_ctx, std::string kernel_name, - const phi::DenseTensor& x, + const DenseTensor& x, const std::vector& dims, dnnl::algorithm reduction_type, bool keep_dim, bool reduce_all, - phi::DenseTensor* out) { + DenseTensor* out) { auto x_dims = x.dims(); auto reduce_dims = dims; @@ -99,12 +99,12 @@ void ReduceKernel(const phi::Context& dev_ctx, } template -void MeanRawKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void MeanRawKernel(const Context& dev_ctx, + const DenseTensor& x, const std::vector& dims, bool keep_dim, bool reduce_all, - phi::DenseTensor* out) { + DenseTensor* out) { ReduceKernel(dev_ctx, "MeanRaw", x, @@ -116,23 +116,23 @@ void MeanRawKernel(const phi::Context& dev_ctx, } template -void MeanKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void MeanKernel(const Context& dev_ctx, + const DenseTensor& x, const std::vector& dims, bool keep_dim, - phi::DenseTensor* out) { + DenseTensor* out) { bool reduce_all = false; MeanRawKernel(dev_ctx, x, dims, keep_dim, reduce_all, out); } template -void SumRawKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void SumRawKernel(const Context& dev_ctx, + const DenseTensor& x, const std::vector& dims, bool keep_dim, bool reduce_all, phi::DataType out_dtype, - phi::DenseTensor* out) { + DenseTensor* out) { ReduceKernel(dev_ctx, "SumRaw", x, @@ -144,23 +144,23 @@ void SumRawKernel(const phi::Context& dev_ctx, } template -void SumKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void SumKernel(const Context& dev_ctx, + const DenseTensor& x, const std::vector& dims, phi::DataType out_dtype, bool keep_dim, - phi::DenseTensor* out) { + DenseTensor* out) { bool reduce_all = false; SumRawKernel(dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out); } template -void MaxRawKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void MaxRawKernel(const Context& dev_ctx, + const DenseTensor& x, const std::vector& dims, bool keep_dim, bool reduce_all, - phi::DenseTensor* out) { + DenseTensor* out) { ReduceKernel(dev_ctx, "MaxRaw", x, @@ -172,22 +172,22 @@ void MaxRawKernel(const phi::Context& dev_ctx, } template -void MaxKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void MaxKernel(const Context& dev_ctx, + const DenseTensor& x, const std::vector& dims, bool keep_dim, - phi::DenseTensor* out) { + DenseTensor* out) { bool reduce_all = false; MaxRawKernel(dev_ctx, x, dims, keep_dim, reduce_all, out); } template -void MinRawKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void MinRawKernel(const Context& dev_ctx, + const DenseTensor& x, const std::vector& dims, bool keep_dim, bool reduce_all, - phi::DenseTensor* out) { + DenseTensor* out) { ReduceKernel(dev_ctx, "MinRaw", x, @@ -199,33 +199,26 @@ void MinRawKernel(const phi::Context& dev_ctx, } template -void MinKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void MinKernel(const Context& dev_ctx, + const DenseTensor& x, const std::vector& dims, bool keep_dim, - phi::DenseTensor* out) { + DenseTensor* out) { bool reduce_all = false; MinRawKernel(dev_ctx, x, dims, keep_dim, reduce_all, out); } -} // namespace custom_kernel +} // namespace phi PD_BUILD_PHI_KERNEL( - mean_raw, intel_gpu, ALL_LAYOUT, custom_kernel::MeanRawKernel, float) {} -PD_BUILD_PHI_KERNEL( - mean, intel_gpu, ALL_LAYOUT, custom_kernel::MeanKernel, float) {} + mean_raw, intel_gpu, ALL_LAYOUT, phi::MeanRawKernel, float) {} +PD_BUILD_PHI_KERNEL(mean, intel_gpu, ALL_LAYOUT, phi::MeanKernel, float) {} -PD_BUILD_PHI_KERNEL( - sum_raw, intel_gpu, ALL_LAYOUT, custom_kernel::SumRawKernel, float) {} -PD_BUILD_PHI_KERNEL( - sum, intel_gpu, ALL_LAYOUT, custom_kernel::SumKernel, float) {} +PD_BUILD_PHI_KERNEL(sum_raw, intel_gpu, ALL_LAYOUT, phi::SumRawKernel, float) {} +PD_BUILD_PHI_KERNEL(sum, intel_gpu, ALL_LAYOUT, phi::SumKernel, float) {} -PD_BUILD_PHI_KERNEL( - min_raw, intel_gpu, ALL_LAYOUT, custom_kernel::MinRawKernel, float) {} -PD_BUILD_PHI_KERNEL( - min, intel_gpu, ALL_LAYOUT, custom_kernel::MinKernel, float) {} +PD_BUILD_PHI_KERNEL(min_raw, intel_gpu, ALL_LAYOUT, phi::MinRawKernel, float) {} +PD_BUILD_PHI_KERNEL(min, intel_gpu, ALL_LAYOUT, phi::MinKernel, float) {} -PD_BUILD_PHI_KERNEL( - max_raw, intel_gpu, ALL_LAYOUT, custom_kernel::MaxRawKernel, float) {} -PD_BUILD_PHI_KERNEL( - max, intel_gpu, ALL_LAYOUT, custom_kernel::MaxKernel, float) {} +PD_BUILD_PHI_KERNEL(max_raw, intel_gpu, ALL_LAYOUT, phi::MaxRawKernel, float) {} +PD_BUILD_PHI_KERNEL(max, intel_gpu, ALL_LAYOUT, phi::MaxKernel, float) {} diff --git a/backends/intel_gpu/kernels/reshape_kernel.cc b/backends/intel_gpu/kernels/reshape_kernel.cc index 12ad3d7573d..2a19787d01c 100644 --- a/backends/intel_gpu/kernels/reshape_kernel.cc +++ b/backends/intel_gpu/kernels/reshape_kernel.cc @@ -15,7 +15,7 @@ #include "kernels/phi_funcs.h" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { static std::vector ValidateShape(const std::vector shape, const std::vector& in_dims) { @@ -120,10 +120,10 @@ static std::vector ValidateShape(const std::vector shape, } template -void ReshapeKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void ReshapeKernel(const Context& dev_ctx, + const DenseTensor& x, const phi::IntArray& shape, - phi::DenseTensor* out) { + DenseTensor* out) { show_kernel("Reshape type=" << dnn_support::type2String::name()); auto x_dims = x.dims(); auto out_dims = ValidateShape(shape.GetData(), x_dims); @@ -152,20 +152,20 @@ void ReshapeKernel(const phi::Context& dev_ctx, } template -void ReshapeWithXShape(const phi::Context& dev_ctx, - const phi::DenseTensor& x, +void ReshapeWithXShape(const Context& dev_ctx, + const DenseTensor& x, const phi::IntArray& shape, - phi::DenseTensor* out, - phi::DenseTensor* xshape) { + DenseTensor* out, + DenseTensor* xshape) { ReshapeKernel(dev_ctx, x, shape, out); } -} // namespace custom_kernel +} // namespace phi PD_BUILD_PHI_KERNEL(reshape, intel_gpu, ALL_LAYOUT, - custom_kernel::ReshapeKernel, + phi::ReshapeKernel, float, double, int8_t, @@ -178,7 +178,7 @@ PD_BUILD_PHI_KERNEL(reshape, PD_BUILD_PHI_KERNEL(reshape_with_xshape, intel_gpu, ALL_LAYOUT, - custom_kernel::ReshapeWithXShape, + phi::ReshapeWithXShape, float, double, int8_t, diff --git a/backends/intel_gpu/kernels/slice_kernel.cc b/backends/intel_gpu/kernels/slice_kernel.cc index 8a6b9d6f496..9f45fb7e9ac 100644 --- a/backends/intel_gpu/kernels/slice_kernel.cc +++ b/backends/intel_gpu/kernels/slice_kernel.cc @@ -15,17 +15,17 @@ #include "kernels/phi_funcs.h" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { template -void SliceRawKernel(const phi::Context& ctx, - const phi::DenseTensor& input, +void SliceRawKernel(const Context& ctx, + const DenseTensor& input, const std::vector& axes, const phi::IntArray& starts_arr, const phi::IntArray& ends_arr, const std::vector& infer_flags, const std::vector& decrease_axis, - phi::DenseTensor* out) { + DenseTensor* out) { show_kernel("SliceRawKernel, type=" << dnn_support::type2String::name()); // Step 1: Get the accurate attribute value of starts and ends @@ -142,12 +142,8 @@ void SliceRawKernel(const phi::Context& ctx, out->Resize(out_dims); } -} // namespace custom_kernel +} // namespace phi -PD_BUILD_PHI_KERNEL(slice, - intel_gpu, - ALL_LAYOUT, - custom_kernel::SliceRawKernel, - int64_t, - float, - double) {} +PD_BUILD_PHI_KERNEL( + slice, intel_gpu, ALL_LAYOUT, phi::SliceRawKernel, int64_t, float, double) { +} diff --git a/backends/intel_gpu/kernels/softmax_kernel.cc b/backends/intel_gpu/kernels/softmax_kernel.cc index 2dd722d84f4..862c5db42b5 100644 --- a/backends/intel_gpu/kernels/softmax_kernel.cc +++ b/backends/intel_gpu/kernels/softmax_kernel.cc @@ -15,7 +15,7 @@ #include "kernels/dnn_support.hpp" #include "kernels/phi_funcs.h" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { template T ValueClip(const T& x) { @@ -81,11 +81,11 @@ void SoftmaxGrad( std::shared_ptr softmax_pd = nullptr; template -void SoftmaxGradKernel(const phi::Context& dev_ctx, - const phi::DenseTensor& out, - const phi::DenseTensor& out_grad, +void SoftmaxGradKernel(const Context& dev_ctx, + const DenseTensor& out, + const DenseTensor& out_grad, int axis, - phi::DenseTensor* x_grad) { + DenseTensor* x_grad) { show_kernel("SoftmaxGradKernel()"); const int rank = x_grad->dims().size(); const int calc_axis = phi::funcs::CanonicalAxis(axis, rank); @@ -139,10 +139,10 @@ void SoftmaxGradKernel(const phi::Context& dev_ctx, } template -void SoftmaxKernel(const phi::Context& ctx, - const phi::DenseTensor& x, +void SoftmaxKernel(const Context& ctx, + const DenseTensor& x, int axis, - phi::DenseTensor* out) { + DenseTensor* out) { if constexpr (std::is_same::value) { const int rank = x.dims().size(); const int calc_axis = phi::funcs::CanonicalAxis(axis, rank); @@ -212,17 +212,10 @@ void SoftmaxKernel(const phi::Context& ctx, } } -} // namespace custom_kernel +} // namespace phi -PD_BUILD_PHI_KERNEL(softmax, - intel_gpu, - ALL_LAYOUT, - custom_kernel::SoftmaxKernel, - float, - double) {} +PD_BUILD_PHI_KERNEL( + softmax, intel_gpu, ALL_LAYOUT, phi::SoftmaxKernel, float, double) {} -PD_BUILD_PHI_KERNEL(softmax_grad, - intel_gpu, - ALL_LAYOUT, - custom_kernel::SoftmaxGradKernel, - float) {} +PD_BUILD_PHI_KERNEL( + softmax_grad, intel_gpu, ALL_LAYOUT, phi::SoftmaxGradKernel, float) {} diff --git a/backends/intel_gpu/kernels/transpose_kernel.cc b/backends/intel_gpu/kernels/transpose_kernel.cc index 359cbe0d049..981a27ffdac 100644 --- a/backends/intel_gpu/kernels/transpose_kernel.cc +++ b/backends/intel_gpu/kernels/transpose_kernel.cc @@ -16,13 +16,13 @@ #include "kernels/phi_funcs.h" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { template -void TransposeKernelGPU(const phi::Context& ctx, - const phi::DenseTensor& x, +void TransposeKernelGPU(const Context& ctx, + const DenseTensor& x, const std::vector& axis, - phi::DenseTensor* out) { + DenseTensor* out) { show_kernel("TransposeKernelGPU "); using tag = dnnl::memory::format_tag; using dt = dnnl::memory::data_type; @@ -88,10 +88,7 @@ void TransposeKernelGPU(const phi::Context& ctx, reorder_prim.execute(engine_stream, reorder_args); engine_stream.wait(); } -} // namespace custom_kernel +} // namespace phi -PD_BUILD_PHI_KERNEL(transpose, - intel_gpu, - ALL_LAYOUT, - custom_kernel::TransposeKernelGPU, - float) {} +PD_BUILD_PHI_KERNEL( + transpose, intel_gpu, ALL_LAYOUT, phi::TransposeKernelGPU, float) {} diff --git a/backends/intel_gpu/kernels/uniform_random_kernel.cc b/backends/intel_gpu/kernels/uniform_random_kernel.cc index 54bedfac9ea..5f12c4c87a2 100644 --- a/backends/intel_gpu/kernels/uniform_random_kernel.cc +++ b/backends/intel_gpu/kernels/uniform_random_kernel.cc @@ -16,7 +16,7 @@ #include "kernels/dnn_support.hpp" #include "paddle/phi/capi/all.h" -namespace custom_kernel { +namespace phi { template inline void UniformRealDistribution(T *data, @@ -32,7 +32,7 @@ inline void UniformRealDistribution(T *data, } template -void UniformRandomRawKernel(const phi::Context &dev_ctx, +void UniformRandomRawKernel(const Context &dev_ctx, const phi::IntArray &shape, phi::DataType dtype, const phi::Scalar &min, @@ -41,7 +41,7 @@ void UniformRandomRawKernel(const phi::Context &dev_ctx, int diag_num, int diag_step, float diag_val, - phi::DenseTensor *out) { + DenseTensor *out) { show_kernel( "UniformRandom-SYCL type=" << dnn_support::type2String::name()); @@ -51,7 +51,7 @@ void UniformRandomRawKernel(const phi::Context &dev_ctx, auto numel = out->numel(); // // 1. CPU implement - phi::DenseTensor cpu_out; + DenseTensor cpu_out; cpu_out.Resize(std::vector(shape_data.begin(), shape_data.end())); cpu_out.set_dtype(out->dtype()); auto cpu_data = dev_ctx.template HostAlloc(&cpu_out); @@ -84,7 +84,7 @@ void UniformRandomRawKernel(const phi::Context &dev_ctx, } template -void UniformRandomKernel(const phi::Context &dev_ctx, +void UniformRandomKernel(const Context &dev_ctx, const phi::IntArray &shape, phi::DataType dtype, // float min, @@ -92,22 +92,19 @@ void UniformRandomKernel(const phi::Context &dev_ctx, const phi::Scalar &min, const phi::Scalar &max, int seed, - phi::DenseTensor *out) { + DenseTensor *out) { show_kernel( "UniformRandom-SYCL type=" << dnn_support::type2String::name()); - custom_kernel::UniformRandomRawKernel( + phi::UniformRandomRawKernel( dev_ctx, shape, dtype, min, max, seed, 0, 0, 0.0f, out); } -} // namespace custom_kernel +} // namespace phi PD_BUILD_PHI_KERNEL(uniform_random_raw, intel_gpu, ALL_LAYOUT, - custom_kernel::UniformRandomRawKernel, + phi::UniformRandomRawKernel, float) {} -PD_BUILD_PHI_KERNEL(uniform_random, - intel_gpu, - ALL_LAYOUT, - custom_kernel::UniformRandomKernel, - float) {} +PD_BUILD_PHI_KERNEL( + uniform_random, intel_gpu, ALL_LAYOUT, phi::UniformRandomKernel, float) {}