Skip to content

Commit 6dcd4ba

Browse files
author
root
committed
Updated paddle patch file for version 3.0.0
1 parent 3ed5035 commit 6dcd4ba

1 file changed

Lines changed: 120 additions & 0 deletions

File tree

p/paddle/paddle_v3.0.0.patch

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
diff --git a/paddle/phi/common/cpstring_impl.h b/paddle/phi/common/cpstring_impl.h
2+
index 33780d1538..4b6cfd7889 100644
3+
--- a/paddle/phi/common/cpstring_impl.h
4+
+++ b/paddle/phi/common/cpstring_impl.h
5+
@@ -203,7 +203,6 @@ HOSTDEVICE static inline void *PD_Memcpy(void *dst,
6+
}
7+
return dst;
8+
}
9+
-
10+
HOSTDEVICE static inline void *PD_Malloc(size_t size) { return malloc(size); }
11+
12+
HOSTDEVICE static inline void *PD_Realloc(void *ptr,
13+
diff --git a/paddle/phi/core/platform/denormal.cc b/paddle/phi/core/platform/denormal.cc
14+
index 93c55e84be..f9c34c869a 100644
15+
--- a/paddle/phi/core/platform/denormal.cc
16+
+++ b/paddle/phi/core/platform/denormal.cc
17+
@@ -30,7 +30,8 @@
18+
19+
#if !defined(GCC_WITHOUT_INTRINSICS) && !defined(PADDLE_WITH_ARM) && \
20+
!defined(PADDLE_WITH_SW) && !defined(PADDLE_WITH_MIPS) && \
21+
- !defined(_WIN32) && !defined(PADDLE_WITH_LOONGARCH)
22+
+ !defined(_WIN32) && !defined(PADDLE_WITH_LOONGARCH) && \
23+
+ !defined(__powerpc__) && !defined(__ppc__) && !defined(__PPC__)
24+
#define DENORM_USE_INTRINSICS
25+
#endif
26+
27+
diff --git a/paddle/phi/kernels/funcs/search_compute.h b/paddle/phi/kernels/funcs/search_compute.h
28+
index b51f10cce9..ae25b45cfb 100644
29+
--- a/paddle/phi/kernels/funcs/search_compute.h
30+
+++ b/paddle/phi/kernels/funcs/search_compute.h
31+
@@ -15,7 +15,8 @@
32+
#pragma once
33+
34+
#if !defined(PADDLE_WITH_ARM) && !defined(PADDLE_WITH_SW) && \
35+
- !defined(PADDLE_WITH_MIPS) && !defined(PADDLE_WITH_LOONGARCH)
36+
+ !defined(PADDLE_WITH_MIPS) && !defined(PADDLE_WITH_LOONGARCH) && \
37+
+ (defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86))
38+
#include <immintrin.h>
39+
#endif
40+
#include <cfloat>
41+
@@ -101,7 +102,8 @@ void call_gemm_batched(const Context& ctx,
42+
}
43+
44+
#if !defined(PADDLE_WITH_ARM) && !defined(PADDLE_WITH_SW) && \
45+
- !defined(PADDLE_WITH_MIPS) && !defined(PADDLE_WITH_LOONGARCH)
46+
+ !defined(PADDLE_WITH_MIPS) && !defined(PADDLE_WITH_LOONGARCH) && \
47+
+ (defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86))
48+
49+
#define __m256x __m256
50+
51+
@@ -144,7 +146,7 @@ inline void axpy(const T* x, T* y, size_t len, const T alpha) {
52+
#elif defined(PADDLE_WITH_ARM) || defined(PADDLE_WITH_SW) || \
53+
defined(PADDLE_WITH_MIPS) || defined(PADDLE_WITH_LOONGARCH)
54+
PADDLE_THROW(common::errors::Unimplemented("axpy is not supported"));
55+
-#else
56+
+#elif defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86)
57+
lll = len & ~SSE_CUT_LEN_MASK;
58+
__m128x mm_alpha = _mm_load1_px(&alpha);
59+
for (jjj = 0; jjj < lll; jjj += SSE_STEP_SIZE) {
60+
@@ -174,7 +176,7 @@ inline void axpy_noadd(const T* x, T* y, size_t len, const T alpha) {
61+
#elif defined(PADDLE_WITH_ARM) || defined(PADDLE_WITH_SW) || \
62+
defined(PADDLE_WITH_MIPS) || defined(PADDLE_WITH_LOONGARCH)
63+
PADDLE_THROW(common::errors::Unimplemented("axpy_noadd is not supported"));
64+
-#else
65+
+#elif defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86)
66+
lll = len & ~SSE_CUT_LEN_MASK;
67+
__m128x mm_alpha = _mm_load1_px(&alpha);
68+
for (jjj = 0; jjj < lll; jjj += SSE_STEP_SIZE) {
69+
diff --git a/paddle/phi/kernels/funcs/softmax_impl.h b/paddle/phi/kernels/funcs/softmax_impl.h
70+
index 8f6b0fdd32..9b094fd237 100644
71+
--- a/paddle/phi/kernels/funcs/softmax_impl.h
72+
+++ b/paddle/phi/kernels/funcs/softmax_impl.h
73+
@@ -237,14 +237,16 @@ class SoftmaxFunctor<DeviceContext, T, enable_if_CPU<DeviceContext>> {
74+
const int axis_dim,
75+
const phi::DenseTensor* X,
76+
phi::DenseTensor* Y) {
77+
+#if !defined(__powerpc__) && !defined(__ppc__) && !defined(__PPC__)
78+
const auto& in_dims = X->dims();
79+
constexpr int kBatchDim = 0;
80+
constexpr int kClassDim = 1;
81+
-
82+
const int num_classes = in_dims[kClassDim];
83+
const int batch_size = in_dims[kBatchDim];
84+
const int num_remain = num_classes / axis_dim;
85+
86+
+ const int batch_size = in_dims[kBatchDim];
87+
+ const int num_remain = num_classes / axis_dim;
88+
if (num_remain == 1 &&
89+
phi::backends::cpu::MayIUse(phi::backends::cpu::avx)) {
90+
const T* in_data = X->data<T>();
91+
@@ -267,7 +269,9 @@ class SoftmaxFunctor<DeviceContext, T, enable_if_CPU<DeviceContext>> {
92+
in_data += num_classes;
93+
out_data += num_classes;
94+
}
95+
- } else {
96+
+ } else
97+
+#endif
98+
+ {
99+
SoftmaxEigen<DeviceContext, T>()(context, axis_dim, X, Y);
100+
}
101+
}
102+
@@ -394,6 +398,7 @@ class SoftmaxGradFunctor<DeviceContext, T, enable_if_CPU<DeviceContext>> {
103+
const phi::DenseTensor* y,
104+
const phi::DenseTensor* y_grad,
105+
phi::DenseTensor* x_grad) {
106+
+#if !defined(__powerpc__) && !defined(__ppc__) && !defined(__PPC__)
107+
const auto& out_dims = y->dims();
108+
constexpr int kBatchDim = 0;
109+
constexpr int kClassDim = 1;
110+
@@ -419,7 +424,9 @@ class SoftmaxGradFunctor<DeviceContext, T, enable_if_CPU<DeviceContext>> {
111+
out_grad += num_classes;
112+
in_grad += num_classes;
113+
}
114+
- } else {
115+
+ } else
116+
+#endif
117+
+ {
118+
SoftmaxGradEigen<DeviceContext, T>()(
119+
context, axis_dim, y, y_grad, x_grad);
120+
}

0 commit comments

Comments
 (0)