88#include < cstdlib> // For aligned_alloc
99#include " OMPStream.h"
1010
11+ #if defined(PAGEFAULT)
12+ #pragma omp requires unified_shared_memory
13+ #endif
14+
1115#ifndef ALIGNMENT
1216#define ALIGNMENT (2 *1024 *1024 ) // 2MB
1317#endif
@@ -24,20 +28,22 @@ OMPStream<T>::OMPStream(const intptr_t ARRAY_SIZE, int device)
2428
2529#ifdef OMP_TARGET_GPU
2630 omp_set_default_device (device);
27- T *a = this ->a ;
28- T *b = this ->b ;
29- T *c = this ->c ;
30- // Set up data region on device
31- #pragma omp target enter data map(alloc: a[0:array_size], b[0:array_size], c[0:array_size])
32- {}
31+ #if !defined(PAGEFAULT)
32+ T *a = this ->a ;
33+ T *b = this ->b ;
34+ T *c = this ->c ;
35+ // Set up data region on device
36+ #pragma omp target enter data map(alloc: a[0:array_size], b[0:array_size], c[0:array_size])
37+ {}
38+ #endif
3339#endif
3440
3541}
3642
3743template <class T >
3844OMPStream<T>::~OMPStream ()
3945{
40- #ifdef OMP_TARGET_GPU
46+ #if defined( OMP_TARGET_GPU) && !defined(PAGEFAULT)
4147 // End data region on device
4248 intptr_t array_size = this ->array_size ;
4349 T *a = this ->a ;
@@ -55,7 +61,7 @@ template <class T>
5561void OMPStream<T>::init_arrays(T initA, T initB, T initC)
5662{
5763 intptr_t array_size = this ->array_size ;
58- #ifdef OMP_TARGET_GPU
64+ #if defined( OMP_TARGET_GPU) && !defined(PAGEFAULT)
5965 T *a = this ->a ;
6066 T *b = this ->b ;
6167 T *c = this ->c ;
@@ -69,7 +75,7 @@ void OMPStream<T>::init_arrays(T initA, T initB, T initC)
6975 b[i] = initB;
7076 c[i] = initC;
7177 }
72- #if defined(OMP_TARGET_GPU) && defined(_CRAYC)
78+ #if defined(OMP_TARGET_GPU) && defined(_CRAYC) && !defined(PAGEFAULT)
7379 // If using the Cray compiler, the kernels do not block, so this update forces
7480 // a small copy to ensure blocking so that timing is correct
7581 #pragma omp target update from(a[0:0])
@@ -80,7 +86,7 @@ template <class T>
8086void OMPStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::vector<T>& h_c)
8187{
8288
83- #ifdef OMP_TARGET_GPU
89+ #if defined( OMP_TARGET_GPU) && !defined(PAGEFAULT)
8490 T *a = this ->a ;
8591 T *b = this ->b ;
8692 T *c = this ->c ;
@@ -101,7 +107,7 @@ void OMPStream<T>::read_arrays(std::vector<T>& h_a, std::vector<T>& h_b, std::ve
101107template <class T >
102108void OMPStream<T>::copy()
103109{
104- #ifdef OMP_TARGET_GPU
110+ #if defined( OMP_TARGET_GPU) && !defined(PAGEFAULT)
105111 intptr_t array_size = this ->array_size ;
106112 T *a = this ->a ;
107113 T *c = this ->c ;
@@ -113,7 +119,7 @@ void OMPStream<T>::copy()
113119 {
114120 c[i] = a[i];
115121 }
116- #if defined(OMP_TARGET_GPU) && defined(_CRAYC)
122+ #if defined(OMP_TARGET_GPU) && defined(_CRAYC) && !defined(PAGEFAULT)
117123 // If using the Cray compiler, the kernels do not block, so this update forces
118124 // a small copy to ensure blocking so that timing is correct
119125 #pragma omp target update from(a[0:0])
@@ -126,9 +132,11 @@ void OMPStream<T>::mul()
126132 const T scalar = startScalar;
127133
128134#ifdef OMP_TARGET_GPU
129- intptr_t array_size = this ->array_size ;
130- T *b = this ->b ;
131- T *c = this ->c ;
135+ #if !defined(PAGEFAULT)
136+ intptr_t array_size = this ->array_size ;
137+ T *b = this ->b ;
138+ T *c = this ->c ;
139+ #endif
132140 #pragma omp target teams distribute parallel for simd
133141#else
134142 #pragma omp parallel for
@@ -137,7 +145,7 @@ void OMPStream<T>::mul()
137145 {
138146 b[i] = scalar * c[i];
139147 }
140- #if defined(OMP_TARGET_GPU) && defined(_CRAYC)
148+ #if defined(OMP_TARGET_GPU) && defined(_CRAYC) && !defined(PAGEFAULT)
141149 // If using the Cray compiler, the kernels do not block, so this update forces
142150 // a small copy to ensure blocking so that timing is correct
143151 #pragma omp target update from(c[0:0])
@@ -148,10 +156,12 @@ template <class T>
148156void OMPStream<T>::add()
149157{
150158#ifdef OMP_TARGET_GPU
151- intptr_t array_size = this ->array_size ;
152- T *a = this ->a ;
153- T *b = this ->b ;
154- T *c = this ->c ;
159+ #if !defined(PAGEFAULT)
160+ intptr_t array_size = this ->array_size ;
161+ T *a = this ->a ;
162+ T *b = this ->b ;
163+ T *c = this ->c ;
164+ #endif
155165 #pragma omp target teams distribute parallel for simd
156166#else
157167 #pragma omp parallel for
@@ -160,7 +170,7 @@ void OMPStream<T>::add()
160170 {
161171 c[i] = a[i] + b[i];
162172 }
163- #if defined(OMP_TARGET_GPU) && defined(_CRAYC)
173+ #if defined(OMP_TARGET_GPU) && defined(_CRAYC) && !defined(PAGEFAULT)
164174 // If using the Cray compiler, the kernels do not block, so this update forces
165175 // a small copy to ensure blocking so that timing is correct
166176 #pragma omp target update from(a[0:0])
@@ -173,10 +183,12 @@ void OMPStream<T>::triad()
173183 const T scalar = startScalar;
174184
175185#ifdef OMP_TARGET_GPU
176- intptr_t array_size = this ->array_size ;
177- T *a = this ->a ;
178- T *b = this ->b ;
179- T *c = this ->c ;
186+ #if !defined(PAGEFAULT)
187+ intptr_t array_size = this ->array_size ;
188+ T *a = this ->a ;
189+ T *b = this ->b ;
190+ T *c = this ->c ;
191+ #endif
180192 #pragma omp target teams distribute parallel for simd
181193#else
182194 #pragma omp parallel for
@@ -185,7 +197,7 @@ void OMPStream<T>::triad()
185197 {
186198 a[i] = b[i] + scalar * c[i];
187199 }
188- #if defined(OMP_TARGET_GPU) && defined(_CRAYC)
200+ #if defined(OMP_TARGET_GPU) && defined(_CRAYC) && !defined(PAGEFAULT)
189201 // If using the Cray compiler, the kernels do not block, so this update forces
190202 // a small copy to ensure blocking so that timing is correct
191203 #pragma omp target update from(a[0:0])
@@ -198,10 +210,12 @@ void OMPStream<T>::nstream()
198210 const T scalar = startScalar;
199211
200212#ifdef OMP_TARGET_GPU
201- intptr_t array_size = this ->array_size ;
202- T *a = this ->a ;
203- T *b = this ->b ;
204- T *c = this ->c ;
213+ #if !defined(PAGEFAULT)
214+ intptr_t array_size = this ->array_size ;
215+ T *a = this ->a ;
216+ T *b = this ->b ;
217+ T *c = this ->c ;
218+ #endif
205219 #pragma omp target teams distribute parallel for simd
206220#else
207221 #pragma omp parallel for
@@ -210,7 +224,7 @@ void OMPStream<T>::nstream()
210224 {
211225 a[i] += b[i] + scalar * c[i];
212226 }
213- #if defined(OMP_TARGET_GPU) && defined(_CRAYC)
227+ #if defined(OMP_TARGET_GPU) && defined(_CRAYC) && !defined(PAGEFAULT)
214228 // If using the Cray compiler, the kernels do not block, so this update forces
215229 // a small copy to ensure blocking so that timing is correct
216230 #pragma omp target update from(a[0:0])
@@ -223,9 +237,11 @@ T OMPStream<T>::dot()
223237 T sum{};
224238
225239#ifdef OMP_TARGET_GPU
226- intptr_t array_size = this ->array_size ;
227- T *a = this ->a ;
228- T *b = this ->b ;
240+ #if !defined(PAGEFAULT)
241+ intptr_t array_size = this ->array_size ;
242+ T *a = this ->a ;
243+ T *b = this ->b ;
244+ #endif
229245 #pragma omp target teams distribute parallel for simd map(tofrom: sum) reduction(+:sum)
230246#else
231247 #pragma omp parallel for reduction(+:sum)
0 commit comments