Skip to content

Commit bd35c5d

Browse files
committed
Provide a way to do GPU masking.
Signed-off-by: George Bosilca <gbosilca@nvidia.com>
1 parent d643750 commit bd35c5d

2 files changed

Lines changed: 39 additions & 18 deletions

File tree

tests/runtime/cuda/stress_main.c

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "parsec/data_distribution.h"
33
#include "parsec/data_dist/matrix/matrix.h"
44
#include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h"
5+
#include "parsec/utils/mca_param.h"
56

67
#include "stress.h"
78
#include "stress_wrapper.h"
@@ -15,7 +16,7 @@ int main(int argc, char *argv[])
1516
parsec_context_t *parsec = NULL;
1617
parsec_taskpool_t *tp;
1718
int size = 1;
18-
int rank = 0;
19+
int rank = 0, nb_gpus = 1;
1920

2021
#if defined(DISTRIBUTED)
2122
{
@@ -24,6 +25,27 @@ int main(int argc, char *argv[])
2425
}
2526
MPI_Comm_size(MPI_COMM_WORLD, &size);
2627
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
28+
#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)
29+
{
30+
MPI_Comm local_comm;
31+
int local_rank, local_size;
32+
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0,
33+
MPI_INFO_NULL, &local_comm);
34+
MPI_Comm_rank(local_comm, &local_rank);
35+
MPI_Comm_size(local_comm, &local_size);
36+
MPI_Comm_free(&local_comm);
37+
int gpu_mask = 0;
38+
for (int i = 0; i < nb_gpus; i++)
39+
{
40+
gpu_mask |= ((1 << local_rank) << i);
41+
}
42+
char *value;
43+
asprintf(&value, "%d", gpu_mask);
44+
parsec_setenv_mca_param("device_cuda_mask", value, &environ);
45+
free(value);
46+
value = NULL;
47+
}
48+
#endif /* defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)*/
2749
#endif /* DISTRIBUTED */
2850

2951
parsec = parsec_init(-1, &argc, &argv);

tests/runtime/cuda/testing_get_best_device.c

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,10 @@ int main(int argc, char *argv[])
4545
char **pargv;
4646

4747
/* Default */
48-
int m = 0;
49-
int N = 8;
50-
int NB = 4;
51-
int P = 1;
52-
int KP = 1;
53-
int KQ = 1;
54-
int cores = -1;
55-
int nb_gpus = 0;
56-
int info = 0;
57-
58-
while ((ch = getopt(argc, argv, "m:N:t:s:S:P:c:g:h")) != -1) {
48+
int m = 0, N = 8, NB = 4, P = 1, KP = 1, KQ = 1;
49+
int cores = -1, nb_gpus = 0, nb_avail_gpu = 0, info = 0, gpu_mask = 0xFF;
50+
51+
while ((ch = getopt(argc, argv, "m:N:t:s:S:P:c:g:G:h")) != -1) {
5952
switch (ch) {
6053
case 'm': m = atoi(optarg); break;
6154
case 'N': N = atoi(optarg); break;
@@ -65,6 +58,7 @@ int main(int argc, char *argv[])
6558
case 'P': P = atoi(optarg); break;
6659
case 'c': cores = atoi(optarg); break;
6760
case 'g': nb_gpus = atoi(optarg); break;
61+
case 'G': gpu_mask = atoi(optarg); break;
6862
case '?': case 'h': default:
6963
fprintf(stderr,
7064
"-m : initialize MPI_THREAD_MULTIPLE (default: 0/no)\n"
@@ -75,6 +69,7 @@ int main(int argc, char *argv[])
7569
"-P : rows (P) in the PxQ process grid (default: 1)\n"
7670
"-c : number of cores used (default: -1)\n"
7771
"-g : number of GPUs used (default: 0)\n"
72+
"-G : mask of the GPUs to be used (default: 0xff)"
7873
"-h : print this help message\n"
7974
"\n");
8075
exit(1);
@@ -102,16 +97,20 @@ int main(int argc, char *argv[])
10297
break;
10398
}
10499
}
105-
106100
#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT)
107101
extern char **environ;
108102
char *value;
109103
if( nb_gpus < 1 && 0 == rank ) {
110-
fprintf(stderr, "Warning: if run on GPUs, please set --gpus=value bigger than 0\n");
104+
fprintf(stderr, "Warning: if run on GPUs, please set -g value bigger than 0\n");
111105
}
112106
asprintf(&value, "%d", nb_gpus);
113107
parsec_setenv_mca_param( "device_cuda_enabled", value, &environ );
114-
free(value);
108+
free(value); value = NULL;
109+
if( 0xFF != gpu_mask ) {
110+
asprintf(&value, "%d", gpu_mask);
111+
parsec_setenv_mca_param("device_cuda_mask", value, &environ);
112+
free(value); value = NULL;
113+
}
115114
#endif
116115

117116
/* Initialize PaRSEC */
@@ -134,7 +133,7 @@ int main(int argc, char *argv[])
134133
}
135134
cores = nb_total_comp_threads;
136135
}
137-
136+
nb_avail_gpu = parsec_context_query(parsec, PARSEC_CONTEXT_QUERY_DEVICES, PARSEC_DEV_CUDA);
138137
/* initializing matrix structure */
139138
parsec_matrix_block_cyclic_t dcA;
140139
parsec_matrix_block_cyclic_init(&dcA, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE,
@@ -153,9 +152,9 @@ int main(int argc, char *argv[])
153152
/* Main routines */
154153
SYNC_TIME_START();
155154
info = parsec_get_best_device_check(parsec, (parsec_tiled_matrix_t *)&dcA);
156-
SYNC_TIME_PRINT(rank, ("Get_best_device" "\tN= %d NB= %d "
155+
SYNC_TIME_PRINT(rank, ("Get_best_device\tN= %d NB= %d "
157156
"PxQ= %d %d KPxKQ= %d %d cores= %d nb_gpus= %d\n",
158-
N, NB, P, nodes/P, KP, KQ, cores, parsec_nb_devices-2));
157+
N, NB, P, nodes / P, KP, KQ, cores, nb_avail_gpu));
159158

160159
/* Check result */
161160
if( 0 == rank && info != 0 ) {

0 commit comments

Comments
 (0)