|
1 | | - |
2 | 1 | /** |
3 | | - * Copyright (c) 2019-2021 The University of Tennessee and The University |
| 2 | + * Copyright (c) 2019-2024 The University of Tennessee and The University |
4 | 3 | * of Tennessee Research Foundation. All rights |
5 | 4 | * reserved. |
6 | 5 | * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. |
@@ -103,20 +102,8 @@ parsec_taskpool_t* testing_nvlink_New( parsec_context_t *ctx, int depth, int mb |
103 | 102 | parsec_matrix_block_cyclic_t *userM; |
104 | 103 |
|
105 | 104 | /** Find all CUDA devices */ |
106 | | - nb = 0; |
107 | | - for(dev = 0; dev < (int)parsec_nb_devices; dev++) { |
108 | | - parsec_device_module_t *device = parsec_mca_device_get(dev); |
109 | | - if( PARSEC_DEV_CUDA == device->type ) { |
110 | | - nb++; |
111 | | - } |
112 | | - } |
113 | | - if(nb == 0) { |
114 | | - char hostname[256]; |
115 | | - gethostname(hostname, 256); |
116 | | - fprintf(stderr, "This test requires at least one CUDA device per node -- no CUDA device found on rank %d on %s\n", |
117 | | - ctx->my_rank, hostname); |
118 | | - return NULL; |
119 | | - } |
| 105 | + nb = parsec_context_query(ctx, PARSEC_CONTEXT_QUERY_DEVICES, PARSEC_DEV_CUDA); |
| 106 | + assert(nb >= 0); |
120 | 107 | dev_index = (int*)malloc(nb * sizeof(int)); |
121 | 108 | nb = 0; |
122 | 109 | for(dev = 0; dev < (int)parsec_nb_devices; dev++) { |
@@ -156,7 +143,7 @@ parsec_taskpool_t* testing_nvlink_New( parsec_context_t *ctx, int depth, int mb |
156 | 143 |
|
157 | 144 | /* GEMM1 tasks will create one data copy per GPU, and work on those. |
158 | 145 | * see nvlink.jdf:MAKE_C tasks */ |
159 | | - |
| 146 | + |
160 | 147 | /* userM is a user-managed matrix: the user creates the data copies |
161 | 148 | * only on the GPU they want the GEMM2 to run. To simplify the code, |
162 | 149 | * we use parsec_matrix_block_cyclic that requires to also have a CPU data |
@@ -208,14 +195,14 @@ parsec_taskpool_t* testing_nvlink_New( parsec_context_t *ctx, int depth, int mb |
208 | 195 | g++; |
209 | 196 | } |
210 | 197 | } |
211 | | - |
| 198 | + |
212 | 199 | testing_handle = parsec_nvlink_new(dcA, userM, ctx->nb_nodes, CuHI, nb, dev_index); |
213 | 200 |
|
214 | 201 | parsec_add2arena( &testing_handle->arenas_datatypes[PARSEC_nvlink_DEFAULT_ADT_IDX], |
215 | 202 | parsec_datatype_double_complex_t, |
216 | 203 | PARSEC_MATRIX_FULL, 1, mb, mb, mb, |
217 | 204 | PARSEC_ARENA_ALIGNMENT_SSE, -1 ); |
218 | | - |
| 205 | + |
219 | 206 | return &testing_handle->super; |
220 | 207 | } |
221 | 208 |
|
0 commit comments