@@ -111,7 +111,6 @@ struct cudaFunctionTable {
111111typedef struct cudaFunctionTable cudaFunctionTable_t ;
112112static cudaFunctionTable_t cuFunc ;
113113
114- #define NB_IPC_STREAM 4
115114
116115static int stage_one_init_ref_count = 0 ;
117116static bool stage_three_init_complete = false;
@@ -123,8 +122,7 @@ bool mca_common_cuda_enabled = false;
123122static bool mca_common_cuda_register_memory = true;
124123static bool mca_common_cuda_warning = false;
125124static opal_list_t common_cuda_memory_registrations ;
126- static CUstream ipcStream [NB_IPC_STREAM ];
127- static int current_ipc_stream_id = 0 ;
125+ static CUstream ipcStream = NULL ;
128126static CUstream dtohStream = NULL ;
129127static CUstream htodStream = NULL ;
130128static CUstream memcpyStream = NULL ;
@@ -821,14 +819,12 @@ static int mca_common_cuda_stage_three_init(void)
821819 }
822820
823821 /* Create stream for use in ipc asynchronous copies */
824- for (i = 0 ; i < NB_IPC_STREAM ; i ++ ) {
825- res = cuFunc .cuStreamCreate (& ipcStream [i ], 0 );
826- if (OPAL_UNLIKELY (res != CUDA_SUCCESS )) {
827- opal_show_help ("help-mpi-common-cuda.txt" , "cuStreamCreate failed" ,
828- true, OPAL_PROC_MY_HOSTNAME , res );
829- rc = OPAL_ERROR ;
830- goto cleanup_and_error ;
831- }
822+ res = cuFunc .cuStreamCreate (& ipcStream , 0 );
823+ if (OPAL_UNLIKELY (res != CUDA_SUCCESS )) {
824+ opal_show_help ("help-mpi-common-cuda.txt" , "cuStreamCreate failed" ,
825+ true, OPAL_PROC_MY_HOSTNAME , res );
826+ rc = OPAL_ERROR ;
827+ goto cleanup_and_error ;
832828 }
833829
834830 /* Create stream for use in dtoh asynchronous copies */
@@ -1010,10 +1006,8 @@ void mca_common_cuda_fini(void)
10101006 if (NULL != cuda_event_unpack_callback_frag_array ) {
10111007 free (cuda_event_unpack_callback_frag_array );
10121008 }
1013- for (i = 0 ; i < NB_IPC_STREAM ; i ++ ) {
1014- if ((NULL != ipcStream [i ]) && ctx_ok ) {
1015- cuFunc .cuStreamDestroy (ipcStream [i ]);
1016- }
1009+ if ((NULL != ipcStream ) && ctx_ok ) {
1010+ cuFunc .cuStreamDestroy (ipcStream );
10171011 }
10181012 if ((NULL != dtohStream ) && ctx_ok ) {
10191013 cuFunc .cuStreamDestroy (dtohStream );
@@ -1427,7 +1421,7 @@ int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
14271421 * to measure the advantages of asynchronous copies. */
14281422 if (OPAL_LIKELY (mca_common_cuda_async )) {
14291423 // printf("I use async memcpy\n");
1430- result = cuFunc .cuMemcpyAsync ((CUdeviceptr )dst , (CUdeviceptr )src , amount , ipcStream [ current_ipc_stream_id ] );
1424+ result = cuFunc .cuMemcpyAsync ((CUdeviceptr )dst , (CUdeviceptr )src , amount , ipcStream );
14311425 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
14321426 opal_show_help ("help-mpi-common-cuda.txt" , "cuMemcpyAsync failed" ,
14331427 true, dst , src , amount , result );
@@ -1438,11 +1432,7 @@ int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
14381432 "CUDA: cuMemcpyAsync passed: dst=%p, src=%p, size=%d" ,
14391433 dst , src , (int )amount );
14401434 }
1441- result = cuFunc .cuEventRecord (cuda_event_ipc_array [cuda_event_ipc_first_avail ], ipcStream [current_ipc_stream_id ]);
1442- current_ipc_stream_id ++ ;
1443- if (current_ipc_stream_id >= NB_IPC_STREAM ) {
1444- current_ipc_stream_id = 0 ;
1445- }
1435+ result = cuFunc .cuEventRecord (cuda_event_ipc_array [cuda_event_ipc_first_avail ], ipcStream );
14461436 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
14471437 opal_show_help ("help-mpi-common-cuda.txt" , "cuEventRecord failed" ,
14481438 true, OPAL_PROC_MY_HOSTNAME , result );
@@ -1461,7 +1451,7 @@ int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
14611451 * done = 0 ;
14621452 } else {
14631453 /* Mimic the async function so they use the same memcpy call. */
1464- result = cuFunc .cuMemcpyAsync ((CUdeviceptr )dst , (CUdeviceptr )src , amount , ipcStream [ 0 ] );
1454+ result = cuFunc .cuMemcpyAsync ((CUdeviceptr )dst , (CUdeviceptr )src , amount , ipcStream );
14651455 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
14661456 opal_show_help ("help-mpi-common-cuda.txt" , "cuMemcpyAsync failed" ,
14671457 true, dst , src , amount , result );
@@ -1474,7 +1464,7 @@ int mca_common_cuda_memcpy(void *dst, void *src, size_t amount, char *msg,
14741464 }
14751465
14761466 /* Record an event, then wait for it to complete with calls to cuEventQuery */
1477- result = cuFunc .cuEventRecord (cuda_event_ipc_array [cuda_event_ipc_first_avail ], ipcStream [ 0 ] );
1467+ result = cuFunc .cuEventRecord (cuda_event_ipc_array [cuda_event_ipc_first_avail ], ipcStream );
14781468 if (OPAL_UNLIKELY (CUDA_SUCCESS != result )) {
14791469 opal_show_help ("help-mpi-common-cuda.txt" , "cuEventRecord failed" ,
14801470 true, OPAL_PROC_MY_HOSTNAME , result );
0 commit comments