@@ -953,9 +953,9 @@ parsec_device_data_reserve_space( parsec_device_gpu_module_t* gpu_device,
953953
954954 /* Skip CTL flows only */
955955 if (PARSEC_FLOW_ACCESS_NONE == (PARSEC_FLOW_ACCESS_MASK & flow -> flow_flags )) {
956- gpu_task -> flow_nb_elts [i ] = 0 ; /* assume there is nothing to transfer to the GPU */
956+ gpu_task -> flow_nb_elts [i ] = 0 ; /* assume there is nothing to transfer to the GPU */
957957 continue ;
958- }
958+ }
959959
960960 PARSEC_DEBUG_VERBOSE (20 , parsec_gpu_output_stream ,
961961 "GPU[%d:%s]:%s: Investigating flow %s:%d" ,
@@ -970,7 +970,7 @@ parsec_device_data_reserve_space( parsec_device_gpu_module_t* gpu_device,
970970 gpu_device -> super .device_index , gpu_device -> super .name , task_name ,
971971 flow -> name , i , gpu_elem ,
972972 this_task -> data [i ].data_in -> data_transfer_status == PARSEC_DATA_STATUS_UNDER_TRANSFER ? " [in transfer]" : "" );
973- this_task -> data [i ].data_out = this_task -> data [i ].data_in ;
973+ this_task -> data [i ].data_out = this_task -> data [i ].data_in ;
974974 continue ;
975975 }
976976 master = this_task -> data [i ].data_in -> original ;
@@ -2476,7 +2476,10 @@ parsec_device_kernel_epilog( parsec_device_gpu_module_t *gpu_device,
24762476 gpu_copy -> coherency_state = PARSEC_DATA_COHERENCY_SHARED ;
24772477 assert (PARSEC_DATA_STATUS_UNDER_TRANSFER == cpu_copy -> data_transfer_status );
24782478 cpu_copy -> data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER ;
2479-
2479+ if ( 0 == (parsec_mpi_allow_gpu_memory_communications & PARSEC_RUNTIME_SEND_FROM_GPU_MEMORY ) ) {
2480+ /* Report the CPU copy as the output of the task. */
2481+ this_task -> data [i ].data_out = cpu_copy ;
2482+ }
24802483 PARSEC_DEBUG_VERBOSE (20 , parsec_gpu_output_stream ,
24812484 "GPU copy %p [ref_count %d] moved to the read LRU in %s" ,
24822485 gpu_copy , gpu_copy -> super .super .obj_reference_count , __func__ );
0 commit comments