@@ -52,7 +52,7 @@ namespace bm_execution {
5252 std::vector<cl::Buffer> validationBuffers;
5353
5454 // Create all kernels and buffers. The kernel pairs are generated twice to utilize all channels
55- for (int r = 0 ; r < 2 ; r++) {
55+ for (int r = 0 ; r < config. programSettings -> kernelReplications ; r++) {
5656
5757 validationBuffers.push_back (cl::Buffer (*config.context , CL_MEM_WRITE_ONLY, sizeof (HOST_DATA_TYPE) * validationData.size (),0 ,&err));
5858 ASSERT_CL (err)
@@ -90,23 +90,42 @@ namespace bm_execution {
9090 for (uint r =0 ; r < config.programSettings ->numRepetitions ; r++) {
9191 MPI_Barrier (MPI_COMM_WORLD);
9292 auto startCalculation = std::chrono::high_resolution_clock::now ();
93- for (int i = 0 ; i < 2 ; i++) {
93+ for (int i = 0 ; i < config. programSettings -> kernelReplications ; i++) {
9494 sendQueues[i].enqueueNDRangeKernel (sendKernels[i], cl::NullRange, cl::NDRange (1 ));
9595 recvQueues[i].enqueueNDRangeKernel (recvKernels[i], cl::NullRange, cl::NDRange (1 ));
96+ #ifndef NDEBUG
97+ int current_rank;
98+ MPI_Comm_rank (MPI_COMM_WORLD, & current_rank);
99+ std::cout << " Rank " << current_rank << " : Enqueued " << r << " ," << i << std::endl;
100+ #endif
96101 }
97- for (int i = 0 ; i < 2 ; i++) {
102+ for (int i = 0 ; i < config. programSettings -> kernelReplications ; i++) {
98103 sendQueues[i].finish ();
104+ #ifndef NDEBUG
105+ int current_rank;
106+ MPI_Comm_rank (MPI_COMM_WORLD, & current_rank);
107+ std::cout << " Rank " << current_rank << " : Send done " << r << " ," << i << std::endl;
108+ #endif
99109 recvQueues[i].finish ();
110+ #ifndef NDEBUG
111+ MPI_Comm_rank (MPI_COMM_WORLD, & current_rank);
112+ std::cout << " Rank " << current_rank << " : Recv done " << r << " ," << i << std::endl;
113+ #endif
100114 }
101115 auto endCalculation = std::chrono::high_resolution_clock::now ();
102116 std::chrono::duration<double > calculationTime =
103117 std::chrono::duration_cast<std::chrono::duration<double >>
104118 (endCalculation - startCalculation);
105119 calculationTimings.push_back (calculationTime.count ());
120+ #ifndef NDEBUG
121+ int current_rank;
122+ MPI_Comm_rank (MPI_COMM_WORLD, & current_rank);
123+ std::cout << " Rank " << current_rank << " : Done " << r << std::endl;
124+ #endif
106125 }
107126 // Read validation data from FPGA will be placed sequentially in buffer for all replications
108127 // The data order should not matter, because every byte should have the same value!
109- for (int r = 0 ; r < 2 ; r++) {
128+ for (int r = 0 ; r < config. programSettings -> kernelReplications ; r++) {
110129 err = recvQueues[r].enqueueReadBuffer (validationBuffers[r], CL_TRUE, 0 , sizeof (HOST_DATA_TYPE) * validationData.size () / 2 , &validationData.data ()[r * validationData.size () / 2 ]);
111130 ASSERT_CL (err);
112131 }
0 commit comments