@@ -185,7 +185,7 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
185185 err =kernels.back ().back ().setArg (3 , config.programSettings ->matrixSize / config.programSettings ->blockSize );
186186 ASSERT_CL (err)
187187 all_events.back ().emplace_back ();
188- err = lu_queues.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))), &all_events.back ().back ());
188+ err = lu_queues.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))), &all_events.back ().back ());
189189 ASSERT_CL (err)
190190
191191
@@ -216,11 +216,11 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
216216
217217 if (tops + 1 == (config.programSettings ->matrixSize / config.programSettings ->blockSize )) {
218218 all_events.back ().emplace_back ();
219- err = top_queues.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))), &(all_events.back ().back ()));
219+ err = top_queues.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))), &(all_events.back ().back ()));
220220 ASSERT_CL (err)
221221 }
222222 else {
223- err = top_queues.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))));
223+ err = top_queues.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))));
224224 ASSERT_CL (err)
225225 }
226226
@@ -253,11 +253,11 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
253253
254254 if (tops + 1 == (config.programSettings ->matrixSize / config.programSettings ->blockSize )) {
255255 all_events.back ().emplace_back ();
256- err = left_queues.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))), &(all_events.back ().back ()));
256+ err = left_queues.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))), &(all_events.back ().back ()));
257257 ASSERT_CL (err)
258258 }
259259 else {
260- err = left_queues.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))));
260+ err = left_queues.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))));
261261 ASSERT_CL (err)
262262 }
263263 network_layer_op_flags[0 ] |= LEFT_BLOCK ;
@@ -325,7 +325,7 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
325325 err = kernels.back ().back ().setArg (1 , network_forward_flags);
326326 ASSERT_CL (err)
327327
328- err = network_queues_bottomright.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))));
328+ err = network_queues_bottomright.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))));
329329 ASSERT_CL (err)
330330 }
331331 // Create the network kernel for down -> top direction
@@ -344,11 +344,11 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
344344
345345 if (std::distance (it,network_layer_op_flags.end ()) == 1 ) {
346346 all_events.back ().emplace_back ();
347- err = network_queues_top.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))), &(all_events.back ().back ()));
347+ err = network_queues_top.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))), &(all_events.back ().back ()));
348348 ASSERT_CL (err)
349349 }
350350 else {
351- err = network_queues_top.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))));
351+ err = network_queues_top.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))));
352352 ASSERT_CL (err)
353353 }
354354
@@ -368,11 +368,11 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
368368
369369 if (std::distance (it,network_layer_op_flags.end ()) == 1 ) {
370370 all_events.back ().emplace_back ();
371- err = network_queues_left.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))), &(all_events.back ().back ()));
371+ err = network_queues_left.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))), &(all_events.back ().back ()));
372372 ASSERT_CL (err)
373373 }
374374 else {
375- err = network_queues_left.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))));
375+ err = network_queues_left.back ().enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))));
376376 ASSERT_CL (err)
377377 }
378378
@@ -418,16 +418,16 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
418418 // this is the last taks that will be enqueued in this queue, so create an event
419419 all_events.back ().emplace_back ();
420420 // Distribute the workload over all available matrix multiplication kernels
421- err = inner_queues.back ()[(current_replication)].enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))), &(all_events.back ().back ()));
422- // err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange , &communication_events, &(all_events.back().back()));
421+ err = inner_queues.back ()[(current_replication)].enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))), &(all_events.back ().back ()));
422+ // err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1) , &communication_events, &(all_events.back().back()));
423423 }
424424 else {
425425#ifndef NDEBUG
426426 std::cout << " Torus " << config.programSettings ->torus_row << " ," << config.programSettings ->torus_col << " Inner L " << block_row << " ," << block_col << std::endl;
427427#endif
428428 // Distribute the workload over all available matrix multiplication kernels
429- err = inner_queues.back ()[(current_replication)].enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))));
430- // err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange , &communication_events);
429+ err = inner_queues.back ()[(current_replication)].enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))));
430+ // err = inner_queues.back()[(current_replication)].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1) , &communication_events);
431431 }
432432 current_update++;
433433 current_replication = (current_replication + 1 ) % config.programSettings ->kernelReplications ;
@@ -463,14 +463,14 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
463463 // this is the last taks that will be enqueued in this queue, so create an event
464464 all_events.back ().emplace_back ();
465465 // Distribute the workload over all available matrix multiplication kernels
466- err = inner_queues.back ()[(current_replication)].enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))), &(all_events.back ().back ()));
466+ err = inner_queues.back ()[(current_replication)].enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))), &(all_events.back ().back ()));
467467 }
468468 else {
469469#ifndef NDEBUG
470470 std::cout << " Torus " << config.programSettings ->torus_row << " ," << config.programSettings ->torus_col << " Inner T " << block_row << " ," << block_col << std::endl;
471471#endif
472472 // Distribute the workload over all available matrix multiplication kernels
473- err = inner_queues.back ()[(current_replication)].enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (all_events.end ()))));
473+ err = inner_queues.back ()[(current_replication)].enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (all_events.end ()))));
474474 }
475475 ASSERT_CL (err)
476476 current_update++;
@@ -518,14 +518,14 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
518518 // this is the last taks that will be enqueued in this queue, so create an event
519519 all_events.back ().emplace_back ();
520520 // Distribute the workload over all available matrix multiplication kernels
521- err = inner_queues.back ()[(current_replication)].enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (std::prev (all_events.end ())))), &(all_events.back ().back ()));
521+ err = inner_queues.back ()[(current_replication)].enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (std::prev (all_events.end ())))), &(all_events.back ().back ()));
522522 }
523523 else {
524524#ifndef NDEBUG
525525 std::cout << " Torus " << config.programSettings ->torus_row << " ," << config.programSettings ->torus_col << " Inner " << block_row << " ," << block_col << std::endl;
526526#endif
527527 // Distribute the workload over all available matrix multiplication kernels
528- err = inner_queues.back ()[(current_replication)].enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NullRange , &(*std::prev (std::prev (std::prev (all_events.end ())))));
528+ err = inner_queues.back ()[(current_replication)].enqueueNDRangeKernel (kernels.back ().back (), cl::NullRange, cl::NDRange (1 ), cl::NDRange ( 1 ) , &(*std::prev (std::prev (std::prev (all_events.end ())))));
529529 }
530530
531531 ASSERT_CL (err)
@@ -563,13 +563,13 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
563563// // this is the last taks that will be enqueued in this queue, so create an event
564564// all_events.back().emplace_back();
565565// // Distribute the workload over all available matrix multiplication kernels
566- // err = inner_queues.back()[(current_replication) + 1].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange , &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
566+ // err = inner_queues.back()[(current_replication) + 1].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1) , &(*std::prev(std::prev(all_events.end()))), &(all_events.back().back()));
567567// current_update = 0;
568568// current_replication++;
569569// }
570570// else {
571571// // Distribute the workload over all available matrix multiplication kernels
572- // err = inner_queues.back()[(current_replication) + 1].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NullRange , &(*std::prev(std::prev(all_events.end()))));
572+ // err = inner_queues.back()[(current_replication) + 1].enqueueNDRangeKernel(kernels.back().back(), cl::NullRange, cl::NDRange(1), cl::NDRange(1) , &(*std::prev(std::prev(all_events.end()))));
573573// current_update++;
574574// }
575575 }
0 commit comments