3535
3636#include < atomic>
3737#include < ctime>
38- #include < iomanip>
3938#include < iostream>
39+ #include < format>
40+ #include < string>
41+
4042
4143#ifndef _WIN32
4244#include < unistd.h>
@@ -215,31 +217,57 @@ int32_t GPUReconstructionCPU::ExitDevice()
215217}
216218
217219namespace {
220+
218221 void write_header (std::ostream& stream) {
219- stream << " type,count,name,kernel (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n " ;
222+ if (stream.rdbuf () == std::cout.rdbuf ()) {
223+ stream << " | | count | name | gpu (us) | cpu (us) | cpu/tot | tot (us) | GB/s | bytes | bytes/call |\n " ;
224+ stream << " |---|--------|-------------------------------------------|-----------|-----------|---------|-----------|-----------|---------------|---------------|\n " ;
225+ } else {
226+ stream << " type,count,name,gpu (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n " ;
227+ }
220228 }
221229
222230 struct Row {
223- std::string type = " " ;
224- std::string name = " " ;
225- double kernel_time = -1.0 ;
231+ char type = ' ' ;
232+ std::string name;
233+ uint32_t count = 0 ;
234+ double gpu_time = -1.0 ;
226235 double cpu_time = -1.0 ;
227236 double total_time = -1.0 ;
228- size_t memSize = 0 ;
229- uint32_t count = 0 ;
237+ uint32_t memSize = 0 ;
238+ uint32_t statNEvents;
239+
240+ void test (std::ostream& stream) {
241+ // static constexpr const char* fmt = "| {0:6} | {1:7} | {2:43} | {3:15.0f} | {4:15.0f} | {5:15.2f} | {6:15.0f} | {7:10.3f} | {8:10.0f} | {9:15.0f} |\n";
242+ double scale = 1000000.0 / statNEvents;
243+ stream << " | " << type << " | " ;
244+ if (count != 0 ) stream << std::format (" {:6} |" , count);
245+ else stream << " |" ;
246+ stream << std::format (" {:41}" , name) << " |" << std::format (" {:10.0f}" , gpu_time * scale) << " |" ;
247+ if (cpu_time != -1.0 ) stream << std::format (" {:10.0f} |" , cpu_time * scale);
248+ else stream << " |" ;
249+ if (cpu_time != -1.0 && total_time != -1.0 ) stream << std::format (" {:8.2f} |" , cpu_time / total_time);
250+ else stream << " |" ;
251+ if (total_time != -1.0 ) stream << std::format (" {:10.0f} |" , total_time * scale);
252+ else stream << " |" ;
253+ if (memSize != 0 && count != 0 ) stream << std::format (" {:10.3f} |" , memSize / gpu_time * 1e-9 ) << std::format (" {:14} |" , memSize / statNEvents) << std::format (" {:14} |" , memSize / statNEvents / count);
254+ else stream << " | | |" ;
255+ stream << std::endl;
256+ }
230257
231- void write (std::ostream& stream, uint32_t statNEvents ) {
258+ void write (std::ostream& stream) {
232259 double scale = 1000000.0 / statNEvents;
260+ // stream << std::format("{0},{1},{2},{3:.0f},{4:.0f},{5:.2f},{6:.0f},{7:.3f},{8},{9}\n", type, count, name, gpu_time * scale, cpu_time * scale, cpu_time / total_time, total_time * scale, memSize / gpu_time * 1e-9, memSize / statNEvents, memSize / statNEvents / count);
233261 stream << type << " ," ;
234262 if (count != 0 ) stream << count;
235- stream << " ," << name << " ," << uint32_t (kernel_time * scale) << " ," ;
236- if (cpu_time != -1.0 ) stream << uint32_t ( cpu_time * scale);
263+ stream << " ," << name << " ," << std::format ( " {:.0f} " , gpu_time * scale) << " ," ;
264+ if (cpu_time != -1.0 ) stream << std::format ( " {:.0f} " , cpu_time * scale);
237265 stream << " ," ;
238- if (cpu_time != -1.0 && total_time != -1.0 ) stream << uint32_t ( cpu_time / total_time * 100 ) / 100.0 ;
266+ if (cpu_time != -1.0 && total_time != -1.0 ) stream << std::format ( " {:.2f} " , cpu_time / total_time) ;
239267 stream << " ," ;
240- if (total_time != -1.0 ) stream << uint32_t ( total_time * scale);
268+ if (total_time != -1.0 ) stream << std::format ( " {:.0f} " , total_time * scale);
241269 stream << " ," ;
242- if (memSize != 0 && count != 0 ) stream << uint32_t ( memSize / kernel_time * 1e-6 ) * 1e-3 << " ," << memSize / statNEvents << " ," << memSize / statNEvents / count;
270+ if (memSize != 0 && count != 0 ) stream << std::format ( " {:.3f} " , memSize / gpu_time * 1e-9 ) << " ," << memSize / statNEvents << " ," << memSize / statNEvents / count;
243271 else stream << " ,," ;
244272 stream << std::endl;
245273 }
@@ -309,6 +337,7 @@ int32_t GPUReconstructionCPU::RunChains()
309337 }
310338
311339 if (GetProcessingSettings ().debugLevel >= 1 ) {
340+ write_header (std::cout);
312341 for (uint32_t i = 0 ; i < mTimers .size (); i++) {
313342 double time = 0 ;
314343 if (mTimers [i] == nullptr ) {
@@ -332,14 +361,16 @@ int32_t GPUReconstructionCPU::RunChains()
332361 Row task_row;
333362 task_row.type = ' K' ;
334363 task_row.name = mTimers [i]->name .c_str ();
335- task_row.kernel_time = time;
364+ task_row.gpu_time = time;
336365 task_row.count = mTimers [i]->count ;
366+ task_row.statNEvents = mStatNEvents ;
337367 if (mTimers [i]->memSize && mStatNEvents && time != 0 .) {
338368 task_row.memSize = mTimers [i]->memSize ;
339369 snprintf (bandwidth, 256 , " (%8.3f GB/s - %'14zu bytes - %'14zu per call)" , mTimers [i]->memSize / time * 1e-9 , mTimers [i]->memSize / mStatNEvents , mTimers [i]->memSize / mStatNEvents / mTimers [i]->count );
340370 }
341- if (benchmarkCSV.is_open ()) task_row.write (benchmarkCSV, mStatNEvents );
342- printf (" Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n " , type == 0 ? ' K' : ' C' , mTimers [i]->count , mTimers [i]->name .c_str (), time * 1000000 / mStatNEvents , bandwidth);
371+ if (benchmarkCSV.is_open ()) task_row.write (benchmarkCSV);
372+ task_row.test (std::cout);
373+ // printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth);
343374 if (GetProcessingSettings ().resetTimers ) {
344375 mTimers [i]->count = 0 ;
345376 mTimers [i]->memSize = 0 ;
@@ -351,34 +382,40 @@ int32_t GPUReconstructionCPU::RunChains()
351382 if (kernelStepTimes[i] != 0 . || mTimersRecoSteps [i].timerTotal .GetElapsedTime () != 0 .) {
352383 Row reco_step_row;
353384 reco_step_row.name = std::string (gpudatatypes::RECO_STEP_NAMES [i]) + " (Tasks)" ;
354- reco_step_row.kernel_time = kernelStepTimes[i];
385+ reco_step_row.gpu_time = kernelStepTimes[i];
355386 reco_step_row.cpu_time = mTimersRecoSteps [i].timerCPU ;
356387 reco_step_row.total_time = mTimersRecoSteps [i].timerTotal .GetElapsedTime ();
357- if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
358- printf (" Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n " , " Tasks" ,
359- gpudatatypes::RECO_STEP_NAMES [i], kernelStepTimes[i] * 1000000 / mStatNEvents , " " , mTimersRecoSteps [i].timerTotal .GetElapsedTime () * 1000000 / mStatNEvents , mTimersRecoSteps [i].timerCPU * 1000000 / mStatNEvents , mTimersRecoSteps [i].timerCPU / mTimersRecoSteps [i].timerTotal .GetElapsedTime ());
388+ reco_step_row.statNEvents = mStatNEvents ;
389+ if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV);
390+ reco_step_row.test (std::cout);
391+ // printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n", "Tasks",
392+ // gpudatatypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU / mTimersRecoSteps[i].timerTotal.GetElapsedTime());
360393 }
361394 if (mTimersRecoSteps [i].bytesToGPU ) {
362395 Row reco_step_row;
363396 reco_step_row.type = ' D' ;
364397 reco_step_row.name = std::string (gpudatatypes::RECO_STEP_NAMES [i]) + " (DMA to GPU)" ;
365- reco_step_row.kernel_time = mTimersRecoSteps [i].timerToGPU .GetElapsedTime ();
398+ reco_step_row.gpu_time = mTimersRecoSteps [i].timerToGPU .GetElapsedTime ();
366399 reco_step_row.memSize = mTimersRecoSteps [i].bytesToGPU ;
367400 reco_step_row.count = mTimersRecoSteps [i].countToGPU ;
368- if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
369- printf (" Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n " , mTimersRecoSteps [i].countToGPU , " DMA to GPU" , gpudatatypes::RECO_STEP_NAMES [i], mTimersRecoSteps [i].timerToGPU .GetElapsedTime () * 1000000 / mStatNEvents ,
370- mTimersRecoSteps [i].bytesToGPU / mTimersRecoSteps [i].timerToGPU .GetElapsedTime () * 1e-9 , mTimersRecoSteps [i].bytesToGPU / mStatNEvents , mTimersRecoSteps [i].bytesToGPU / mTimersRecoSteps [i].countToGPU );
401+ reco_step_row.statNEvents = mStatNEvents ;
402+ if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV);
403+ reco_step_row.test (std::cout);
404+ // printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents,
405+ // mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToGPU / mStatNEvents, mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].countToGPU);
371406 }
372407 if (mTimersRecoSteps [i].bytesToHost ) {
373408 Row reco_step_row;
374409 reco_step_row.type = ' D' ;
375410 reco_step_row.name = std::string (gpudatatypes::RECO_STEP_NAMES [i]) + " (DMA to Host)" ;
376- reco_step_row.kernel_time = mTimersRecoSteps [i].timerToHost .GetElapsedTime ();
411+ reco_step_row.gpu_time = mTimersRecoSteps [i].timerToHost .GetElapsedTime ();
377412 reco_step_row.memSize = mTimersRecoSteps [i].bytesToHost ;
378413 reco_step_row.count = mTimersRecoSteps [i].countToHost ;
379- if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV, mStatNEvents );
380- printf (" Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n " , mTimersRecoSteps [i].countToHost , " DMA to Host" , gpudatatypes::RECO_STEP_NAMES [i], mTimersRecoSteps [i].timerToHost .GetElapsedTime () * 1000000 / mStatNEvents ,
381- mTimersRecoSteps [i].bytesToHost / mTimersRecoSteps [i].timerToHost .GetElapsedTime () * 1e-9 , mTimersRecoSteps [i].bytesToHost / mStatNEvents , mTimersRecoSteps [i].bytesToHost / mTimersRecoSteps [i].countToHost );
414+ reco_step_row.statNEvents = mStatNEvents ;
415+ if (benchmarkCSV.is_open ()) reco_step_row.write (benchmarkCSV);
416+ reco_step_row.test (std::cout);
417+ // printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents,
418+ // mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToHost / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].countToHost);
382419 }
383420 if (GetProcessingSettings ().resetTimers ) {
384421 mTimersRecoSteps [i].bytesToGPU = mTimersRecoSteps [i].bytesToHost = 0 ;
@@ -394,22 +431,26 @@ int32_t GPUReconstructionCPU::RunChains()
394431 if (mTimersGeneralSteps [i].GetElapsedTime () != 0 .) {
395432 Row general_step_row;
396433 general_step_row.name = gpudatatypes::GENERAL_STEP_NAMES [i];
397- general_step_row.kernel_time = mTimersGeneralSteps [i].GetElapsedTime ();
398- if (benchmarkCSV.is_open ()) general_step_row.write (benchmarkCSV, mStatNEvents );
399- printf (" Execution Time: General Step : %50s Time: %'10.0f us\n " , gpudatatypes::GENERAL_STEP_NAMES [i], mTimersGeneralSteps [i].GetElapsedTime () * 1000000 / mStatNEvents );
434+ general_step_row.gpu_time = mTimersGeneralSteps [i].GetElapsedTime ();
435+ general_step_row.statNEvents = mStatNEvents ;
436+ if (benchmarkCSV.is_open ()) general_step_row.write (benchmarkCSV);
437+ general_step_row.test (std::cout);
438+ // printf("Execution Time: General Step : %50s Time: %'10.0f us\n", gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents);
400439 }
401440 }
402441 Row wall_row;
403442 wall_row.name = " Wall" ;
404443 if (GetProcessingSettings ().debugLevel >= 1 ) {
405- wall_row.kernel_time = kernelTotal;
444+ wall_row.gpu_time = kernelTotal;
406445 mStatKernelTime = kernelTotal * 1000000 / mStatNEvents ;
407- printf (" Execution Time: Total : %50s Time: %'10.0f us%s\n " , " Total Kernel" , mStatKernelTime , nEventReport.c_str ());
446+ // printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str());
408447 }
409448 wall_row.cpu_time = mStatCPUTime ;
410449 wall_row.total_time = mStatWallTime * mStatNEvents / 1000000 ;
411- if (benchmarkCSV.is_open ()) wall_row.write (benchmarkCSV, mStatNEvents );
412- printf (" Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n " , " Total Wall" , mStatWallTime , mStatCPUTime * 1000000 / mStatNEvents , mStatCPUTime / mTimerTotal .GetElapsedTime (), nEventReport.c_str ());
450+ wall_row.statNEvents = mStatNEvents ;
451+ if (benchmarkCSV.is_open ()) wall_row.write (benchmarkCSV);
452+ wall_row.test (std::cout);
453+ // printf("Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n", "Total Wall", mStatWallTime, mStatCPUTime * 1000000 / mStatNEvents, mStatCPUTime / mTimerTotal.GetElapsedTime(), nEventReport.c_str());
413454 } else if (GetProcessingSettings ().debugLevel >= 0 ) {
414455 GPUInfo (" Total Wall Time: %10.0f us%s" , mStatWallTime , nEventReport.c_str ());
415456 }
0 commit comments