@@ -217,61 +217,62 @@ int32_t GPUReconstructionCPU::ExitDevice()
217217}
218218
219219namespace {
220-
221- void write_header (std::ostream& stream) {
222- if (stream.rdbuf () == std::cout.rdbuf ()) {
223- stream << " | | count | name | gpu (us) | cpu (us) | cpu/tot | tot (us) | GB/s | bytes | bytes/call |\n " ;
224- stream << " |---|--------|-------------------------------------------|-----------|-----------|---------|-----------|-----------|---------------|---------------|\n " ;
225- } else {
226- stream << " type,count,name,gpu (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n " ;
227- }
220+ void write_header (std::ostream& stream) {
221+ if (stream.rdbuf () == std::cout.rdbuf ()) {
222+ stream << " | | count | name | gpu (us) | cpu (us) | cpu/tot | tot (us) | GB/s | bytes | bytes/call |\n " ;
223+ stream << " |---|--------|-------------------------------------------|-----------|-----------|---------|-----------|-----------|---------------|---------------|\n " ;
224+ } else {
225+ stream << " type,count,name,gpu (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n " ;
228226 }
227+ }
229228
230- struct Row {
231- char type = ' ' ;
232- std::string name;
233- uint32_t count = 0 ;
234- double gpu_time = -1.0 ;
235- double cpu_time = -1.0 ;
236- double total_time = -1.0 ;
237- uint32_t memSize = 0 ;
238- uint32_t statNEvents;
239-
240- void test (std::ostream& stream) {
241- // static constexpr const char* fmt = "| {0:6} | {1:7} | {2:43} | {3:15.0f} | {4:15.0f} | {5:15.2f} | {6:15.0f} | {7:10.3f} | {8:10.0f} | {9:15.0f} |\n";
242- double scale = 1000000.0 / statNEvents;
243- stream << " | " << type << " | " ;
244- if (count != 0 ) stream << std::format (" {:6} |" , count);
245- else stream << " |" ;
246- stream << std::format (" {:41}" , name) << " |" << std::format (" {:10.0f}" , gpu_time * scale) << " |" ;
247- if (cpu_time != -1.0 ) stream << std::format (" {:10.0f} |" , cpu_time * scale);
248- else stream << " |" ;
249- if (cpu_time != -1.0 && total_time != -1.0 ) stream << std::format (" {:8.2f} |" , cpu_time / total_time);
250- else stream << " |" ;
251- if (total_time != -1.0 ) stream << std::format (" {:10.0f} |" , total_time * scale);
252- else stream << " |" ;
253- if (memSize != 0 && count != 0 ) stream << std::format (" {:10.3f} |" , memSize / gpu_time * 1e-9 ) << std::format (" {:14} |" , memSize / statNEvents) << std::format (" {:14} |" , memSize / statNEvents / count);
254- else stream << " | | |" ;
255- stream << std::endl;
256- }
229+ struct Row {
230+ char type = ' ' ;
231+ std::string name;
232+ uint32_t count = 0 ;
233+ double gpu_time = -1.0 ;
234+ double cpu_time = -1.0 ;
235+ double total_time = -1.0 ;
236+ uint32_t memSize = 0 ;
237+ uint32_t statNEvents;
238+
239+ void writeToCout (std::ostream& stream) {
240+ double scale = 1000000.0 / statNEvents;
241+ stream << " | " << type << " | " ;
242+ if (count != 0 ) stream << std::format (" {:6} |" , count);
243+ else stream << " |" ;
244+ stream << std::format (" {:42}|" , name);
245+ if (gpu_time != -1.0 ) stream << std::format (" {:10.0f} |" , gpu_time * scale);
246+ else stream << " |" ;
247+ if (cpu_time != -1.0 ) stream << std::format (" {:10.0f} |" , cpu_time * scale);
248+ else stream << " |" ;
249+ if (cpu_time != -1.0 && total_time != -1.0 ) stream << std::format (" {:8.2f} |" , cpu_time / total_time);
250+ else stream << " |" ;
251+ if (total_time != -1.0 ) stream << std::format (" {:10.0f} |" , total_time * scale);
252+ else stream << " |" ;
253+ if (memSize != 0 && count != 0 ) stream << std::format (" {:10.3f} |{:14} |{:14} |" , memSize / gpu_time * 1e-9 , memSize / statNEvents, memSize / statNEvents / count);
254+ else stream << " | | |" ;
255+ stream << std::endl;
256+ }
257257
258- void write (std::ostream& stream) {
259- double scale = 1000000.0 / statNEvents;
260- // stream << std::format("{0},{1},{2},{3:.0f},{4:.0f},{5:.2f},{6:.0f},{7:.3f},{8},{9}\n", type, count, name, gpu_time * scale, cpu_time * scale, cpu_time / total_time, total_time * scale, memSize / gpu_time * 1e-9, memSize / statNEvents, memSize / statNEvents / count);
261- stream << type << " ," ;
262- if (count != 0 ) stream << count;
263- stream << " ," << name << " ," << std::format (" {:.0f}" , gpu_time * scale) << " ," ;
264- if (cpu_time != -1.0 ) stream << std::format (" {:.0f}" , cpu_time * scale);
265- stream << " ," ;
266- if (cpu_time != -1.0 && total_time != -1.0 ) stream << std::format (" {:.2f}" , cpu_time / total_time);
267- stream << " ," ;
268- if (total_time != -1.0 ) stream << std::format (" {:.0f}" , total_time * scale);
269- stream << " ," ;
270- if (memSize != 0 && count != 0 ) stream << std::format (" {:.3f}" , memSize / gpu_time * 1e-9 ) << " ," << memSize / statNEvents << " ," << memSize / statNEvents / count;
271- else stream << " ,," ;
272- stream << std::endl;
273- }
274- };
258+ void writeToFile (std::ostream& stream) {
259+ double scale = 1000000.0 / statNEvents;
260+ stream << type << " ," ;
261+ if (count != 0 ) stream << count;
262+ stream << " ," << name << " ," ;
263+ if (gpu_time != -1.0 ) stream << std::format (" {:.0f}" , gpu_time * scale);
264+ stream << " ," ;
265+ if (cpu_time != -1.0 ) stream << std::format (" {:.0f}" , cpu_time * scale);
266+ stream << " ," ;
267+ if (cpu_time != -1.0 && total_time != -1.0 ) stream << std::format (" {:.2f}" , cpu_time / total_time);
268+ stream << " ," ;
269+ if (total_time != -1.0 ) stream << std::format (" {:.0f}" , total_time * scale);
270+ stream << " ," ;
271+ if (memSize != 0 && count != 0 ) stream << std::format (" {:.3f},{},{}" , memSize / gpu_time * 1e-9 , memSize / statNEvents, memSize / statNEvents / count);
272+ else stream << " ,," ;
273+ stream << std::endl;
274+ }
275+ };
275276}
276277
277278int32_t GPUReconstructionCPU::RunChains ()
@@ -318,7 +319,7 @@ int32_t GPUReconstructionCPU::RunChains()
318319 PrintMemoryOverview ();
319320 }
320321
321- mStatWallTime = ( mTimerTotal .GetElapsedTime () * 1000000 . / mStatNEvents );
322+ mStatWallTime = mTimerTotal .GetElapsedTime ();
322323 std::string nEventReport;
323324 if (GetProcessingSettings ().debugLevel >= 0 && mStatNEvents > 1 ) {
324325 nEventReport += " (avergage of " + std::to_string (mStatNEvents ) + " runs)" ;
@@ -357,7 +358,6 @@ int32_t GPUReconstructionCPU::RunChains()
357358 int32_t stepNum = getRecoStepNum (mTimers [i]->step );
358359 kernelStepTimes[stepNum] += time;
359360 }
360- char bandwidth[256 ] = " " ;
361361 Row task_row;
362362 task_row.type = ' K' ;
363363 task_row.name = mTimers [i]->name .c_str ();
@@ -366,11 +366,11 @@ int32_t GPUReconstructionCPU::RunChains()
366366 task_row.statNEvents = mStatNEvents ;
367367 if (mTimers [i]->memSize && mStatNEvents && time != 0 .) {
368368 task_row.memSize = mTimers [i]->memSize ;
369- snprintf (bandwidth, 256 , " (%8.3f GB/s - %'14zu bytes - %'14zu per call)" , mTimers [i]->memSize / time * 1e-9 , mTimers [i]->memSize / mStatNEvents , mTimers [i]->memSize / mStatNEvents / mTimers [i]->count );
370369 }
371- if (benchmarkCSV.is_open ()) task_row.write (benchmarkCSV);
372- task_row.test (std::cout);
373- // printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth);
370+ if (benchmarkCSV.is_open ()) {
371+ task_row.writeToFile (benchmarkCSV);
372+ }
373+ task_row.writeToCout (std::cout);
374374 if (GetProcessingSettings ().resetTimers ) {
375375 mTimers [i]->count = 0 ;
376376 mTimers [i]->memSize = 0 ;
@@ -386,10 +386,10 @@ int32_t GPUReconstructionCPU::RunChains()
386386 reco_step_row.cpu_time = mTimersRecoSteps [i].timerCPU ;
387387 reco_step_row.total_time = mTimersRecoSteps [i].timerTotal .GetElapsedTime ();
388388 reco_step_row.statNEvents = mStatNEvents ;
389- if (benchmarkCSV.is_open ()) reco_step_row. write (benchmarkCSV);
390- reco_step_row.test (std::cout );
391- // printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n", "Tasks",
392- // gpudatatypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU / mTimersRecoSteps[i].timerTotal.GetElapsedTime() );
389+ if (benchmarkCSV.is_open ()) {
390+ reco_step_row.writeToFile (benchmarkCSV );
391+ }
392+ reco_step_row. writeToCout (std::cout );
393393 }
394394 if (mTimersRecoSteps [i].bytesToGPU ) {
395395 Row reco_step_row;
@@ -399,10 +399,10 @@ int32_t GPUReconstructionCPU::RunChains()
399399 reco_step_row.memSize = mTimersRecoSteps [i].bytesToGPU ;
400400 reco_step_row.count = mTimersRecoSteps [i].countToGPU ;
401401 reco_step_row.statNEvents = mStatNEvents ;
402- if (benchmarkCSV.is_open ()) reco_step_row. write (benchmarkCSV);
403- reco_step_row.test (std::cout );
404- // printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents,
405- // mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToGPU / mStatNEvents, mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].countToGPU );
402+ if (benchmarkCSV.is_open ()) {
403+ reco_step_row.writeToFile (benchmarkCSV );
404+ }
405+ reco_step_row. writeToCout (std::cout );
406406 }
407407 if (mTimersRecoSteps [i].bytesToHost ) {
408408 Row reco_step_row;
@@ -412,10 +412,10 @@ int32_t GPUReconstructionCPU::RunChains()
412412 reco_step_row.memSize = mTimersRecoSteps [i].bytesToHost ;
413413 reco_step_row.count = mTimersRecoSteps [i].countToHost ;
414414 reco_step_row.statNEvents = mStatNEvents ;
415- if (benchmarkCSV.is_open ()) reco_step_row. write (benchmarkCSV);
416- reco_step_row.test (std::cout );
417- // printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents,
418- // mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToHost / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].countToHost );
415+ if (benchmarkCSV.is_open ()) {
416+ reco_step_row.writeToFile (benchmarkCSV );
417+ }
418+ reco_step_row. writeToCout (std::cout );
419419 }
420420 if (GetProcessingSettings ().resetTimers ) {
421421 mTimersRecoSteps [i].bytesToGPU = mTimersRecoSteps [i].bytesToHost = 0 ;
@@ -433,26 +433,26 @@ int32_t GPUReconstructionCPU::RunChains()
433433 general_step_row.name = gpudatatypes::GENERAL_STEP_NAMES [i];
434434 general_step_row.gpu_time = mTimersGeneralSteps [i].GetElapsedTime ();
435435 general_step_row.statNEvents = mStatNEvents ;
436- if (benchmarkCSV.is_open ()) general_step_row.write (benchmarkCSV);
437- general_step_row.test (std::cout);
438- // printf("Execution Time: General Step : %50s Time: %'10.0f us\n", gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents);
436+ if (benchmarkCSV.is_open ()) {
437+ general_step_row.writeToFile (benchmarkCSV);
438+ }
439+ general_step_row.writeToCout (std::cout);
439440 }
440441 }
441442 Row wall_row;
442443 wall_row.name = " Wall" ;
443444 if (GetProcessingSettings ().debugLevel >= 1 ) {
444445 wall_row.gpu_time = kernelTotal;
445- mStatKernelTime = kernelTotal * 1000000 / mStatNEvents ;
446- // printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str());
447446 }
448447 wall_row.cpu_time = mStatCPUTime ;
449- wall_row.total_time = mStatWallTime * mStatNEvents / 1000000 ;
448+ wall_row.total_time = mStatWallTime ;
450449 wall_row.statNEvents = mStatNEvents ;
451- if (benchmarkCSV.is_open ()) wall_row.write (benchmarkCSV);
452- wall_row.test (std::cout);
453- // printf("Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n", "Total Wall", mStatWallTime, mStatCPUTime * 1000000 / mStatNEvents, mStatCPUTime / mTimerTotal.GetElapsedTime(), nEventReport.c_str());
450+ if (benchmarkCSV.is_open ()) {
451+ wall_row.writeToFile (benchmarkCSV);
452+ }
453+ wall_row.writeToCout (std::cout);
454454 } else if (GetProcessingSettings ().debugLevel >= 0 ) {
455- GPUInfo (" Total Wall Time: %10.0f us%s" , mStatWallTime , nEventReport.c_str ());
455+ GPUInfo (" Total Wall Time: %10.0f us%s" , mStatWallTime * 1000000 / mStatNEvents , nEventReport.c_str ());
456456 }
457457 if (GetProcessingSettings ().resetTimers ) {
458458 mStatNEvents = 0 ;
0 commit comments