Skip to content

Commit c8caaf7

Browse files
Markdown table
1 parent d2f7940 commit c8caaf7

1 file changed

Lines changed: 76 additions & 35 deletions

File tree

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 76 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,10 @@
3535

3636
#include <atomic>
3737
#include <ctime>
38-
#include <iomanip>
3938
#include <iostream>
39+
#include <format>
40+
#include <string>
41+
4042

4143
#ifndef _WIN32
4244
#include <unistd.h>
@@ -215,31 +217,57 @@ int32_t GPUReconstructionCPU::ExitDevice()
215217
}
216218

217219
namespace {
220+
218221
void write_header(std::ostream& stream) {
219-
stream << "type,count,name,kernel (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n";
222+
if (stream.rdbuf() == std::cout.rdbuf()) {
223+
stream << "| | count | name | gpu (us) | cpu (us) | cpu/tot | tot (us) | GB/s | bytes | bytes/call |\n";
224+
stream << "|---|--------|-------------------------------------------|-----------|-----------|---------|-----------|-----------|---------------|---------------|\n";
225+
} else {
226+
stream << "type,count,name,gpu (us),cpu (us),cpu/total,total (us),GB/s,bytes,bytes/call\n";
227+
}
220228
}
221229

222230
struct Row {
223-
std::string type = "";
224-
std::string name = "";
225-
double kernel_time = -1.0;
231+
char type = ' ';
232+
std::string name;
233+
uint32_t count = 0;
234+
double gpu_time = -1.0;
226235
double cpu_time = -1.0;
227236
double total_time = -1.0;
228-
size_t memSize = 0;
229-
uint32_t count = 0;
237+
uint32_t memSize = 0;
238+
uint32_t statNEvents;
239+
240+
void test(std::ostream& stream) {
241+
//static constexpr const char* fmt = "| {0:6} | {1:7} | {2:43} | {3:15.0f} | {4:15.0f} | {5:15.2f} | {6:15.0f} | {7:10.3f} | {8:10.0f} | {9:15.0f} |\n";
242+
double scale = 1000000.0 / statNEvents;
243+
stream << "| " << type << " | ";
244+
if (count != 0) stream << std::format("{:6} |", count);
245+
else stream << " |";
246+
stream << std::format(" {:41}", name) << " |" << std::format("{:10.0f}", gpu_time * scale) << " |";
247+
if (cpu_time != -1.0) stream << std::format("{:10.0f} |", cpu_time * scale);
248+
else stream << " |";
249+
if (cpu_time != -1.0 && total_time != -1.0) stream << std::format("{:8.2f} |", cpu_time / total_time);
250+
else stream << " |";
251+
if (total_time != -1.0) stream << std::format("{:10.0f} |", total_time * scale);
252+
else stream << " |";
253+
if (memSize != 0 && count != 0) stream << std::format("{:10.3f} |", memSize / gpu_time * 1e-9) << std::format("{:14} |", memSize / statNEvents) << std::format("{:14} |", memSize / statNEvents / count);
254+
else stream << " | | |";
255+
stream << std::endl;
256+
}
230257

231-
void write(std::ostream& stream, uint32_t statNEvents) {
258+
void write(std::ostream& stream) {
232259
double scale = 1000000.0 / statNEvents;
260+
//stream << std::format("{0},{1},{2},{3:.0f},{4:.0f},{5:.2f},{6:.0f},{7:.3f},{8},{9}\n", type, count, name, gpu_time * scale, cpu_time * scale, cpu_time / total_time, total_time * scale, memSize / gpu_time * 1e-9, memSize / statNEvents, memSize / statNEvents / count);
233261
stream << type << ",";
234262
if (count != 0) stream << count;
235-
stream << "," << name << "," << uint32_t(kernel_time * scale) << ",";
236-
if (cpu_time != -1.0) stream << uint32_t(cpu_time * scale);
263+
stream << "," << name << "," << std::format("{:.0f}", gpu_time * scale) << ",";
264+
if (cpu_time != -1.0) stream << std::format("{:.0f}", cpu_time * scale);
237265
stream << ",";
238-
if (cpu_time != -1.0 && total_time != -1.0) stream << uint32_t(cpu_time / total_time *100) / 100.0;
266+
if (cpu_time != -1.0 && total_time != -1.0) stream << std::format("{:.2f}", cpu_time / total_time);
239267
stream << ",";
240-
if (total_time != -1.0) stream << uint32_t(total_time * scale);
268+
if (total_time != -1.0) stream << std::format("{:.0f}", total_time * scale);
241269
stream << ",";
242-
if (memSize != 0 && count != 0) stream << uint32_t(memSize / kernel_time * 1e-6) * 1e-3 << "," << memSize / statNEvents << "," << memSize / statNEvents / count;
270+
if (memSize != 0 && count != 0) stream << std::format("{:.3f}", memSize / gpu_time * 1e-9) << "," << memSize / statNEvents << "," << memSize / statNEvents / count;
243271
else stream << ",,";
244272
stream << std::endl;
245273
}
@@ -309,6 +337,7 @@ int32_t GPUReconstructionCPU::RunChains()
309337
}
310338

311339
if (GetProcessingSettings().debugLevel >= 1) {
340+
write_header(std::cout);
312341
for (uint32_t i = 0; i < mTimers.size(); i++) {
313342
double time = 0;
314343
if (mTimers[i] == nullptr) {
@@ -332,14 +361,16 @@ int32_t GPUReconstructionCPU::RunChains()
332361
Row task_row;
333362
task_row.type = 'K';
334363
task_row.name = mTimers[i]->name.c_str();
335-
task_row.kernel_time = time;
364+
task_row.gpu_time = time;
336365
task_row.count = mTimers[i]->count;
366+
task_row.statNEvents = mStatNEvents;
337367
if (mTimers[i]->memSize && mStatNEvents && time != 0.) {
338368
task_row.memSize = mTimers[i]->memSize;
339369
snprintf(bandwidth, 256, " (%8.3f GB/s - %'14zu bytes - %'14zu per call)", mTimers[i]->memSize / time * 1e-9, mTimers[i]->memSize / mStatNEvents, mTimers[i]->memSize / mStatNEvents / mTimers[i]->count);
340370
}
341-
if (benchmarkCSV.is_open()) task_row.write(benchmarkCSV, mStatNEvents);
342-
printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth);
371+
if (benchmarkCSV.is_open()) task_row.write(benchmarkCSV);
372+
task_row.test(std::cout);
373+
//printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth);
343374
if (GetProcessingSettings().resetTimers) {
344375
mTimers[i]->count = 0;
345376
mTimers[i]->memSize = 0;
@@ -351,34 +382,40 @@ int32_t GPUReconstructionCPU::RunChains()
351382
if (kernelStepTimes[i] != 0. || mTimersRecoSteps[i].timerTotal.GetElapsedTime() != 0.) {
352383
Row reco_step_row;
353384
reco_step_row.name = std::string(gpudatatypes::RECO_STEP_NAMES[i]) + " (Tasks)";
354-
reco_step_row.kernel_time = kernelStepTimes[i];
385+
reco_step_row.gpu_time = kernelStepTimes[i];
355386
reco_step_row.cpu_time = mTimersRecoSteps[i].timerCPU;
356387
reco_step_row.total_time = mTimersRecoSteps[i].timerTotal.GetElapsedTime();
357-
if (benchmarkCSV.is_open()) reco_step_row.write(benchmarkCSV, mStatNEvents);
358-
printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n", "Tasks",
359-
gpudatatypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU / mTimersRecoSteps[i].timerTotal.GetElapsedTime());
388+
reco_step_row.statNEvents = mStatNEvents;
389+
if (benchmarkCSV.is_open()) reco_step_row.write(benchmarkCSV);
390+
reco_step_row.test(std::cout);
391+
//printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n", "Tasks",
392+
//gpudatatypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU / mTimersRecoSteps[i].timerTotal.GetElapsedTime());
360393
}
361394
if (mTimersRecoSteps[i].bytesToGPU) {
362395
Row reco_step_row;
363396
reco_step_row.type = 'D';
364397
reco_step_row.name = std::string(gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to GPU)";
365-
reco_step_row.kernel_time = mTimersRecoSteps[i].timerToGPU.GetElapsedTime();
398+
reco_step_row.gpu_time = mTimersRecoSteps[i].timerToGPU.GetElapsedTime();
366399
reco_step_row.memSize = mTimersRecoSteps[i].bytesToGPU;
367400
reco_step_row.count = mTimersRecoSteps[i].countToGPU;
368-
if (benchmarkCSV.is_open()) reco_step_row.write(benchmarkCSV, mStatNEvents);
369-
printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents,
370-
mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToGPU / mStatNEvents, mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].countToGPU);
401+
reco_step_row.statNEvents = mStatNEvents;
402+
if (benchmarkCSV.is_open()) reco_step_row.write(benchmarkCSV);
403+
reco_step_row.test(std::cout);
404+
//printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents,
405+
//mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToGPU / mStatNEvents, mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].countToGPU);
371406
}
372407
if (mTimersRecoSteps[i].bytesToHost) {
373408
Row reco_step_row;
374409
reco_step_row.type = 'D';
375410
reco_step_row.name = std::string(gpudatatypes::RECO_STEP_NAMES[i]) + " (DMA to Host)";
376-
reco_step_row.kernel_time = mTimersRecoSteps[i].timerToHost.GetElapsedTime();
411+
reco_step_row.gpu_time = mTimersRecoSteps[i].timerToHost.GetElapsedTime();
377412
reco_step_row.memSize = mTimersRecoSteps[i].bytesToHost;
378413
reco_step_row.count = mTimersRecoSteps[i].countToHost;
379-
if (benchmarkCSV.is_open()) reco_step_row.write(benchmarkCSV, mStatNEvents);
380-
printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents,
381-
mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToHost / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].countToHost);
414+
reco_step_row.statNEvents = mStatNEvents;
415+
if (benchmarkCSV.is_open()) reco_step_row.write(benchmarkCSV);
416+
reco_step_row.test(std::cout);
417+
//printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", gpudatatypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents,
418+
//mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToHost / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].countToHost);
382419
}
383420
if (GetProcessingSettings().resetTimers) {
384421
mTimersRecoSteps[i].bytesToGPU = mTimersRecoSteps[i].bytesToHost = 0;
@@ -394,22 +431,26 @@ int32_t GPUReconstructionCPU::RunChains()
394431
if (mTimersGeneralSteps[i].GetElapsedTime() != 0.) {
395432
Row general_step_row;
396433
general_step_row.name = gpudatatypes::GENERAL_STEP_NAMES[i];
397-
general_step_row.kernel_time = mTimersGeneralSteps[i].GetElapsedTime();
398-
if (benchmarkCSV.is_open()) general_step_row.write(benchmarkCSV, mStatNEvents);
399-
printf("Execution Time: General Step : %50s Time: %'10.0f us\n", gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents);
434+
general_step_row.gpu_time = mTimersGeneralSteps[i].GetElapsedTime();
435+
general_step_row.statNEvents = mStatNEvents;
436+
if (benchmarkCSV.is_open()) general_step_row.write(benchmarkCSV);
437+
general_step_row.test(std::cout);
438+
//printf("Execution Time: General Step : %50s Time: %'10.0f us\n", gpudatatypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents);
400439
}
401440
}
402441
Row wall_row;
403442
wall_row.name = "Wall";
404443
if (GetProcessingSettings().debugLevel >= 1) {
405-
wall_row.kernel_time = kernelTotal;
444+
wall_row.gpu_time = kernelTotal;
406445
mStatKernelTime = kernelTotal * 1000000 / mStatNEvents;
407-
printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str());
446+
//printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str());
408447
}
409448
wall_row.cpu_time = mStatCPUTime;
410449
wall_row.total_time = mStatWallTime * mStatNEvents / 1000000;
411-
if (benchmarkCSV.is_open()) wall_row.write(benchmarkCSV, mStatNEvents);
412-
printf("Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n", "Total Wall", mStatWallTime, mStatCPUTime * 1000000 / mStatNEvents, mStatCPUTime / mTimerTotal.GetElapsedTime(), nEventReport.c_str());
450+
wall_row.statNEvents = mStatNEvents;
451+
if (benchmarkCSV.is_open()) wall_row.write(benchmarkCSV);
452+
wall_row.test(std::cout);
453+
//printf("Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n", "Total Wall", mStatWallTime, mStatCPUTime * 1000000 / mStatNEvents, mStatCPUTime / mTimerTotal.GetElapsedTime(), nEventReport.c_str());
413454
} else if (GetProcessingSettings().debugLevel >= 0) {
414455
GPUInfo("Total Wall Time: %10.0f us%s", mStatWallTime, nEventReport.c_str());
415456
}

0 commit comments

Comments
 (0)