Skip to content

Commit 70d25cf

Browse files
Write benchmark results to csv
1 parent 3ebc7ac commit 70d25cf

3 files changed

Lines changed: 34 additions & 3 deletions

File tree

.github/workflows/standalone-benchmark.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
ALIBUILD_ARCH_PREFIX: el9-x86_64/Packages
3434
MODULEPATH: /cvmfs/alice.cern.ch/etc/toolchain/modulefiles/el9-x86_64:/cvmfs/alice.cern.ch/el9-x86_64/Modules/modulefiles
3535
STANDALONE_DIR: /root/standalone
36-
ARTIFACT_FILE: /root/artifact.txt
36+
ARTIFACT_FILE: /root/benchmark.csv
3737
LD_LIBRARY_PATH: /usr/local/cuda-13.0/compat
3838

3939
name: ${{ matrix.name }}
@@ -84,11 +84,11 @@ jobs:
8484
source /etc/profile.d/modules.sh
8585
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
8686
cd ${STANDALONE_DIR}
87-
${STANDALONE_DIR}/ca -e 50kHz -g --memSize 15000000000 --sync --runs 1 --debug 1 > ${ARTIFACT_FILE}
87+
${STANDALONE_DIR}/ca -e 50kHz -g --memSize 15000000000 --sync --runs 1 --debug 1 --PROCtimingCSV ${ARTIFACT_FILE}
8888
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
8989
9090
- name: Upload Artifact
9191
uses: actions/upload-artifact@v4
9292
with:
9393
name: ${{ matrix.name }}-artifact
94-
path: /root/artifact.txt
94+
path: /root/benchmark.csv

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535

3636
#include <atomic>
3737
#include <ctime>
38+
#include <fstream>
3839

3940
#ifndef _WIN32
4041
#include <unistd.h>
@@ -263,6 +264,31 @@ int32_t GPUReconstructionCPU::RunChains()
263264
}
264265
double kernelTotal = 0;
265266
std::vector<double> kernelStepTimes(gpudatatypes::N_RECO_STEPS, 0.);
267+
std::ofstream timingCSVFile;
268+
if (!GetProcessingSettings().timingCSV.empty()) {
269+
bool needHeader = true;
270+
{
271+
std::ifstream timingCSVIn(GetProcessingSettings().timingCSV);
272+
needHeader = !timingCSVIn.good() || timingCSVIn.peek() == std::ifstream::traits_type::eof();
273+
}
274+
timingCSVFile.open(GetProcessingSettings().timingCSV, std::ios::out | std::ios::app);
275+
if (!timingCSVFile.is_open()) {
276+
GPUError("Could not open timing CSV file '%s' for writing", GetProcessingSettings().timingCSV.c_str());
277+
} else if (needHeader) {
278+
timingCSVFile << "name,time,count,events\n";
279+
}
280+
}
281+
auto writeCSVString = [](std::ostream& out, const std::string& s) {
282+
out << '"';
283+
for (char c : s) {
284+
if (c == '"') {
285+
out << "\"\"";
286+
} else {
287+
out << c;
288+
}
289+
}
290+
out << '"';
291+
};
266292

267293
if (GetProcessingSettings().debugLevel >= 1) {
268294
for (uint32_t i = 0; i < mTimers.size(); i++) {
@@ -289,6 +315,10 @@ int32_t GPUReconstructionCPU::RunChains()
289315
snprintf(bandwidth, 256, " (%8.3f GB/s - %'14zu bytes - %'14zu per call)", mTimers[i]->memSize / time * 1e-9, mTimers[i]->memSize / mStatNEvents, mTimers[i]->memSize / mStatNEvents / mTimers[i]->count);
290316
}
291317
printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth);
318+
if (timingCSVFile.is_open()) {
319+
writeCSVString(timingCSVFile, mTimers[i]->name);
320+
timingCSVFile << "," << (time * 1000000 / mStatNEvents) << "," << mTimers[i]->count << "," << mStatNEvents << "\n";
321+
}
292322
if (GetProcessingSettings().resetTimers) {
293323
mTimers[i]->count = 0;
294324
mTimers[i]->memSize = 0;

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent,
307307
AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)")
308308
AddOption(debugMask, uint32_t, (1 << 18) - 1, "debugMask", 0, "Mask for debug output dumps to file")
309309
AddOption(debugLogSuffix, std::string, "", "debugSuffix", 0, "Suffix for debug log files with --debug 6")
310+
AddOption(timingCSV, std::string, "", "", 0, "Append per-task timing rows to this CSV file")
310311
AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures")
311312
AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks")
312313
AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6 or deterministic compile flag set", def(1))

0 commit comments

Comments
 (0)