Skip to content

Commit 08bfc28

Browse files
Olialmoneta
authored andcommitted
Time Profiler for Sofie
1 parent 6282757 commit 08bfc28

9 files changed

Lines changed: 255 additions & 23 deletions

File tree

tmva/sofie/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
2222
TMVA/OperatorList.hxx
2323
TMVA/RModel_Base.hxx
2424
TMVA/RModel.hxx
25+
TMVA/RModelProfiler.hxx
2526
TMVA/ROperator.hxx
2627
TMVA/ROperator_BasicUnary.hxx
2728
TMVA/ROperator_BasicBinary.hxx
@@ -77,6 +78,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
7778
SOURCES
7879
src/RModel_Base.cxx
7980
src/RModel.cxx
81+
src/RModelProfiler.cxx
8082
src/RModel_GNN.cxx
8183
src/RModel_GraphIndependent.cxx
8284
src/RFunction.cxx

tmva/sofie/inc/TMVA/RModel.hxx

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,23 @@ namespace SOFIE {
1111

1212
class RModel final : public RModel_Base {
1313

14+
friend class RModelProfiler;
15+
1416
private:
1517
bool fIsInitialized = false;
1618
bool fIsSubGraph = false;
19+
bool fProfile = false;
20+
1721
int fVerbose = 0;
1822
int fBatchSize = -1;
1923
long fReadPos = 0; // reading file position
24+
2025
size_t fConstantTensorSize = 0; // size (in Bytes) of the allocated constant tensors
2126
size_t fWeightsTensorSize = 0; // size (in Bytes) of the allocated weight tensors
2227
size_t fOtherTensorSize = 0; // size (in Bytes) of intermediate tensors which are not managed by the memory pool
2328

29+
std::string fProfilerGC = "";
30+
2431
OptimizationLevel fOptimizationLevel = OptimizationLevel::kExtended;
2532

2633
std::unordered_map<std::string, InputTensorInfo> fInputTensorInfos; // input tensors where shape may not fully defined or other graph inputs?
@@ -157,7 +164,7 @@ public:
157164
void Initialize(int batchSize = -1, bool verbose = false);
158165
void Initialize(const std::map<std::string,size_t> & inputParams, bool verbose = false);
159166

160-
void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
167+
void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
161168
void Generate(Options options = Options::kDefault, int batchSize = -1, int pos = 0, bool verbose = false)
162169
{
163170
Generate(static_cast<std::underlying_type_t<Options>>(options), batchSize, pos, verbose);
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#ifndef TMVA_SOFIE_RMODELPROFILER
2+
#define TMVA_SOFIE_RMODELPROFILER
3+
4+
#include "TMVA/RModel.hxx"
5+
6+
namespace TMVA {
7+
namespace Experimental {
8+
namespace SOFIE {
9+
10+
/// \class RModelProfiler
11+
/// \brief A helper class to generate profiled inference code for an RModel.
12+
///
13+
/// This class instruments the generated C++ code to measure the execution
14+
/// time of each operator. It is invoked when the RModel::Generate is called
15+
/// with the Options::kProfile flag.
16+
class RModelProfiler {
17+
private:
18+
RModel &fModel;
19+
20+
void GenerateUtilityFunctions();
21+
22+
public:
23+
// The profiler must be constructed with a model to work on.
24+
RModelProfiler() = delete;
25+
RModelProfiler(RModel &model);
26+
~RModelProfiler() = default;
27+
28+
// There is no point in copying or moving an RModelProfiler
29+
RModelProfiler(const RModelProfiler &other) = delete;
30+
RModelProfiler(RModelProfiler &&other) = delete;
31+
RModelProfiler &operator=(const RModelProfiler &other) = delete;
32+
RModelProfiler &operator=(RModelProfiler &&other) = delete;
33+
34+
// Main function to generate the profiled code.
35+
void Generate();
36+
};
37+
38+
} // namespace SOFIE
39+
} // namespace Experimental
40+
} // namespace TMVA
41+
42+
#endif // TMVA_SOFIE_RMODELPROFILER

tmva/sofie/inc/TMVA/RModel_Base.hxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ enum class Options {
2626
kRootBinaryWeightFile = 0x4,
2727
kGNN = 0x8,
2828
kGNNComponent = 0x10,
29+
kProfile = 0x20,
2930
};
3031

3132
// Optimization levels inspired by ONNXRuntime.

tmva/sofie/inc/TMVA/ROperator.hxx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ public:
3737
//virtual void Forward_blas() = 0;
3838
virtual ~ROperator(){}
3939

40+
std::string name = "UnnamedOperator";
41+
const std::string &GetOperatorName() { return name; };
42+
4043
protected:
4144

4245
const std::string SP = " "; ///< space used to correctly indent the generated C++ code

tmva/sofie/src/RModel.cxx

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#endif
1010

1111
#include "TMVA/RModel.hxx"
12+
#include "TMVA/RModelProfiler.hxx"
1213
#include "TMVA/SOFIE_common.hxx"
1314

1415
namespace TMVA {
@@ -1061,7 +1062,7 @@ void RModel::GenerateSessionCode()
10611062
CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx);
10621063
}
10631064

1064-
// to check remaining unused fragments after memory allocation (lesser the better)
1065+
// to check remaining unused fragments after memory allocation (lesser the better)
10651066
// for (const auto &it: fIntermediateMemoryInfo.available_stack){
10661067
// std::cout<<"chunk_idx: "<<it.first<<", chunk_size: "<<it.second<<"\n";
10671068
// }
@@ -1089,13 +1090,13 @@ void RModel::GenerateSessionCode()
10891090
// Generate code for Session constructor
10901091
if (fUseSession) {
10911092
std::string sessionName = "Session";
1092-
if (fIsSubGraph)
1093+
if (fIsSubGraph)
10931094
sessionName += "_" + fName;
10941095
// add here specific operator code that needs to define session data members
10951096
fGC += "\n";
10961097
for (size_t id = 0; id < fOperators.size(); id++) {
10971098
std::string opName = std::to_string(id);
1098-
fGC += fOperators[id]->GenerateSessionMembersCode(opName);
1099+
fGC += fOperators[id]->GenerateSessionMembersCode(opName);
10991100
}
11001101
fGC += "\n";
11011102
// here add initialization and reading of weight tensors
@@ -1143,23 +1144,28 @@ void RModel::GenerateSessionCode()
11431144
fGC += "}\n\n";
11441145
}
11451146

1146-
fGC += doInferSignature + "{\n";
1147-
fGC += "\n";
1147+
if (fProfile) {
1148+
RModelProfiler profiler(*this);
1149+
profiler.Generate();
1150+
fGC += fProfilerGC;
1151+
} else {
1152+
fGC += doInferSignature + "{\n";
1153+
fGC += "\n";
11481154

1149-
// generate the inference code
1150-
if (fVerbose)
1151-
std::cout << "Generating main inference code for " << fName << std::endl;
1155+
// generate the inference code
1156+
if (fVerbose)
1157+
std::cout << "Generating main inference code for " << fName << std::endl;
11521158

1153-
if (fOutputTensorNames.size() == 0)
1154-
throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported");
1159+
if (fOutputTensorNames.size() == 0)
1160+
throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported");
11551161

1156-
for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) {
1157-
if (fVerbose)
1162+
for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) {
1163+
if (fVerbose)
11581164
std::cout << "Generating code for operator .... " << op_idx << std::endl;
1159-
fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx)));
1160-
}
1165+
fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx)));
1166+
}
11611167

1162-
fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
1168+
fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
11631169

11641170
for (std::string const &name : fOutputTensorNames) {
11651171
// need to check is size is the same (don't want to return a vector with
@@ -1170,7 +1176,8 @@ void RModel::GenerateSessionCode()
11701176
fGC += SP + "FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n";
11711177
}
11721178

1173-
fGC += "}\n\n";
1179+
fGC += "}\n\n";
1180+
}
11741181

11751182
// generate the inference overload that returns an output struct
11761183
GenerateOutput();
@@ -1183,9 +1190,11 @@ void RModel::GenerateSessionCode()
11831190

11841191
void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, long pos, bool verbose)
11851192
{
1193+
bool profile = (options & static_cast<std::underlying_type_t<Options>>(Options::kProfile));
11861194
fVerbose = verbose;
11871195
fBatchSize = batchSize;
11881196
fReadPos = pos;
1197+
fProfile = profile;
11891198

11901199
// session flag is used in operator initialize
11911200
if (static_cast<std::underlying_type_t<Options>>(Options::kNoSession) & options) {
@@ -1205,9 +1214,9 @@ void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, lo
12051214
"TMVA-SOFIE: RModel::Generate: cannot use a separate weight file without generating a Session class");
12061215
}
12071216

1208-
if (static_cast<std::underlying_type_t<Options>>(Options::kGNN) & options)
1217+
if (static_cast<std::underlying_type_t<Options>>(Options::kGNN) & options)
12091218
fIsGNN = true;
1210-
if (static_cast<std::underlying_type_t<Options>>(Options::kGNNComponent) & options)
1219+
if (static_cast<std::underlying_type_t<Options>>(Options::kGNNComponent) & options)
12111220
fIsGNNComponent = true;
12121221

12131222
// initialize the model including all operators and sub-graphs
@@ -1228,13 +1237,13 @@ void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, lo
12281237

12291238
// generate first code for the subgraphs
12301239
for (auto &graph : fSubGraphs) {
1231-
if (fVerbose)
1240+
if (fVerbose)
12321241
std::cout << "generate session code for subgraph " << graph->fName << std::endl;
12331242
graph->GenerateSessionCode();
12341243
fGC += graph->fGC;
12351244
}
12361245

1237-
if (fVerbose)
1246+
if (fVerbose)
12381247
std::cout << "generate Main session code - model " << fName << std::endl;
12391248

12401249
// generate main session code

tmva/sofie/src/RModelProfiler.cxx

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#include "TMVA/RModelProfiler.hxx"
2+
#include "TMVA/SOFIE_common.hxx"
3+
4+
namespace TMVA {
5+
namespace Experimental {
6+
namespace SOFIE {
7+
8+
// The constructor now just registers the necessary C++ libraries.
9+
RModelProfiler::RModelProfiler(RModel &model) : fModel(model)
10+
{
11+
fModel.AddNeededStdLib("chrono"); // for timing operators
12+
fModel.AddNeededStdLib("vector"); // for storing profiling results
13+
fModel.AddNeededStdLib("string"); // for operator names
14+
fModel.AddNeededStdLib("map"); // for the results map
15+
fModel.AddNeededStdLib("iostream"); // for printing results
16+
fModel.AddNeededStdLib("iomanip"); // for printing results
17+
}
18+
19+
// This function generates the helper functions inside the Session struct.
20+
void RModelProfiler::GenerateUtilityFunctions()
21+
{
22+
auto &gc = fModel.fProfilerGC;
23+
24+
// Generate PrintProfilingResults function
25+
gc += " void PrintProfilingResults() const {\n";
26+
gc += " if (fProfilingResults.empty()) {\n";
27+
gc += " std::cout << \"No profiling results to display.\" << std::endl;\n";
28+
gc += " return;\n";
29+
gc += " }\n";
30+
gc += "\n";
31+
gc += " std::cout << \"\\n\" << std::string(50, '=') << std::endl;\n";
32+
gc += " std::cout << \" AVERAGE PROFILING RESULTS\" << std::endl;\n";
33+
gc += " std::cout << std::string(50, '=') << std::endl;\n";
34+
gc += " for (const auto& op : fProfilingResults) {\n";
35+
gc += " double sum = 0.0;\n";
36+
gc += " for (double time : op.second) {\n";
37+
gc += " sum += time;\n";
38+
gc += " }\n";
39+
gc += " double average = sum / op.second.size();\n";
40+
gc += " std::cout << \" \" << std::left << std::setw(20) << op.first\n";
41+
gc += " << \": \" << std::fixed << std::setprecision(6) << average << \" us\"\n";
42+
gc += " << \" (over \" << op.second.size() << \" runs)\" << std::endl;\n";
43+
gc += " }\n";
44+
gc += " std::cout << std::string(50, '=') << \"\\n\" << std::endl;\n";
45+
gc += " }\n";
46+
gc += "\n";
47+
48+
// Generate ResetProfilingResults function
49+
gc += " void ResetProfilingResults() {\n";
50+
gc += " fProfilingResults.clear();\n";
51+
gc += " }\n";
52+
gc += "\n";
53+
54+
// Generate GetOpAvgTime function
55+
gc += " std::map<std::string, double> GetOpAvgTime() const {\n";
56+
gc += " if (fProfilingResults.empty()) {\n";
57+
gc += " return {};\n";
58+
gc += " }\n";
59+
gc += "\n";
60+
gc += " std::map<std::string, double> avg;\n";
61+
gc += " for (const auto& op : fProfilingResults) {\n";
62+
gc += " double mean = 0.0;\n";
63+
gc += " for (double time : op.second) {\n";
64+
gc += " mean += time;\n";
65+
gc += " }\n";
66+
gc += " mean /= op.second.size();\n";
67+
gc += " avg[op.first] = mean;\n";
68+
gc += " }\n";
69+
gc += "\n";
70+
gc += " return avg;\n";
71+
gc += " }\n";
72+
gc += "\n";
73+
74+
// Generate GetOpVariance function
75+
gc += " std::map<std::string, double> GetOpVariance() const {\n";
76+
gc += " if (fProfilingResults.empty()) {\n";
77+
gc += " return {};\n";
78+
gc += " }\n";
79+
gc += "\n";
80+
gc += " std::map<std::string, double> variance;\n";
81+
gc += " for (const auto& op : fProfilingResults) {\n";
82+
gc += " // Var[X] = E[X^2] - E[X]^2\n";
83+
gc += " double mean = 0.0, mean2 = 0.0;\n";
84+
gc += " for (double time : op.second) {\n";
85+
gc += " mean += time;\n";
86+
gc += " mean2 += time * time;\n";
87+
gc += " }\n";
88+
gc += " mean /= op.second.size();\n";
89+
gc += " mean2 /= op.second.size();\n";
90+
gc += " variance[op.first] = mean2 - mean * mean;\n";
91+
gc += " }\n";
92+
gc += "\n";
93+
gc += " return variance;\n";
94+
gc += " }\n";
95+
}
96+
97+
// Main generation function for the profiler.
98+
void RModelProfiler::Generate()
99+
{
100+
// Clear the profiler's code string to start fresh.
101+
fModel.fProfilerGC.clear();
102+
auto &gc = fModel.fProfilerGC;
103+
104+
// 1. Add the data member to the Session struct to store results.
105+
gc += "public:\n";
106+
gc += " // Maps an operator name to a vector of its execution times (in microseconds).\n";
107+
gc += " std::map<std::string, std::vector<double>> fProfilingResults;\n\n";
108+
109+
// 2. Generate and add the utility functions like PrintProfilingResults.
110+
GenerateUtilityFunctions();
111+
112+
// 3. Generate the signature for the profiled doInfer method.
113+
std::string doInferSignature = fModel.GenerateInferSignature();
114+
if (!doInferSignature.empty()) doInferSignature += ", ";
115+
for (auto const &name : fModel.GetOutputTensorNames()) {
116+
doInferSignature += " std::vector<" + ConvertTypeToString(fModel.GetTensorType(name)) + "> &output_tensor_" + name + ",";
117+
}
118+
if (!fModel.GetOutputTensorNames().empty()) {
119+
doInferSignature.back() = ' ';
120+
}
121+
gc += "void doInfer(" + doInferSignature + ") {\n";
122+
123+
// 4. Generate the body of the doInfer method with timing instrumentation.
124+
gc += " // Timer variable for profiling\n";
125+
gc += " std::chrono::steady_clock::time_point tp_start, tp_overall_start;\n\n";
126+
gc += " tp_overall_start = std::chrono::steady_clock::now();\n\n";
127+
128+
for (size_t op_idx = 0; op_idx < fModel.fOperators.size(); ++op_idx) {
129+
const auto& op = fModel.fOperators[op_idx];
130+
gc += " // -- Profiling for operator " + op->name + " --\n";
131+
gc += " tp_start = std::chrono::steady_clock::now();\n\n";
132+
133+
// Add the actual operator inference code
134+
gc += op->Generate(std::to_string(op_idx));
135+
136+
// Add the code to stop the timer and store the result
137+
gc += "\n fProfilingResults[\"" + op->name + "\"].push_back(\n";
138+
gc += " std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(\n";
139+
gc += " std::chrono::steady_clock::now() - tp_start).count());\n\n";
140+
}
141+
142+
// 5. Generate the code to fill the output tensors.
143+
gc += " using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
144+
for (std::string const &name : fModel.GetOutputTensorNames()) {
145+
bool isIntermediate = fModel.fIntermediateTensorInfos.count(name) > 0;
146+
std::string n = isIntermediate ? std::to_string(ConvertShapeToLength(fModel.GetTensorShape(name)))
147+
: ConvertDynamicShapeToLength(fModel.GetDynamicTensorShape(name));
148+
gc += " FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n";
149+
}
150+
151+
gc += "\n // -- Record overall inference time --\n";
152+
gc += " fProfilingResults[\"Overall_Time\"].push_back(\n";
153+
gc += " std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(\n";
154+
gc += " std::chrono::steady_clock::now() - tp_overall_start).count());\n";
155+
156+
gc += "}\n\n"; // End of doInfer function
157+
}
158+
159+
} // namespace SOFIE
160+
} // namespace Experimental
161+
} // namespace TMVA

0 commit comments

Comments
 (0)