11#pragma once
22#include < algorithm>
33#include < chrono>
4+ #include < iomanip>
45#include < memory>
56#include < queue>
67#include < stdexcept>
1415#include " runtime_options.hpp"
1516
1617namespace it_lab_ai {
18+ static std::unordered_map<LayerType, std::string> label_map = {
19+ {kInput , " Input" },
20+ {kPooling , " Pooling" },
21+ {kElementWise , " Element-wise" },
22+ {kConvolution , " Convolution" },
23+ {kFullyConnected , " Dense" },
24+ {kFlatten , " Flatten" },
25+ {kConcat , " Concat" },
26+ {kDropout , " Dropout" },
27+ {kSplit , " Split" },
28+ {kBinaryOp , " BinaryOp" },
29+ {kTranspose , " Transpose" },
30+ {kMatmul , " MatMul" },
31+ {kReshape , " Reshape" },
32+ {kSoftmax , " Softmax" },
33+ {kReduce , " Reduce" },
34+ {kBatchNormalization , " Normalization" }};
35+
36+ struct LayerTimeStats {
37+ std::string layer_name;
38+ double total_time = 0.0 ;
39+ int call_count = 0 ;
40+ double min_time = std::numeric_limits<double >::max();
41+ double max_time = 0.0 ;
42+ };
1743
1844struct BranchState {
1945 int ind_layer;
@@ -27,6 +53,7 @@ std::shared_ptr<Layer> layer_based_shared_copy(
2753 const std::shared_ptr<Layer>& layer, const RuntimeOptions& options);
2854
2955class Graph {
56+ std::map<std::string, LayerTimeStats> layer_stats_;
3057 int BiggestSize_;
3158 int V_ ; // amount of ids
3259 std::vector<std::shared_ptr<Layer>> layers_;
@@ -383,8 +410,27 @@ class Graph {
383410 auto end = std::chrono::high_resolution_clock::now ();
384411 auto elapsed =
385412 std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
386- time_.push_back (static_cast <int >(elapsed.count ()));
387- time_layer_.push_back (layers_[current_layer]->getName ());
413+ int elapsed_ms = static_cast <int >(elapsed.count ());
414+ time_.push_back (elapsed_ms);
415+
416+ LayerType layer_type = layers_[current_layer]->getName ();
417+ time_layer_.push_back (layer_type);
418+
419+ auto it = label_map.find (layer_type);
420+ std::string layer_name_str =
421+ (it != label_map.end ()) ? it->second : " Unknown" ;
422+
423+ auto & stats = layer_stats_[layer_name_str];
424+ stats.total_time += elapsed_ms;
425+ stats.call_count ++;
426+
427+ if (stats.call_count == 1 ) {
428+ stats.min_time = elapsed_ms;
429+ stats.max_time = elapsed_ms;
430+ } else {
431+ if (elapsed_ms < stats.min_time ) stats.min_time = elapsed_ms;
432+ if (elapsed_ms > stats.max_time ) stats.max_time = elapsed_ms;
433+ }
388434#endif
389435 }
390436 }
@@ -408,25 +454,6 @@ class Graph {
408454#ifdef ENABLE_STATISTIC_TIME
409455 std::vector<std::string> getTimeInfo () {
410456 std::vector<std::string> res;
411-
412- std::unordered_map<LayerType, std::string> label_map = {
413- {kInput , " Input" },
414- {kPooling , " Pooling" },
415- {kElementWise , " Element-wise" },
416- {kConvolution , " Convolution" },
417- {kFullyConnected , " Dense" },
418- {kFlatten , " Flatten" },
419- {kConcat , " Concat" },
420- {kDropout , " Dropout" },
421- {kSplit , " Split" },
422- {kBinaryOp , " BinaryOp" },
423- {kTranspose , " Transpose" },
424- {kMatmul , " MatMul" },
425- {kReshape , " Reshape" },
426- {kSoftmax , " Softmax" },
427- {kReduce , " Reduce" },
428- {kBatchNormalization , " Normalization" }};
429-
430457 for (size_t i = 0 ; i < time_.size (); i++) {
431458 auto it = label_map.find (time_layer_[i]);
432459 std::string layer_name = (it != label_map.end ()) ? it->second : " Unknown" ;
@@ -461,6 +488,23 @@ class Graph {
461488 return result;
462489 }
463490
491+ void printLayerStats () {
492+ std::cout << " \n ========== LAYER PERFORMANCE STATISTICS ==========\n " ;
493+ std::cout << std::left << std::setw (20 ) << " Layer Type" << std::right
494+ << std::setw (15 ) << " Total (ms)" << std::setw (12 ) << " Calls"
495+ << std::setw (15 ) << " Avg (ms)" << std::setw (15 ) << " Min (ms)"
496+ << std::setw (15 ) << " Max (ms)" << ' \n ' ;
497+
498+ for (const auto & [name, stats] : layer_stats_) {
499+ double avg = stats.total_time / stats.call_count ;
500+ std::cout << std::left << std::setw (20 ) << name << std::right
501+ << std::fixed << std::setprecision (3 ) << std::setw (15 )
502+ << stats.total_time << std::setw (12 ) << stats.call_count
503+ << std::setw (15 ) << avg << std::setw (15 ) << stats.min_time
504+ << std::setw (15 ) << stats.max_time << ' \n ' ;
505+ }
506+ }
507+
464508 [[nodiscard]] std::vector<int > getTraversalOrder () const {
465509 auto in_out_degrees = getInOutDegrees ();
466510 std::vector<int > in_degree (V_ );
0 commit comments