Skip to content

Commit 52b6394

Browse files
Profiler Teamcopybara-github
authored andcommitted
Normalize FLOPs with DVFS in op_profile analysis of XProf
PiperOrigin-RevId: 911448273
1 parent e0a3b3d commit 52b6394

4 files changed

Lines changed: 71 additions & 5 deletions

File tree

xprof/convert/BUILD

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,16 @@ cc_library(
707707
],
708708
)
709709

710+
cc_test(
711+
name = "op_metrics_to_record_test",
712+
srcs = ["op_metrics_to_record_test.cc"],
713+
deps = [
714+
":op_metrics_to_record",
715+
"@com_google_googletest//:gtest_main",
716+
"@org_xprof//plugin/xprof/protobuf:op_metrics_proto_cc",
717+
],
718+
)
719+
710720
cc_library(
711721
name = "op_stack",
712722
hdrs = ["op_stack.h"],

xprof/convert/op_metrics_to_record.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ inline double GigaFlopsPerSecondPerCoreNormalizedOnDvfs(
5151
return GigaFlopsPerSecondPerCore(metrics);
5252
}
5353
return GigaFlopsPerSecondPerCore(metrics) *
54-
(tsl::profiler::SafeDivide(metrics.normalized_time_ps(),
55-
metrics.time_ps()));
54+
(tsl::profiler::SafeDivide(metrics.time_ps(),
55+
metrics.normalized_time_ps()));
5656
}
5757

5858
inline double GigaModelFlopsPerSecondPerCore(const OpMetrics& metrics) {
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/* Copyright 2026 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#include "xprof/convert/op_metrics_to_record.h"
17+
18+
#include "<gtest/gtest.h>"
19+
#include "plugin/xprof/protobuf/op_metrics.pb.h"
20+
21+
namespace tensorflow {
22+
namespace profiler {
23+
namespace {
24+
25+
constexpr double kMaxError = 1E-10;
26+
27+
TEST(OpMetricsToRecordTest, GigaFlopsPerSecondPerCoreNormalizedOnDvfs) {
28+
OpMetrics metrics;
29+
metrics.set_time_ps(100);
30+
metrics.set_normalized_time_ps(200);
31+
metrics.set_flops_v2(1000);
32+
metrics.set_occurrences(1);
33+
metrics.set_num_cores(1);
34+
35+
// GigaFlopsPerSecondPerCore = (flops_v2 / (time_ps / 1000.0)) = 1000 / 0.1 =
36+
// 10000. Multiplier = time_ps / normalized_time_ps = 100 / 200 = 0.5.
37+
// Expected normalized GFLOPS = 10000 * 0.5 = 5000.
38+
EXPECT_NEAR(5000.0, GigaFlopsPerSecondPerCoreNormalizedOnDvfs(metrics),
39+
kMaxError);
40+
}
41+
42+
TEST(OpMetricsToRecordTest, GigaFlopsPerSecondPerCoreNormalizedOnDvfsFallback) {
43+
OpMetrics metrics;
44+
metrics.set_time_ps(100);
45+
metrics.set_normalized_time_ps(0);
46+
metrics.set_flops_v2(1000);
47+
metrics.set_occurrences(1);
48+
metrics.set_num_cores(1);
49+
50+
EXPECT_NEAR(10000.0, GigaFlopsPerSecondPerCoreNormalizedOnDvfs(metrics),
51+
kMaxError);
52+
}
53+
54+
} // namespace
55+
} // namespace profiler
56+
} // namespace tensorflow

xprof/convert/op_profile_builder.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,9 @@ void PopulateOpMetricsNode(
203203
metrics->set_avg_time_ps(tsl::profiler::SafeDivide(op_metrics.time_ps(),
204204
op_metrics.occurrences()));
205205

206-
double uncapped_flops_utilization =
207-
tsl::profiler::SafeDivide(GigaFlopsPerSecondPerCore(op_metrics),
208-
peak_gigaflops_per_second_per_core);
206+
double uncapped_flops_utilization = tsl::profiler::SafeDivide(
207+
GigaFlopsPerSecondPerCoreNormalizedOnDvfs(op_metrics),
208+
peak_gigaflops_per_second_per_core);
209209

210210
double flops_utilization = CapUtilization(uncapped_flops_utilization);
211211
// The UI expects flops_utilization = flop_util / time_fraction. See:

0 commit comments

Comments
 (0)