diff --git a/xprof/convert/BUILD b/xprof/convert/BUILD index 17069e2c..daf254b9 100644 --- a/xprof/convert/BUILD +++ b/xprof/convert/BUILD @@ -707,6 +707,16 @@ cc_library( ], ) +cc_test( + name = "op_metrics_to_record_test", + srcs = ["op_metrics_to_record_test.cc"], + deps = [ + ":op_metrics_to_record", + "@com_google_googletest//:gtest_main", + "@org_xprof//plugin/xprof/protobuf:op_metrics_proto_cc", + ], +) + cc_library( name = "op_stack", hdrs = ["op_stack.h"], diff --git a/xprof/convert/op_metrics_to_record.h b/xprof/convert/op_metrics_to_record.h index 6e99060c..6f84a26a 100644 --- a/xprof/convert/op_metrics_to_record.h +++ b/xprof/convert/op_metrics_to_record.h @@ -51,8 +51,8 @@ inline double GigaFlopsPerSecondPerCoreNormalizedOnDvfs( return GigaFlopsPerSecondPerCore(metrics); } return GigaFlopsPerSecondPerCore(metrics) * - (tsl::profiler::SafeDivide(metrics.normalized_time_ps(), - metrics.time_ps())); + (tsl::profiler::SafeDivide(metrics.time_ps(), + metrics.normalized_time_ps())); } inline double GigaModelFlopsPerSecondPerCore(const OpMetrics& metrics) { diff --git a/xprof/convert/op_metrics_to_record_test.cc b/xprof/convert/op_metrics_to_record_test.cc new file mode 100644 index 00000000..50011031 --- /dev/null +++ b/xprof/convert/op_metrics_to_record_test.cc @@ -0,0 +1,56 @@ +/* Copyright 2026 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "xprof/convert/op_metrics_to_record.h" + +#include "" +#include "plugin/xprof/protobuf/op_metrics.pb.h" + +namespace tensorflow { +namespace profiler { +namespace { + +constexpr double kMaxError = 1E-10; + +TEST(OpMetricsToRecordTest, GigaFlopsPerSecondPerCoreNormalizedOnDvfs) { + OpMetrics metrics; + metrics.set_time_ps(100); + metrics.set_normalized_time_ps(200); + metrics.set_flops_v2(1000); + metrics.set_occurrences(1); + metrics.set_num_cores(1); + + // GigaFlopsPerSecondPerCore = (flops_v2 / (time_ps / 1000.0)) = 1000 / 0.1 = + // 10000. Multiplier = time_ps / normalized_time_ps = 100 / 200 = 0.5. + // Expected normalized GFLOPS = 10000 * 0.5 = 5000. + EXPECT_NEAR(5000.0, GigaFlopsPerSecondPerCoreNormalizedOnDvfs(metrics), + kMaxError); +} + +TEST(OpMetricsToRecordTest, GigaFlopsPerSecondPerCoreNormalizedOnDvfsFallback) { + OpMetrics metrics; + metrics.set_time_ps(100); + metrics.set_normalized_time_ps(0); + metrics.set_flops_v2(1000); + metrics.set_occurrences(1); + metrics.set_num_cores(1); + + EXPECT_NEAR(10000.0, GigaFlopsPerSecondPerCoreNormalizedOnDvfs(metrics), + kMaxError); +} + +} // namespace +} // namespace profiler +} // namespace tensorflow diff --git a/xprof/convert/op_profile_builder.cc b/xprof/convert/op_profile_builder.cc index 00d040ba..d4b0c0c8 100644 --- a/xprof/convert/op_profile_builder.cc +++ b/xprof/convert/op_profile_builder.cc @@ -203,9 +203,9 @@ void PopulateOpMetricsNode( metrics->set_avg_time_ps(tsl::profiler::SafeDivide(op_metrics.time_ps(), op_metrics.occurrences())); - double uncapped_flops_utilization = - tsl::profiler::SafeDivide(GigaFlopsPerSecondPerCore(op_metrics), - peak_gigaflops_per_second_per_core); + double uncapped_flops_utilization = tsl::profiler::SafeDivide( + GigaFlopsPerSecondPerCoreNormalizedOnDvfs(op_metrics), + peak_gigaflops_per_second_per_core); double flops_utilization = CapUtilization(uncapped_flops_utilization); // The UI expects flops_utilization = flop_util / time_fraction. See: