Fix EmbeddingQuantizeFloatToFloatOrHalfBenchmark (#5622)

cyyever · meta-codesync[bot] · commit 0aacd9ffa41e · 2026-04-14T11:25:17.000-07:00
Summary: X-link: https://github.com/facebookresearch/FBGEMM/pull/2573 This PR fixes 3 issues in ``EmbeddingQuantizeFloatToFloatOrHalfBenchmark``: 1. Heap buffer overflow in the float16 output path 2. Incorrect elements_per_usec to count actual output elements 3. Fixing incorrect bytes_read to use 1 byte per input element (uint8_t) instead of sizeof(float) Pull Request resolved: #5622 Reviewed By: cthi Differential Revision: D100537012 Pulled By: q10 fbshipit-source-id: 77fcc097bd877af721040f2de410188cc0f5adb8
diff --git a/bench/EmbeddingQuantizeFloatToFloatOrHalfBenchmark.cc b/bench/EmbeddingQuantizeFloatToFloatOrHalfBenchmark.cc
@@ -42,13 +42,8 @@ static void performance_test() {
       aligned_vector<uint8_t> inpVec(rowSize * colSize);
       randFill<uint8_t>(inpVec, 0, 20);
 
-      int out_emb_cols = colSize;
-
-      if constexpr (is_same_v<T, float16>) {
-        out_emb_cols /= 2;
-      }
-      int outVecSize = rowSize * (out_emb_cols + 2 * sizeof(T));
-      aligned_vector<T> outVec(outVecSize);
+      int output_columns = colSize - 2 * sizeof(float);
+      aligned_vector<T> outVec(rowSize * output_columns);
 
       double duration = 0.0f;
 
@@ -69,10 +64,10 @@ static void performance_test() {
           });
 
       float elements_per_usec =
-          rowSize * colSize * kNumRepeats / (duration * 1e6);
+          rowSize * output_columns * kNumRepeats / (duration * 1e6);
 
       duration *= 1e9; // convert to ns
-      long bytes_read = rowSize * colSize * sizeof(float) * kNumRepeats;
+      size_t bytes_read = static_cast<size_t>(rowSize) * colSize * kNumRepeats;
       float gigabyes_per_sec = bytes_read / duration;
 
       cout << setw(6) << rowSize << ", " << setw(6) << colSize << ",";