Skip to content

Commit 0aacd9f

Browse files
cyyevermeta-codesync[bot]
authored andcommitted
Fix EmbeddingQuantizeFloatToFloatOrHalfBenchmark (#5622)
Summary: X-link: https://github.com/facebookresearch/FBGEMM/pull/2573 This PR fixes 3 issues in ``EmbeddingQuantizeFloatToFloatOrHalfBenchmark``: 1. Heap buffer overflow in the float16 output path 2. Incorrect elements_per_usec to count actual output elements 3. Fixing incorrect bytes_read to use 1 byte per input element (uint8_t) instead of sizeof(float) Pull Request resolved: #5622 Reviewed By: cthi Differential Revision: D100537012 Pulled By: q10 fbshipit-source-id: 77fcc097bd877af721040f2de410188cc0f5adb8
1 parent 2e3da8a commit 0aacd9f

1 file changed

Lines changed: 4 additions & 9 deletions

File tree

bench/EmbeddingQuantizeFloatToFloatOrHalfBenchmark.cc

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,8 @@ static void performance_test() {
4242
aligned_vector<uint8_t> inpVec(rowSize * colSize);
4343
randFill<uint8_t>(inpVec, 0, 20);
4444

45-
int out_emb_cols = colSize;
46-
47-
if constexpr (is_same_v<T, float16>) {
48-
out_emb_cols /= 2;
49-
}
50-
int outVecSize = rowSize * (out_emb_cols + 2 * sizeof(T));
51-
aligned_vector<T> outVec(outVecSize);
45+
int output_columns = colSize - 2 * sizeof(float);
46+
aligned_vector<T> outVec(rowSize * output_columns);
5247

5348
double duration = 0.0f;
5449

@@ -69,10 +64,10 @@ static void performance_test() {
6964
});
7065

7166
float elements_per_usec =
72-
rowSize * colSize * kNumRepeats / (duration * 1e6);
67+
rowSize * output_columns * kNumRepeats / (duration * 1e6);
7368

7469
duration *= 1e9; // convert to ns
75-
long bytes_read = rowSize * colSize * sizeof(float) * kNumRepeats;
70+
size_t bytes_read = static_cast<size_t>(rowSize) * colSize * kNumRepeats;
7671
float gigabyes_per_sec = bytes_read / duration;
7772

7873
cout << setw(6) << rowSize << ", " << setw(6) << colSize << ",";

0 commit comments

Comments
 (0)