Skip to content

Commit 36e904d

Browse files
authored
Fix SHA hash OOB on strings that are exact multiples of message chunk size (rapidsai#21004)
Closes rapidsai#21003 This is a small change to SHA hash processing to handle the case where the input data length is an exact multiple of message chunk size. Taking SHA256 on strlen 128 for instance: after copying/hashing the first 64 bytes, the existing loop checks 128 > 64 + 64: ```cpp while (len > Hasher::message_chunk_size + copylen) ``` which does not trigger and `process()` returns with an unhashed 64 bytes left in the buffer. Subsequently, `finalize()` tries to write the end message flag at buffer[64] (during `thrust::fill_n`), triggering the error in the issue. Authors: - Rishi Chandra (https://github.com/rishic3) Approvers: - Lawrence Mitchell (https://github.com/wence-) - David Wendt (https://github.com/davidwendt) URL: rapidsai#21004
1 parent 080c04a commit 36e904d

6 files changed

Lines changed: 50 additions & 18 deletions

File tree

cpp/src/hash/sha_hash.cuh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,15 +119,16 @@ struct HashBase : public crtp<Hasher> {
119119
this->underlying().hash_step(state);
120120

121121
// Take buffer-sized chunks of the data and do a hash step on each chunk.
122-
while (len > Hasher::message_chunk_size + copylen) {
122+
// Check with equality here because the last chunk may be exactly the size of the buffer.
123+
while (len >= Hasher::message_chunk_size + copylen) {
123124
memcpy(state.buffer, data + copylen, Hasher::message_chunk_size);
124125
this->underlying().hash_step(state);
125126
copylen += Hasher::message_chunk_size;
126127
}
127128

128-
// The remaining data chunk does not fill the buffer. We copy the data into
129+
// The remaining data chunk (if any) does not fill the buffer. We copy the data into
129130
// the buffer but do not trigger a hash step yet.
130-
memcpy(state.buffer, data + copylen, len - copylen);
131+
if (len > copylen) { memcpy(state.buffer, data + copylen, len - copylen); }
131132
state.buffer_length = len - copylen;
132133
}
133134
}

cpp/tests/hashing/sha1_test.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -35,6 +35,8 @@ TEST_F(SHA1HashTest, MultiValue)
3535
"A 56 character string to test message padding algorithm.",
3636
"A 63 character string to test message padding algorithm, again.",
3737
"A 64 character string to test message padding algorithm, again!!",
38+
"A 128 character string to test message padding algorithm for exact multiples of 64 bytes. "
39+
"Let's be very sure that this works!!!!",
3840
"A very long (greater than 128 bytes/char string) to execute a multi hash-step data point in "
3941
"the hash function being tested. This string needed to be longer.",
4042
"All work and no play makes Jack a dull boy",
@@ -58,6 +60,7 @@ TEST_F(SHA1HashTest, MultiValue)
5860
"cb73203438ab46ea54491c53e288a2703c440c4a",
5961
"c595ebd13a785c1c2659e010a42e2ff9987ef51f",
6062
"4ffaf61804c55b8c2171be548bef2e1d0baca17a",
63+
"a5a365142798fac9acb8d8d1c57a281d2f7d7f93",
6164
"595965dd18f38087186162c788485fe249242131",
6265
"a62ca720fbab830c8890044eacbeac216f1ca2e4",
6366
"11e16c52273b5669a41d17ec7c187475193f88b3",
@@ -69,16 +72,17 @@ TEST_F(SHA1HashTest, MultiValue)
6972
"e3977ee0ea7f238134ec93c79988fa84b7c5d79e",
7073
"f6f75b6fa3c3d8d86b44fcb2c98c9ad4b37dcdd0",
7174
"c7abd431a775c604edf41a62f7f215e7258dc16a",
75+
"408e7160bf55fae2a8cfb3ff2f6f830513a30a8a",
7276
"153fdf20d2bd8ae76241197314d6e0be7fe10f50",
7377
"8c3656f7cb37898f9296c1965000d6da13fed64e",
7478
"b4a848399375ec842c2cb445d98b5f80a4dce94f",
7579
"106a56e997aa6a149cc5091750574a25c324fa65"});
7680

7781
using limits = std::numeric_limits<int32_t>;
7882
cudf::test::fixed_width_column_wrapper<int32_t> const ints_col(
79-
{0, -1, 100, -100, limits::min(), limits::max(), 1, 2, 3});
83+
{0, -1, 100, -100, limits::min(), limits::max(), 42, 1, 2, 3});
8084

81-
cudf::test::fixed_width_column_wrapper<bool> const bools_col({0, 1, 1, 1, 0, 1, 1, 1, 0});
85+
cudf::test::fixed_width_column_wrapper<bool> const bools_col({0, 1, 1, 1, 0, 1, 1, 1, 1, 0});
8286

8387
// Test string inputs against known outputs
8488
auto const string_input1 = cudf::table_view({strings_col});

cpp/tests/hashing/sha224_test.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -35,6 +35,8 @@ TEST_F(SHA224HashTest, MultiValue)
3535
"A 56 character string to test message padding algorithm.",
3636
"A 63 character string to test message padding algorithm, again.",
3737
"A 64 character string to test message padding algorithm, again!!",
38+
"A 128 character string to test message padding algorithm for exact multiples of 64 bytes. "
39+
"Let's be very sure that this works!!!!",
3840
"A very long (greater than 128 bytes/char string) to execute a multi hash-step data point in "
3941
"the hash function being tested. This string needed to be longer.",
4042
"All work and no play makes Jack a dull boy",
@@ -58,6 +60,7 @@ TEST_F(SHA224HashTest, MultiValue)
5860
"5d1ed8373987e403482cefe1662a63fa3076c0a5331d141f41654bbe",
5961
"0662c91000b99de7a20c89097dd62f59120398d52499497489ccff95",
6062
"f9ea303770699483f3e53263b32a3b3c876d1b8808ce84df4b8ca1c4",
63+
"5a2db111de8110fda407037b587af6bb05eec55761696b90bca05729",
6164
"2da6cd4bdaa0a99fd7236cd5507c52e12328e71192e83b32d2f110f9",
6265
"e7d0adb165079efc6c6343112f8b154aa3644ca6326f658aaa0f8e4a",
6366
"309cc09eaa051beea7d0b0159daca9b4e8a533cb554e8f382c82709e",
@@ -69,16 +72,17 @@ TEST_F(SHA224HashTest, MultiValue)
6972
"8e1955a473a149368dc0a931f99379b44b0bb752f206dbdf68629232",
7073
"8581001e08295b7884428c022378cfdd643c977aefe4512f0252dc30",
7174
"d5854dfe3c32996345b103a6a16c7bdfa924723d620b150737e77370",
75+
"040e548e5e6e7affc27b39521c303a8030c119b9ba13d9739cf2c649",
7276
"dd56deac5f2caa579a440ee814fc04a3afaf805d567087ac3317beb3",
7377
"14fb559f6309604bedd89183f585f3b433932b5b0e675848feebf8ec",
7478
"d219eefea538491efcb69bc5bbef4177ad991d1b6e1367b5981b8c31",
7579
"5d5c2eace7ee553fe5cd25c8a8916e1eda81a5a5ca36a6338118a661"});
7680

7781
using limits = std::numeric_limits<int32_t>;
7882
cudf::test::fixed_width_column_wrapper<int32_t> const ints_col(
79-
{0, -1, 100, -100, limits::min(), limits::max(), 1, 2, 3});
83+
{0, -1, 100, -100, limits::min(), limits::max(), 42, 1, 2, 3});
8084

81-
cudf::test::fixed_width_column_wrapper<bool> const bools_col({0, 1, 1, 1, 0, 1, 1, 1, 0});
85+
cudf::test::fixed_width_column_wrapper<bool> const bools_col({0, 1, 1, 1, 0, 1, 1, 1, 1, 0});
8286

8387
// Test string inputs against known outputs
8488
auto const string_input1 = cudf::table_view({strings_col});

cpp/tests/hashing/sha256_test.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -35,6 +35,11 @@ TEST_F(SHA256HashTest, MultiValue)
3535
"A 56 character string to test message padding algorithm.",
3636
"A 63 character string to test message padding algorithm, again.",
3737
"A 64 character string to test message padding algorithm, again!!",
38+
"A 128 character string to test message padding algorithm for exact multiples of 64 bytes. "
39+
"Let's be very sure that this works!!!!",
40+
"A 256 character string to test message padding algorithm for exact multiples of 64 bytes, "
41+
"again. I ran this through python for a quick length check to ensure this string is exactly "
42+
"256 chars, but feel free to count it yourself manually to be extra certain.",
3843
"A very long (greater than 128 bytes/char string) to execute a multi hash-step data point in "
3944
"the hash function being tested. This string needed to be longer.",
4045
"All work and no play makes Jack a dull boy",
@@ -58,6 +63,8 @@ TEST_F(SHA256HashTest, MultiValue)
5863
"d16883c666112142c1d72c9080b41161be7563250539e3f6ab6e2fdf2210074b",
5964
"11174fa180460f5d683c2e63fcdd897dcbf10c28a9225d3ced9a8bbc3774415d",
6065
"10a7d211e692c6f71bb9f7524ba1437588c2797356f05fc585340f002fe7015e",
66+
"55865b5ddd47bb71a7023e8e58e20425d075942cf59c5be94e8c55760f50caf6",
67+
"ab7fb20c040600f53e1b1cbd75e8c1ce4738b7f5618b46784802873d09778b62",
6168
"339d610dcb030bb4222bcf18c8ab82d911bfe7fb95b2cd9f6785fd4562b02401",
6269
"2ce9936a4a2234bf8a76c37d92e01d549d03949792242e7f8a1ad68575e4e4a8",
6370
"255fdd4d80a72f67921eb36f3e1157ea3e995068cee80e430c034e0d3692f614",
@@ -69,16 +76,18 @@ TEST_F(SHA256HashTest, MultiValue)
6976
"96c204fa5d44b2487abfec105a05f8ae634551604f6596202ca99e3724e3953a",
7077
"2e7be264f3ecbb2930e7c54bf6c5fc1f310a8c63c50916bb713f34699ed11719",
7178
"224e4dce71d5dbd5e79ba65aaced7ad9c4f45dda146278087b2b61d164f056f0",
79+
"f1d8f49c0f893b9cad0ced1d05c405be7af67fbfbcf939411c2e35a529d965ae",
80+
"223d50099346475af5738c2efa8db10cafaed4fd27b13b3e45f7b0a2281dc509",
7281
"91f3108d4e9c696fdb37ae49fdc6a2237f1d1f977b7216406cc8a6365355f43b",
7382
"490be480afe271685e9c1fdf46daac0b9bf7f25602e153ca92a0ddb0e4b662ef",
7483
"4ddc45855d7ce3ab09efacff1fbafb33502f7dd468dc5a62826689c1c658dbce",
7584
"bed32be19e1f432f5caec2b8bf914a968dfa5a5cba3868ea640ba9cbb0f9c9c8"});
7685

7786
using limits = std::numeric_limits<int32_t>;
7887
cudf::test::fixed_width_column_wrapper<int32_t> const ints_col(
79-
{0, -1, 100, -100, limits::min(), limits::max(), 1, 2, 3});
88+
{0, -1, 100, -100, limits::min(), limits::max(), 42, 1, 2, 3, 4});
8089

81-
cudf::test::fixed_width_column_wrapper<bool> const bools_col({0, 1, 1, 1, 0, 1, 1, 1, 0});
90+
cudf::test::fixed_width_column_wrapper<bool> const bools_col({0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0});
8291

8392
// Test string inputs against known outputs
8493
auto const string_input1 = cudf::table_view({strings_col});

cpp/tests/hashing/sha384_test.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -35,6 +35,9 @@ TEST_F(SHA384HashTest, MultiValue)
3535
"A 56 character string to test message padding algorithm.",
3636
"A 63 character string to test message padding algorithm, again.",
3737
"A 64 character string to test message padding algorithm, again!!",
38+
"A 256 character string to test message padding algorithm for exact multiples of 64 bytes, "
39+
"again. I ran this through python for a quick length check to ensure this string is exactly "
40+
"256 chars, but feel free to count it yourself manually to be extra certain.",
3841
"A very long (greater than 128 bytes/char string) to execute a multi hash-step data point in "
3942
"the hash function being tested. This string needed to be longer.",
4043
"All work and no play makes Jack a dull boy",
@@ -63,6 +66,8 @@ TEST_F(SHA384HashTest, MultiValue)
6366
"78c",
6467
"5d7a853a18138fa90feac07c896dfca65a0f1eb2ed40f1fd7be6238dd7ef429bb1aeb0236735500eb954c9b4ba923"
6568
"254",
69+
"26e1c7410c4fc28ce814e80abf8bf73868c3907319c59887cfaf6f621c3202b94552852e7ec40b55912cab635e917"
70+
"4c8",
6671
"c72bcaf3a4b01986711cd5d2614aa8f9d7fad61455613eac4561b1468f9a25dd26566c8ad1190dec7567be4f6fc1d"
6772
"b29",
6873
"281826f23bebb3f835d2f15edcb0cdb3078ae2d7dc516f3a366af172dff4db6dd5833bc1e5ee411d52c598773e939"
@@ -83,6 +88,8 @@ TEST_F(SHA384HashTest, MultiValue)
8388
"77d",
8489
"112a6f9c74741d490747db90f5e901a88b7a32f637c030d6d96e5f89a70a5f1ee209e018648842c0e1d32002f95fd"
8590
"d07",
91+
"a52942e39227bb9b4a3a0a22e170f3e86828a6c45b6b4e49ce41d06ebe879a6c12687b198a3addd90277f3ec1fa1c"
92+
"ba8",
8693
"dc6f24bb0eb2c96fb53c52c402f073de089f3aeae9594be0c4f4cb31b13bd48769b80aa97d83a25ece1edf0c83373"
8794
"f56",
8895
"781a33adfdcdcbb514318728c074fbb59d44002995825642e0c9bfef8a2ccf3fb637b39ff3dd265df8cd93c86e945"
@@ -94,9 +101,9 @@ TEST_F(SHA384HashTest, MultiValue)
94101

95102
using limits = std::numeric_limits<int32_t>;
96103
cudf::test::fixed_width_column_wrapper<int32_t> const ints_col(
97-
{0, -1, 100, -100, limits::min(), limits::max(), 1, 2, 3});
104+
{0, -1, 100, -100, limits::min(), limits::max(), 42, 1, 2, 3});
98105

99-
cudf::test::fixed_width_column_wrapper<bool> const bools_col({0, 1, 1, 1, 0, 1, 1, 1, 0});
106+
cudf::test::fixed_width_column_wrapper<bool> const bools_col({0, 1, 1, 1, 0, 1, 1, 1, 1, 0});
100107

101108
// Test string inputs against known outputs
102109
auto const string_input1 = cudf::table_view({strings_col});

cpp/tests/hashing/sha512_test.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -35,6 +35,9 @@ TEST_F(SHA512HashTest, MultiValue)
3535
"A 56 character string to test message padding algorithm.",
3636
"A 63 character string to test message padding algorithm, again.",
3737
"A 64 character string to test message padding algorithm, again!!",
38+
"A 256 character string to test message padding algorithm for exact multiples of 64 bytes, "
39+
"again. I ran this through python for a quick length check to ensure this string is exactly "
40+
"256 chars, but feel free to count it yourself manually to be extra certain.",
3841
"A very long (greater than 128 bytes/char string) to execute a multi hash-step data point in "
3942
"the hash function being tested. This string needed to be longer.",
4043
"All work and no play makes Jack a dull boy",
@@ -63,6 +66,8 @@ TEST_F(SHA512HashTest, MultiValue)
6366
"cc3d2c789d2cf5960df648c78a765e6c27c",
6467
"9c436e24be60e17425a1a829642d97e7180b57485cf95db007cf5b32bbae1f2325b6874b3377e37806b15b739bffa"
6568
"412ea6d095b726487d70e7b50e92d56c750",
69+
"c0b72e552c4026f6f3797281a92a124e4c83d1a901f58c7139dad5a5160e319324e002e7c8c5e64eabdb713b6006d"
70+
"d06e7755244aff3861fd52f4ccb3c79b321",
6671
"6a25ca1f20f6e79faea2a0770075e4262beb66b40f59c22d3e8abdb6188ef8d8914faf5dbf6df76165bb61b81dfda"
6772
"46643f0d6366a39f7bd3d270312f9d3cf87",
6873
"bae9eb4b5c05a4c5f85750b70b2f0ce78e387f992f0927a017eb40bd180a13004f6252a6bbf9816f195fb7d86668c"
@@ -83,6 +88,8 @@ TEST_F(SHA512HashTest, MultiValue)
8388
"67911b49d951714decbdbfca1957be8ba10",
8489
"da7706221f8861ef522ab9555f57306382fb18c337536545d839e431dede4ff9f9affafb82ab5588734a8fc6631e6"
8590
"a0cd864634b62e24a42755c863c5d5c5848",
91+
"7d7498241ad7aee2845a1f79d44115511853f7d1490da871f0aaeba05a4299dc256f267c0203db1d175bee97e3969"
92+
"ba6f250a0d08ac037210aeced85f83a8412",
8693
"04dadc8fdf205fe535c8eb38f20882fc2a0e308081052d7588e74f6620aa207749039468c126db7407050def80415"
8794
"1d037cb188d5d4d459015032972a9e9f001",
8895
"aae2e742074847889a029a8d3170f9e17177d48ec0b9dabe572aa68dd3001af0c512f164ba84aa75b13950948170a"
@@ -94,9 +101,9 @@ TEST_F(SHA512HashTest, MultiValue)
94101

95102
using limits = std::numeric_limits<int32_t>;
96103
cudf::test::fixed_width_column_wrapper<int32_t> const ints_col(
97-
{0, -1, 100, -100, limits::min(), limits::max(), 1, 2, 3});
104+
{0, -1, 100, -100, limits::min(), limits::max(), 42, 1, 2, 3});
98105

99-
cudf::test::fixed_width_column_wrapper<bool> const bools_col({0, 1, 1, 1, 0, 1, 1, 1, 0});
106+
cudf::test::fixed_width_column_wrapper<bool> const bools_col({0, 1, 1, 1, 0, 1, 1, 1, 1, 0});
100107

101108
// Test string inputs against known outputs
102109
auto const string_input1 = cudf::table_view({strings_col});

0 commit comments

Comments
 (0)