Skip to content

Commit dfeb507

Browse files
committed
fixup! crypto: add TurboSHAKE and KangarooTwelve Web Cryptography algorithms
1 parent f80cc1b commit dfeb507

File tree

1 file changed

+46
-14
lines changed

1 file changed

+46
-14
lines changed

src/crypto/crypto_turboshake.cc

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,30 @@ inline uint64_t ROL64(uint64_t val, int offset) {
3030
return (val << offset) | (val >> (64 - offset));
3131
}
3232

33+
// Load/store 64-bit lanes in little-endian byte order.
34+
// The Keccak state uses LE lane encoding (FIPS 202 Section 1, B.1).
35+
// These helpers ensure correctness on both LE and BE platforms.
36+
inline uint64_t LoadLE64(const uint8_t* src) {
37+
return static_cast<uint64_t>(src[0]) | (static_cast<uint64_t>(src[1]) << 8) |
38+
(static_cast<uint64_t>(src[2]) << 16) |
39+
(static_cast<uint64_t>(src[3]) << 24) |
40+
(static_cast<uint64_t>(src[4]) << 32) |
41+
(static_cast<uint64_t>(src[5]) << 40) |
42+
(static_cast<uint64_t>(src[6]) << 48) |
43+
(static_cast<uint64_t>(src[7]) << 56);
44+
}
45+
46+
inline void StoreLE64(uint8_t* dst, uint64_t val) {
47+
dst[0] = static_cast<uint8_t>(val);
48+
dst[1] = static_cast<uint8_t>(val >> 8);
49+
dst[2] = static_cast<uint8_t>(val >> 16);
50+
dst[3] = static_cast<uint8_t>(val >> 24);
51+
dst[4] = static_cast<uint8_t>(val >> 32);
52+
dst[5] = static_cast<uint8_t>(val >> 40);
53+
dst[6] = static_cast<uint8_t>(val >> 48);
54+
dst[7] = static_cast<uint8_t>(val >> 56);
55+
}
56+
3357
static const unsigned char rhotates[5][5] = {
3458
{0, 1, 62, 28, 27},
3559
{36, 44, 6, 55, 20},
@@ -116,41 +140,49 @@ void TurboSHAKE(const uint8_t* input,
116140
uint8_t* output,
117141
size_t output_len) {
118142
uint64_t A[5][5] = {};
119-
uint8_t* state = reinterpret_cast<uint8_t*>(A);
143+
// Both rates (168, 136) are multiples of 8
144+
size_t lane_count = rate / 8;
120145

121146
size_t offset = 0;
122147

123148
// Absorb complete blocks from input
124149
while (offset + rate <= input_len) {
125-
for (size_t i = 0; i < rate; i++) {
126-
state[i] ^= input[offset + i];
150+
for (size_t i = 0; i < lane_count; i++) {
151+
A[i / 5][i % 5] ^= LoadLE64(input + offset + i * 8);
127152
}
128153
KeccakP1600_12(A);
129154
offset += rate;
130155
}
131156

132157
// Absorb last (partial) block: remaining input bytes + domain_sep + padding
133158
size_t remaining = input_len - offset;
134-
135-
// XOR remaining input bytes
136-
for (size_t i = 0; i < remaining; i++) {
137-
state[i] ^= input[offset + i];
159+
uint8_t pad[168] = {}; // sized for max rate (TurboSHAKE128)
160+
if (remaining > 0) {
161+
memcpy(pad, input + offset, remaining);
138162
}
163+
pad[remaining] ^= domain_sep;
164+
pad[rate - 1] ^= 0x80;
139165

140-
// XOR domain separation byte
141-
state[remaining] ^= domain_sep;
142-
143-
// XOR pad10*1 final bit at end of rate block
144-
state[rate - 1] ^= 0x80;
145-
166+
for (size_t i = 0; i < lane_count; i++) {
167+
A[i / 5][i % 5] ^= LoadLE64(pad + i * 8);
168+
}
146169
KeccakP1600_12(A);
147170

148171
// Squeeze output
149172
size_t out_offset = 0;
150173
while (out_offset < output_len) {
151174
size_t block = output_len - out_offset;
152175
if (block > rate) block = rate;
153-
memcpy(output + out_offset, state, block);
176+
size_t full_lanes = block / 8;
177+
for (size_t i = 0; i < full_lanes; i++) {
178+
StoreLE64(output + out_offset + i * 8, A[i / 5][i % 5]);
179+
}
180+
size_t rem = block % 8;
181+
if (rem > 0) {
182+
uint8_t tmp[8];
183+
StoreLE64(tmp, A[full_lanes / 5][full_lanes % 5]);
184+
memcpy(output + out_offset + full_lanes * 8, tmp, rem);
185+
}
154186
out_offset += block;
155187
if (out_offset < output_len) {
156188
KeccakP1600_12(A);

0 commit comments

Comments
 (0)