Skip to content

Commit 5522b65

Browse files
committed
mldsa: correct ML-DSA on CHAR_BIT!=8 + add WOLFSSL_MLDSA_VERIFY_SMALLEST_MEM
ML-DSA-87 keygen/sign/verify on a 16-bit byte/int CPU (TI C28x), gated and a no-op on normal targets: - Encode/decode integer-promotion fixes: a byte/word16 field promotes to *unsigned* int where int is 16-bit, so '2 - field' was unsigned and a negative coefficient zero-extended into sword32 (e.g. -1 -> 0x0000FFFF); cast the unpacked field to sword32 (eta-2/eta-4/t0 decode). Bit-packers relied on (byte) truncating to 8 bits; mask with MLDSA_OCT() and cast the <<MLDSA_D shift to sword32 (eta-2/t0/t1/gamma1 encode). - dilithium.h: shift-based Montgomery reduction on WC_16BIT_CPU (cl2000 miscompiles the multiply form). - New WOLFSSL_MLDSA_VERIFY_SMALLEST_MEM: stream the signature z vector one polynomial at a time instead of pinning the whole l-vector, cutting the ML-DSA-87 verify key by ~6 KB (with WOLFSSL_MLDSA_ASSIGN_KEY, ~10.7 KB total verify RAM on the C28x).
1 parent 5e898d7 commit 5522b65

3 files changed

Lines changed: 169 additions & 87 deletions

File tree

wolfcrypt/src/wc_mldsa.c

Lines changed: 131 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,6 +1021,7 @@ static int mldsa_squeeze256(wc_Shake* shake256, const byte* in,
10211021
* @param [in] eta Range specifier of each value.
10221022
* @param [out] p Buffer to encode into.
10231023
*/
1024+
10241025
static void mldsa_vec_encode_eta_bits_c(const sword32* s, byte d, byte eta,
10251026
byte* p)
10261027
{
@@ -1047,9 +1048,9 @@ static void mldsa_vec_encode_eta_bits_c(const sword32* s, byte d, byte eta,
10471048
byte s7 = (byte)(2 - s[j + 7]);
10481049

10491050
/* Pack 8 3-bit values into 3 bytes. */
1050-
p[0] = (byte)((s0 >> 0) | (s1 << 3) | (s2 << 6));
1051-
p[1] = (byte)((s2 >> 2) | (s3 << 1) | (s4 << 4) | (s5 << 7));
1052-
p[2] = (byte)((s5 >> 1) | (s6 << 2) | (s7 << 5));
1051+
p[0] = WC_OCTET((s0 >> 0) | (s1 << 3) | (s2 << 6));
1052+
p[1] = WC_OCTET((s2 >> 2) | (s3 << 1) | (s4 << 4) | (s5 << 7));
1053+
p[2] = WC_OCTET((s5 >> 1) | (s6 << 2) | (s7 << 5));
10531054
/* Move to next place to encode into. */
10541055
p += MLDSA_ETA_2_BITS;
10551056
}
@@ -1159,15 +1160,18 @@ static void mldsa_decode_eta_2_bits_c(const byte* p, sword32* s)
11591160
* 3 bits to encode each number.
11601161
* 8 numbers from 3 bytes. (8 * 3 bits = 3 * 8 bits) */
11611162
for (j = 0; j < MLDSA_N; j += 8) {
1162-
/* Get 3 bits and put in range of -2..2. */
1163-
s[j + 0] = 2 - ((p[0] >> 0) & 0x7 );
1164-
s[j + 1] = 2 - ((p[0] >> 3) & 0x7 );
1165-
s[j + 2] = 2 - ((p[0] >> 6) | ((p[1] << 2) & 0x7));
1166-
s[j + 3] = 2 - ((p[1] >> 1) & 0x7 );
1167-
s[j + 4] = 2 - ((p[1] >> 4) & 0x7 );
1168-
s[j + 5] = 2 - ((p[1] >> 7) | ((p[2] << 1) & 0x7));
1169-
s[j + 6] = 2 - ((p[2] >> 2) & 0x7 );
1170-
s[j + 7] = 2 - ((p[2] >> 5) & 0x7 );
1163+
/* Get 3 bits and put in range of -2..2.
1164+
* Cast to signed 32-bit before the subtract: where int is 16-bit a
1165+
* byte/word16 field promotes to unsigned, so a negative result would
1166+
* zero-extend instead of sign-extend into sword32. */
1167+
s[j + 0] = 2 - (sword32)((p[0] >> 0) & 0x7 );
1168+
s[j + 1] = 2 - (sword32)((p[0] >> 3) & 0x7 );
1169+
s[j + 2] = 2 - (sword32)((p[0] >> 6) | ((p[1] << 2) & 0x7));
1170+
s[j + 3] = 2 - (sword32)((p[1] >> 1) & 0x7 );
1171+
s[j + 4] = 2 - (sword32)((p[1] >> 4) & 0x7 );
1172+
s[j + 5] = 2 - (sword32)((p[1] >> 7) | ((p[2] << 1) & 0x7));
1173+
s[j + 6] = 2 - (sword32)((p[2] >> 2) & 0x7 );
1174+
s[j + 7] = 2 - (sword32)((p[2] >> 5) & 0x7 );
11711175
/* Move to next place to decode from. */
11721176
p += MLDSA_ETA_2_BITS;
11731177
}
@@ -1221,24 +1225,24 @@ static void mldsa_decode_eta_4_bits_c(const byte* p, sword32* s)
12211225
* 4 bits to encode each number.
12221226
* 2 numbers from 1 bytes. (2 * 4 bits = 1 * 8 bits) */
12231227
for (j = 0; j < MLDSA_N / 2; j++) {
1224-
/* Get 4 bits and put in range of -4..4. */
1225-
s[j * 2 + 0] = 4 - (p[j] & 0xf);
1226-
s[j * 2 + 1] = 4 - (p[j] >> 4);
1228+
/* Get 4 bits and put in range of -4..4. (sword32 cast: see eta-2.) */
1229+
s[j * 2 + 0] = 4 - (sword32)(p[j] & 0xf);
1230+
s[j * 2 + 1] = 4 - (sword32)(p[j] >> 4);
12271231
}
12281232
#else
12291233
/* Step 6 or 9.
12301234
* 4 bits to encode each number.
12311235
* 8 numbers from 4 bytes. (8 * 4 bits = 4 * 8 bits) */
12321236
for (j = 0; j < MLDSA_N / 2; j += 4) {
1233-
/* Get 4 bits and put in range of -4..4. */
1234-
s[j * 2 + 0] = 4 - (p[j + 0] & 0xf);
1235-
s[j * 2 + 1] = 4 - (p[j + 0] >> 4);
1236-
s[j * 2 + 2] = 4 - (p[j + 1] & 0xf);
1237-
s[j * 2 + 3] = 4 - (p[j + 1] >> 4);
1238-
s[j * 2 + 4] = 4 - (p[j + 2] & 0xf);
1239-
s[j * 2 + 5] = 4 - (p[j + 2] >> 4);
1240-
s[j * 2 + 6] = 4 - (p[j + 3] & 0xf);
1241-
s[j * 2 + 7] = 4 - (p[j + 3] >> 4);
1237+
/* Get 4 bits and put in range of -4..4. (sword32 cast: see eta-2.) */
1238+
s[j * 2 + 0] = 4 - (sword32)(p[j + 0] & 0xf);
1239+
s[j * 2 + 1] = 4 - (sword32)(p[j + 0] >> 4);
1240+
s[j * 2 + 2] = 4 - (sword32)(p[j + 1] & 0xf);
1241+
s[j * 2 + 3] = 4 - (sword32)(p[j + 1] >> 4);
1242+
s[j * 2 + 4] = 4 - (sword32)(p[j + 2] & 0xf);
1243+
s[j * 2 + 5] = 4 - (sword32)(p[j + 2] >> 4);
1244+
s[j * 2 + 6] = 4 - (sword32)(p[j + 3] & 0xf);
1245+
s[j * 2 + 7] = 4 - (sword32)(p[j + 3] >> 4);
12421246
}
12431247
#endif /* WOLFSSL_MLDSA_SMALL */
12441248
}
@@ -1378,21 +1382,21 @@ static void mldsa_vec_encode_t0_t1_c(const sword32* t, byte d, byte* t0,
13781382
MLDSA_D);
13791383
/* Take 8 values of t and take bottom bits and make positive. */
13801384
word16 n0_0 = (word16)(MLDSA_D_MAX_HALF -
1381-
(t[j + 0] - (n1_0 << MLDSA_D)));
1385+
(t[j + 0] - ((sword32)n1_0 << MLDSA_D)));
13821386
word16 n0_1 = (word16)(MLDSA_D_MAX_HALF -
1383-
(t[j + 1] - (n1_1 << MLDSA_D)));
1387+
(t[j + 1] - ((sword32)n1_1 << MLDSA_D)));
13841388
word16 n0_2 = (word16)(MLDSA_D_MAX_HALF -
1385-
(t[j + 2] - (n1_2 << MLDSA_D)));
1389+
(t[j + 2] - ((sword32)n1_2 << MLDSA_D)));
13861390
word16 n0_3 = (word16)(MLDSA_D_MAX_HALF -
1387-
(t[j + 3] - (n1_3 << MLDSA_D)));
1391+
(t[j + 3] - ((sword32)n1_3 << MLDSA_D)));
13881392
word16 n0_4 = (word16)(MLDSA_D_MAX_HALF -
1389-
(t[j + 4] - (n1_4 << MLDSA_D)));
1393+
(t[j + 4] - ((sword32)n1_4 << MLDSA_D)));
13901394
word16 n0_5 = (word16)(MLDSA_D_MAX_HALF -
1391-
(t[j + 5] - (n1_5 << MLDSA_D)));
1395+
(t[j + 5] - ((sword32)n1_5 << MLDSA_D)));
13921396
word16 n0_6 = (word16)(MLDSA_D_MAX_HALF -
1393-
(t[j + 6] - (n1_6 << MLDSA_D)));
1397+
(t[j + 6] - ((sword32)n1_6 << MLDSA_D)));
13941398
word16 n0_7 = (word16)(MLDSA_D_MAX_HALF -
1395-
(t[j + 7] - (n1_7 << MLDSA_D)));
1399+
(t[j + 7] - ((sword32)n1_7 << MLDSA_D)));
13961400

13971401
/* 13 bits per number.
13981402
* 8 numbers become 13 bytes. (8 * 13 bits = 13 * 8 bits) */
@@ -1406,20 +1410,20 @@ static void mldsa_vec_encode_t0_t1_c(const sword32* t, byte d, byte* t0,
14061410
tp[2] = (n0_4 >> 12) | ((word32)n0_5 << 1) |
14071411
((word32)n0_6 << 14) | ((word32)n0_7 << 27);
14081412
#else
1409-
t0[ 0] = (byte)( (n0_0 << 0));
1410-
t0[ 1] = (byte)((n0_0 >> 8) | (n0_1 << 5));
1411-
t0[ 2] = (byte)((n0_1 >> 3) );
1412-
t0[ 3] = (byte)((n0_1 >> 11) | (n0_2 << 2));
1413-
t0[ 4] = (byte)((n0_2 >> 6) | (n0_3 << 7));
1414-
t0[ 5] = (byte)((n0_3 >> 1) );
1415-
t0[ 6] = (byte)((n0_3 >> 9) | (n0_4 << 4));
1416-
t0[ 7] = (byte)((n0_4 >> 4) );
1417-
t0[ 8] = (byte)((n0_4 >> 12) | (n0_5 << 1));
1418-
t0[ 9] = (byte)((n0_5 >> 7) | (n0_6 << 6));
1419-
t0[10] = (byte)((n0_6 >> 2) );
1420-
t0[11] = (byte)((n0_6 >> 10) | (n0_7 << 3));
1413+
t0[ 0] = WC_OCTET( (n0_0 << 0));
1414+
t0[ 1] = WC_OCTET((n0_0 >> 8) | (n0_1 << 5));
1415+
t0[ 2] = WC_OCTET((n0_1 >> 3) );
1416+
t0[ 3] = WC_OCTET((n0_1 >> 11) | (n0_2 << 2));
1417+
t0[ 4] = WC_OCTET((n0_2 >> 6) | (n0_3 << 7));
1418+
t0[ 5] = WC_OCTET((n0_3 >> 1) );
1419+
t0[ 6] = WC_OCTET((n0_3 >> 9) | (n0_4 << 4));
1420+
t0[ 7] = WC_OCTET((n0_4 >> 4) );
1421+
t0[ 8] = WC_OCTET((n0_4 >> 12) | (n0_5 << 1));
1422+
t0[ 9] = WC_OCTET((n0_5 >> 7) | (n0_6 << 6));
1423+
t0[10] = WC_OCTET((n0_6 >> 2) );
1424+
t0[11] = WC_OCTET((n0_6 >> 10) | (n0_7 << 3));
14211425
#endif
1422-
t0[12] = (byte)((n0_7 >> 5) );
1426+
t0[12] = WC_OCTET((n0_7 >> 5) );
14231427

14241428
/* 10 bits per number.
14251429
* 8 bytes become 10 bytes. (8 * 10 bits = 10 * 8 bits) */
@@ -1430,17 +1434,17 @@ static void mldsa_vec_encode_t0_t1_c(const sword32* t, byte d, byte* t0,
14301434
tp[1] = (n1_3 >> 2) | ((word32)n1_4 << 8) |
14311435
((word32)n1_5 << 18) | ((word32)n1_6 << 28);
14321436
#else
1433-
t1[0] = (byte)( (n1_0 << 0));
1434-
t1[1] = (byte)((n1_0 >> 8) | (n1_1 << 2));
1435-
t1[2] = (byte)((n1_1 >> 6) | (n1_2 << 4));
1436-
t1[3] = (byte)((n1_2 >> 4) | (n1_3 << 6));
1437-
t1[4] = (byte)((n1_3 >> 2) );
1438-
t1[5] = (byte)( (n1_4 << 0));
1439-
t1[6] = (byte)((n1_4 >> 8) | (n1_5 << 2));
1440-
t1[7] = (byte)((n1_5 >> 6) | (n1_6 << 4));
1437+
t1[0] = WC_OCTET( (n1_0 << 0));
1438+
t1[1] = WC_OCTET((n1_0 >> 8) | (n1_1 << 2));
1439+
t1[2] = WC_OCTET((n1_1 >> 6) | (n1_2 << 4));
1440+
t1[3] = WC_OCTET((n1_2 >> 4) | (n1_3 << 6));
1441+
t1[4] = WC_OCTET((n1_3 >> 2) );
1442+
t1[5] = WC_OCTET( (n1_4 << 0));
1443+
t1[6] = WC_OCTET((n1_4 >> 8) | (n1_5 << 2));
1444+
t1[7] = WC_OCTET((n1_5 >> 6) | (n1_6 << 4));
14411445
#endif
1442-
t1[8] = (byte)((n1_6 >> 4) | (n1_7 << 6));
1443-
t1[9] = (byte)((n1_7 >> 2) );
1446+
t1[8] = WC_OCTET((n1_6 >> 4) | (n1_7 << 6));
1447+
t1[9] = WC_OCTET((n1_7 >> 2) );
14441448

14451449
/* Move to next place to encode bottom bits to. */
14461450
t0 += MLDSA_D;
@@ -1526,25 +1530,27 @@ static void mldsa_decode_t0_c(const byte* t0, sword32* t)
15261530
t[j + 7] = MLDSA_D_MAX_HALF - (sword32)
15271531
(( t32_2 >> 27 ) | ((word32)t0[12] ) << 5 );
15281532
#else
1529-
t[j + 0] = MLDSA_D_MAX_HALF -
1533+
/* sword32 cast on the unpacked field: see eta-2 decode - the subtract
1534+
* must be signed/32-bit so a negative t0 sign-extends correctly. */
1535+
t[j + 0] = MLDSA_D_MAX_HALF - (sword32)
15301536
((t0[ 0] ) | (((word16)(t0[ 1] & 0x1f)) << 8));
1531-
t[j + 1] = MLDSA_D_MAX_HALF -
1537+
t[j + 1] = MLDSA_D_MAX_HALF - (sword32)
15321538
((t0[ 1] >> 5) | (((word16)(t0[ 2] )) << 3) |
15331539
(((word16)(t0[ 3] & 0x03)) << 11));
1534-
t[j + 2] = MLDSA_D_MAX_HALF -
1540+
t[j + 2] = MLDSA_D_MAX_HALF - (sword32)
15351541
((t0[ 3] >> 2) | (((word16)(t0[ 4] & 0x7f)) << 6));
1536-
t[j + 3] = MLDSA_D_MAX_HALF -
1542+
t[j + 3] = MLDSA_D_MAX_HALF - (sword32)
15371543
((t0[ 4] >> 7) | (((word16)(t0[ 5] )) << 1) |
15381544
(((word16)(t0[ 6] & 0x0f)) << 9));
1539-
t[j + 4] = MLDSA_D_MAX_HALF -
1545+
t[j + 4] = MLDSA_D_MAX_HALF - (sword32)
15401546
((t0[ 6] >> 4) | (((word16)(t0[ 7] )) << 4) |
15411547
(((word16)(t0[ 8] & 0x01)) << 12));
1542-
t[j + 5] = MLDSA_D_MAX_HALF -
1548+
t[j + 5] = MLDSA_D_MAX_HALF - (sword32)
15431549
((t0[ 8] >> 1) | (((word16)(t0[ 9] & 0x3f)) << 7));
1544-
t[j + 6] = MLDSA_D_MAX_HALF -
1550+
t[j + 6] = MLDSA_D_MAX_HALF - (sword32)
15451551
((t0[ 9] >> 6) | (((word16)(t0[10] )) << 2) |
15461552
(((word16)(t0[11] & 0x07)) << 10));
1547-
t[j + 7] = MLDSA_D_MAX_HALF -
1553+
t[j + 7] = MLDSA_D_MAX_HALF - (sword32)
15481554
((t0[11] >> 3) | (((word16)(t0[12] )) << 5));
15491555
#endif
15501556
/* Move to next place to decode from. */
@@ -1771,16 +1777,16 @@ static void mldsa_encode_gamma1_17_bits_c(const sword32* z, byte* s)
17711777
s32p[1] = (z1 >> 14) | (z2 << 4) | (z3 << 22);
17721778
#endif
17731779
#else
1774-
s[0] = (byte)( z0 );
1775-
s[1] = (byte)( z0 >> 8 );
1776-
s[2] = (byte)((z0 >> 16) | (z1 << 2));
1777-
s[3] = (byte)( z1 >> 6 );
1778-
s[4] = (byte)((z1 >> 14) | (z2 << 4));
1779-
s[5] = (byte)( z2 >> 4 );
1780-
s[6] = (byte)((z2 >> 12) | (z3 << 6));
1781-
s[7] = (byte)( z3 >> 2 );
1782-
#endif
1783-
s[8] = (byte)( z3 >> 10 );
1780+
s[0] = WC_OCTET( z0 );
1781+
s[1] = WC_OCTET( z0 >> 8 );
1782+
s[2] = WC_OCTET((z0 >> 16) | (z1 << 2));
1783+
s[3] = WC_OCTET( z1 >> 6 );
1784+
s[4] = WC_OCTET((z1 >> 14) | (z2 << 4));
1785+
s[5] = WC_OCTET( z2 >> 4 );
1786+
s[6] = WC_OCTET((z2 >> 12) | (z3 << 6));
1787+
s[7] = WC_OCTET( z3 >> 2 );
1788+
#endif
1789+
s[8] = WC_OCTET( z3 >> 10 );
17841790
/* Move to next place to encode to. */
17851791
s += MLDSA_GAMMA1_17_ENC_BITS / 2;
17861792
}
@@ -1842,16 +1848,16 @@ static void mldsa_encode_gamma1_19_bits_c(const sword32* z, byte* s)
18421848
#endif
18431849
s16p[4] = (word16)((z3 >> 4) );
18441850
#else
1845-
s[0] = (byte) z0 ;
1846-
s[1] = (byte) (z0 >> 8) ;
1847-
s[2] = (byte)((z0 >> 16) | (z1 << 4));
1848-
s[3] = (byte) (z1 >> 4) ;
1849-
s[4] = (byte) (z1 >> 12) ;
1850-
s[5] = (byte) z2 ;
1851-
s[6] = (byte) (z2 >> 8) ;
1852-
s[7] = (byte)((z2 >> 16) | (z3 << 4));
1853-
s[8] = (byte) (z3 >> 4) ;
1854-
s[9] = (byte) (z3 >> 12) ;
1851+
s[0] = WC_OCTET( z0 );
1852+
s[1] = WC_OCTET((z0 >> 8) );
1853+
s[2] = WC_OCTET((z0 >> 16) | (z1 << 4));
1854+
s[3] = WC_OCTET((z1 >> 4) );
1855+
s[4] = WC_OCTET((z1 >> 12) );
1856+
s[5] = WC_OCTET( z2 );
1857+
s[6] = WC_OCTET((z2 >> 8) );
1858+
s[7] = WC_OCTET((z2 >> 16) | (z3 << 4));
1859+
s[8] = WC_OCTET((z3 >> 4) );
1860+
s[9] = WC_OCTET((z3 >> 12) );
18551861
#endif
18561862
/* Move to next place to encode to. */
18571863
s += MLDSA_GAMMA1_19_ENC_BITS / 2;
@@ -2244,6 +2250,9 @@ static void mldsa_decode_gamma1(const byte* s, int bits, sword32* z)
22442250
* @param [in] bits Number of bits used in encoding - GAMMA1 bits.
22452251
* @param [out] z Vector of polynomials.
22462252
*/
2253+
#ifndef WOLFSSL_MLDSA_VERIFY_SMALLEST_MEM
2254+
/* The smallest-mem verify streams z one polynomial at a time with
2255+
* mldsa_decode_gamma1() directly, so the whole-vector wrapper is unused. */
22472256
static void mldsa_vec_decode_gamma1(const byte* x, byte l, int bits,
22482257
sword32* z)
22492258
{
@@ -2258,6 +2267,7 @@ static void mldsa_vec_decode_gamma1(const byte* x, byte l, int bits,
22582267
z += MLDSA_N;
22592268
}
22602269
}
2270+
#endif /* !WOLFSSL_MLDSA_VERIFY_SMALLEST_MEM */
22612271
#endif
22622272

22632273
#if !defined(WOLFSSL_MLDSA_NO_SIGN) || !defined(WOLFSSL_MLDSA_NO_VERIFY)
@@ -4624,8 +4634,12 @@ static int mldsa_sample_in_ball_ex(int level, wc_Shake* shake256,
46244634

46254635
/* Step 8: Move value from random index to current index. */
46264636
c[i] = c[j];
4627-
/* Step 9: Set value at random index to +/- 1. */
4628-
c[j] = 1 - ((((signs[s >> 3]) >> (s & 0x7)) & 0x1) << 1);
4637+
/* Step 9: Set value at random index to +/- 1.
4638+
* Cast to sword32 before the subtract: where a byte is as wide as int,
4639+
* signs[] promotes to unsigned and -1 would widen as 0x0000ffff.
4640+
* Matches the USE_INTEL_SPEEDUP path. */
4641+
c[j] = (sword32)1 -
4642+
(sword32)((((signs[s >> 3]) >> (s & 0x7)) & 0x1) << 1);
46294643
/* Next sign bit index. */
46304644
s++;
46314645
}
@@ -9771,6 +9785,10 @@ static int mldsa_verify_with_mu(wc_MlDsaKey* key, const byte* mu,
97719785
byte o;
97729786
byte* encW1;
97739787
byte* seed = commit_calc;
9788+
#ifdef WOLFSSL_MLDSA_VERIFY_SMALLEST_MEM
9789+
/* Bytes of encoded z per polynomial - z is streamed one poly at a time. */
9790+
word32 zStride = (word32)(MLDSA_N / 8) * (word32)(params->gamma1_bits + 1);
9791+
#endif
97749792

97759793
/* Ensure the signature is the right size for the parameters. */
97769794
if (sigLen != params->sigSz) {
@@ -9825,15 +9843,33 @@ static int mldsa_verify_with_mu(wc_MlDsaKey* key, const byte* mu,
98259843
#endif
98269844

98279845
if (ret == 0) {
9828-
/* Step 2: Decode z from signature. */
9829-
mldsa_vec_decode_gamma1(ze, params->l, params->gamma1_bits, z);
98309846
/* Step 13: Check z is valid - values are low enough. */
98319847
hi = ((sword32)1 << params->gamma1_bits) - params->beta;
9848+
#ifdef WOLFSSL_MLDSA_VERIFY_SMALLEST_MEM
9849+
{
9850+
/* Step 2/13: Stream z one polynomial at a time for the range check;
9851+
* the per-poly NTT happens inside the matrix loop below. */
9852+
const byte* zp = ze;
9853+
unsigned int zi;
9854+
9855+
valid = 1;
9856+
for (zi = 0; valid && (zi < params->l); zi++) {
9857+
mldsa_decode_gamma1(zp, params->gamma1_bits, z);
9858+
valid = mldsa_check_low(z, hi);
9859+
zp += zStride;
9860+
}
9861+
}
9862+
#else
9863+
/* Step 2: Decode z from signature. */
9864+
mldsa_vec_decode_gamma1(ze, params->l, params->gamma1_bits, z);
98329865
valid = mldsa_vec_check_low(z, params->l, hi);
9866+
#endif
98339867
}
98349868
if ((ret == 0) && valid) {
9869+
#ifndef WOLFSSL_MLDSA_VERIFY_SMALLEST_MEM
98359870
/* Step 10: NTT(z) */
98369871
mldsa_vec_ntt_full(z, params->l);
9872+
#endif
98379873

98389874
/* Step 9: Compute c from first 256 bits of commit. */
98399875
#ifdef WOLFSSL_MLDSA_VERIFY_NO_MALLOC
@@ -9907,6 +9943,14 @@ static int mldsa_verify_with_mu(wc_MlDsaKey* key, const byte* mu,
99079943
for (s = 0; (ret == 0) && (s < params->l); s++) {
99089944
/* Put s into buffer to be hashed. */
99099945
seed[MLDSA_PUB_SEED_SZ + 0] = (byte)s;
9946+
#ifdef WOLFSSL_MLDSA_VERIFY_SMALLEST_MEM
9947+
/* Step 2/10: Decode and NTT this z polynomial on demand (z is
9948+
* not kept as a whole vector in this mode). */
9949+
mldsa_decode_gamma1(ze + (word32)s * zStride,
9950+
params->gamma1_bits, z);
9951+
mldsa_ntt_full(z);
9952+
zt = z;
9953+
#endif
99109954
/* Step 3: Create polynomial from hashing seed. */
99119955
#ifdef WOLFSSL_MLDSA_VERIFY_NO_MALLOC
99129956
ret = mldsa_rej_ntt_poly_ex(&key->shake, seed, a, key->h);

0 commit comments

Comments
 (0)