Skip to content

Commit dfda657

Browse files
committed
[WIP] Speed up C-reference NTT/invNTT with twisted zetas + 2-layer merging
Replace the single-layer C-reference forward and inverse NTT in `mldsa/src/poly.c` with one that merges two layers each, keeping. Aso, store each twiddle alongside its precomputed twist, letting `mld_fqmul(a, b, b_twisted)` drop the QINV multiply that was previously hidden inside `mld_montgomery_reduce`. Mirrors the design of mlkem-native #463 (twisted twiddles) and and `R = 2^32` Montgomery form. Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
1 parent a71b5d2 commit dfda657

19 files changed

Lines changed: 650 additions & 287 deletions

File tree

mldsa/src/poly.c

Lines changed: 209 additions & 146 deletions
Large diffs are not rendered by default.

mldsa/src/zetas.inc

Lines changed: 258 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,44 +12,263 @@
1212

1313
/*
1414
* Table of zeta values used in the reference NTT and inverse NTT.
15-
* See autogen for details.
15+
* Each row is (zeta_mont, zeta_twisted); see autogen for details.
1616
*/
17-
static const int32_t mld_zetas[MLDSA_N] = {
18-
0, 25847, -2608894, -518909, 237124, -777960, -876248,
19-
466468, 1826347, 2353451, -359251, -2091905, 3119733, -2884855,
20-
3111497, 2680103, 2725464, 1024112, -1079900, 3585928, -549488,
21-
-1119584, 2619752, -2108549, -2118186, -3859737, -1399561, -3277672,
22-
1757237, -19422, 4010497, 280005, 2706023, 95776, 3077325,
23-
3530437, -1661693, -3592148, -2537516, 3915439, -3861115, -3043716,
24-
3574422, -2867647, 3539968, -300467, 2348700, -539299, -1699267,
25-
-1643818, 3505694, -3821735, 3507263, -2140649, -1600420, 3699596,
26-
811944, 531354, 954230, 3881043, 3900724, -2556880, 2071892,
27-
-2797779, -3930395, -1528703, -3677745, -3041255, -1452451, 3475950,
28-
2176455, -1585221, -1257611, 1939314, -4083598, -1000202, -3190144,
29-
-3157330, -3632928, 126922, 3412210, -983419, 2147896, 2715295,
30-
-2967645, -3693493, -411027, -2477047, -671102, -1228525, -22981,
31-
-1308169, -381987, 1349076, 1852771, -1430430, -3343383, 264944,
32-
508951, 3097992, 44288, -1100098, 904516, 3958618, -3724342,
33-
-8578, 1653064, -3249728, 2389356, -210977, 759969, -1316856,
34-
189548, -3553272, 3159746, -1851402, -2409325, -177440, 1315589,
35-
1341330, 1285669, -1584928, -812732, -1439742, -3019102, -3881060,
36-
-3628969, 3839961, 2091667, 3407706, 2316500, 3817976, -3342478,
37-
2244091, -2446433, -3562462, 266997, 2434439, -1235728, 3513181,
38-
-3520352, -3759364, -1197226, -3193378, 900702, 1859098, 909542,
39-
819034, 495491, -1613174, -43260, -522500, -655327, -3122442,
40-
2031748, 3207046, -3556995, -525098, -768622, -3595838, 342297,
41-
286988, -2437823, 4108315, 3437287, -3342277, 1735879, 203044,
42-
2842341, 2691481, -2590150, 1265009, 4055324, 1247620, 2486353,
43-
1595974, -3767016, 1250494, 2635921, -3548272, -2994039, 1869119,
44-
1903435, -1050970, -1333058, 1237275, -3318210, -1430225, -451100,
45-
1312455, 3306115, -1962642, -1279661, 1917081, -2546312, -1374803,
46-
1500165, 777191, 2235880, 3406031, -542412, -2831860, -1671176,
47-
-1846953, -2584293, -3724270, 594136, -3776993, -2013608, 2432395,
48-
2454455, -164721, 1957272, 3369112, 185531, -1207385, -3183426,
49-
162844, 1616392, 3014001, 810149, 1652634, -3694233, -1799107,
50-
-3038916, 3523897, 3866901, 269760, 2213111, -975884, 1717735,
51-
472078, -426683, 1723600, -1803090, 1910376, -1667432, -1104333,
52-
-260646, -3833893, -2939036, -2235985, -420899, -2286327, 183443,
53-
-976891, 1612842, -3545687, -554416, 3919660, -48306, -1362209,
54-
3937738, 1400424, -846154, 1976782,
17+
static const int32_t mld_zetas[MLDSA_N][2] = {
18+
{0, 0},
19+
{25847, 1830765815},
20+
{-2608894, -1929875198},
21+
{-518909, -1927777021},
22+
{237124, 1640767044},
23+
{-777960, 1477910808},
24+
{-876248, 1612161320},
25+
{466468, 1640734244},
26+
{1826347, 308362795},
27+
{2353451, -1815525077},
28+
{-359251, -1374673747},
29+
{-2091905, -1091570561},
30+
{3119733, -1929495947},
31+
{-2884855, 515185417},
32+
{3111497, -285697463},
33+
{2680103, 625853735},
34+
{2725464, 1727305304},
35+
{1024112, 2082316400},
36+
{-1079900, -1364982364},
37+
{3585928, 858240904},
38+
{-549488, 1806278032},
39+
{-1119584, 222489248},
40+
{2619752, -346752664},
41+
{-2108549, 684667771},
42+
{-2118186, 1654287830},
43+
{-3859737, -878576921},
44+
{-1399561, -1257667337},
45+
{-3277672, -748618600},
46+
{1757237, 329347125},
47+
{-19422, 1837364258},
48+
{4010497, -1443016191},
49+
{280005, -1170414139},
50+
{2706023, -1846138265},
51+
{95776, -1631226336},
52+
{3077325, -1404529459},
53+
{3530437, 1838055109},
54+
{-1661693, 1594295555},
55+
{-3592148, -1076973524},
56+
{-2537516, -1898723372},
57+
{3915439, -594436433},
58+
{-3861115, -202001019},
59+
{-3043716, -475984260},
60+
{3574422, -561427818},
61+
{-2867647, 1797021249},
62+
{3539968, -1061813248},
63+
{-300467, 2059733581},
64+
{2348700, -1661512036},
65+
{-539299, -1104976547},
66+
{-1699267, -1750224323},
67+
{-1643818, -901666090},
68+
{3505694, 418987550},
69+
{-3821735, 1831915353},
70+
{3507263, -1925356481},
71+
{-2140649, 992097815},
72+
{-1600420, 879957084},
73+
{3699596, 2024403852},
74+
{811944, 1484874664},
75+
{531354, -1636082790},
76+
{954230, -285388938},
77+
{3881043, -1983539117},
78+
{3900724, -1495136972},
79+
{-2556880, -950076368},
80+
{2071892, -1714807468},
81+
{-2797779, -952438995},
82+
{-3930395, -1574918427},
83+
{-1528703, -654783359},
84+
{-3677745, 1350681039},
85+
{-3041255, -1974159335},
86+
{-1452451, -2143979939},
87+
{3475950, 1651689966},
88+
{2176455, 1599739335},
89+
{-1585221, 140455867},
90+
{-1257611, -1285853323},
91+
{1939314, -1039411342},
92+
{-4083598, -993005454},
93+
{-1000202, 1955560694},
94+
{-3190144, -1440787840},
95+
{-3157330, 1529189038},
96+
{-3632928, 568627424},
97+
{126922, -2131021878},
98+
{3412210, -783134478},
99+
{-983419, -247357819},
100+
{2147896, -588790216},
101+
{2715295, 1518161567},
102+
{-2967645, 289871779},
103+
{-3693493, -86965173},
104+
{-411027, -1262003603},
105+
{-2477047, 1708872713},
106+
{-671102, 2135294594},
107+
{-1228525, 1787797779},
108+
{-22981, -1018755525},
109+
{-1308169, 1638590967},
110+
{-381987, -889861155},
111+
{1349076, -120646188},
112+
{1852771, 1665705315},
113+
{-1430430, -1669960606},
114+
{-3343383, 1321868265},
115+
{264944, -916321552},
116+
{508951, 1225434135},
117+
{3097992, 1155548552},
118+
{44288, -1784632064},
119+
{-1100098, 2143745726},
120+
{904516, 666258756},
121+
{3958618, 1210558298},
122+
{-3724342, 675310538},
123+
{-8578, -1261461890},
124+
{1653064, -1555941048},
125+
{-3249728, -318346816},
126+
{2389356, -1999506068},
127+
{-210977, 628664287},
128+
{759969, -1499481951},
129+
{-1316856, -1729304568},
130+
{189548, -695180180},
131+
{-3553272, 1422575624},
132+
{3159746, -1375177022},
133+
{-1851402, 1424130038},
134+
{-2409325, 1777179795},
135+
{-177440, -1185330464},
136+
{1315589, 334803717},
137+
{1341330, 235321234},
138+
{1285669, -178766299},
139+
{-1584928, 168022240},
140+
{-812732, -518252220},
141+
{-1439742, 1206536194},
142+
{-3019102, 1957047970},
143+
{-3881060, 985155484},
144+
{-3628969, 1146323031},
145+
{3839961, -894060583},
146+
{2091667, -898413},
147+
{3407706, 991903578},
148+
{2316500, 1363007700},
149+
{3817976, 746144248},
150+
{-3342478, -1363460238},
151+
{2244091, 912367099},
152+
{-2446433, 30313375},
153+
{-3562462, -1420958686},
154+
{266997, -605900043},
155+
{2434439, -44694137},
156+
{-1235728, -326425360},
157+
{3513181, 2032221021},
158+
{-3520352, 2027833504},
159+
{-3759364, 1176904444},
160+
{-1197226, 1683520342},
161+
{-3193378, 1904936414},
162+
{900702, 14253662},
163+
{1859098, -421552614},
164+
{909542, -517299994},
165+
{819034, 1257750362},
166+
{495491, 1014493059},
167+
{-1613174, -818371958},
168+
{-43260, 2027935492},
169+
{-522500, 1926727420},
170+
{-655327, 863641633},
171+
{-3122442, 1747917558},
172+
{2031748, -1372618620},
173+
{3207046, 1931587462},
174+
{-3556995, 1819892093},
175+
{-525098, -325927722},
176+
{-768622, 128353682},
177+
{-3595838, 1258381762},
178+
{342297, 2124962073},
179+
{286988, 908452108},
180+
{-2437823, -1123881663},
181+
{4108315, 885133339},
182+
{3437287, -1223601433},
183+
{-3342277, 1851023419},
184+
{1735879, 137583815},
185+
{203044, 1629985060},
186+
{2842341, -1920467227},
187+
{2691481, -1176751719},
188+
{-2590150, -635454918},
189+
{1265009, 1967222129},
190+
{4055324, -1637785316},
191+
{1247620, -1354528380},
192+
{2486353, -642772911},
193+
{1595974, 6363718},
194+
{-3767016, -1536588520},
195+
{1250494, -72690498},
196+
{2635921, 45766801},
197+
{-3548272, -1287922800},
198+
{-2994039, 694382729},
199+
{1869119, -314284737},
200+
{1903435, 671509323},
201+
{-1050970, 1136965286},
202+
{-1333058, 235104446},
203+
{1237275, 985022747},
204+
{-3318210, -2070602178},
205+
{-1430225, 1779436847},
206+
{-451100, -1045062172},
207+
{1312455, 963438279},
208+
{3306115, 419615363},
209+
{-1962642, 1116720494},
210+
{-1279661, 831969619},
211+
{1917081, -1078959975},
212+
{-2546312, 1216882040},
213+
{-1374803, 1042326957},
214+
{1500165, -300448763},
215+
{777191, 604552167},
216+
{2235880, -270590488},
217+
{3406031, 1405999311},
218+
{-542412, 756955444},
219+
{-2831860, -1021949428},
220+
{-1671176, -1276805128},
221+
{-1846953, 713994583},
222+
{-2584293, -260312805},
223+
{-3724270, 608791570},
224+
{594136, 371462360},
225+
{-3776993, 940195359},
226+
{-2013608, 1554794072},
227+
{2432395, 173440395},
228+
{2454455, -1357098057},
229+
{-164721, -1542497137},
230+
{1957272, 1339088280},
231+
{3369112, -2126092136},
232+
{185531, -384158533},
233+
{-1207385, 2061661095},
234+
{-3183426, -2040058690},
235+
{162844, -1316619236},
236+
{1616392, 827959816},
237+
{3014001, -883155599},
238+
{810149, -853476187},
239+
{1652634, -1039370342},
240+
{-3694233, -596344473},
241+
{-1799107, 1726753853},
242+
{-3038916, -2047270596},
243+
{3523897, 6087993},
244+
{3866901, 702390549},
245+
{269760, -1547952704},
246+
{2213111, -1723816713},
247+
{-975884, -110126092},
248+
{1717735, -279505433},
249+
{472078, 394851342},
250+
{-426683, -1591599803},
251+
{1723600, 565464272},
252+
{-1803090, -260424530},
253+
{1910376, 283780712},
254+
{-1667432, -440824168},
255+
{-1104333, -1758099917},
256+
{-260646, -71875110},
257+
{-3833893, 776003547},
258+
{-2939036, 1119856484},
259+
{-2235985, -1600929361},
260+
{-420899, -1208667171},
261+
{-2286327, 1123958025},
262+
{183443, 1544891539},
263+
{-976891, 879867909},
264+
{1612842, -1499603926},
265+
{-3545687, 201262505},
266+
{-554416, 155290192},
267+
{3919660, -1809756372},
268+
{-48306, 2036925262},
269+
{-1362209, 1934038751},
270+
{3937738, -973777462},
271+
{1400424, 400711272},
272+
{-846154, -540420426},
273+
{1976782, 374860238},
55274
};

proofs/cbmc/fqmul/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
2020
PROJECT_SOURCES += $(SRCDIR)/mldsa/src/poly.c
2121

2222
CHECK_FUNCTION_CONTRACTS=mld_fqmul
23-
USE_FUNCTION_CONTRACTS=mld_montgomery_reduce
23+
USE_FUNCTION_CONTRACTS=
2424
APPLY_LOOP_CONTRACTS=on
2525
USE_DYNAMIC_FRAMES=1
2626

proofs/cbmc/fqmul/fqmul_harness.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33

44
#include "poly.h"
55

6-
int32_t mld_fqmul(int32_t a, int32_t b);
6+
int32_t mld_fqmul(int32_t a, int32_t b, int32_t b_twisted);
77
void harness(void)
88
{
9-
int32_t a, b, r;
10-
r = mld_fqmul(a, b);
9+
int32_t a, b, b_twisted, r;
10+
r = mld_fqmul(a, b, b_twisted);
1111
}

proofs/cbmc/fqscale/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
2020
PROJECT_SOURCES += $(SRCDIR)/mldsa/src/poly.c
2121

2222
CHECK_FUNCTION_CONTRACTS=mld_fqscale
23-
USE_FUNCTION_CONTRACTS=mld_montgomery_reduce
23+
USE_FUNCTION_CONTRACTS=mld_fqmul
2424
APPLY_LOOP_CONTRACTS=on
2525
USE_DYNAMIC_FRAMES=1
2626

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
include ../Makefile_params.common
55

66
HARNESS_ENTRY = harness
7-
HARNESS_FILE = ntt_butterfly_block_harness
7+
HARNESS_FILE = invntt_2_layers_harness
88

99
# This should be a unique identifier for this proof, and will appear on the
1010
# Litani dashboard. It can be human-readable and contain spaces if you wish.
11-
PROOF_UID = mld_ntt_butterfly_block
11+
PROOF_UID = mld_invntt_2_layers
1212

1313
DEFINES +=
1414
INCLUDES +=
@@ -19,16 +19,16 @@ UNWINDSET +=
1919
PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
2020
PROJECT_SOURCES += $(SRCDIR)/mldsa/src/poly.c
2121

22-
CHECK_FUNCTION_CONTRACTS=mld_ntt_butterfly_block
23-
USE_FUNCTION_CONTRACTS=mld_fqmul
22+
CHECK_FUNCTION_CONTRACTS=mld_invntt_2_layers
23+
USE_FUNCTION_CONTRACTS=mld_invntt_2_layers_block
2424
APPLY_LOOP_CONTRACTS=on
2525
USE_DYNAMIC_FRAMES=1
2626

2727
# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
2828
EXTERNAL_SAT_SOLVER=
2929
CBMCFLAGS=--bitwuzla
3030

31-
FUNCTION_NAME = mld_ntt_butterfly_block
31+
FUNCTION_NAME = mld_invntt_2_layers
3232

3333
# If this proof is found to consume huge amounts of RAM, you can set the
3434
# EXPENSIVE variable. With new enough versions of the proof tools, this will

proofs/cbmc/invntt_layer/invntt_layer_harness.c renamed to proofs/cbmc/invntt_2_layers/invntt_2_layers_harness.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
#include <stdint.h>
55
#include "params.h"
66

7-
void mld_invntt_layer(int32_t r[MLDSA_N], unsigned layer);
7+
void mld_invntt_2_layers(int32_t r[MLDSA_N], unsigned layer);
88

99
void harness(void)
1010
{
1111
int32_t *r;
1212
unsigned layer;
13-
mld_invntt_layer(r, layer);
13+
mld_invntt_2_layers(r, layer);
1414
}

0 commit comments

Comments
 (0)