66.globl ed25519_verify_var
77.globl ed25519_sign_stage1
88.globl ed25519_sign_stage2
9+
10+ / * Expose for testing only. * /
911.globl affine_encode
1012.globl affine_decode_var
1113.globl ext_scmul
14+ .globl ext_scmul_sca
1215.globl ext_double
1316.globl ext_add
17+ .globl ext_to_affine
18+
1419
1520/ **
1621 * This library contains an implementation of the Ed25519 signature scheme
100105 * clobbered flag groups: FG0
101106 * /
102107ed25519_gen_public_key:
108+ / * Set up scalar arithmetic for the scalar reductions.
109+ MOD <= L.
110+ [ w15:w14 ] <= mu. * /
111+ jal x1 , sc_init
103112
104113 / * Load the arithmetic shares (s0 , s1) of the precomputed and clamped secret
105114 s and reduce them modulo L. * /
@@ -112,7 +121,7 @@ ed25519_gen_public_key:
112121
113122 / * w28 <= [ w17:w16 ] mod L = s0 mod L. * /
114123 jal x1 , sc_reduce
115- bn. mov w28 , w18
124+ bn. mov w2 , w18
116125
117126 / * Clear w16 and w17 with randomness before loading the second share s1. * /
118127 bn.wsrr w16 , URND
@@ -126,9 +135,7 @@ ed25519_gen_public_key:
126135
127136 / * w18 <= [ w17:w16 ] mod L = s1 mod L. * /
128137 jal x1 , sc_reduce
129-
130- / * TODO: Remove once everything is in place. * /
131- bn.subm w28 , w28 , w18
138+ bn. mov w4 , w18
132139
133140 / * Set up for field arithmetic in preparation for scalar multiplication.
134141 MOD <= p
@@ -161,9 +168,9 @@ ed25519_gen_public_key:
161168 [ w9:w6 ] <= extended(B) = (B.X , B.Y , B.Z , B.T) * /
162169 jal x1 , affine_to_ext
163170
164- / * Compute the public key point A = [ s ] B.
165- [ w13:w10 ] <= w28 * [ w9:w6 ] = [ s ] B * /
166- jal x1 , ext_scmul
171+ / * Compute the public key point A = [ s0 - s1 ] B.
172+ [ w13:w10 ] <= (w2 - w4) * [ w9:w6 ] = [ s0 - s1 ] B * /
173+ jal x1 , ext_scmul_sca
167174
168175 / * Convert A to affine coordinates.
169176 w10 <= A.x , w11 <= A.y * /
@@ -477,6 +484,20 @@ ed25519_verify_var:
477484 * clobbered flag groups: FG0
478485 * /
479486ed25519_sign_stage1:
487+ / * Calculate and write encoded public key A (A_) to DMEM.
488+ dmem [ ed25519_public_key ] <= A_
489+
490+ Furthermore , set up for field arithmetic in preparation for scalar multiplication.
491+ MOD <= p
492+ w19 <= 19
493+
494+ And initialize curve parameter d.
495+ w30 <= dmem [ d ] = ( - 121665 / 121666 ) mod p
496+
497+ Lastly , load B in extended coordinates.
498+ [ w9:w6 ] <= extended(B) = (B.X , B.Y , B.Z , B.T) * /
499+ jal x1 , ed25519_gen_public_key
500+
480501 / * Set up for scalar arithmetic.
481502 [ w15:w14 ] <= mu
482503 MOD <= L * /
@@ -492,9 +513,9 @@ ed25519_sign_stage1:
492513 bn.lid x2 ++, 32 (x3)
493514 bn.lid x2 ++, 64 (x3)
494515
495- / * w5 <= [ w22:w20 ] mod L = r0 mod L. * /
516+ / * w2 <= [ w22:w20 ] mod L = r0 mod L. * /
496517 jal x1 , sc_reduce_768
497- bn. mov w5 , w18
518+ bn. mov w2 , w18
498519
499520 / * Overwrite w20 - w22 with randomness before loading the second share r1. * /
500521 bn.wsrr w20 , URND
@@ -508,30 +529,19 @@ ed25519_sign_stage1:
508529 bn.lid x2 ++, 32 (x3)
509530 bn.lid x2 ++, 64 (x3)
510531
511- / * w18 <= [ w22:w20 ] mod L = r1 mod L. * /
532+ / * w4 <= [ w22:w20 ] mod L = r1 mod L. * /
512533 jal x1 , sc_reduce_768
534+ bn. mov w4 , w18
513535
514- / * TODO remove this once everything is in place. * /
515- bn.subm w5 , w5 , w18
516-
517- / * Calculate and write encoded public key A (A_) to DMEM.
518- dmem [ ed25519_public_key ] <= A_
519-
520- Furthermore , set up for field arithmetic in preparation for scalar multiplication.
536+ / * Set up for field arithmetic in preparation for scalar multiplication.
521537 MOD <= p
522- w19 <= 19
523-
524- And initialize curve parameter d.
525- w30 <= dmem [ d ] = ( - 121665 / 121666 ) mod p
526-
527- Lastly , load B in extended coordinates.
528- [ w9:w6 ] <= extended(B) = (B.X , B.Y , B.Z , B.T) * /
529- jal x1 , ed25519_gen_public_key
538+ w19 <= 19 * /
539+ jal x1 , fe_init
530540
531- / * Compute the signature point R = [ r ] B.
532- [ w13:w10 ] <= w5 * [ w9:w6 ] = [ r ] B * /
533- bn. mov w28 , w5
534- jal x1 , ext_scmul
541+ / * Compute the signature point R = [ r0 - r1 ] B.
542+ [ w13:w10 ] <= (w2 - w4) * [ w9:w6 ] = [ r0 - r1 ] B * /
543+ / * bn.mov w28 , w5 * /
544+ jal x1 , ext_scmul_sca
535545
536546 / * Convert R to affine coordinates.
537547 w10 <= R.x , w11 <= R.y * /
@@ -1246,6 +1256,199 @@ ext_scmul:
12461256 [ w13:w10 ] = P = a * (X1 , Y1 , Z1 , T1) * /
12471257 ret
12481258
1259+ / **
1260+ * Scalar - point multiplication with an arithmetically masked scalar.
1261+ *
1262+ * Returns (X2 , Y2 , Z2 , T2) = (s0 - s1) * (X1 , Y1 , Z1 , T1) = (s0 - s1) * P.
1263+ *
1264+ * This routine calculates both terms s0 * (X1 , Y1 , Z1 , T1) and
1265+ * - s1 * (X1 , Y1 , Z1 , T1) in parallel with the following double - and - add - always
1266+ * algorithm:
1267+ *
1268+ * Q = ( 0 , 1 , 1 , 0 )
1269+ *
1270+ * for i = bitlen(s0) - 1 to 0 :
1271+ * Q = 2 * Q
1272+ * A = s0 [ i ] ? P : - P
1273+ * B = Q + A
1274+ * Q = (s0 [ i ] ^ s1 [ i ] ) ? B : Q
1275+ *
1276+ * return Q
1277+ *
1278+ * This routine runs in constant time.
1279+ *
1280+ * Flags : Flags have no meaning beyond the scope of this subroutine.
1281+ *
1282+ * @param [ in ] w2: s0 , first scalar share , s0 < L
1283+ * @param [ in ] w4: s1 , second scalar share , s1 < L
1284+ * @param [ in ] w6: input X1 (X1 < p)
1285+ * @param [ in ] w7: input Y1 (Y1 < p)
1286+ * @param [ in ] w8: input Z1 (Z1 < p)
1287+ * @param [ in ] wQ: input T1 (T1 < p)
1288+ * @param [ in ] w19: constant , w19 = 19
1289+ * @param [ in ] w29: constant , w29 = ( 2 * d) mod p , d = ( - 121665 / 121666 ) mod p
1290+ * @param [ in ] w31: all - zero
1291+ * @param [ in ] MOD: p , modulus = 2 ^ 255 - 19
1292+ * @param [ out ] w10: output X2
1293+ * @param [ out ] w11: output Y2
1294+ * @param [ out ] w12: output Z2
1295+ * @param [ out ] w13: output T2
1296+ *
1297+ * Clobbered registers: w2 to w18 , w20 to w27
1298+ * Clobbered flag groups: FG0 , FG1
1299+ * /
1300+ ext_scmul_sca:
1301+ / * Initialize the intermediate result Q to the origin point.
1302+ [ w13:w10 ] <= ( 0 , 1 , 1 , 0 ) * /
1303+ bn. mov w10 , w31
1304+ bn.addi w11 , w31 , 1
1305+ bn.addi w12 , w31 , 1
1306+ bn. mov w13 , w31
1307+
1308+ / *
1309+ * Blind both 253 - bit scalar shares and expand them to 382 - bit shares. Such
1310+ * large blinding factors are necessary to protect them against powerful SCA
1311+ * attacks.
1312+ * /
1313+
1314+ / * [ w3:w2 ] <= w2 + k * L = s0 + k * L. * /
1315+ bn. mov w20 , w2
1316+ jal x1 , sc_blind
1317+ bn. mov w2 , w16
1318+ bn. mov w3 , w17
1319+
1320+ / * [ w5:w4 ] <= w4 + k * L = s1 + k' * L. * /
1321+ bn. mov w20 , w4
1322+ jal x1 , sc_blind
1323+ bn. mov w4 , w16
1324+ bn. mov w5 , w17
1325+
1326+ / * Move the blinded 382 - bit share s0 to the MSB position. * /
1327+ bn.rshi w3 , w3 , w2 >> 126
1328+ bn.rshi w2 , w2 , w31 >> 126
1329+
1330+ bn. xor w31 , w31 , w31 / * dummy * /
1331+
1332+ / * Move the blinded 382 - bit share s1 to the MSB position. * /
1333+ bn.rshi w5 , w5 , w4 >> 126
1334+ bn.rshi w4 , w4 , w31 >> 126
1335+
1336+ / * Iterate over all scalar bits starting at the MSB. * /
1337+ loopi 382 , 54
1338+ / * Compute Q = 2 * Q.
1339+ [ w13:w10 ] <= [ w13:w10 ] + [ w13:w10 ] = 2 * Q * /
1340+ jal x1 , ext_double
1341+
1342+ / * Save the value 2 * Q for later.
1343+ [ w17:w14 ] <= [ w13:w10 ] = 2 * Q. * /
1344+ bn. mov w14 , w10
1345+ bn. mov w15 , w11
1346+ bn. mov w16 , w12
1347+ bn. mov w17 , w13
1348+
1349+ / *
1350+ * First selection: A = s0 [ i ] ? P : - P.
1351+ *
1352+ * It is important th at the destination register of `bn.sel` is different
1353+ * from the source register to avoid leaking the secret flag bit.
1354+ * /
1355+
1356+ / * Randomize the destination registers. * /
1357+ bn.wsrr w10 , URND
1358+ bn.wsrr w11 , URND
1359+ bn.wsrr w12 , URND
1360+ bn.wsrr w13 , URND
1361+
1362+ / * Negate P.
1363+ [ w23:w20 ] <= ( - P.X , P.Y , P.Z , - P.T). * /
1364+ bn.subm w20 , w31 , w6
1365+ bn. mov w21 , w7
1366+ bn. mov w22 , w8
1367+ bn.subm w23 , w31 , w9
1368+
1369+ / * Isolate the bit s0 [ 381 ] which is s0 [ i ] . * /
1370+ bn.addi w3 , w3 , 0 , FG0
1371+
1372+ / * Perform the selection A = s0 [ i ] ? P : - P.
1373+ [ w13:w10 ] <= s0 [ i ] ? P : - P. * /
1374+ bn.sel w10 , w6 , w20 , FG0.M
1375+ bn.sel w11 , w7 , w21 , FG0.M
1376+ bn.sel w12 , w8 , w22 , FG0.M
1377+ bn.sel w13 , w9 , w23 , FG0.M
1378+
1379+ / * Clear the flag. * /
1380+ bn. sub w31 , w31 , w31 , FG0
1381+
1382+ / * Compute the addition B = Q + A.
1383+ [ w13:w10 ] <= [ w13:w10 ] + [ w17:w14 ] = A + Q. * /
1384+ jal x1 , ext_add
1385+
1386+ / *
1387+ * Second selection: Q = (s0 [ i ] ^ s1 [ i ] ) ? B : Q.
1388+ * The XOR (s0 [ i ] ^ s1 [ i ] ) can be computed with 3 successive selections
1389+ * such th at (s0 [ i ] ^ s1 [ i ] ) = (s1 [ i ] ) ? (s0 [ i ] ? B : Q) : (s1 [ i ] ? Q : B).
1390+ * /
1391+
1392+ / * Randomize the destination registers. * /
1393+ bn.wsrr w20 , URND
1394+ bn.wsrr w21 , URND
1395+ bn.wsrr w22 , URND
1396+ bn.wsrr w23 , URND
1397+ bn.wsrr w24 , URND
1398+ bn.wsrr w25 , URND
1399+ bn.wsrr w26 , URND
1400+ bn.wsrr w27 , URND
1401+
1402+ / * Isolate the bit s0 [ 381 ] which is s0 [ i ] . * /
1403+ bn.addi w3 , w3 , 0 , FG0
1404+
1405+ / * [ w23:w20 ] <= s0 [ i ] ? B : Q. * /
1406+ bn.sel w20 , w10 , w14 , FG0.M
1407+ bn.sel w21 , w11 , w15 , FG0.M
1408+ bn.sel w22 , w12 , w16 , FG0.M
1409+ bn.sel w23 , w13 , w17 , FG0.M
1410+
1411+ / * [ w27:w24 ] <= s0 [ i ] ? Q : B. * /
1412+ bn.sel w24 , w14 , w10 , FG0.M
1413+ bn.sel w25 , w15 , w11 , FG0.M
1414+ bn.sel w26 , w16 , w12 , FG0.M
1415+ bn.sel w27 , w17 , w13 , FG0.M
1416+
1417+ / * Clear the flag. * /
1418+ bn. sub w31 , w31 , w31 , FG0
1419+
1420+ / * Randomize the destination registers. * /
1421+ bn.wsrr w10 , URND
1422+ bn.wsrr w11 , URND
1423+ bn.wsrr w12 , URND
1424+ bn.wsrr w13 , URND
1425+
1426+ / * Isolate the bit s1 [ 381 ] which is s1 [ i ] . * /
1427+ bn.addi w5 , w5 , 0 , FG1
1428+
1429+ / * [ w13:w10 ] <= s1 [ i ] ? (s0 [ i ] ? B : Q) : (s0 [ i ] ? Q : B). * /
1430+ bn.sel w10 , w24 , w20 , FG1.M
1431+ bn.sel w11 , w25 , w21 , FG1.M
1432+ bn.sel w12 , w26 , w22 , FG1.M
1433+ bn.sel w13 , w27 , w23 , FG1.M
1434+
1435+ / * Clear the flag. * /
1436+ bn. sub w31 , w31 , w31 , FG0
1437+
1438+ / * Shift both scalars one position to the left and pad with randomness. * /
1439+ bn.wsrr w20 , URND
1440+
1441+ bn.rshi w3 , w3 , w2 >> 255
1442+ bn.rshi w2 , w2 , w20 >> 255
1443+
1444+ bn. xor w31 , w31 , w31 / * dummy * /
1445+
1446+ bn.rshi w5 , w5 , w4 >> 255
1447+ bn.rshi w4 , w4 , w20 >> 255
1448+ / * End of loop * /
1449+
1450+ ret
1451+
12491452/ **
12501453 * Add a point to itself in extended twisted Edwards coordinates.
12511454 *
0 commit comments