Skip to content

Commit b4611fa

Browse files
committed
RISC-V ASM unaligned read/writes: alternative assembly
Not all RISC-V chips allow unaligned reads and writes with basic assembly instructions like lw/sw. Add alternative assembly that is turned on with: WOLFSSL_RISCV_ASM_NO_UNALIGNED.
1 parent 887f242 commit b4611fa

10 files changed

Lines changed: 377 additions & 25 deletions

File tree

.wolfssl_known_macro_extras

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,7 @@ WOLFSSL_RENESAS_RZN2L
887887
WOLFSSL_RENESAS_TLS
888888
WOLFSSL_RENESAS_TSIP_IAREWRX
889889
WOLFSSL_REQUIRE_TCA
890+
WOLFSSL_RISCV_ASM_NO_UNALIGNED
890891
WOLFSSL_RNG_USE_FULL_SEED
891892
WOLFSSL_RSA_CHECK_D_ON_DECRYPT
892893
WOLFSSL_RSA_DECRYPT_TO_0_LEN

configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3859,7 +3859,7 @@ do
38593859
# FSL, FSR, FSRI, CMOV, CMIX - QEMU doesn't know about these instructions
38603860
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_BIT_MANIPULATION_TERNARY"
38613861
;;
3862-
zkn|zkned)
3862+
zkned)
38633863
# AES encrypt/decrpyt, SHA-2
38643864
ENABLED_RISCV_ASM=yes
38653865
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_SCALAR_CRYPTO_ASM"

wolfcrypt/src/port/riscv/riscv-64-aes.c

Lines changed: 76 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1871,8 +1871,13 @@ int wc_AesSetKey(Aes* aes, const byte* key, word32 keyLen, const byte* iv,
18711871
static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
18721872
{
18731873
__asm__ __volatile__ (
1874+
#ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
18741875
"ld t2, 0(%[in])\n\t"
18751876
"ld t3, 8(%[in])\n\t"
1877+
#else
1878+
UNALIGNED_LD(t2, 0, %[in], t0)
1879+
UNALIGNED_LD(t3, 8, %[in], t0)
1880+
#endif
18761881
"ld a3, 0(%[key])\n\t"
18771882
"ld a4, 8(%[key])\n\t"
18781883
"ld a5, 16(%[key])\n\t"
@@ -1897,8 +1902,13 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
18971902
AESENC_2_ROUNDS(208, 216, 224, 232)
18981903
"L_aes_encrypt_done:\n\t"
18991904
AESENC_LAST_ROUND()
1905+
#ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
19001906
"sd t2, 0(%[out])\n\t"
19011907
"sd t3, 8(%[out])\n\t"
1908+
#else
1909+
UNALIGNED_SD(t2, 0, %[out], t0)
1910+
UNALIGNED_SD(t3, 8, %[out], t0)
1911+
#endif
19021912
:
19031913
: [in] "r" (in), [out] "r" (out), [key] "r" (aes->key),
19041914
[rounds] "r" (aes->rounds)
@@ -1918,8 +1928,13 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
19181928
static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
19191929
{
19201930
__asm__ __volatile__ (
1931+
#ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
19211932
"ld t2, 0(%[in])\n\t"
19221933
"ld t3, 8(%[in])\n\t"
1934+
#else
1935+
UNALIGNED_LD(t2, 0, %[in], t0)
1936+
UNALIGNED_LD(t3, 8, %[in], t0)
1937+
#endif
19231938
"ld a3, 0(%[key])\n\t"
19241939
"ld a4, 8(%[key])\n\t"
19251940
"ld a5, 16(%[key])\n\t"
@@ -1944,8 +1959,13 @@ static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
19441959
AESDEC_2_ROUNDS(208, 216, 224, 232)
19451960
"L_aes_decrypt_done:\n\t"
19461961
AESDEC_LAST_ROUND()
1962+
#ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
19471963
"sd t2, 0(%[out])\n\t"
19481964
"sd t3, 8(%[out])\n\t"
1965+
#else
1966+
UNALIGNED_SD(t2, 0, %[out], t0)
1967+
UNALIGNED_SD(t3, 8, %[out], t0)
1968+
#endif
19491969
:
19501970
: [in] "r" (in), [out] "r" (out), [key] "r" (aes->key),
19511971
[rounds] "r" (aes->rounds)
@@ -3209,8 +3229,13 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
32093229
LOAD_WORD_REV(t2, 8, %[in])
32103230
LOAD_WORD_REV(t3, 12, %[in])
32113231
#else
3232+
#ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
32123233
"ld t1, 0(%[in])\n\t"
32133234
"ld t3, 8(%[in])\n\t"
3235+
#else
3236+
UNALIGNED_LD(t1, 0, %[in], t0)
3237+
UNALIGNED_LD(t3, 8, %[in], t0)
3238+
#endif
32143239
REV8(REG_T1, REG_T1)
32153240
REV8(REG_T3, REG_T3)
32163241
"srli t0, t1, 32\n\t"
@@ -3376,16 +3401,26 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
33763401
REV8(REG_T1, REG_T1)
33773402
REV8(REG_T3, REG_T3)
33783403
/* Write encrypted block to output. */
3404+
#ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
33793405
"sd t1, 0(%[out])\n\t"
33803406
"sd t3, 8(%[out])\n\t"
3407+
#else
3408+
UNALIGNED_SD(t1, 0, %[out], t0)
3409+
UNALIGNED_SD(t3, 8, %[out], t0)
3410+
#endif
33813411
#else
33823412
PACK(REG_T1, REG_A5, REG_A4)
33833413
PACK(REG_T3, REG_A7, REG_A6)
33843414
REV8(REG_T1, REG_T1)
33853415
REV8(REG_T3, REG_T3)
33863416
/* Write encrypted block to output. */
3417+
#ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
33873418
"sd t1, 0(%[out])\n\t"
33883419
"sd t3, 8(%[out])\n\t"
3420+
#else
3421+
UNALIGNED_SD(t1, 0, %[out], t0)
3422+
UNALIGNED_SD(t3, 8, %[out], t0)
3423+
#endif
33893424
#endif
33903425

33913426
:
@@ -3641,8 +3676,13 @@ static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
36413676
LOAD_WORD_REV(t2, 8, %[in])
36423677
LOAD_WORD_REV(t3, 12, %[in])
36433678
#else
3679+
#ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
36443680
"ld t1, 0(%[in])\n\t"
36453681
"ld t3, 8(%[in])\n\t"
3682+
#else
3683+
UNALIGNED_LD(t1, 0, %[in], t0)
3684+
UNALIGNED_LD(t3, 8, %[in], t0)
3685+
#endif
36463686
REV8(REG_T1, REG_T1)
36473687
REV8(REG_T3, REG_T3)
36483688
"srli t0, t1, 32\n\t"
@@ -3793,16 +3833,26 @@ static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
37933833
REV8(REG_T1, REG_T1)
37943834
REV8(REG_T3, REG_T3)
37953835
/* Write encrypted block to output. */
3836+
#ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
37963837
"sd t1, 0(%[out])\n\t"
37973838
"sd t3, 8(%[out])\n\t"
3839+
#else
3840+
UNALIGNED_SD(t1, 0, %[out], t0)
3841+
UNALIGNED_SD(t3, 8, %[out], t0)
3842+
#endif
37983843
#else
37993844
PACK(REG_T1, REG_A5, REG_A4)
38003845
PACK(REG_T3, REG_A7, REG_A6)
38013846
REV8(REG_T1, REG_T1)
38023847
REV8(REG_T3, REG_T3)
38033848
/* Write encrypted block to output. */
3849+
#ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
38043850
"sd t1, 0(%[out])\n\t"
38053851
"sd t3, 8(%[out])\n\t"
3852+
#else
3853+
UNALIGNED_SD(t1, 0, %[out], t0)
3854+
UNALIGNED_SD(t3, 8, %[out], t0)
3855+
#endif
38063856
#endif
38073857

38083858
:
@@ -4113,7 +4163,7 @@ static WC_INLINE void IncrementAesCounter(byte* inOutCtr)
41134163
*/
41144164
int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
41154165
{
4116-
byte scratch[WC_AES_BLOCK_SIZE];
4166+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
41174167
word32 processed;
41184168
int ret = 0;
41194169

@@ -4563,8 +4613,8 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
45634613
byte* s, word32 sSz)
45644614
{
45654615
if (gcm != NULL) {
4566-
byte x[WC_AES_BLOCK_SIZE];
4567-
byte scratch[WC_AES_BLOCK_SIZE];
4616+
ALIGN16 byte x[WC_AES_BLOCK_SIZE];
4617+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
45684618
byte* h = gcm->H;
45694619

45704620
__asm__ __volatile__ (
@@ -4896,8 +4946,8 @@ static void GMULT(byte* x, byte* y)
48964946
void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
48974947
byte* s, word32 sSz)
48984948
{
4899-
byte x[WC_AES_BLOCK_SIZE];
4900-
byte scratch[WC_AES_BLOCK_SIZE];
4949+
ALIGN16 byte x[WC_AES_BLOCK_SIZE];
4950+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
49014951
word32 blocks, partial;
49024952
byte* h;
49034953

@@ -5163,8 +5213,13 @@ static void ghash_blocks(byte* x, byte* y, const byte* in, word32 blocks)
51635213

51645214
"L_ghash_loop:\n\t"
51655215
/* Load input block. */
5216+
#ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
51665217
"ld t5, 0(%[in])\n\t"
51675218
"ld a5, 8(%[in])\n\t"
5219+
#else
5220+
UNALIGNED_LD(t5, 0, %[in], t4)
5221+
UNALIGNED_LD(a5, 8, %[in], t4)
5222+
#endif
51685223
/* Reverse bits to match x. */
51695224
#ifdef WOLFSSL_RISCV_BIT_MANIPULATION
51705225
BREV8(REG_T5, REG_T5)
@@ -5307,8 +5362,8 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
53075362
byte* s, word32 sSz)
53085363
{
53095364
if (gcm != NULL) {
5310-
byte x[WC_AES_BLOCK_SIZE];
5311-
byte scratch[WC_AES_BLOCK_SIZE];
5365+
ALIGN16 byte x[WC_AES_BLOCK_SIZE];
5366+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
53125367
word32 blocks, partial;
53135368
byte* h = gcm->H;
53145369

@@ -5388,8 +5443,8 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
53885443
const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz,
53895444
const byte* aad, word32 aadSz)
53905445
{
5391-
byte counter[WC_AES_BLOCK_SIZE];
5392-
byte scratch[WC_AES_BLOCK_SIZE];
5446+
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
5447+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
53935448
/* Noticed different optimization levels treated head of array different.
53945449
* Some cases was stack pointer plus offset others was a register containing
53955450
* address. To make uniform for passing in to inline assembly code am using
@@ -5886,8 +5941,8 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
58865941
const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz,
58875942
const byte* aad, word32 aadSz)
58885943
{
5889-
byte counter[WC_AES_BLOCK_SIZE];
5890-
byte scratch[WC_AES_BLOCK_SIZE];
5944+
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
5945+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
58915946
/* Noticed different optimization levels treated head of array different.
58925947
* Some cases was stack pointer plus offset others was a register containing
58935948
* address. To make uniform for passing in to inline assembly code am using
@@ -6398,8 +6453,8 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
63986453
const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz,
63996454
const byte* aad, word32 aadSz)
64006455
{
6401-
byte counter[WC_AES_BLOCK_SIZE];
6402-
byte scratch[WC_AES_BLOCK_SIZE];
6456+
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
6457+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
64036458
/* Noticed different optimization levels treated head of array different.
64046459
* Some cases was stack pointer plus offset others was a register containing
64056460
* address. To make uniform for passing in to inline assembly code am using
@@ -7003,8 +7058,8 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
70037058
const byte* aad, word32 aadSz)
70047059
{
70057060
int ret = 0;
7006-
byte counter[WC_AES_BLOCK_SIZE];
7007-
byte scratch[WC_AES_BLOCK_SIZE];
7061+
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
7062+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
70087063
/* Noticed different optimization levels treated head of array different.
70097064
* Some cases was stack pointer plus offset others was a register containing
70107065
* address. To make uniform for passing in to inline assembly code am using
@@ -7512,8 +7567,8 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
75127567
const byte* aad, word32 aadSz)
75137568
{
75147569
int ret = 0;
7515-
byte counter[WC_AES_BLOCK_SIZE];
7516-
byte scratch[WC_AES_BLOCK_SIZE];
7570+
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
7571+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
75177572
/* Noticed different optimization levels treated head of array different.
75187573
* Some cases was stack pointer plus offset others was a register containing
75197574
* address. To make uniform for passing in to inline assembly code am using
@@ -8035,8 +8090,8 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
80358090
const byte* aad, word32 aadSz)
80368091
{
80378092
int ret = 0;
8038-
byte counter[WC_AES_BLOCK_SIZE];
8039-
byte scratch[WC_AES_BLOCK_SIZE];
8093+
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
8094+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
80408095
/* Noticed different optimization levels treated head of array different.
80418096
* Some cases was stack pointer plus offset others was a register containing
80428097
* address. To make uniform for passing in to inline assembly code am using
@@ -8733,8 +8788,8 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
87338788
byte* s, word32 sSz)
87348789
{
87358790
if (gcm != NULL) {
8736-
byte x[WC_AES_BLOCK_SIZE];
8737-
byte scratch[WC_AES_BLOCK_SIZE];
8791+
ALIGN16 byte x[WC_AES_BLOCK_SIZE];
8792+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
87388793
word32 blocks, partial;
87398794

87408795
XMEMSET(x, 0, WC_AES_BLOCK_SIZE);

0 commit comments

Comments
 (0)