@@ -1871,8 +1871,13 @@ int wc_AesSetKey(Aes* aes, const byte* key, word32 keyLen, const byte* iv,
18711871static void wc_AesEncrypt (Aes * aes , const byte * in , byte * out )
18721872{
18731873 __asm__ __volatile__ (
1874+ #ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
18741875 "ld t2, 0(%[in])\n\t"
18751876 "ld t3, 8(%[in])\n\t"
1877+ #else
1878+ UNALIGNED_LD (t2 , 0 , %[in ], t0 )
1879+ UNALIGNED_LD (t3 , 8 , %[in ], t0 )
1880+ #endif
18761881 "ld a3, 0(%[key])\n\t"
18771882 "ld a4, 8(%[key])\n\t"
18781883 "ld a5, 16(%[key])\n\t"
@@ -1897,8 +1902,13 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
18971902 AESENC_2_ROUNDS (208 , 216 , 224 , 232 )
18981903 "L_aes_encrypt_done:\n\t"
18991904 AESENC_LAST_ROUND ()
1905+ #ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
19001906 "sd t2, 0(%[out])\n\t"
19011907 "sd t3, 8(%[out])\n\t"
1908+ #else
1909+ UNALIGNED_SD (t2 , 0 , %[out ], t0 )
1910+ UNALIGNED_SD (t3 , 8 , %[out ], t0 )
1911+ #endif
19021912 :
19031913 : [in ] "r" (in ), [out ] "r " (out), [key] " r " (aes->key),
19041914 [rounds ] "r " (aes->rounds)
@@ -1918,8 +1928,13 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
19181928static void wc_AesDecrypt (Aes * aes , const byte * in , byte * out )
19191929{
19201930 __asm__ __volatile__ (
1931+ #ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
19211932 "ld t2, 0(%[in])\n\t"
19221933 "ld t3, 8(%[in])\n\t"
1934+ #else
1935+ UNALIGNED_LD (t2 , 0 , %[in ], t0 )
1936+ UNALIGNED_LD (t3 , 8 , %[in ], t0 )
1937+ #endif
19231938 "ld a3, 0(%[key])\n\t"
19241939 "ld a4, 8(%[key])\n\t"
19251940 "ld a5, 16(%[key])\n\t"
@@ -1944,8 +1959,13 @@ static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
19441959 AESDEC_2_ROUNDS (208 , 216 , 224 , 232 )
19451960 "L_aes_decrypt_done:\n\t"
19461961 AESDEC_LAST_ROUND ()
1962+ #ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
19471963 "sd t2, 0(%[out])\n\t"
19481964 "sd t3, 8(%[out])\n\t"
1965+ #else
1966+ UNALIGNED_SD (t2 , 0 , %[out ], t0 )
1967+ UNALIGNED_SD (t3 , 8 , %[out ], t0 )
1968+ #endif
19491969 :
19501970 : [in ] "r" (in ), [out ] "r " (out), [key] " r " (aes->key),
19511971 [rounds ] "r " (aes->rounds)
@@ -3209,8 +3229,13 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
32093229 LOAD_WORD_REV (t2 , 8 , %[in ])
32103230 LOAD_WORD_REV (t3 , 12 , %[in ])
32113231#else
3232+ #ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
32123233 "ld t1, 0(%[in])\n\t"
32133234 "ld t3, 8(%[in])\n\t"
3235+ #else
3236+ UNALIGNED_LD (t1 , 0 , %[in ], t0 )
3237+ UNALIGNED_LD (t3 , 8 , %[in ], t0 )
3238+ #endif
32143239 REV8 (REG_T1 , REG_T1 )
32153240 REV8 (REG_T3 , REG_T3 )
32163241 "srli t0, t1, 32\n\t"
@@ -3376,16 +3401,26 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
33763401 REV8 (REG_T1 , REG_T1 )
33773402 REV8 (REG_T3 , REG_T3 )
33783403 /* Write encrypted block to output. */
3404+ #ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
33793405 "sd t1, 0(%[out])\n\t"
33803406 "sd t3, 8(%[out])\n\t"
3407+ #else
3408+ UNALIGNED_SD (t1 , 0 , %[out ], t0 )
3409+ UNALIGNED_SD (t3 , 8 , %[out ], t0 )
3410+ #endif
33813411#else
33823412 PACK (REG_T1 , REG_A5 , REG_A4 )
33833413 PACK (REG_T3 , REG_A7 , REG_A6 )
33843414 REV8 (REG_T1 , REG_T1 )
33853415 REV8 (REG_T3 , REG_T3 )
33863416 /* Write encrypted block to output. */
3417+ #ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
33873418 "sd t1, 0(%[out])\n\t"
33883419 "sd t3, 8(%[out])\n\t"
3420+ #else
3421+ UNALIGNED_SD (t1 , 0 , %[out ], t0 )
3422+ UNALIGNED_SD (t3 , 8 , %[out ], t0 )
3423+ #endif
33893424#endif
33903425
33913426 :
@@ -3641,8 +3676,13 @@ static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
36413676 LOAD_WORD_REV (t2 , 8 , %[in ])
36423677 LOAD_WORD_REV (t3 , 12 , %[in ])
36433678#else
3679+ #ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
36443680 "ld t1, 0(%[in])\n\t"
36453681 "ld t3, 8(%[in])\n\t"
3682+ #else
3683+ UNALIGNED_LD (t1 , 0 , %[in ], t0 )
3684+ UNALIGNED_LD (t3 , 8 , %[in ], t0 )
3685+ #endif
36463686 REV8 (REG_T1 , REG_T1 )
36473687 REV8 (REG_T3 , REG_T3 )
36483688 "srli t0, t1, 32\n\t"
@@ -3793,16 +3833,26 @@ static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
37933833 REV8 (REG_T1 , REG_T1 )
37943834 REV8 (REG_T3 , REG_T3 )
37953835 /* Write encrypted block to output. */
3836+ #ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
37963837 "sd t1, 0(%[out])\n\t"
37973838 "sd t3, 8(%[out])\n\t"
3839+ #else
3840+ UNALIGNED_SD (t1 , 0 , %[out ], t0 )
3841+ UNALIGNED_SD (t3 , 8 , %[out ], t0 )
3842+ #endif
37983843#else
37993844 PACK (REG_T1 , REG_A5 , REG_A4 )
38003845 PACK (REG_T3 , REG_A7 , REG_A6 )
38013846 REV8 (REG_T1 , REG_T1 )
38023847 REV8 (REG_T3 , REG_T3 )
38033848 /* Write encrypted block to output. */
3849+ #ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
38043850 "sd t1, 0(%[out])\n\t"
38053851 "sd t3, 8(%[out])\n\t"
3852+ #else
3853+ UNALIGNED_SD (t1 , 0 , %[out ], t0 )
3854+ UNALIGNED_SD (t3 , 8 , %[out ], t0 )
3855+ #endif
38063856#endif
38073857
38083858 :
@@ -4113,7 +4163,7 @@ static WC_INLINE void IncrementAesCounter(byte* inOutCtr)
41134163 */
41144164int wc_AesCtrEncrypt (Aes * aes , byte * out , const byte * in , word32 sz )
41154165{
4116- byte scratch [WC_AES_BLOCK_SIZE ];
4166+ ALIGN16 byte scratch [WC_AES_BLOCK_SIZE ];
41174167 word32 processed ;
41184168 int ret = 0 ;
41194169
@@ -4563,8 +4613,8 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
45634613 byte * s , word32 sSz )
45644614{
45654615 if (gcm != NULL ) {
4566- byte x [WC_AES_BLOCK_SIZE ];
4567- byte scratch [WC_AES_BLOCK_SIZE ];
4616+ ALIGN16 byte x [WC_AES_BLOCK_SIZE ];
4617+ ALIGN16 byte scratch [WC_AES_BLOCK_SIZE ];
45684618 byte * h = gcm -> H ;
45694619
45704620 __asm__ __volatile__ (
@@ -4896,8 +4946,8 @@ static void GMULT(byte* x, byte* y)
48964946void GHASH (Gcm * gcm , const byte * a , word32 aSz , const byte * c , word32 cSz ,
48974947 byte * s , word32 sSz )
48984948{
4899- byte x [WC_AES_BLOCK_SIZE ];
4900- byte scratch [WC_AES_BLOCK_SIZE ];
4949+ ALIGN16 byte x [WC_AES_BLOCK_SIZE ];
4950+ ALIGN16 byte scratch [WC_AES_BLOCK_SIZE ];
49014951 word32 blocks , partial ;
49024952 byte * h ;
49034953
@@ -5163,8 +5213,13 @@ static void ghash_blocks(byte* x, byte* y, const byte* in, word32 blocks)
51635213
51645214 "L_ghash_loop :\n \t "
51655215 /* Load input block. */
5216+ #ifndef WOLFSSL_RISCV_ASM_NO_UNALIGNED
51665217 "ld t5, 0(%[in])\n\t"
51675218 "ld a5, 8(%[in])\n\t"
5219+ #else
5220+ UNALIGNED_LD (t5 , 0 , %[in ], t4 )
5221+ UNALIGNED_LD (a5 , 8 , %[in ], t4 )
5222+ #endif
51685223 /* Reverse bits to match x. */
51695224#ifdef WOLFSSL_RISCV_BIT_MANIPULATION
51705225 BREV8 (REG_T5 , REG_T5 )
@@ -5307,8 +5362,8 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
53075362 byte * s , word32 sSz )
53085363{
53095364 if (gcm != NULL ) {
5310- byte x [WC_AES_BLOCK_SIZE ];
5311- byte scratch [WC_AES_BLOCK_SIZE ];
5365+ ALIGN16 byte x [WC_AES_BLOCK_SIZE ];
5366+ ALIGN16 byte scratch [WC_AES_BLOCK_SIZE ];
53125367 word32 blocks , partial ;
53135368 byte * h = gcm -> H ;
53145369
@@ -5388,8 +5443,8 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
53885443 const byte * nonce , word32 nonceSz , byte * tag , word32 tagSz ,
53895444 const byte * aad , word32 aadSz )
53905445{
5391- byte counter [WC_AES_BLOCK_SIZE ];
5392- byte scratch [WC_AES_BLOCK_SIZE ];
5446+ ALIGN16 byte counter [WC_AES_BLOCK_SIZE ];
5447+ ALIGN16 byte scratch [WC_AES_BLOCK_SIZE ];
53935448 /* Noticed different optimization levels treated head of array different.
53945449 * Some cases was stack pointer plus offset others was a register containing
53955450 * address. To make uniform for passing in to inline assembly code am using
@@ -5886,8 +5941,8 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
58865941 const byte * nonce , word32 nonceSz , byte * tag , word32 tagSz ,
58875942 const byte * aad , word32 aadSz )
58885943{
5889- byte counter [WC_AES_BLOCK_SIZE ];
5890- byte scratch [WC_AES_BLOCK_SIZE ];
5944+ ALIGN16 byte counter [WC_AES_BLOCK_SIZE ];
5945+ ALIGN16 byte scratch [WC_AES_BLOCK_SIZE ];
58915946 /* Noticed different optimization levels treated head of array different.
58925947 * Some cases was stack pointer plus offset others was a register containing
58935948 * address. To make uniform for passing in to inline assembly code am using
@@ -6398,8 +6453,8 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
63986453 const byte * nonce , word32 nonceSz , byte * tag , word32 tagSz ,
63996454 const byte * aad , word32 aadSz )
64006455{
6401- byte counter [WC_AES_BLOCK_SIZE ];
6402- byte scratch [WC_AES_BLOCK_SIZE ];
6456+ ALIGN16 byte counter [WC_AES_BLOCK_SIZE ];
6457+ ALIGN16 byte scratch [WC_AES_BLOCK_SIZE ];
64036458 /* Noticed different optimization levels treated head of array different.
64046459 * Some cases was stack pointer plus offset others was a register containing
64056460 * address. To make uniform for passing in to inline assembly code am using
@@ -7003,8 +7058,8 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
70037058 const byte * aad , word32 aadSz )
70047059{
70057060 int ret = 0 ;
7006- byte counter [WC_AES_BLOCK_SIZE ];
7007- byte scratch [WC_AES_BLOCK_SIZE ];
7061+ ALIGN16 byte counter [WC_AES_BLOCK_SIZE ];
7062+ ALIGN16 byte scratch [WC_AES_BLOCK_SIZE ];
70087063 /* Noticed different optimization levels treated head of array different.
70097064 * Some cases was stack pointer plus offset others was a register containing
70107065 * address. To make uniform for passing in to inline assembly code am using
@@ -7512,8 +7567,8 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
75127567 const byte * aad , word32 aadSz )
75137568{
75147569 int ret = 0 ;
7515- byte counter [WC_AES_BLOCK_SIZE ];
7516- byte scratch [WC_AES_BLOCK_SIZE ];
7570+ ALIGN16 byte counter [WC_AES_BLOCK_SIZE ];
7571+ ALIGN16 byte scratch [WC_AES_BLOCK_SIZE ];
75177572 /* Noticed different optimization levels treated head of array different.
75187573 * Some cases was stack pointer plus offset others was a register containing
75197574 * address. To make uniform for passing in to inline assembly code am using
@@ -8035,8 +8090,8 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
80358090 const byte * aad , word32 aadSz )
80368091{
80378092 int ret = 0 ;
8038- byte counter [WC_AES_BLOCK_SIZE ];
8039- byte scratch [WC_AES_BLOCK_SIZE ];
8093+ ALIGN16 byte counter [WC_AES_BLOCK_SIZE ];
8094+ ALIGN16 byte scratch [WC_AES_BLOCK_SIZE ];
80408095 /* Noticed different optimization levels treated head of array different.
80418096 * Some cases was stack pointer plus offset others was a register containing
80428097 * address. To make uniform for passing in to inline assembly code am using
@@ -8733,8 +8788,8 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
87338788 byte * s , word32 sSz )
87348789{
87358790 if (gcm != NULL ) {
8736- byte x [WC_AES_BLOCK_SIZE ];
8737- byte scratch [WC_AES_BLOCK_SIZE ];
8791+ ALIGN16 byte x [WC_AES_BLOCK_SIZE ];
8792+ ALIGN16 byte scratch [WC_AES_BLOCK_SIZE ];
87388793 word32 blocks , partial ;
87398794
87408795 XMEMSET (x , 0 , WC_AES_BLOCK_SIZE );
0 commit comments