Skip to content

Commit 26c45cf

Browse files
committed
RISC-V ASM unaligned read/writes: alternative assembly
Not all RISC-V chips allow unaligned reads and writes with basic assembly instructions like lw/sw. Add alternative assembly that is turned on with: WOLFSSL_RISCV_ASM_NO_UNALIGNED.
1 parent 887f242 commit 26c45cf

10 files changed

Lines changed: 301 additions & 163 deletions

File tree

.wolfssl_known_macro_extras

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,7 @@ WOLFSSL_RENESAS_RZN2L
887887
WOLFSSL_RENESAS_TLS
888888
WOLFSSL_RENESAS_TSIP_IAREWRX
889889
WOLFSSL_REQUIRE_TCA
890+
WOLFSSL_RISCV_ASM_NO_UNALIGNED
890891
WOLFSSL_RNG_USE_FULL_SEED
891892
WOLFSSL_RSA_CHECK_D_ON_DECRYPT
892893
WOLFSSL_RSA_DECRYPT_TO_0_LEN

configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3859,7 +3859,7 @@ do
38593859
# FSL, FSR, FSRI, CMOV, CMIX - QEMU doesn't know about these instructions
38603860
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_BIT_MANIPULATION_TERNARY"
38613861
;;
3862-
zkn|zkned)
3862+
zkned)
38633863
# AES encrypt/decrpyt, SHA-2
38643864
ENABLED_RISCV_ASM=yes
38653865
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_SCALAR_CRYPTO_ASM"

wolfcrypt/src/port/riscv/riscv-64-aes.c

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1871,8 +1871,8 @@ int wc_AesSetKey(Aes* aes, const byte* key, word32 keyLen, const byte* iv,
18711871
static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
18721872
{
18731873
__asm__ __volatile__ (
1874-
"ld t2, 0(%[in])\n\t"
1875-
"ld t3, 8(%[in])\n\t"
1874+
UNALIGNED_LD(t2, 0, %[in], t0)
1875+
UNALIGNED_LD(t3, 8, %[in], t0)
18761876
"ld a3, 0(%[key])\n\t"
18771877
"ld a4, 8(%[key])\n\t"
18781878
"ld a5, 16(%[key])\n\t"
@@ -1897,8 +1897,8 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
18971897
AESENC_2_ROUNDS(208, 216, 224, 232)
18981898
"L_aes_encrypt_done:\n\t"
18991899
AESENC_LAST_ROUND()
1900-
"sd t2, 0(%[out])\n\t"
1901-
"sd t3, 8(%[out])\n\t"
1900+
UNALIGNED_SD(t2, 0, %[out], t0)
1901+
UNALIGNED_SD(t3, 8, %[out], t0)
19021902
:
19031903
: [in] "r" (in), [out] "r" (out), [key] "r" (aes->key),
19041904
[rounds] "r" (aes->rounds)
@@ -1918,8 +1918,8 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
19181918
static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
19191919
{
19201920
__asm__ __volatile__ (
1921-
"ld t2, 0(%[in])\n\t"
1922-
"ld t3, 8(%[in])\n\t"
1921+
UNALIGNED_LD(t2, 0, %[in], t0)
1922+
UNALIGNED_LD(t3, 8, %[in], t0)
19231923
"ld a3, 0(%[key])\n\t"
19241924
"ld a4, 8(%[key])\n\t"
19251925
"ld a5, 16(%[key])\n\t"
@@ -1944,8 +1944,8 @@ static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
19441944
AESDEC_2_ROUNDS(208, 216, 224, 232)
19451945
"L_aes_decrypt_done:\n\t"
19461946
AESDEC_LAST_ROUND()
1947-
"sd t2, 0(%[out])\n\t"
1948-
"sd t3, 8(%[out])\n\t"
1947+
UNALIGNED_SD(t2, 0, %[out], t0)
1948+
UNALIGNED_SD(t3, 8, %[out], t0)
19491949
:
19501950
: [in] "r" (in), [out] "r" (out), [key] "r" (aes->key),
19511951
[rounds] "r" (aes->rounds)
@@ -3209,8 +3209,8 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
32093209
LOAD_WORD_REV(t2, 8, %[in])
32103210
LOAD_WORD_REV(t3, 12, %[in])
32113211
#else
3212-
"ld t1, 0(%[in])\n\t"
3213-
"ld t3, 8(%[in])\n\t"
3212+
UNALIGNED_LD(t1, 0, %[in], t0)
3213+
UNALIGNED_LD(t3, 8, %[in], t0)
32143214
REV8(REG_T1, REG_T1)
32153215
REV8(REG_T3, REG_T3)
32163216
"srli t0, t1, 32\n\t"
@@ -3376,16 +3376,16 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
33763376
REV8(REG_T1, REG_T1)
33773377
REV8(REG_T3, REG_T3)
33783378
/* Write encrypted block to output. */
3379-
"sd t1, 0(%[out])\n\t"
3380-
"sd t3, 8(%[out])\n\t"
3379+
UNALIGNED_SD(t1, 0, %[out], t0)
3380+
UNALIGNED_SD(t3, 8, %[out], t0)
33813381
#else
33823382
PACK(REG_T1, REG_A5, REG_A4)
33833383
PACK(REG_T3, REG_A7, REG_A6)
33843384
REV8(REG_T1, REG_T1)
33853385
REV8(REG_T3, REG_T3)
33863386
/* Write encrypted block to output. */
3387-
"sd t1, 0(%[out])\n\t"
3388-
"sd t3, 8(%[out])\n\t"
3387+
UNALIGNED_SD(t1, 0, %[out], t0)
3388+
UNALIGNED_SD(t3, 8, %[out], t0)
33893389
#endif
33903390

33913391
:
@@ -3641,8 +3641,8 @@ static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
36413641
LOAD_WORD_REV(t2, 8, %[in])
36423642
LOAD_WORD_REV(t3, 12, %[in])
36433643
#else
3644-
"ld t1, 0(%[in])\n\t"
3645-
"ld t3, 8(%[in])\n\t"
3644+
UNALIGNED_LD(t1, 0, %[in], t0)
3645+
UNALIGNED_LD(t3, 8, %[in], t0)
36463646
REV8(REG_T1, REG_T1)
36473647
REV8(REG_T3, REG_T3)
36483648
"srli t0, t1, 32\n\t"
@@ -3793,16 +3793,16 @@ static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
37933793
REV8(REG_T1, REG_T1)
37943794
REV8(REG_T3, REG_T3)
37953795
/* Write encrypted block to output. */
3796-
"sd t1, 0(%[out])\n\t"
3797-
"sd t3, 8(%[out])\n\t"
3796+
UNALIGNED_SD(t1, 0, %[out], t0)
3797+
UNALIGNED_SD(t3, 8, %[out], t0)
37983798
#else
37993799
PACK(REG_T1, REG_A5, REG_A4)
38003800
PACK(REG_T3, REG_A7, REG_A6)
38013801
REV8(REG_T1, REG_T1)
38023802
REV8(REG_T3, REG_T3)
38033803
/* Write encrypted block to output. */
3804-
"sd t1, 0(%[out])\n\t"
3805-
"sd t3, 8(%[out])\n\t"
3804+
UNALIGNED_SD(t1, 0, %[out], t0)
3805+
UNALIGNED_SD(t3, 8, %[out], t0)
38063806
#endif
38073807

38083808
:
@@ -4113,7 +4113,7 @@ static WC_INLINE void IncrementAesCounter(byte* inOutCtr)
41134113
*/
41144114
int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
41154115
{
4116-
byte scratch[WC_AES_BLOCK_SIZE];
4116+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
41174117
word32 processed;
41184118
int ret = 0;
41194119

@@ -4563,8 +4563,8 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
45634563
byte* s, word32 sSz)
45644564
{
45654565
if (gcm != NULL) {
4566-
byte x[WC_AES_BLOCK_SIZE];
4567-
byte scratch[WC_AES_BLOCK_SIZE];
4566+
ALIGN16 byte x[WC_AES_BLOCK_SIZE];
4567+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
45684568
byte* h = gcm->H;
45694569

45704570
__asm__ __volatile__ (
@@ -4896,8 +4896,8 @@ static void GMULT(byte* x, byte* y)
48964896
void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
48974897
byte* s, word32 sSz)
48984898
{
4899-
byte x[WC_AES_BLOCK_SIZE];
4900-
byte scratch[WC_AES_BLOCK_SIZE];
4899+
ALIGN16 byte x[WC_AES_BLOCK_SIZE];
4900+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
49014901
word32 blocks, partial;
49024902
byte* h;
49034903

@@ -5163,8 +5163,8 @@ static void ghash_blocks(byte* x, byte* y, const byte* in, word32 blocks)
51635163

51645164
"L_ghash_loop:\n\t"
51655165
/* Load input block. */
5166-
"ld t5, 0(%[in])\n\t"
5167-
"ld a5, 8(%[in])\n\t"
5166+
UNALIGNED_LD(t5, 0, %[in], t4)
5167+
UNALIGNED_LD(a5, 8, %[in], t4)
51685168
/* Reverse bits to match x. */
51695169
#ifdef WOLFSSL_RISCV_BIT_MANIPULATION
51705170
BREV8(REG_T5, REG_T5)
@@ -5307,8 +5307,8 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
53075307
byte* s, word32 sSz)
53085308
{
53095309
if (gcm != NULL) {
5310-
byte x[WC_AES_BLOCK_SIZE];
5311-
byte scratch[WC_AES_BLOCK_SIZE];
5310+
ALIGN16 byte x[WC_AES_BLOCK_SIZE];
5311+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
53125312
word32 blocks, partial;
53135313
byte* h = gcm->H;
53145314

@@ -5388,8 +5388,8 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
53885388
const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz,
53895389
const byte* aad, word32 aadSz)
53905390
{
5391-
byte counter[WC_AES_BLOCK_SIZE];
5392-
byte scratch[WC_AES_BLOCK_SIZE];
5391+
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
5392+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
53935393
/* Noticed different optimization levels treated head of array different.
53945394
* Some cases was stack pointer plus offset others was a register containing
53955395
* address. To make uniform for passing in to inline assembly code am using
@@ -5886,8 +5886,8 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
58865886
const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz,
58875887
const byte* aad, word32 aadSz)
58885888
{
5889-
byte counter[WC_AES_BLOCK_SIZE];
5890-
byte scratch[WC_AES_BLOCK_SIZE];
5889+
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
5890+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
58915891
/* Noticed different optimization levels treated head of array different.
58925892
* Some cases was stack pointer plus offset others was a register containing
58935893
* address. To make uniform for passing in to inline assembly code am using
@@ -6398,8 +6398,8 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
63986398
const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz,
63996399
const byte* aad, word32 aadSz)
64006400
{
6401-
byte counter[WC_AES_BLOCK_SIZE];
6402-
byte scratch[WC_AES_BLOCK_SIZE];
6401+
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
6402+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
64036403
/* Noticed different optimization levels treated head of array different.
64046404
* Some cases was stack pointer plus offset others was a register containing
64056405
* address. To make uniform for passing in to inline assembly code am using
@@ -7003,8 +7003,8 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
70037003
const byte* aad, word32 aadSz)
70047004
{
70057005
int ret = 0;
7006-
byte counter[WC_AES_BLOCK_SIZE];
7007-
byte scratch[WC_AES_BLOCK_SIZE];
7006+
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
7007+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
70087008
/* Noticed different optimization levels treated head of array different.
70097009
* Some cases was stack pointer plus offset others was a register containing
70107010
* address. To make uniform for passing in to inline assembly code am using
@@ -7512,8 +7512,8 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
75127512
const byte* aad, word32 aadSz)
75137513
{
75147514
int ret = 0;
7515-
byte counter[WC_AES_BLOCK_SIZE];
7516-
byte scratch[WC_AES_BLOCK_SIZE];
7515+
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
7516+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
75177517
/* Noticed different optimization levels treated head of array different.
75187518
* Some cases was stack pointer plus offset others was a register containing
75197519
* address. To make uniform for passing in to inline assembly code am using
@@ -8035,8 +8035,8 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
80358035
const byte* aad, word32 aadSz)
80368036
{
80378037
int ret = 0;
8038-
byte counter[WC_AES_BLOCK_SIZE];
8039-
byte scratch[WC_AES_BLOCK_SIZE];
8038+
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
8039+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
80408040
/* Noticed different optimization levels treated head of array different.
80418041
* Some cases was stack pointer plus offset others was a register containing
80428042
* address. To make uniform for passing in to inline assembly code am using
@@ -8733,8 +8733,8 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
87338733
byte* s, word32 sSz)
87348734
{
87358735
if (gcm != NULL) {
8736-
byte x[WC_AES_BLOCK_SIZE];
8737-
byte scratch[WC_AES_BLOCK_SIZE];
8736+
ALIGN16 byte x[WC_AES_BLOCK_SIZE];
8737+
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
87388738
word32 blocks, partial;
87398739

87408740
XMEMSET(x, 0, WC_AES_BLOCK_SIZE);

0 commit comments

Comments
 (0)