Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .wolfssl_known_macro_extras
Original file line number Diff line number Diff line change
Expand Up @@ -887,6 +887,7 @@ WOLFSSL_RENESAS_RZN2L
WOLFSSL_RENESAS_TLS
WOLFSSL_RENESAS_TSIP_IAREWRX
WOLFSSL_REQUIRE_TCA
WOLFSSL_RISCV_ASM_NO_UNALIGNED
WOLFSSL_RNG_USE_FULL_SEED
WOLFSSL_RSA_CHECK_D_ON_DECRYPT
WOLFSSL_RSA_DECRYPT_TO_0_LEN
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -3859,7 +3859,7 @@ do
# FSL, FSR, FSRI, CMOV, CMIX - QEMU doesn't know about these instructions
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_BIT_MANIPULATION_TERNARY"
;;
zkn|zkned)
zkned)
# AES encrypt/decrpyt, SHA-2
ENABLED_RISCV_ASM=yes
AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_RISCV_SCALAR_CRYPTO_ASM"
Expand Down
89 changes: 39 additions & 50 deletions wolfcrypt/src/port/riscv/riscv-64-aes.c
Copy link
Copy Markdown

@EAlexJ EAlexJ May 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you missed some in

int wc_AesSetKey(Aes* aes, const byte* key, word32 keyLen, const byte* iv,
    int dir)
{

The pointer to key is not necessarily aligned (Found it out the hard way)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you saying then that none of the fields of Aes will be aligned?
If so, then I will need to change the access to the fields: key, reg and tmp.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That I do not know, the best solution for me is to align the buffers at its source, so I went into the ssl struct and corrected the placement of byte* key.
This was sufficient, so in this very case only key seems to not be aligned by default

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, why did you choose to use ALIGN16, is ALIGN8 not already sufficient?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because they are 16 byte buffers in AES, I made it align on 16 bytes.
Looking into it, the vector instructions are 64 bit loads and stores so changing to ALIGN8 will be fine.

Original file line number Diff line number Diff line change
Expand Up @@ -1871,8 +1871,7 @@ int wc_AesSetKey(Aes* aes, const byte* key, word32 keyLen, const byte* iv,
static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
{
__asm__ __volatile__ (
"ld t2, 0(%[in])\n\t"
"ld t3, 8(%[in])\n\t"
UNALIGNED_LD2(t2, t3, 0, %[in], t0)
"ld a3, 0(%[key])\n\t"
"ld a4, 8(%[key])\n\t"
"ld a5, 16(%[key])\n\t"
Expand All @@ -1897,8 +1896,7 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
AESENC_2_ROUNDS(208, 216, 224, 232)
"L_aes_encrypt_done:\n\t"
AESENC_LAST_ROUND()
"sd t2, 0(%[out])\n\t"
"sd t3, 8(%[out])\n\t"
UNALIGNED_SD2(t2, t3, 0, %[out], t0)
:
: [in] "r" (in), [out] "r" (out), [key] "r" (aes->key),
[rounds] "r" (aes->rounds)
Expand All @@ -1918,8 +1916,7 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
{
__asm__ __volatile__ (
"ld t2, 0(%[in])\n\t"
"ld t3, 8(%[in])\n\t"
UNALIGNED_LD2(t2, t3, 0, %[in], t0)
"ld a3, 0(%[key])\n\t"
"ld a4, 8(%[key])\n\t"
"ld a5, 16(%[key])\n\t"
Expand All @@ -1944,8 +1941,7 @@ static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
AESDEC_2_ROUNDS(208, 216, 224, 232)
"L_aes_decrypt_done:\n\t"
AESDEC_LAST_ROUND()
"sd t2, 0(%[out])\n\t"
"sd t3, 8(%[out])\n\t"
UNALIGNED_SD2(t2, t3, 0, %[out], t0)
:
: [in] "r" (in), [out] "r" (out), [key] "r" (aes->key),
[rounds] "r" (aes->rounds)
Expand Down Expand Up @@ -3209,8 +3205,7 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
LOAD_WORD_REV(t2, 8, %[in])
LOAD_WORD_REV(t3, 12, %[in])
#else
"ld t1, 0(%[in])\n\t"
"ld t3, 8(%[in])\n\t"
UNALIGNED_LD2(t1, t3, 0, %[in], t0)
REV8(REG_T1, REG_T1)
REV8(REG_T3, REG_T3)
"srli t0, t1, 32\n\t"
Expand Down Expand Up @@ -3376,16 +3371,14 @@ static void wc_AesEncrypt(Aes* aes, const byte* in, byte* out)
REV8(REG_T1, REG_T1)
REV8(REG_T3, REG_T3)
/* Write encrypted block to output. */
"sd t1, 0(%[out])\n\t"
"sd t3, 8(%[out])\n\t"
UNALIGNED_SD2(t1, t3, 0, %[out], t0)
#else
PACK(REG_T1, REG_A5, REG_A4)
PACK(REG_T3, REG_A7, REG_A6)
REV8(REG_T1, REG_T1)
REV8(REG_T3, REG_T3)
/* Write encrypted block to output. */
"sd t1, 0(%[out])\n\t"
"sd t3, 8(%[out])\n\t"
UNALIGNED_SD2(t1, t3, 0, %[out], t0)
#endif

:
Expand Down Expand Up @@ -3641,8 +3634,7 @@ static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
LOAD_WORD_REV(t2, 8, %[in])
LOAD_WORD_REV(t3, 12, %[in])
#else
"ld t1, 0(%[in])\n\t"
"ld t3, 8(%[in])\n\t"
UNALIGNED_LD2(t1, t3, 0, %[in], t0)
REV8(REG_T1, REG_T1)
REV8(REG_T3, REG_T3)
"srli t0, t1, 32\n\t"
Expand Down Expand Up @@ -3793,16 +3785,14 @@ static void wc_AesDecrypt(Aes* aes, const byte* in, byte* out)
REV8(REG_T1, REG_T1)
REV8(REG_T3, REG_T3)
/* Write encrypted block to output. */
"sd t1, 0(%[out])\n\t"
"sd t3, 8(%[out])\n\t"
UNALIGNED_SD2(t1, t3, 0, %[out], t0)
#else
PACK(REG_T1, REG_A5, REG_A4)
PACK(REG_T3, REG_A7, REG_A6)
REV8(REG_T1, REG_T1)
REV8(REG_T3, REG_T3)
/* Write encrypted block to output. */
"sd t1, 0(%[out])\n\t"
"sd t3, 8(%[out])\n\t"
UNALIGNED_SD2(t1, t3, 0, %[out], t0)
#endif

:
Expand Down Expand Up @@ -4113,7 +4103,7 @@ static WC_INLINE void IncrementAesCounter(byte* inOutCtr)
*/
int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
{
byte scratch[WC_AES_BLOCK_SIZE];
ALIGN8 byte scratch[WC_AES_BLOCK_SIZE];
word32 processed;
int ret = 0;

Expand Down Expand Up @@ -4563,8 +4553,8 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
byte* s, word32 sSz)
{
if (gcm != NULL) {
byte x[WC_AES_BLOCK_SIZE];
byte scratch[WC_AES_BLOCK_SIZE];
ALIGN8 byte x[WC_AES_BLOCK_SIZE];
ALIGN8 byte scratch[WC_AES_BLOCK_SIZE];
byte* h = gcm->H;

__asm__ __volatile__ (
Expand Down Expand Up @@ -4896,8 +4886,8 @@ static void GMULT(byte* x, byte* y)
void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
byte* s, word32 sSz)
{
byte x[WC_AES_BLOCK_SIZE];
byte scratch[WC_AES_BLOCK_SIZE];
ALIGN8 byte x[WC_AES_BLOCK_SIZE];
ALIGN8 byte scratch[WC_AES_BLOCK_SIZE];
word32 blocks, partial;
byte* h;

Expand Down Expand Up @@ -5163,8 +5153,7 @@ static void ghash_blocks(byte* x, byte* y, const byte* in, word32 blocks)

"L_ghash_loop:\n\t"
/* Load input block. */
"ld t5, 0(%[in])\n\t"
"ld a5, 8(%[in])\n\t"
UNALIGNED_LD2(t5, a5, 0, %[in], t4)
/* Reverse bits to match x. */
#ifdef WOLFSSL_RISCV_BIT_MANIPULATION
BREV8(REG_T5, REG_T5)
Expand Down Expand Up @@ -5307,8 +5296,8 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
byte* s, word32 sSz)
{
if (gcm != NULL) {
byte x[WC_AES_BLOCK_SIZE];
byte scratch[WC_AES_BLOCK_SIZE];
ALIGN8 byte x[WC_AES_BLOCK_SIZE];
ALIGN8 byte scratch[WC_AES_BLOCK_SIZE];
word32 blocks, partial;
byte* h = gcm->H;

Expand Down Expand Up @@ -5388,8 +5377,8 @@ static void Aes128GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz,
const byte* aad, word32 aadSz)
{
byte counter[WC_AES_BLOCK_SIZE];
byte scratch[WC_AES_BLOCK_SIZE];
ALIGN8 byte counter[WC_AES_BLOCK_SIZE];
ALIGN8 byte scratch[WC_AES_BLOCK_SIZE];
/* Noticed different optimization levels treated head of array different.
* Some cases was stack pointer plus offset others was a register containing
* address. To make uniform for passing in to inline assembly code am using
Expand Down Expand Up @@ -5886,8 +5875,8 @@ static void Aes192GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz,
const byte* aad, word32 aadSz)
{
byte counter[WC_AES_BLOCK_SIZE];
byte scratch[WC_AES_BLOCK_SIZE];
ALIGN8 byte counter[WC_AES_BLOCK_SIZE];
ALIGN8 byte scratch[WC_AES_BLOCK_SIZE];
/* Noticed different optimization levels treated head of array different.
* Some cases was stack pointer plus offset others was a register containing
* address. To make uniform for passing in to inline assembly code am using
Expand Down Expand Up @@ -6398,8 +6387,8 @@ static void Aes256GcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
const byte* nonce, word32 nonceSz, byte* tag, word32 tagSz,
const byte* aad, word32 aadSz)
{
byte counter[WC_AES_BLOCK_SIZE];
byte scratch[WC_AES_BLOCK_SIZE];
ALIGN8 byte counter[WC_AES_BLOCK_SIZE];
ALIGN8 byte scratch[WC_AES_BLOCK_SIZE];
/* Noticed different optimization levels treated head of array different.
* Some cases was stack pointer plus offset others was a register containing
* address. To make uniform for passing in to inline assembly code am using
Expand Down Expand Up @@ -7003,8 +6992,8 @@ static int Aes128GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
const byte* aad, word32 aadSz)
{
int ret = 0;
byte counter[WC_AES_BLOCK_SIZE];
byte scratch[WC_AES_BLOCK_SIZE];
ALIGN8 byte counter[WC_AES_BLOCK_SIZE];
ALIGN8 byte scratch[WC_AES_BLOCK_SIZE];
/* Noticed different optimization levels treated head of array different.
* Some cases was stack pointer plus offset others was a register containing
* address. To make uniform for passing in to inline assembly code am using
Expand Down Expand Up @@ -7512,8 +7501,8 @@ static int Aes192GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
const byte* aad, word32 aadSz)
{
int ret = 0;
byte counter[WC_AES_BLOCK_SIZE];
byte scratch[WC_AES_BLOCK_SIZE];
ALIGN8 byte counter[WC_AES_BLOCK_SIZE];
ALIGN8 byte scratch[WC_AES_BLOCK_SIZE];
/* Noticed different optimization levels treated head of array different.
* Some cases was stack pointer plus offset others was a register containing
* address. To make uniform for passing in to inline assembly code am using
Expand Down Expand Up @@ -8035,8 +8024,8 @@ static int Aes256GcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
const byte* aad, word32 aadSz)
{
int ret = 0;
byte counter[WC_AES_BLOCK_SIZE];
byte scratch[WC_AES_BLOCK_SIZE];
ALIGN8 byte counter[WC_AES_BLOCK_SIZE];
ALIGN8 byte scratch[WC_AES_BLOCK_SIZE];
/* Noticed different optimization levels treated head of array different.
* Some cases was stack pointer plus offset others was a register containing
* address. To make uniform for passing in to inline assembly code am using
Expand Down Expand Up @@ -8733,8 +8722,8 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c, word32 cSz,
byte* s, word32 sSz)
{
if (gcm != NULL) {
byte x[WC_AES_BLOCK_SIZE];
byte scratch[WC_AES_BLOCK_SIZE];
ALIGN8 byte x[WC_AES_BLOCK_SIZE];
ALIGN8 byte scratch[WC_AES_BLOCK_SIZE];
word32 blocks, partial;

XMEMSET(x, 0, WC_AES_BLOCK_SIZE);
Expand Down Expand Up @@ -8834,9 +8823,9 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
word32 partial = sz % WC_AES_BLOCK_SIZE;
const byte* p = in;
byte* c = out;
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
ALIGN16 byte initialCounter[WC_AES_BLOCK_SIZE];
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
ALIGN8 byte counter[WC_AES_BLOCK_SIZE];
ALIGN8 byte initialCounter[WC_AES_BLOCK_SIZE];
ALIGN8 byte scratch[WC_AES_BLOCK_SIZE];

/* Validate parameters. */
if ((aes == NULL) || (nonce == NULL) || (nonceSz == 0) || (tag == NULL) ||
Expand Down Expand Up @@ -8934,10 +8923,10 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
word32 partial = sz % WC_AES_BLOCK_SIZE;
const byte* c = in;
byte* p = out;
ALIGN16 byte counter[WC_AES_BLOCK_SIZE];
ALIGN16 byte scratch[WC_AES_BLOCK_SIZE];
ALIGN16 byte Tprime[WC_AES_BLOCK_SIZE];
ALIGN16 byte EKY0[WC_AES_BLOCK_SIZE];
ALIGN8 byte counter[WC_AES_BLOCK_SIZE];
ALIGN8 byte scratch[WC_AES_BLOCK_SIZE];
ALIGN8 byte Tprime[WC_AES_BLOCK_SIZE];
ALIGN8 byte EKY0[WC_AES_BLOCK_SIZE];
sword32 res;

/* Validate parameters. */
Expand Down
Loading
Loading