Skip to content

Commit efe891c

Browse files
committed
Aarch64 asm: Have software fallback and CPU id checks
cpuid.h — added CPUID_ASIMD flag + IS_AARCH64_ASIMD() macro (NEON detection). cpuid.c — added NEON/ASIMD detection fixed FreeBSD/OpenBSD to use HWCAP_* sha256.c — runtime dispatch SHA256-crypto → NEON → software sha512.c — replaced the #error with the same crypto → NEON → software dispatch. chacha.c: add AArch64 runtime fallback to C. poly1305.c: add AArch64 runtime fallback to C. Fixes test_tls.c: don't memcpy into buffer if length is too long. sha256.c: even if data is not NULL, return immmediately when length is 0.
1 parent dd6da70 commit efe891c

8 files changed

Lines changed: 794 additions & 330 deletions

File tree

tests/api/test_tls.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1829,7 +1829,9 @@ int test_tls12_corrupted_finished(void)
18291829
}
18301830
else {
18311831
ExpectIntGE(finishedSz, finishedLen);
1832-
XMEMCPY(finishedMsg, test_ctx.s_buff + finishedOffInMsg, finishedLen);
1832+
if (EXPECT_SUCCESS()) {
1833+
XMEMCPY(finishedMsg, test_ctx.s_buff + finishedOffInMsg, finishedLen);
1834+
}
18331835
finishedSz = finishedLen;
18341836
ExpectIntEQ(test_memio_modify_message_len(&test_ctx, 0,
18351837
finishedMsgPos, finishedOffInMsg), 0);

wolfcrypt/src/chacha.c

Lines changed: 90 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,38 @@ Public domain.
109109
static cpuid_flags_t cpuidFlags = WC_CPUID_INITIALIZER;
110110
#endif
111111

112+
/* The aarch64 ChaCha assembly is NEON-only. When NEON might be absent, also
113+
* build the C implementation: dispatch on ASIMD at runtime when NEON is
114+
* compiled in, or use only the C path when NEON is disabled at build time. */
115+
#if defined(USE_ARM_CHACHA_SPEEDUP) && defined(__aarch64__)
116+
#ifdef WOLFSSL_ARMASM_NO_NEON
117+
#define WOLFSSL_ARM_CHACHA_C_ONLY
118+
#else
119+
#define WOLFSSL_ARM_CHACHA_NEON_FALLBACK
120+
#endif
121+
#endif
122+
#if defined(WOLFSSL_ARM_CHACHA_NEON_FALLBACK) || \
123+
defined(WOLFSSL_ARM_CHACHA_C_ONLY)
124+
#define WOLFSSL_ARM_CHACHA_NEED_C
125+
#endif
126+
127+
#ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
128+
static cpuid_flags_t chacha_cpuid_flags = WC_CPUID_INITIALIZER;
129+
/* Return non-zero when NEON/ASIMD is present and the asm path should run. */
130+
static WC_INLINE int chacha_use_neon(void)
131+
{
132+
cpuid_get_flags_ex(&chacha_cpuid_flags);
133+
return IS_AARCH64_ASIMD(chacha_cpuid_flags);
134+
}
135+
#endif
136+
112137
/**
113138
* Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
114139
* uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
115140
*/
116141
int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
117142
{
118-
#if !defined(USE_ARM_CHACHA_SPEEDUP)
143+
#if !defined(USE_ARM_CHACHA_SPEEDUP) || defined(WOLFSSL_ARM_CHACHA_NEED_C)
119144
word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */
120145
#endif
121146

@@ -124,24 +149,31 @@ int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
124149

125150
ctx->left = 0; /* resets state */
126151

127-
#if !defined(USE_ARM_CHACHA_SPEEDUP)
128-
XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
129-
/* block counter */
130-
ctx->X[CHACHA_MATRIX_CNT_IV+0] = counter;
131-
/* fixed variable from nonce */
132-
ctx->X[CHACHA_MATRIX_CNT_IV+1] = LITTLE32(temp[0]);
133-
/* counter from nonce */
134-
ctx->X[CHACHA_MATRIX_CNT_IV+2] = LITTLE32(temp[1]);
135-
/* counter from nonce */
136-
ctx->X[CHACHA_MATRIX_CNT_IV+3] = LITTLE32(temp[2]);
137-
#else
152+
#ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
153+
if (chacha_use_neon())
154+
wc_chacha_setiv(ctx->X, inIv, counter);
155+
else
156+
#elif defined(USE_ARM_CHACHA_SPEEDUP) && !defined(WOLFSSL_ARM_CHACHA_C_ONLY)
138157
wc_chacha_setiv(ctx->X, inIv, counter);
139158
#endif
159+
#if !defined(USE_ARM_CHACHA_SPEEDUP) || defined(WOLFSSL_ARM_CHACHA_NEED_C)
160+
{
161+
XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
162+
/* block counter */
163+
ctx->X[CHACHA_MATRIX_CNT_IV+0] = counter;
164+
/* fixed variable from nonce */
165+
ctx->X[CHACHA_MATRIX_CNT_IV+1] = LITTLE32(temp[0]);
166+
/* counter from nonce */
167+
ctx->X[CHACHA_MATRIX_CNT_IV+2] = LITTLE32(temp[1]);
168+
/* counter from nonce */
169+
ctx->X[CHACHA_MATRIX_CNT_IV+3] = LITTLE32(temp[2]);
170+
}
171+
#endif
140172

141173
return 0;
142174
}
143175

144-
#if !defined(USE_ARM_CHACHA_SPEEDUP)
176+
#if !defined(USE_ARM_CHACHA_SPEEDUP) || defined(WOLFSSL_ARM_CHACHA_NEED_C)
145177
/* "expand 32-byte k" as unsigned 32 byte */
146178
static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
147179
/* "expand 16-byte k" as unsigned 16 byte */
@@ -153,7 +185,7 @@ static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
153185
*/
154186
int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
155187
{
156-
#if !defined(USE_ARM_CHACHA_SPEEDUP)
188+
#if !defined(USE_ARM_CHACHA_SPEEDUP) || defined(WOLFSSL_ARM_CHACHA_NEED_C)
157189
const word32* constants;
158190
const byte* k;
159191
#ifdef XSTREAM_ALIGN
@@ -167,7 +199,15 @@ int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
167199
if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ)
168200
return BAD_FUNC_ARG;
169201

170-
#if !defined(USE_ARM_CHACHA_SPEEDUP)
202+
#ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
203+
if (chacha_use_neon())
204+
wc_chacha_setkey(ctx->X, key, keySz);
205+
else
206+
#elif defined(USE_ARM_CHACHA_SPEEDUP) && !defined(WOLFSSL_ARM_CHACHA_C_ONLY)
207+
wc_chacha_setkey(ctx->X, key, keySz);
208+
#endif
209+
#if !defined(USE_ARM_CHACHA_SPEEDUP) || defined(WOLFSSL_ARM_CHACHA_NEED_C)
210+
{
171211
#ifdef XSTREAM_ALIGN
172212
if ((wc_ptr_t)key % 4) {
173213
WOLFSSL_MSG("wc_ChachaSetKey unaligned key");
@@ -211,16 +251,16 @@ int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
211251
ctx->X[ 1] = constants[1];
212252
ctx->X[ 2] = constants[2];
213253
ctx->X[ 3] = constants[3];
214-
#else
215-
wc_chacha_setkey(ctx->X, key, keySz);
254+
}
216255
#endif
217256

218257
ctx->left = 0; /* resets state */
219258

220259
return 0;
221260
}
222261

223-
#if !defined(USE_INTEL_CHACHA_SPEEDUP) && !defined(USE_ARM_CHACHA_SPEEDUP)
262+
#if (!defined(USE_INTEL_CHACHA_SPEEDUP) && !defined(USE_ARM_CHACHA_SPEEDUP)) || \
263+
defined(WOLFSSL_ARM_CHACHA_NEED_C)
224264
/**
225265
* Converts word into bytes with rotations having been done.
226266
*/
@@ -267,7 +307,8 @@ extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
267307
#endif
268308

269309

270-
#if !defined(USE_INTEL_CHACHA_SPEEDUP) && !defined(USE_ARM_CHACHA_SPEEDUP)
310+
#if (!defined(USE_INTEL_CHACHA_SPEEDUP) && !defined(USE_ARM_CHACHA_SPEEDUP)) || \
311+
defined(WOLFSSL_ARM_CHACHA_NEED_C)
271312
/**
272313
* Encrypt a stream of bytes
273314
*/
@@ -366,23 +407,39 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
366407
return 0;
367408
}
368409
#elif defined(USE_ARM_CHACHA_SPEEDUP)
369-
/* Handle left over bytes from last block. */
370-
if ((msglen > 0) && (ctx->left > 0)) {
371-
byte* over = ((byte*)ctx->over) + CHACHA_CHUNK_BYTES - ctx->left;
372-
word32 l = min(msglen, ctx->left);
373-
374-
wc_chacha_use_over(over, output, input, l);
410+
#ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
411+
if (chacha_use_neon())
412+
#endif
413+
#ifndef WOLFSSL_ARM_CHACHA_C_ONLY
414+
{
415+
/* Handle left over bytes from last block. */
416+
if ((msglen > 0) && (ctx->left > 0)) {
417+
byte* over = ((byte*)ctx->over) + CHACHA_CHUNK_BYTES - ctx->left;
418+
word32 l = min(msglen, ctx->left);
419+
420+
wc_chacha_use_over(over, output, input, l);
421+
422+
ctx->left -= l;
423+
input += l;
424+
output += l;
425+
msglen -= l;
426+
}
375427

376-
ctx->left -= l;
377-
input += l;
378-
output += l;
379-
msglen -= l;
428+
if (msglen != 0) {
429+
wc_chacha_crypt_bytes(ctx, output, input, msglen);
430+
}
431+
return 0;
380432
}
381-
382-
if (msglen != 0) {
383-
wc_chacha_crypt_bytes(ctx, output, input, msglen);
433+
#endif
434+
#ifdef WOLFSSL_ARM_CHACHA_NEED_C
435+
#ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
436+
else
437+
#endif
438+
{
439+
wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
440+
return 0;
384441
}
385-
return 0;
442+
#endif
386443
#else
387444
wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
388445
return 0;

wolfcrypt/src/cpuid.c

Lines changed: 80 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@
144144
#define CPUID_AARCH64_FEAT_SHA3 ((word64)1 << 32)
145145
#define CPUID_AARCH64_FEAT_SM3 ((word64)1 << 36)
146146
#define CPUID_AARCH64_FEAT_SM4 ((word64)1 << 40)
147+
#define CPUID_AARCH64_FEAT_ASMID ((word64)0xf << 20)
147148

148149
#ifdef WOLFSSL_AARCH64_PRIVILEGE_MODE
149150
/* https://developer.arm.com/documentation/ddi0601/2024-09/AArch64-Registers
@@ -156,13 +157,27 @@
156157
old_cpuid_flags = WC_CPUID_INITIALIZER;
157158
word64 features;
158159

160+
#ifndef WOLFSSL_ARMASM_NO_NEON
161+
__asm__ __volatile (
162+
"mrs %[feat], ID_AA64PFR0_EL1\n"
163+
: [feat] "=r" (features)
164+
:
165+
:
166+
);
167+
168+
if ((features & CPUID_AARCH64_FEAT_ASMID) !=
169+
CPUID_AARCH64_FEAT_ASMID)
170+
new_cpuid_flags |= CPUID_ASIMD;
171+
#endif
172+
159173
__asm__ __volatile (
160174
"mrs %[feat], ID_AA64ISAR0_EL1\n"
161175
: [feat] "=r" (features)
162176
:
163177
:
164178
);
165179

180+
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
166181
if (features & CPUID_AARCH64_FEAT_AES)
167182
new_cpuid_flags |= CPUID_AES;
168183
if (features & CPUID_AARCH64_FEAT_AES_PMULL) {
@@ -171,16 +186,27 @@
171186
}
172187
if (features & CPUID_AARCH64_FEAT_SHA256)
173188
new_cpuid_flags |= CPUID_SHA256;
189+
#endif
190+
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
174191
if (features & CPUID_AARCH64_FEAT_SHA256_512)
175192
new_cpuid_flags |= CPUID_SHA256 | CPUID_SHA512;
193+
#endif
194+
#if !defined(WOLFSSL_AARCH64_NO_SQRDMLSH)
176195
if (features & CPUID_AARCH64_FEAT_RDM)
177196
new_cpuid_flags |= CPUID_RDM;
197+
#endif
198+
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
178199
if (features & CPUID_AARCH64_FEAT_SHA3)
179200
new_cpuid_flags |= CPUID_SHA3;
201+
#endif
202+
#ifdef WOLFSSL_ARMASM_CRYPTO_SM3
180203
if (features & CPUID_AARCH64_FEAT_SM3)
181204
new_cpuid_flags |= CPUID_SM3;
205+
#endif
206+
#ifdef WOLFSSL_ARMASM_CRYPTO_SM4
182207
if (features & CPUID_AARCH64_FEAT_SM4)
183208
new_cpuid_flags |= CPUID_SM4;
209+
#endif
184210

185211
(void)wolfSSL_Atomic_Uint_CompareExchange
186212
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
@@ -200,6 +226,11 @@
200226
old_cpuid_flags = WC_CPUID_INITIALIZER;
201227
word64 hwcaps = getauxval(AT_HWCAP);
202228

229+
#ifndef WOLFSSL_ARMASM_NO_NEON
230+
if (hwcaps & HWCAP_ASIMD)
231+
new_cpuid_flags |= CPUID_ASIMD;
232+
#endif
233+
203234
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
204235
if (hwcaps & HWCAP_AES)
205236
new_cpuid_flags |= CPUID_AES;
@@ -247,12 +278,18 @@
247278
old_cpuid_flags = WC_CPUID_INITIALIZER;
248279
word64 features = android_getCpuFeatures();
249280

281+
#ifndef WOLFSSL_ARMASM_NO_NEON
282+
/* All Android AArch64 chips support NEON. */
283+
new_cpuid_flags |= CPUID_ASIMD;
284+
#endif
285+
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
250286
if (features & ANDROID_CPU_ARM_FEATURE_AES)
251287
new_cpuid_flags |= CPUID_AES;
252288
if (features & ANDROID_CPU_ARM_FEATURE_PMULL)
253289
new_cpuid_flags |= CPUID_PMULL;
254290
if (features & ANDROID_CPU_ARM_FEATURE_SHA2)
255291
new_cpuid_flags |= CPUID_SHA256;
292+
#endif
256293

257294
(void)wolfSSL_Atomic_Uint_CompareExchange
258295
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
@@ -279,18 +316,31 @@
279316
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) {
280317
cpuid_flags_t new_cpuid_flags = 0,
281318
old_cpuid_flags = WC_CPUID_INITIALIZER;
319+
320+
#ifndef WOLFSSL_ARMASM_NO_NEON
321+
/* All Mac AArch64 chips support NEON. */
322+
new_cpuid_flags |= CPUID_ASIMD;
323+
#endif
324+
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
282325
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_AES") != 0)
283326
new_cpuid_flags |= CPUID_AES;
284327
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_PMULL") != 0)
285328
new_cpuid_flags |= CPUID_PMULL;
286329
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA256") != 0)
287330
new_cpuid_flags |= CPUID_SHA256;
331+
#endif
332+
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
288333
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA512") != 0)
289334
new_cpuid_flags |= CPUID_SHA512;
335+
#endif
336+
#if !defined(WOLFSSL_AARCH64_NO_SQRDMLSH)
290337
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_RDM") != 0)
291338
new_cpuid_flags |= CPUID_RDM;
339+
#endif
340+
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
292341
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA3") != 0)
293342
new_cpuid_flags |= CPUID_SHA3;
343+
#endif
294344
#ifdef WOLFSSL_ARMASM_CRYPTO_SM3
295345
new_cpuid_flags |= CPUID_SM3;
296346
#endif
@@ -316,24 +366,40 @@
316366

317367
elf_aux_info(AT_HWCAP, &features, sizeof(features));
318368

319-
if (features & CPUID_AARCH64_FEAT_AES)
320-
new_cpuid_flags |= CPUID_AES;
321-
if (features & CPUID_AARCH64_FEAT_AES_PMULL) {
369+
#ifndef WOLFSSL_ARMASM_NO_NEON
370+
if (features & HWCAP_ASIMD)
371+
new_cpuid_flags |= CPUID_ASIMD;
372+
#endif
373+
374+
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
375+
if (features & HWCAP_AES)
322376
new_cpuid_flags |= CPUID_AES;
377+
if (features & HWCAP_PMULL)
323378
new_cpuid_flags |= CPUID_PMULL;
324-
}
325-
if (features & CPUID_AARCH64_FEAT_SHA256)
379+
if (features & HWCAP_SHA2)
326380
new_cpuid_flags |= CPUID_SHA256;
327-
if (features & CPUID_AARCH64_FEAT_SHA256_512)
328-
new_cpuid_flags |= CPUID_SHA256 | CPUID_SHA512;
329-
if (features & CPUID_AARCH64_FEAT_RDM)
381+
#endif
382+
383+
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
384+
if (features & HWCAP_SHA512)
385+
new_cpuid_flags |= CPUID_SHA512;
386+
#endif
387+
#if defined(HWCAP_ASIMDRDM) && !defined(WOLFSSL_AARCH64_NO_SQRDMLSH)
388+
if (features & HWCAP_ASIMDRDM)
330389
new_cpuid_flags |= CPUID_RDM;
331-
if (features & CPUID_AARCH64_FEAT_SHA3)
390+
#endif
391+
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
392+
if (features & HWCAP_SHA3)
332393
new_cpuid_flags |= CPUID_SHA3;
333-
if (features & CPUID_AARCH64_FEAT_SM3)
394+
#endif
395+
#ifdef WOLFSSL_ARMASM_CRYPTO_SM3
396+
if (features & HWCAP_SM3)
334397
new_cpuid_flags |= CPUID_SM3;
335-
if (features & CPUID_AARCH64_FEAT_SM4)
398+
#endif
399+
#ifdef WOLFSSL_ARMASM_CRYPTO_SM4
400+
if (features & HWCAP_SM4)
336401
new_cpuid_flags |= CPUID_SM4;
402+
#endif
337403

338404
(void)wolfSSL_Atomic_Uint_CompareExchange
339405
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
@@ -345,6 +411,9 @@
345411
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) {
346412
cpuid_flags_t new_cpuid_flags = 0,
347413
old_cpuid_flags = WC_CPUID_INITIALIZER;
414+
#ifndef WOLFSSL_ARMASM_NO_NEON
415+
new_cpuid_flags |= CPUID_ASIMD;
416+
#endif
348417
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
349418
new_cpuid_flags |= CPUID_AES;
350419
new_cpuid_flags |= CPUID_PMULL;

0 commit comments

Comments
 (0)