Skip to content

Commit d390a98

Browse files
authored
Merge pull request #10754 from SparkiDev/arm64_asm_c_fallback
Aarch64 asm: Have software fallback and CPU id checks
2 parents 4de8190 + 6315f95 commit d390a98

8 files changed

Lines changed: 818 additions & 336 deletions

File tree

tests/api/test_tls.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1934,7 +1934,9 @@ int test_tls12_corrupted_finished(void)
19341934
}
19351935
else {
19361936
ExpectIntGE(finishedSz, finishedLen);
1937-
XMEMCPY(finishedMsg, test_ctx.s_buff + finishedOffInMsg, finishedLen);
1937+
if (EXPECT_SUCCESS()) {
1938+
XMEMCPY(finishedMsg, test_ctx.s_buff + finishedOffInMsg, finishedLen);
1939+
}
19381940
finishedSz = finishedLen;
19391941
ExpectIntEQ(test_memio_modify_message_len(&test_ctx, 0,
19401942
finishedMsgPos, finishedOffInMsg), 0);

wolfcrypt/src/chacha.c

Lines changed: 90 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,38 @@ Public domain.
109109
static cpuid_flags_t cpuidFlags = WC_CPUID_INITIALIZER;
110110
#endif
111111

112+
/* The aarch64 ChaCha assembly is NEON-only. When NEON might be absent, also
113+
* build the C implementation: dispatch on ASIMD at runtime when NEON is
114+
* compiled in, or use only the C path when NEON is disabled at build time. */
115+
#if defined(USE_ARM_CHACHA_SPEEDUP) && defined(__aarch64__)
116+
#ifdef WOLFSSL_ARMASM_NO_NEON
117+
#define WOLFSSL_ARM_CHACHA_C_ONLY
118+
#else
119+
#define WOLFSSL_ARM_CHACHA_NEON_FALLBACK
120+
#endif
121+
#endif
122+
#if defined(WOLFSSL_ARM_CHACHA_NEON_FALLBACK) || \
123+
defined(WOLFSSL_ARM_CHACHA_C_ONLY)
124+
#define WOLFSSL_ARM_CHACHA_NEED_C
125+
#endif
126+
127+
#ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
128+
static cpuid_flags_t chacha_cpuid_flags = WC_CPUID_INITIALIZER;
129+
/* Return non-zero when NEON/ASIMD is present and the asm path should run. */
130+
static WC_INLINE int chacha_use_neon(void)
131+
{
132+
cpuid_get_flags_ex(&chacha_cpuid_flags);
133+
return IS_AARCH64_ASIMD(chacha_cpuid_flags);
134+
}
135+
#endif
136+
112137
/**
113138
* Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
114139
* uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
115140
*/
116141
int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
117142
{
118-
#if !defined(USE_ARM_CHACHA_SPEEDUP)
143+
#if !defined(USE_ARM_CHACHA_SPEEDUP) || defined(WOLFSSL_ARM_CHACHA_NEED_C)
119144
word32 temp[CHACHA_IV_WORDS];/* used for alignment of memory */
120145
#endif
121146

@@ -124,24 +149,31 @@ int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
124149

125150
ctx->left = 0; /* resets state */
126151

127-
#if !defined(USE_ARM_CHACHA_SPEEDUP)
128-
XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
129-
/* block counter */
130-
ctx->X[CHACHA_MATRIX_CNT_IV+0] = counter;
131-
/* fixed variable from nonce */
132-
ctx->X[CHACHA_MATRIX_CNT_IV+1] = LITTLE32(temp[0]);
133-
/* counter from nonce */
134-
ctx->X[CHACHA_MATRIX_CNT_IV+2] = LITTLE32(temp[1]);
135-
/* counter from nonce */
136-
ctx->X[CHACHA_MATRIX_CNT_IV+3] = LITTLE32(temp[2]);
137-
#else
152+
#ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
153+
if (chacha_use_neon())
154+
wc_chacha_setiv(ctx->X, inIv, counter);
155+
else
156+
#elif defined(USE_ARM_CHACHA_SPEEDUP) && !defined(WOLFSSL_ARM_CHACHA_C_ONLY)
138157
wc_chacha_setiv(ctx->X, inIv, counter);
139158
#endif
159+
#if !defined(USE_ARM_CHACHA_SPEEDUP) || defined(WOLFSSL_ARM_CHACHA_NEED_C)
160+
{
161+
XMEMCPY(temp, inIv, CHACHA_IV_BYTES);
162+
/* block counter */
163+
ctx->X[CHACHA_MATRIX_CNT_IV+0] = counter;
164+
/* fixed variable from nonce */
165+
ctx->X[CHACHA_MATRIX_CNT_IV+1] = LITTLE32(temp[0]);
166+
/* counter from nonce */
167+
ctx->X[CHACHA_MATRIX_CNT_IV+2] = LITTLE32(temp[1]);
168+
/* counter from nonce */
169+
ctx->X[CHACHA_MATRIX_CNT_IV+3] = LITTLE32(temp[2]);
170+
}
171+
#endif
140172

141173
return 0;
142174
}
143175

144-
#if !defined(USE_ARM_CHACHA_SPEEDUP)
176+
#if !defined(USE_ARM_CHACHA_SPEEDUP) || defined(WOLFSSL_ARM_CHACHA_NEED_C)
145177
/* "expand 32-byte k" as unsigned 32 byte */
146178
static const word32 sigma[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
147179
/* "expand 16-byte k" as unsigned 16 byte */
@@ -153,7 +185,7 @@ static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
153185
*/
154186
int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
155187
{
156-
#if !defined(USE_ARM_CHACHA_SPEEDUP)
188+
#if !defined(USE_ARM_CHACHA_SPEEDUP) || defined(WOLFSSL_ARM_CHACHA_NEED_C)
157189
const word32* constants;
158190
const byte* k;
159191
#ifdef XSTREAM_ALIGN
@@ -167,7 +199,15 @@ int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
167199
if (keySz != (CHACHA_MAX_KEY_SZ/2) && keySz != CHACHA_MAX_KEY_SZ)
168200
return BAD_FUNC_ARG;
169201

170-
#if !defined(USE_ARM_CHACHA_SPEEDUP)
202+
#ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
203+
if (chacha_use_neon())
204+
wc_chacha_setkey(ctx->X, key, keySz);
205+
else
206+
#elif defined(USE_ARM_CHACHA_SPEEDUP) && !defined(WOLFSSL_ARM_CHACHA_C_ONLY)
207+
wc_chacha_setkey(ctx->X, key, keySz);
208+
#endif
209+
#if !defined(USE_ARM_CHACHA_SPEEDUP) || defined(WOLFSSL_ARM_CHACHA_NEED_C)
210+
{
171211
#ifdef XSTREAM_ALIGN
172212
if ((wc_ptr_t)key % 4) {
173213
WOLFSSL_MSG("wc_ChachaSetKey unaligned key");
@@ -211,16 +251,16 @@ int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
211251
ctx->X[ 1] = constants[1];
212252
ctx->X[ 2] = constants[2];
213253
ctx->X[ 3] = constants[3];
214-
#else
215-
wc_chacha_setkey(ctx->X, key, keySz);
254+
}
216255
#endif
217256

218257
ctx->left = 0; /* resets state */
219258

220259
return 0;
221260
}
222261

223-
#if !defined(USE_INTEL_CHACHA_SPEEDUP) && !defined(USE_ARM_CHACHA_SPEEDUP)
262+
#if (!defined(USE_INTEL_CHACHA_SPEEDUP) && !defined(USE_ARM_CHACHA_SPEEDUP)) || \
263+
defined(WOLFSSL_ARM_CHACHA_NEED_C)
224264
/**
225265
* Converts word into bytes with rotations having been done.
226266
*/
@@ -267,7 +307,8 @@ extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
267307
#endif
268308

269309

270-
#if !defined(USE_INTEL_CHACHA_SPEEDUP) && !defined(USE_ARM_CHACHA_SPEEDUP)
310+
#if (!defined(USE_INTEL_CHACHA_SPEEDUP) && !defined(USE_ARM_CHACHA_SPEEDUP)) || \
311+
defined(WOLFSSL_ARM_CHACHA_NEED_C)
271312
/**
272313
* Encrypt a stream of bytes
273314
*/
@@ -366,23 +407,39 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
366407
return 0;
367408
}
368409
#elif defined(USE_ARM_CHACHA_SPEEDUP)
369-
/* Handle left over bytes from last block. */
370-
if ((msglen > 0) && (ctx->left > 0)) {
371-
byte* over = ((byte*)ctx->over) + CHACHA_CHUNK_BYTES - ctx->left;
372-
word32 l = min(msglen, ctx->left);
373-
374-
wc_chacha_use_over(over, output, input, l);
410+
#ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
411+
if (chacha_use_neon())
412+
#endif
413+
#ifndef WOLFSSL_ARM_CHACHA_C_ONLY
414+
{
415+
/* Handle left over bytes from last block. */
416+
if ((msglen > 0) && (ctx->left > 0)) {
417+
byte* over = ((byte*)ctx->over) + CHACHA_CHUNK_BYTES - ctx->left;
418+
word32 l = min(msglen, ctx->left);
419+
420+
wc_chacha_use_over(over, output, input, l);
421+
422+
ctx->left -= l;
423+
input += l;
424+
output += l;
425+
msglen -= l;
426+
}
375427

376-
ctx->left -= l;
377-
input += l;
378-
output += l;
379-
msglen -= l;
428+
if (msglen != 0) {
429+
wc_chacha_crypt_bytes(ctx, output, input, msglen);
430+
}
431+
return 0;
380432
}
381-
382-
if (msglen != 0) {
383-
wc_chacha_crypt_bytes(ctx, output, input, msglen);
433+
#endif
434+
#ifdef WOLFSSL_ARM_CHACHA_NEED_C
435+
#ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
436+
else
437+
#endif
438+
{
439+
wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
440+
return 0;
384441
}
385-
return 0;
442+
#endif
386443
#else
387444
wc_Chacha_encrypt_bytes(ctx, input, output, msglen);
388445
return 0;

wolfcrypt/src/cpuid.c

Lines changed: 80 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@
146146
#define CPUID_AARCH64_FEAT_SHA3 ((word64)1 << 32)
147147
#define CPUID_AARCH64_FEAT_SM3 ((word64)1 << 36)
148148
#define CPUID_AARCH64_FEAT_SM4 ((word64)1 << 40)
149+
#define CPUID_AARCH64_FEAT_ASIMD ((word64)0xf << 20)
149150

150151
#ifdef WOLFSSL_AARCH64_PRIVILEGE_MODE
151152
/* https://developer.arm.com/documentation/ddi0601/2024-09/AArch64-Registers
@@ -158,13 +159,27 @@
158159
old_cpuid_flags = WC_CPUID_INITIALIZER;
159160
word64 features;
160161

162+
#ifndef WOLFSSL_ARMASM_NO_NEON
163+
__asm__ __volatile (
164+
"mrs %[feat], ID_AA64PFR0_EL1\n"
165+
: [feat] "=r" (features)
166+
:
167+
:
168+
);
169+
170+
if ((features & CPUID_AARCH64_FEAT_ASIMD) !=
171+
CPUID_AARCH64_FEAT_ASIMD)
172+
new_cpuid_flags |= CPUID_ASIMD;
173+
#endif
174+
161175
__asm__ __volatile (
162176
"mrs %[feat], ID_AA64ISAR0_EL1\n"
163177
: [feat] "=r" (features)
164178
:
165179
:
166180
);
167181

182+
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
168183
if (features & CPUID_AARCH64_FEAT_AES)
169184
new_cpuid_flags |= CPUID_AES;
170185
if (features & CPUID_AARCH64_FEAT_AES_PMULL) {
@@ -173,16 +188,27 @@
173188
}
174189
if (features & CPUID_AARCH64_FEAT_SHA256)
175190
new_cpuid_flags |= CPUID_SHA256;
191+
#endif
192+
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
176193
if (features & CPUID_AARCH64_FEAT_SHA256_512)
177194
new_cpuid_flags |= CPUID_SHA256 | CPUID_SHA512;
195+
#endif
196+
#if !defined(WOLFSSL_AARCH64_NO_SQRDMLSH)
178197
if (features & CPUID_AARCH64_FEAT_RDM)
179198
new_cpuid_flags |= CPUID_RDM;
199+
#endif
200+
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
180201
if (features & CPUID_AARCH64_FEAT_SHA3)
181202
new_cpuid_flags |= CPUID_SHA3;
203+
#endif
204+
#ifdef WOLFSSL_ARMASM_CRYPTO_SM3
182205
if (features & CPUID_AARCH64_FEAT_SM3)
183206
new_cpuid_flags |= CPUID_SM3;
207+
#endif
208+
#ifdef WOLFSSL_ARMASM_CRYPTO_SM4
184209
if (features & CPUID_AARCH64_FEAT_SM4)
185210
new_cpuid_flags |= CPUID_SM4;
211+
#endif
186212

187213
(void)wolfSSL_Atomic_Uint_CompareExchange
188214
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
@@ -202,6 +228,11 @@
202228
old_cpuid_flags = WC_CPUID_INITIALIZER;
203229
word64 hwcaps = getauxval(AT_HWCAP);
204230

231+
#ifndef WOLFSSL_ARMASM_NO_NEON
232+
if (hwcaps & HWCAP_ASIMD)
233+
new_cpuid_flags |= CPUID_ASIMD;
234+
#endif
235+
205236
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
206237
if (hwcaps & HWCAP_AES)
207238
new_cpuid_flags |= CPUID_AES;
@@ -249,12 +280,18 @@
249280
old_cpuid_flags = WC_CPUID_INITIALIZER;
250281
word64 features = android_getCpuFeatures();
251282

283+
#ifndef WOLFSSL_ARMASM_NO_NEON
284+
/* All Android AArch64 chips support NEON. */
285+
new_cpuid_flags |= CPUID_ASIMD;
286+
#endif
287+
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
252288
if (features & ANDROID_CPU_ARM_FEATURE_AES)
253289
new_cpuid_flags |= CPUID_AES;
254290
if (features & ANDROID_CPU_ARM_FEATURE_PMULL)
255291
new_cpuid_flags |= CPUID_PMULL;
256292
if (features & ANDROID_CPU_ARM_FEATURE_SHA2)
257293
new_cpuid_flags |= CPUID_SHA256;
294+
#endif
258295

259296
(void)wolfSSL_Atomic_Uint_CompareExchange
260297
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
@@ -281,18 +318,31 @@
281318
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) {
282319
cpuid_flags_t new_cpuid_flags = 0,
283320
old_cpuid_flags = WC_CPUID_INITIALIZER;
321+
322+
#ifndef WOLFSSL_ARMASM_NO_NEON
323+
/* All Mac AArch64 chips support NEON. */
324+
new_cpuid_flags |= CPUID_ASIMD;
325+
#endif
326+
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
284327
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_AES") != 0)
285328
new_cpuid_flags |= CPUID_AES;
286329
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_PMULL") != 0)
287330
new_cpuid_flags |= CPUID_PMULL;
288331
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA256") != 0)
289332
new_cpuid_flags |= CPUID_SHA256;
333+
#endif
334+
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
290335
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA512") != 0)
291336
new_cpuid_flags |= CPUID_SHA512;
337+
#endif
338+
#if !defined(WOLFSSL_AARCH64_NO_SQRDMLSH)
292339
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_RDM") != 0)
293340
new_cpuid_flags |= CPUID_RDM;
341+
#endif
342+
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
294343
if (cpuid_get_sysctlbyname("hw.optional.arm.FEAT_SHA3") != 0)
295344
new_cpuid_flags |= CPUID_SHA3;
345+
#endif
296346
#ifdef WOLFSSL_ARMASM_CRYPTO_SM3
297347
new_cpuid_flags |= CPUID_SM3;
298348
#endif
@@ -318,24 +368,40 @@
318368

319369
elf_aux_info(AT_HWCAP, &features, sizeof(features));
320370

321-
if (features & CPUID_AARCH64_FEAT_AES)
322-
new_cpuid_flags |= CPUID_AES;
323-
if (features & CPUID_AARCH64_FEAT_AES_PMULL) {
371+
#ifndef WOLFSSL_ARMASM_NO_NEON
372+
if (features & HWCAP_ASIMD)
373+
new_cpuid_flags |= CPUID_ASIMD;
374+
#endif
375+
376+
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
377+
if (features & HWCAP_AES)
324378
new_cpuid_flags |= CPUID_AES;
379+
if (features & HWCAP_PMULL)
325380
new_cpuid_flags |= CPUID_PMULL;
326-
}
327-
if (features & CPUID_AARCH64_FEAT_SHA256)
381+
if (features & HWCAP_SHA2)
328382
new_cpuid_flags |= CPUID_SHA256;
329-
if (features & CPUID_AARCH64_FEAT_SHA256_512)
330-
new_cpuid_flags |= CPUID_SHA256 | CPUID_SHA512;
331-
if (features & CPUID_AARCH64_FEAT_RDM)
383+
#endif
384+
385+
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA512
386+
if (features & HWCAP_SHA512)
387+
new_cpuid_flags |= CPUID_SHA512;
388+
#endif
389+
#if defined(HWCAP_ASIMDRDM) && !defined(WOLFSSL_AARCH64_NO_SQRDMLSH)
390+
if (features & HWCAP_ASIMDRDM)
332391
new_cpuid_flags |= CPUID_RDM;
333-
if (features & CPUID_AARCH64_FEAT_SHA3)
392+
#endif
393+
#ifdef WOLFSSL_ARMASM_CRYPTO_SHA3
394+
if (features & HWCAP_SHA3)
334395
new_cpuid_flags |= CPUID_SHA3;
335-
if (features & CPUID_AARCH64_FEAT_SM3)
396+
#endif
397+
#ifdef WOLFSSL_ARMASM_CRYPTO_SM3
398+
if (features & HWCAP_SM3)
336399
new_cpuid_flags |= CPUID_SM3;
337-
if (features & CPUID_AARCH64_FEAT_SM4)
400+
#endif
401+
#ifdef WOLFSSL_ARMASM_CRYPTO_SM4
402+
if (features & HWCAP_SM4)
338403
new_cpuid_flags |= CPUID_SM4;
404+
#endif
339405

340406
(void)wolfSSL_Atomic_Uint_CompareExchange
341407
(&cpuid_flags, &old_cpuid_flags, new_cpuid_flags);
@@ -347,6 +413,9 @@
347413
if (WOLFSSL_ATOMIC_LOAD(cpuid_flags) == WC_CPUID_INITIALIZER) {
348414
cpuid_flags_t new_cpuid_flags = 0,
349415
old_cpuid_flags = WC_CPUID_INITIALIZER;
416+
#ifndef WOLFSSL_ARMASM_NO_NEON
417+
new_cpuid_flags |= CPUID_ASIMD;
418+
#endif
350419
#ifndef WOLFSSL_ARMASM_NO_HW_CRYPTO
351420
new_cpuid_flags |= CPUID_AES;
352421
new_cpuid_flags |= CPUID_PMULL;

0 commit comments

Comments
 (0)