@@ -109,13 +109,38 @@ Public domain.
109109 static cpuid_flags_t cpuidFlags = WC_CPUID_INITIALIZER ;
110110#endif
111111
112+ /* The aarch64 ChaCha assembly is NEON-only. When NEON might be absent, also
113+ * build the C implementation: dispatch on ASIMD at runtime when NEON is
114+ * compiled in, or use only the C path when NEON is disabled at build time. */
115+ #if defined(USE_ARM_CHACHA_SPEEDUP ) && defined(__aarch64__ )
116+ #ifdef WOLFSSL_ARMASM_NO_NEON
117+ #define WOLFSSL_ARM_CHACHA_C_ONLY
118+ #else
119+ #define WOLFSSL_ARM_CHACHA_NEON_FALLBACK
120+ #endif
121+ #endif
122+ #if defined(WOLFSSL_ARM_CHACHA_NEON_FALLBACK ) || \
123+ defined(WOLFSSL_ARM_CHACHA_C_ONLY )
124+ #define WOLFSSL_ARM_CHACHA_NEED_C
125+ #endif
126+
127+ #ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
128+ static cpuid_flags_t chacha_cpuid_flags = WC_CPUID_INITIALIZER ;
129+ /* Return non-zero when NEON/ASIMD is present and the asm path should run. */
130+ static WC_INLINE int chacha_use_neon (void )
131+ {
132+ cpuid_get_flags_ex (& chacha_cpuid_flags );
133+ return IS_AARCH64_ASIMD (chacha_cpuid_flags );
134+ }
135+ #endif
136+
112137/**
113138 * Set up iv(nonce). Earlier versions used 64 bits instead of 96, this version
114139 * uses the typical AEAD 96 bit nonce and can do record sizes of 256 GB.
115140 */
116141int wc_Chacha_SetIV (ChaCha * ctx , const byte * inIv , word32 counter )
117142{
118- #if !defined(USE_ARM_CHACHA_SPEEDUP )
143+ #if !defined(USE_ARM_CHACHA_SPEEDUP ) || defined( WOLFSSL_ARM_CHACHA_NEED_C )
119144 word32 temp [CHACHA_IV_WORDS ];/* used for alignment of memory */
120145#endif
121146
@@ -124,24 +149,31 @@ int wc_Chacha_SetIV(ChaCha* ctx, const byte* inIv, word32 counter)
124149
125150 ctx -> left = 0 ; /* resets state */
126151
127- #if !defined(USE_ARM_CHACHA_SPEEDUP )
128- XMEMCPY (temp , inIv , CHACHA_IV_BYTES );
129- /* block counter */
130- ctx -> X [CHACHA_MATRIX_CNT_IV + 0 ] = counter ;
131- /* fixed variable from nonce */
132- ctx -> X [CHACHA_MATRIX_CNT_IV + 1 ] = LITTLE32 (temp [0 ]);
133- /* counter from nonce */
134- ctx -> X [CHACHA_MATRIX_CNT_IV + 2 ] = LITTLE32 (temp [1 ]);
135- /* counter from nonce */
136- ctx -> X [CHACHA_MATRIX_CNT_IV + 3 ] = LITTLE32 (temp [2 ]);
137- #else
152+ #ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
153+ if (chacha_use_neon ())
154+ wc_chacha_setiv (ctx -> X , inIv , counter );
155+ else
156+ #elif defined(USE_ARM_CHACHA_SPEEDUP ) && !defined(WOLFSSL_ARM_CHACHA_C_ONLY )
138157 wc_chacha_setiv (ctx -> X , inIv , counter );
139158#endif
159+ #if !defined(USE_ARM_CHACHA_SPEEDUP ) || defined(WOLFSSL_ARM_CHACHA_NEED_C )
160+ {
161+ XMEMCPY (temp , inIv , CHACHA_IV_BYTES );
162+ /* block counter */
163+ ctx -> X [CHACHA_MATRIX_CNT_IV + 0 ] = counter ;
164+ /* fixed variable from nonce */
165+ ctx -> X [CHACHA_MATRIX_CNT_IV + 1 ] = LITTLE32 (temp [0 ]);
166+ /* counter from nonce */
167+ ctx -> X [CHACHA_MATRIX_CNT_IV + 2 ] = LITTLE32 (temp [1 ]);
168+ /* counter from nonce */
169+ ctx -> X [CHACHA_MATRIX_CNT_IV + 3 ] = LITTLE32 (temp [2 ]);
170+ }
171+ #endif
140172
141173 return 0 ;
142174}
143175
144- #if !defined(USE_ARM_CHACHA_SPEEDUP )
176+ #if !defined(USE_ARM_CHACHA_SPEEDUP ) || defined( WOLFSSL_ARM_CHACHA_NEED_C )
145177/* "expand 32-byte k" as unsigned 32 byte */
146178static const word32 sigma [4 ] = {0x61707865 , 0x3320646e , 0x79622d32 , 0x6b206574 };
147179/* "expand 16-byte k" as unsigned 16 byte */
@@ -153,7 +185,7 @@ static const word32 tau[4] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
153185 */
154186int wc_Chacha_SetKey (ChaCha * ctx , const byte * key , word32 keySz )
155187{
156- #if !defined(USE_ARM_CHACHA_SPEEDUP )
188+ #if !defined(USE_ARM_CHACHA_SPEEDUP ) || defined( WOLFSSL_ARM_CHACHA_NEED_C )
157189 const word32 * constants ;
158190 const byte * k ;
159191#ifdef XSTREAM_ALIGN
@@ -167,7 +199,15 @@ int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
167199 if (keySz != (CHACHA_MAX_KEY_SZ /2 ) && keySz != CHACHA_MAX_KEY_SZ )
168200 return BAD_FUNC_ARG ;
169201
170- #if !defined(USE_ARM_CHACHA_SPEEDUP )
202+ #ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
203+ if (chacha_use_neon ())
204+ wc_chacha_setkey (ctx -> X , key , keySz );
205+ else
206+ #elif defined(USE_ARM_CHACHA_SPEEDUP ) && !defined(WOLFSSL_ARM_CHACHA_C_ONLY )
207+ wc_chacha_setkey (ctx -> X , key , keySz );
208+ #endif
209+ #if !defined(USE_ARM_CHACHA_SPEEDUP ) || defined(WOLFSSL_ARM_CHACHA_NEED_C )
210+ {
171211#ifdef XSTREAM_ALIGN
172212 if ((wc_ptr_t )key % 4 ) {
173213 WOLFSSL_MSG ("wc_ChachaSetKey unaligned key" );
@@ -211,16 +251,16 @@ int wc_Chacha_SetKey(ChaCha* ctx, const byte* key, word32 keySz)
211251 ctx -> X [ 1 ] = constants [1 ];
212252 ctx -> X [ 2 ] = constants [2 ];
213253 ctx -> X [ 3 ] = constants [3 ];
214- #else
215- wc_chacha_setkey (ctx -> X , key , keySz );
254+ }
216255#endif
217256
218257 ctx -> left = 0 ; /* resets state */
219258
220259 return 0 ;
221260}
222261
223- #if !defined(USE_INTEL_CHACHA_SPEEDUP ) && !defined(USE_ARM_CHACHA_SPEEDUP )
262+ #if (!defined(USE_INTEL_CHACHA_SPEEDUP ) && !defined(USE_ARM_CHACHA_SPEEDUP )) || \
263+ defined(WOLFSSL_ARM_CHACHA_NEED_C )
224264/**
225265 * Converts word into bytes with rotations having been done.
226266 */
@@ -267,7 +307,8 @@ extern void chacha_encrypt_avx2(ChaCha* ctx, const byte* m, byte* c,
267307#endif
268308
269309
270- #if !defined(USE_INTEL_CHACHA_SPEEDUP ) && !defined(USE_ARM_CHACHA_SPEEDUP )
310+ #if (!defined(USE_INTEL_CHACHA_SPEEDUP ) && !defined(USE_ARM_CHACHA_SPEEDUP )) || \
311+ defined(WOLFSSL_ARM_CHACHA_NEED_C )
271312/**
272313 * Encrypt a stream of bytes
273314 */
@@ -366,23 +407,39 @@ int wc_Chacha_Process(ChaCha* ctx, byte* output, const byte* input,
366407 return 0 ;
367408 }
368409#elif defined(USE_ARM_CHACHA_SPEEDUP )
369- /* Handle left over bytes from last block. */
370- if ((msglen > 0 ) && (ctx -> left > 0 )) {
371- byte * over = ((byte * )ctx -> over ) + CHACHA_CHUNK_BYTES - ctx -> left ;
372- word32 l = min (msglen , ctx -> left );
373-
374- wc_chacha_use_over (over , output , input , l );
410+ #ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
411+ if (chacha_use_neon ())
412+ #endif
413+ #ifndef WOLFSSL_ARM_CHACHA_C_ONLY
414+ {
415+ /* Handle left over bytes from last block. */
416+ if ((msglen > 0 ) && (ctx -> left > 0 )) {
417+ byte * over = ((byte * )ctx -> over ) + CHACHA_CHUNK_BYTES - ctx -> left ;
418+ word32 l = min (msglen , ctx -> left );
419+
420+ wc_chacha_use_over (over , output , input , l );
421+
422+ ctx -> left -= l ;
423+ input += l ;
424+ output += l ;
425+ msglen -= l ;
426+ }
375427
376- ctx -> left -= l ;
377- input += l ;
378- output += l ;
379- msglen -= l ;
428+ if ( msglen != 0 ) {
429+ wc_chacha_crypt_bytes ( ctx , output , input , msglen ) ;
430+ }
431+ return 0 ;
380432 }
381-
382- if (msglen != 0 ) {
383- wc_chacha_crypt_bytes (ctx , output , input , msglen );
433+ #endif
434+ #ifdef WOLFSSL_ARM_CHACHA_NEED_C
435+ #ifdef WOLFSSL_ARM_CHACHA_NEON_FALLBACK
436+ else
437+ #endif
438+ {
439+ wc_Chacha_encrypt_bytes (ctx , input , output , msglen );
440+ return 0 ;
384441 }
385- return 0 ;
442+ #endif
386443#else
387444 wc_Chacha_encrypt_bytes (ctx , input , output , msglen );
388445 return 0 ;
0 commit comments