11/*
2- * This file defines string hash function using CRC32. It takes advantage of
3- * Intel hardware support (crc32 instruction, SSE 4.2) to speedup the CRC32
4- * computation. The hash functions try to compute CRC32 of length and up
5- * to 128 bytes of given string.
2+ * This file defines string hash function using CRC32.
3+ * On Intel architectures, this implemantation takes advantage of hardware
4+ * support (CRC32 instruction, SSE 4.2) to speedup the CRC32 computation.
5+ * On ARM64 architectures, this implementation utilizes the ARMv8.1-A extension
6+ * wich offers CRC32 instructions.
7+ * The hash functions try to compute CRC32 of length and up to 128 bytes of
8+ * the given string.
69 */
710
811#define lj_str_hash_c
1518#include <sys/types.h>
1619#include <unistd.h>
1720#include <time.h>
18- #include <smmintrin.h>
1921#include "lj_vm.h"
2022
23+ #if LUAJIT_TARGET == LUAJIT_ARCH_X64
24+ #include <smmintrin.h>
25+
26+ #define lj_crc32_u32 _mm_crc32_u32
27+ #define lj_crc32_u64 _mm_crc32_u64
28+
2129#ifndef F_CPU_SSE4_2
2230#define F_CPU_SSE4_2 (1 << 20)
2331#endif
2432
33+ #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
34+ #include <sys/auxv.h>
35+ #include <arm_acle.h>
36+ #include <errno.h>
37+
38+ #define lj_crc32_u32 __crc32cw
39+ #define lj_crc32_u64 __crc32cd
40+
41+ #ifndef HWCAP_CRC32
42+ #define HWCAP_CRC32 (1 << 7)
43+ #endif
44+
45+ #else
46+ #error "LJ_OR_STRHASHCRC32 not supported on this architecture"
47+ #endif
48+
2549#ifdef __MINGW32__
2650#define random () ((long) rand())
2751#define srandom (seed ) srand(seed)
@@ -49,7 +73,7 @@ static LJ_NOINLINE uint32_t lj_str_hash_1_4(const char* str, uint32_t len)
4973 v = (v << 8 ) | str [len >> 1 ];
5074 v = (v << 8 ) | str [len - 1 ];
5175 v = (v << 8 ) | len ;
52- return _mm_crc32_u32 (0 , v );
76+ return lj_crc32_u32 (0 , v );
5377#else
5478 uint32_t a , b , h = len ;
5579
@@ -79,9 +103,9 @@ static LJ_NOINLINE uint32_t lj_str_hash_4_16(const char* str, uint32_t len)
79103 v2 = * cast_uint32p (str + len - 4 );
80104 }
81105
82- h = _mm_crc32_u32 (0 , len );
83- h = _mm_crc32_u64 (h , v1 );
84- h = _mm_crc32_u64 (h , v2 );
106+ h = lj_crc32_u32 (0 , len );
107+ h = lj_crc32_u64 (h , v1 );
108+ h = lj_crc32_u64 (h , v2 );
85109
86110 return h ;
87111}
@@ -92,18 +116,18 @@ static LJ_NOINLINE uint32_t lj_str_hash_16_128(const char* str, uint32_t len)
92116 uint64_t h1 , h2 ;
93117 uint32_t i ;
94118
95- h1 = _mm_crc32_u32 (0 , len );
119+ h1 = lj_crc32_u32 (0 , len );
96120 h2 = 0 ;
97121
98122 for (i = 0 ; i < len - 16 ; i += 16 ) {
99- h1 += _mm_crc32_u64 (h1 , * cast_uint64p (str + i ));
100- h2 += _mm_crc32_u64 (h2 , * cast_uint64p (str + i + 8 ));
123+ h1 += lj_crc32_u64 (h1 , * cast_uint64p (str + i ));
124+ h2 += lj_crc32_u64 (h2 , * cast_uint64p (str + i + 8 ));
101125 };
102126
103- h1 = _mm_crc32_u64 (h1 , * cast_uint64p (str + len - 16 ));
104- h2 = _mm_crc32_u64 (h2 , * cast_uint64p (str + len - 8 ));
127+ h1 = lj_crc32_u64 (h1 , * cast_uint64p (str + len - 16 ));
128+ h2 = lj_crc32_u64 (h2 , * cast_uint64p (str + len - 8 ));
105129
106- return _mm_crc32_u32 (h1 , h2 );
130+ return lj_crc32_u32 (h1 , h2 );
107131}
108132
109133/* **************************************************************************
@@ -167,32 +191,32 @@ static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str,
167191 pos1 = get_random_pos_unsafe (chunk_sz_log2 , 0 );
168192 pos2 = get_random_pos_unsafe (chunk_sz_log2 , 1 );
169193
170- h1 = _mm_crc32_u32 (0 , len );
194+ h1 = lj_crc32_u32 (0 , len );
171195 h2 = 0 ;
172196
173197 /* loop over 14 chunks, 2 chunks at a time */
174198 for (i = 0 , chunk_ptr = str ; i < (chunk_num / 2 - 1 );
175199 chunk_ptr += chunk_sz , i ++ ) {
176200
177201 v = * cast_uint64p (chunk_ptr + pos1 );
178- h1 = _mm_crc32_u64 (h1 , v );
202+ h1 = lj_crc32_u64 (h1 , v );
179203
180204 v = * cast_uint64p (chunk_ptr + chunk_sz + pos2 );
181- h2 = _mm_crc32_u64 (h2 , v );
205+ h2 = lj_crc32_u64 (h2 , v );
182206 }
183207
184208 /* the last two chunks */
185209 v = * cast_uint64p (chunk_ptr + pos1 );
186- h1 = _mm_crc32_u64 (h1 , v );
210+ h1 = lj_crc32_u64 (h1 , v );
187211
188212 v = * cast_uint64p (chunk_ptr + chunk_sz - 8 - pos2 );
189- h2 = _mm_crc32_u64 (h2 , v );
213+ h2 = lj_crc32_u64 (h2 , v );
190214
191215 /* process the trailing part */
192- h1 = _mm_crc32_u64 (h1 , * cast_uint64p (str ));
193- h2 = _mm_crc32_u64 (h2 , * cast_uint64p (str + len - 8 ));
216+ h1 = lj_crc32_u64 (h1 , * cast_uint64p (str ));
217+ h2 = lj_crc32_u64 (h2 , * cast_uint64p (str + len - 8 ));
194218
195- h1 = _mm_crc32_u32 (h1 , h2 );
219+ h1 = lj_crc32_u32 (h1 , h2 );
196220
197221 return h1 ;
198222}
@@ -233,8 +257,8 @@ static void lj_str_hash_init_random(void)
233257 }
234258
235259 /* Init seed */
236- seed = _mm_crc32_u32 (0 , getpid ());
237- seed = _mm_crc32_u32 (seed , time (NULL ));
260+ seed = lj_crc32_u32 (0 , getpid ());
261+ seed = lj_crc32_u32 (seed , time (NULL ));
238262 srandom (seed );
239263
240264 /* Now start to populate the random_pos[][]. */
@@ -266,9 +290,15 @@ static void lj_str_hash_init_random(void)
266290
267291LJ_FUNC unsigned char lj_check_crc32_support ()
268292{
293+ #if LUAJIT_TARGET == LUAJIT_ARCH_X64
269294 uint32_t features [4 ];
270295 if (lj_vm_cpuid (1 , features ))
271296 return (features [2 ] & F_CPU_SSE4_2 ) != 0 ;
297+ #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
298+ uint32_t hwcap = getauxval (AT_HWCAP );
299+ if (hwcap != ENOENT )
300+ return (hwcap & HWCAP_CRC32 ) != 0 ;
301+ #endif
272302 return 0 ;
273303}
274304
0 commit comments