Skip to content

Commit 450ea3a

Browse files
committed
Allow independent optimisation with type specific CTZ compiler builtins.
1 parent e04017c commit 450ea3a

1 file changed

Lines changed: 54 additions & 23 deletions

File tree

src/quicktions.pyx

Lines changed: 54 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -104,41 +104,67 @@ cdef extern from *:
104104
#endif
105105
106106
#ifdef __has_builtin
107-
#if __has_builtin(__builtin_ctz) && __has_builtin(__builtin_ctzl) && __has_builtin(__builtin_ctzll)
108-
#define __Quicktions_HAS_FAST_CTZ 1
109-
#define __Quicktions_trailing_zeros_uint(x) __builtin_ctz(x)
110-
#define __Quicktions_trailing_zeros_ulong(x) __builtin_ctzl(x)
111-
#define __Quicktions_trailing_zeros_ullong(x) __builtin_ctzll(x)
112-
#elif __has_builtin(__builtin_ctzg)
113-
#define __Quicktions_HAS_FAST_CTZ 1
107+
#if __has_builtin(__builtin_ctzg)
108+
#define __Quicktions_HAS_FAST_CTZ_uint 1
114109
#define __Quicktions_trailing_zeros_uint(x) __builtin_ctzg(x)
110+
#define __Quicktions_HAS_FAST_CTZ_ulong 1
115111
#define __Quicktions_trailing_zeros_ulong(x) __builtin_ctzg(x)
112+
#define __Quicktions_HAS_FAST_CTZ_ullong 1
116113
#define __Quicktions_trailing_zeros_ullong(x) __builtin_ctzg(x)
114+
#else
115+
#if __has_builtin(__builtin_ctz)
116+
#define __Quicktions_HAS_FAST_CTZ_uint 1
117+
#define __Quicktions_trailing_zeros_uint(x) __builtin_ctz(x)
118+
#endif
119+
#if __has_builtin(__builtin_ctzl)
120+
#define __Quicktions_HAS_FAST_CTZ_ulong 1
121+
#define __Quicktions_trailing_zeros_ulong(x) __builtin_ctzl(x)
122+
#endif
123+
#if __has_builtin(__builtin_ctzll)
124+
#define __Quicktions_HAS_FAST_CTZ_ullong 1
125+
#define __Quicktions_trailing_zeros_ullong(x) __builtin_ctzll(x)
126+
#endif
117127
#endif
118-
#elif defined(_MSC_VER) && defined(_WIN64) && SIZEOF_INT == 4 && SIZEOF_LONG_LONG == 8
128+
#elif defined(_MSC_VER) && SIZEOF_INT == 4
119129
/* Typical Windows64 config (Win32 does not define "_BitScanForward64"). */
120-
#define __Quicktions_HAS_FAST_CTZ 1
121-
#pragma intrinsic(_BitScanForward, _BitScanForward64)
130+
#define __Quicktions_HAS_FAST_CTZ_uint 1
131+
#pragma intrinsic(_BitScanForward)
122132
static CYTHON_INLINE int __Quicktions_trailing_zeros_uint(uint32_t x) {
123133
unsigned long bits;
124134
_BitScanForward(&bits, x);
125135
return (int) bits;
126136
}
127-
static CYTHON_INLINE int __Quicktions_trailing_zeros_ullong(uint64_t x) {
128-
unsigned long bits;
129-
_BitScanForward64(&bits, x);
130-
return (int) bits;
131-
}
132137
#if SIZEOF_LONG == 4
133-
#define __Quicktions_trailing_zeros_ulong(x) __Quicktions_trailing_zeros_uint(x)
134-
#else
138+
#define __Quicktions_HAS_FAST_CTZ_ulong 1
139+
#define __Quicktions_trailing_zeros_ulong(x) __Quicktions_trailing_zeros_uint(x)
140+
#endif
141+
142+
/* Win32 does not define "_BitScanForward64". */
143+
#if defined(_WIN64) && SIZEOF_LONG_LONG == 8
144+
#define __Quicktions_HAS_FAST_CTZ_ullong 1
145+
#pragma intrinsic(_BitScanForward64)
146+
static CYTHON_INLINE int __Quicktions_trailing_zeros_ullong(uint64_t x) {
147+
unsigned long bits;
148+
_BitScanForward64(&bits, x);
149+
return (int) bits;
150+
}
151+
#if SIZEOF_LONG == 8
152+
#define __Quicktions_HAS_FAST_CTZ_ulong 1
135153
#define __Quicktions_trailing_zeros_ulong(x) __Quicktions_trailing_zeros_ullong(x)
154+
#endif
136155
#endif
137156
#endif
138-
#if !defined(__Quicktions_HAS_FAST_CTZ)
139-
#define __Quicktions_HAS_FAST_CTZ 0
157+
158+
#if !defined(__Quicktions_HAS_FAST_CTZ_uint)
159+
#define __Quicktions_HAS_FAST_CTZ_uint 0
140160
#define __Quicktions_trailing_zeros_uint(x) (0)
161+
#endif
162+
#if !defined(__Quicktions_HAS_FAST_CTZ_ulong)
163+
#define __Quicktions_HAS_FAST_CTZ_ulong 0
141164
#define __Quicktions_trailing_zeros_ulong(x) (0)
165+
#endif
166+
#if !defined(__Quicktions_HAS_FAST_CTZ_ullong)
167+
#define __Quicktions_HAS_FAST_CTZ_ullong 0
142168
#define __Quicktions_trailing_zeros_ullong(x) (0)
143169
#endif
144170
"""
@@ -151,9 +177,11 @@ cdef extern from *:
151177
bint HAS_OLD_PYLONG_GCD "(CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000)"
152178
object _PyLong_GCD(object a, object b)
153179

154-
bint HAS_FAST_CTZ "__Quicktions_HAS_FAST_CTZ"
180+
bint HAS_FAST_CTZ_uint "__Quicktions_HAS_FAST_CTZ_uint"
155181
int trailing_zeros_uint "__Quicktions_trailing_zeros_uint" (unsigned int x)
182+
bint HAS_FAST_CTZ_ulong "__Quicktions_HAS_FAST_CTZ_ulong"
156183
int trailing_zeros_ulong "__Quicktions_trailing_zeros_ulong" (unsigned long x)
184+
bint HAS_FAST_CTZ_ullong "__Quicktions_HAS_FAST_CTZ_ullong"
157185
int trailing_zeros_ullong "__Quicktions_trailing_zeros_ullong" (unsigned long long x)
158186

159187

@@ -186,9 +214,12 @@ cdef ullong _abs(long long x):
186214

187215

188216
cdef cunumber _igcd(cunumber a, cunumber b):
189-
"""Euclid's GCD algorithm"""
190-
if HAS_FAST_CTZ:
191-
return _binary_gcd(a, b)
217+
if cunumber is ullong and HAS_FAST_CTZ_ullong:
218+
return _binary_gcd[ullong](a, b)
219+
elif cunumber is ulong and HAS_FAST_CTZ_ulong:
220+
return _binary_gcd[ulong](a, b)
221+
elif cunumber is uint and HAS_FAST_CTZ_uint:
222+
return _binary_gcd[uint](a, b)
192223
else:
193224
return _euclid_gcd(a, b)
194225

0 commit comments

Comments
 (0)