Skip to content

Commit 29de283

Browse files
committed
librt base64: use existing SIMD CPU dispatch by customizing build flags
Inspired by https://stackoverflow.com/a/68508804
1 parent 1b6ebb1 commit 29de283

File tree

5 files changed

+106
-27
lines changed

5 files changed

+106
-27
lines changed

mypyc/build.py

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ class ModDesc(NamedTuple):
7070
"base64/arch/neon64/codec.c",
7171
],
7272
[
73+
"base64/arch/avx/enc_loop_asm.c",
74+
"base64/arch/avx2/enc_loop_asm.c",
75+
"base64/arch/avx2/dec_loop.c",
76+
"base64/arch/avx2/dec_reshuffle.c",
7377
"base64/arch/generic/32/enc_loop.c",
7478
"base64/arch/generic/64/enc_loop.c",
7579
"base64/arch/generic/32/dec_loop.c",
@@ -118,6 +122,51 @@ class ModDesc(NamedTuple):
118122
else:
119123
from distutils import ccompiler, sysconfig
120124

125+
EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = {
126+
"unix": {
127+
"base64/arch/ssse3": "-mssse3",
128+
"base64/arch/sse41": "-msse4.1",
129+
"base64/arch/sse42": "-msse4.2",
130+
"base64/arch/avx2": "-mavx2",
131+
"base64/arch/avx": "-mavx",
132+
},
133+
"msvc": {
134+
"base64/arch/sse42": "/arch:SSE4.2",
135+
"base64/arch/avx2": "/arch:AVX2",
136+
"base64/arch/avx": "/arch:AVX",
137+
}
138+
}
139+
140+
141+
def spawn(self: ccompiler.CCompiler, cmd: Iterable[str], **kwargs: Any) -> None:
142+
compiler_type: str = self.compiler_type # type: ignore[attr-defined]
143+
extra_options = EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT[compiler_type]
144+
new_cmd = list(cmd)
145+
if extra_options is not None:
146+
# filenames are closer to the end of command line
147+
for argument in reversed(new_cmd):
148+
# Check if argument contains a filename. We must check for all
149+
# possible extensions; checking for target extension is faster.
150+
if self.obj_extension and not str(argument).endswith(self.obj_extension): # type: ignore[attr-defined]
151+
continue
152+
153+
for path in extra_options.keys():
154+
if path in str(argument):
155+
if compiler_type == 'bcpp':
156+
# Borland accepts a source file name at the end,
157+
# insert the options before it
158+
new_cmd[-1:-1] = extra_options[path]
159+
else:
160+
new_cmd.append(extra_options[path])
161+
162+
# path component is found, no need to search any further
163+
break
164+
self.__spawn(new_cmd, **kwargs) # type: ignore[attr-defined]
165+
166+
167+
ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined]
168+
ccompiler.CCompiler.spawn = spawn # type: ignore[method-assign]
169+
121170

122171
def get_extension() -> type[Extension]:
123172
# We can work with either setuptools or distutils, and pick setuptools
@@ -661,9 +710,6 @@ def mypycify(
661710
# See https://github.com/mypyc/mypyc/issues/956
662711
"-Wno-cpp",
663712
]
664-
if X86_64:
665-
# Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2.
666-
cflags.append("-msse4.2")
667713
if log_trace:
668714
cflags.append("-DMYPYC_LOG_TRACE")
669715
if experimental_features:
@@ -692,10 +738,6 @@ def mypycify(
692738
# that we actually get the compilation speed and memory
693739
# use wins that multi-file mode is intended for.
694740
cflags += ["/GL-", "/wd9025"] # warning about overriding /GL
695-
if X86_64:
696-
# Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2.
697-
# Also Windows 11 requires SSE4.2 since 24H2.
698-
cflags.append("/arch:SSE4.2")
699741
if log_trace:
700742
cflags.append("/DMYPYC_LOG_TRACE")
701743
if experimental_features:

mypyc/lib-rt/base64/arch/avx/codec.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
#include "../ssse3/dec_loop.c"
2525

2626
#if BASE64_AVX_USE_ASM
27-
# include "enc_loop_asm.c"
27+
# include "./enc_loop_asm.c"
2828
#else
2929
# include "../ssse3/enc_translate.c"
3030
# include "../ssse3/enc_reshuffle.c"

mypyc/lib-rt/base64/arch/avx2/codec.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@
2020
# endif
2121
#endif
2222

23-
#include "dec_reshuffle.c"
24-
#include "dec_loop.c"
23+
#include "./dec_reshuffle.c"
24+
#include "./dec_loop.c"
2525

2626
#if BASE64_AVX2_USE_ASM
27-
# include "enc_loop_asm.c"
27+
# include "./enc_loop_asm.c"
2828
#else
29-
# include "enc_translate.c"
30-
# include "enc_reshuffle.c"
31-
# include "enc_loop.c"
29+
# include "./enc_translate.c"
30+
# include "./enc_reshuffle.c"
31+
# include "./enc_loop.c"
3232
#endif
3333

3434
#endif // HAVE_AVX2

mypyc/lib-rt/base64/config.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,19 @@
11
#ifndef BASE64_CONFIG_H
22
#define BASE64_CONFIG_H
33

4-
#define BASE64_WITH_SSSE3 0
4+
#define BASE64_WITH_SSSE3 1
55
#define HAVE_SSSE3 BASE64_WITH_SSSE3
66

7-
#define BASE64_WITH_SSE41 0
7+
#define BASE64_WITH_SSE41 1
88
#define HAVE_SSE41 BASE64_WITH_SSE41
99

10-
#if defined(__x86_64__) || defined(_M_X64)
1110
#define BASE64_WITH_SSE42 1
12-
#else
13-
#define BASE64_WITH_SSE42 0
14-
#endif
15-
1611
#define HAVE_SSE42 BASE64_WITH_SSE42
1712

18-
#define BASE64_WITH_AVX 0
13+
#define BASE64_WITH_AVX 1
1914
#define HAVE_AVX BASE64_WITH_AVX
2015

21-
#define BASE64_WITH_AVX2 0
16+
#define BASE64_WITH_AVX2 1
2217
#define HAVE_AVX2 BASE64_WITH_AVX2
2318

2419
#define BASE64_WITH_AVX512 0

mypyc/lib-rt/setup.py

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import platform
1010
import subprocess
1111
import sys
12+
from collections.abc import Iterable
1213
from distutils import ccompiler, sysconfig
1314
from typing import Any
1415

@@ -27,6 +28,51 @@
2728

2829
X86_64 = platform.machine() in ("x86_64", "AMD64", "amd64")
2930

31+
EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT = {
32+
"unix": {
33+
"base64/arch/ssse3": "-mssse3",
34+
"base64/arch/sse41": "-msse4.1",
35+
"base64/arch/sse42": "-msse4.2",
36+
"base64/arch/avx2": "-mavx2",
37+
"base64/arch/avx": "-mavx",
38+
},
39+
"msvc": {
40+
"base64/arch/sse42": "/arch:SSE4.2",
41+
"base64/arch/avx2": "/arch:AVX2",
42+
"base64/arch/avx": "/arch:AVX",
43+
}
44+
}
45+
46+
47+
def spawn(self: ccompiler.CCompiler, cmd: Iterable[str], **kwargs: Any) -> None:
48+
compiler_type: str = self.compiler_type # type: ignore[attr-defined]
49+
extra_options = EXTRA_FLAGS_PER_COMPILER_TYPE_PER_PATH_COMPONENT[compiler_type]
50+
new_cmd = list(cmd)
51+
if extra_options is not None:
52+
# filenames are closer to the end of command line
53+
for argument in reversed(new_cmd):
54+
# Check if argument contains a filename. We must check for all
55+
# possible extensions; checking for target extension is faster.
56+
if self.obj_extension and not str(argument).endswith(self.obj_extension): # type: ignore[attr-defined]
57+
continue
58+
59+
for path in extra_options.keys():
60+
if path in str(argument):
61+
if compiler_type == 'bcpp':
62+
# Borland accepts a source file name at the end,
63+
# insert the options before it
64+
new_cmd[-1:-1] = extra_options[path]
65+
else:
66+
new_cmd.append(extra_options[path])
67+
68+
# path component is found, no need to search any further
69+
break
70+
self.__spawn(new_cmd, **kwargs) # type: ignore[attr-defined]
71+
72+
73+
ccompiler.CCompiler.__spawn = ccompiler.CCompiler.spawn # type: ignore[attr-defined]
74+
ccompiler.CCompiler.spawn = spawn # type: ignore[method-assign]
75+
3076

3177
class BuildExtGtest(build_ext):
3278
def get_library_names(self) -> list[str]:
@@ -82,12 +128,8 @@ def run(self) -> None:
82128
cflags: list[str] = []
83129
if compiler.compiler_type == "unix":
84130
cflags += ["-O3"]
85-
if X86_64:
86-
cflags.append("-msse4.2") # Enable SIMD (see also mypyc/build.py)
87131
elif compiler.compiler_type == "msvc":
88132
cflags += ["/O2"]
89-
if X86_64:
90-
cflags.append("/arch:SSE4.2") # Enable SIMD (see also mypyc/build.py)
91133

92134
setup(
93135
ext_modules=[

0 commit comments

Comments
 (0)