Skip to content

Commit 19a7550

Browse files
committed
Enable SSE4.2 selectively
1 parent b5af1bc commit 19a7550

2 files changed

Lines changed: 11 additions & 2 deletions

File tree

mypyc/build.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from mypy.util import write_junit_xml
3737
from mypyc.annotate import generate_annotated_html
3838
from mypyc.codegen import emitmodule
39-
from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, shared_lib_name
39+
from mypyc.common import IS_FREE_THREADED, RUNTIME_C_FILES, X86_64, shared_lib_name
4040
from mypyc.errors import Errors
4141
from mypyc.ir.pprint import format_modules
4242
from mypyc.namegen import exported_name
@@ -645,7 +645,6 @@ def mypycify(
645645
cflags += [
646646
f"-O{opt_level}",
647647
f"-g{debug_level}",
648-
"-msse4.2", # TODO
649648
"-Werror",
650649
"-Wno-unused-function",
651650
"-Wno-unused-label",
@@ -659,6 +658,9 @@ def mypycify(
659658
# See https://github.com/mypyc/mypyc/issues/956
660659
"-Wno-cpp",
661660
]
661+
if X86_64:
662+
# Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2.
663+
cflags.append("-msse4.2")
662664
if log_trace:
663665
cflags.append("-DMYPYC_LOG_TRACE")
664666
if experimental_features:
@@ -687,6 +689,10 @@ def mypycify(
687689
# that we actually get the compilation speed and memory
688690
# use wins that multi-file mode is intended for.
689691
cflags += ["/GL-", "/wd9025"] # warning about overriding /GL
692+
if X86_64:
693+
# Enable SIMD extensions. All CPUs released since ~2010 support SSE4.2.
694+
# Also Windows 11 requires SSE4.2 since 24H2.
695+
cflags.append("/arch:SSE4.2")
690696
if log_trace:
691697
cflags.append("/DMYPYC_LOG_TRACE")
692698
if experimental_features:

mypyc/common.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import platform
34
import sys
45
import sysconfig
56
from typing import Any, Final
@@ -44,6 +45,8 @@
4445

4546
IS_32_BIT_PLATFORM: Final = int(SIZEOF_SIZE_T) == 4
4647

48+
X86_64: Final = platform.machine() == "x86_64"
49+
4750
PLATFORM_SIZE = 4 if IS_32_BIT_PLATFORM else 8
4851

4952
# Maximum value for a short tagged integer.

0 commit comments

Comments
 (0)