Skip to content

Commit 9862d57

Browse files
committed
enable MSVC cl.exe build on Windows ARM64
1 parent 23282a2 commit 9862d57

File tree

4 files changed

+1119
-47
lines changed

4 files changed

+1119
-47
lines changed

.github/workflows/windows.yml

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ jobs:
8585
run: ./_build/test/test_xsimd
8686

8787
build-windows-arm64:
88-
name: 'MSVC arm64'
88+
name: 'MSVC ARM64'
8989
defaults:
9090
run:
9191
shell: bash {0}
@@ -94,7 +94,7 @@ jobs:
9494
- name: Setup compiler
9595
uses: ilammy/msvc-dev-cmd@v1
9696
with:
97-
arch: amd64
97+
arch: arm64
9898
- name: Setup Ninja
9999
run: |
100100
python3 -m pip install --upgrade pip setuptools wheel
@@ -107,3 +107,73 @@ jobs:
107107
run: cmake --build _build
108108
- name: Testing xsimd
109109
run: ./_build/test/test_xsimd
110+
111+
build-windows-arm64-msys2-clang:
112+
name: 'MSYS2 CLANG ARM64'
113+
runs-on: windows-11-arm
114+
defaults:
115+
run:
116+
shell: msys2 {0}
117+
steps:
118+
- name: Setup MSYS2 with Clang (ARM64)
119+
uses: msys2/setup-msys2@v2
120+
with:
121+
msystem: CLANGARM64
122+
update: true
123+
path-type: minimal
124+
pacboy: >-
125+
cc:p
126+
cmake:p
127+
ninja:p
128+
- name: Checkout xsimd
129+
uses: actions/checkout@v4
130+
- name: Configure
131+
run: |
132+
cmake -B _build \
133+
-DBUILD_TESTS=ON \
134+
-DDOWNLOAD_DOCTEST=ON \
135+
-DBUILD_BENCHMARK=ON \
136+
-DBUILD_EXAMPLES=ON \
137+
-DCMAKE_BUILD_TYPE=Release \
138+
-G Ninja
139+
- name: Build
140+
run: cmake --build _build
141+
- name: Testing xsimd
142+
run: ./_build/test/test_xsimd
143+
144+
build-windows-arm64-clang:
145+
name: 'LLVM CLANG ARM64'
146+
defaults:
147+
run:
148+
shell: bash {0}
149+
runs-on: windows-11-arm
150+
steps:
151+
- name: Install LLVM/Clang for Windows ARM64
152+
shell: pwsh
153+
run: |
154+
winget install --id LLVM.LLVM --accept-source-agreements --accept-package-agreements --silent
155+
# Add LLVM bin directory to PATH for subsequent steps
156+
echo "C:\Program Files\LLVM\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
157+
- name: Setup Ninja
158+
run: |
159+
python3 -m pip install --upgrade pip setuptools wheel
160+
python3 -m pip install ninja
161+
- name: Checkout xsimd
162+
uses: actions/checkout@v4
163+
- name: Verify clang-cl version
164+
run: clang-cl --version
165+
- name: Configure
166+
run: |
167+
cmake -B _build \
168+
-DCMAKE_C_COMPILER=clang-cl \
169+
-DCMAKE_CXX_COMPILER=clang-cl \
170+
-DBUILD_TESTS=ON \
171+
-DDOWNLOAD_DOCTEST=ON \
172+
-DBUILD_BENCHMARK=ON \
173+
-DBUILD_EXAMPLES=ON \
174+
-DCMAKE_BUILD_TYPE=Release \
175+
-G Ninja
176+
- name: Build
177+
run: cmake --build _build
178+
- name: Testing xsimd
179+
run: ./_build/test/test_xsimd

include/xsimd/arch/common/xsimd_common_memory.hpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <algorithm>
1616
#include <array>
1717
#include <complex>
18+
#include <cstdio>
1819
#include <stdexcept>
1920

2021
#include "../../types/xsimd_batch_constant.hpp"
@@ -71,6 +72,9 @@ namespace xsimd
7172
for (size_t i = 0; i < sizeof...(Is); ++i)
7273
if ((bitmask >> i) & 1u)
7374
std::swap(mask_buffer[inserted++], mask_buffer[i]);
75+
// Fill remaining positions with the last valid index to avoid undefined behavior
76+
for (size_t i = inserted; i < sizeof...(Is); ++i)
77+
mask_buffer[i] = mask_buffer[inserted > 0 ? inserted - 1 : 0];
7478
return batch<IT, A>::load_aligned(&mask_buffer[0]);
7579
}
7680
}
@@ -85,7 +89,12 @@ namespace xsimd
8589
auto bitmask = mask.mask();
8690
auto z = select(mask, x, batch<T, A>((T)0));
8791
auto compress_mask = detail::create_compress_swizzle_mask<IT, A>(bitmask, std::make_index_sequence<size>());
88-
return swizzle(z, compress_mask);
92+
alignas(A::alignment()) IT mask_out[size];
93+
compress_mask.store_aligned(&mask_out[0]);
94+
alignas(A::alignment()) T z_out[size];
95+
z.store_aligned(&z_out[0]);
96+
auto res = swizzle(z, compress_mask);
97+
return res;
8998
}
9099

91100
// expand

0 commit comments

Comments
 (0)