Skip to content

Commit 87a5730

Browse files
Merge pull request #574 from SKaiNET-developers/feature/native-cpu-multiarch
feat(native-cpu): cross-arch CI matrix + MSVC/Clang portability (PR 4 of 5)
2 parents 33a576c + 5b9c0f5 commit 87a5730

4 files changed

Lines changed: 142 additions & 5 deletions

File tree

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
name: Native CPU multi-arch build
2+
3+
# Cross-arch CI for the FFM native kernel provider
4+
# (skainet-backends/skainet-backend-native-cpu). The local Gradle build
5+
# only produces a host-arch .so/.dylib/.dll; this workflow proves the
6+
# CMake + Kotlin pipeline works on every supported host so consumers
7+
# on Apple Silicon, ARM Linux, and Windows aren't silently broken when
8+
# they pull a published JAR built on x86_64 Linux.
9+
#
10+
# Each matrix job runs the native module's jvmTest end-to-end (CMake
11+
# configure + build + bundle into JAR resources + parity tests via
12+
# FFM downcall). The built shared library is uploaded as an artifact
13+
# so a later "fat-JAR" aggregation step (deferred to a follow-up PR)
14+
# can stage all four arches into one publishable artifact.
15+
16+
on:
17+
push:
18+
branches: [main, develop]
19+
paths:
20+
- 'skainet-backends/skainet-backend-native-cpu/**'
21+
- 'skainet-backends/skainet-backend-api/src/jvmMain/kotlin/sk/ainet/backend/api/kernel/**'
22+
- '.github/workflows/native-cpu-multiarch.yml'
23+
pull_request:
24+
paths:
25+
- 'skainet-backends/skainet-backend-native-cpu/**'
26+
- 'skainet-backends/skainet-backend-api/src/jvmMain/kotlin/sk/ainet/backend/api/kernel/**'
27+
- '.github/workflows/native-cpu-multiarch.yml'
28+
29+
concurrency:
30+
group: ${{ github.workflow }}-${{ github.ref }}
31+
cancel-in-progress: true
32+
33+
jobs:
34+
native-build-test:
35+
name: ${{ matrix.arch_label }}
36+
strategy:
37+
fail-fast: false
38+
matrix:
39+
include:
40+
- os: ubuntu-latest
41+
arch_label: linux-x86_64
42+
lib_name: libskainet_kernels.so
43+
- os: ubuntu-24.04-arm
44+
arch_label: linux-arm64
45+
lib_name: libskainet_kernels.so
46+
- os: macos-14
47+
arch_label: macos-arm64
48+
lib_name: libskainet_kernels.dylib
49+
- os: windows-latest
50+
arch_label: windows-x86_64
51+
lib_name: skainet_kernels.dll
52+
runs-on: ${{ matrix.os }}
53+
timeout-minutes: 30
54+
55+
steps:
56+
- name: Checkout
57+
uses: actions/checkout@v6
58+
59+
- name: Copy CI gradle.properties (Unix)
60+
if: runner.os != 'Windows'
61+
run: |
62+
mkdir -p ~/.gradle
63+
cp .github/ci-gradle.properties ~/.gradle/gradle.properties
64+
65+
- name: Copy CI gradle.properties (Windows)
66+
if: runner.os == 'Windows'
67+
shell: pwsh
68+
run: |
69+
New-Item -ItemType Directory -Force -Path "$HOME\.gradle" | Out-Null
70+
Copy-Item .github\ci-gradle.properties "$HOME\.gradle\gradle.properties"
71+
72+
- name: Set up JDK 25
73+
uses: actions/setup-java@v5
74+
with:
75+
distribution: 'zulu'
76+
java-version: 25
77+
78+
- name: Verify cmake
79+
run: cmake --version
80+
81+
- name: Build + test native module (Unix)
82+
if: runner.os != 'Windows'
83+
env:
84+
GRADLE_OPTS: -Dorg.gradle.jvmargs=-Xmx4g -Dfile.encoding=UTF-8
85+
run: |
86+
./gradlew --no-daemon --stacktrace \
87+
:skainet-backends:skainet-backend-native-cpu:jvmTest \
88+
:skainet-backends:skainet-backend-native-cpu:jvmJar
89+
90+
- name: Build + test native module (Windows)
91+
if: runner.os == 'Windows'
92+
shell: pwsh
93+
env:
94+
GRADLE_OPTS: -Dorg.gradle.jvmargs=-Xmx4g -Dfile.encoding=UTF-8
95+
run: |
96+
.\gradlew.bat --no-daemon --stacktrace `
97+
:skainet-backends:skainet-backend-native-cpu:jvmTest `
98+
:skainet-backends:skainet-backend-native-cpu:jvmJar
99+
100+
- name: Upload native library
101+
if: success()
102+
uses: actions/upload-artifact@v7
103+
with:
104+
name: libskainet_kernels-${{ matrix.arch_label }}
105+
path: skainet-backends/skainet-backend-native-cpu/build/native/resources/native/${{ matrix.arch_label }}/${{ matrix.lib_name }}
106+
if-no-files-found: error
107+
retention-days: 14
108+
109+
- name: Upload test reports
110+
if: always()
111+
uses: actions/upload-artifact@v7
112+
with:
113+
name: native-cpu-test-reports-${{ matrix.arch_label }}
114+
path: |
115+
skainet-backends/skainet-backend-native-cpu/build/reports/tests/**
116+
skainet-backends/skainet-backend-native-cpu/build/test-results/**
117+
retention-days: 14

skainet-backends/skainet-backend-native-cpu/native/CMakeLists.txt

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,11 @@ if(WIN32)
2424
set_target_properties(skainet_kernels PROPERTIES PREFIX "")
2525
endif()
2626

27-
# Hide non-exported symbols on ELF / Mach-O for a smaller surface area
28-
# and let the compiler auto-vectorize the Q4_K hot loop.
27+
# Per-compiler tuning. The Q4_K kernel hot loop is straight-line FP
28+
# arithmetic that auto-vectorizes cleanly under aggressive optimization
29+
# (AVX2 on x86_64, NEON on ARM64). Visibility is also handled here on
30+
# ELF / Mach-O; on Windows the SKAINET_API macro adds dllexport so we
31+
# don't need /VISIBILITY flags.
2932
if(CMAKE_C_COMPILER_ID MATCHES "Clang|GNU")
3033
target_compile_options(skainet_kernels PRIVATE
3134
-fvisibility=hidden
@@ -35,4 +38,10 @@ if(CMAKE_C_COMPILER_ID MATCHES "Clang|GNU")
3538
-funroll-loops
3639
)
3740
set_target_properties(skainet_kernels PROPERTIES C_VISIBILITY_PRESET hidden)
41+
elseif(CMAKE_C_COMPILER_ID MATCHES "MSVC")
42+
target_compile_options(skainet_kernels PRIVATE
43+
/O2
44+
/fp:fast
45+
/W3
46+
)
3847
endif()

skainet-backends/skainet-backend-native-cpu/native/include/skainet_kernels.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,17 @@
1111
# define SKAINET_API
1212
#endif
1313

14+
/* Portable "restrict" qualifier: GNU/Clang accept __restrict__,
15+
* MSVC accepts __restrict, and the C99 keyword `restrict` is
16+
* unreliable across compiler modes. */
17+
#if defined(__GNUC__) || defined(__clang__)
18+
# define SKAINET_RESTRICT __restrict__
19+
#elif defined(_MSC_VER)
20+
# define SKAINET_RESTRICT __restrict
21+
#else
22+
# define SKAINET_RESTRICT
23+
#endif
24+
1425
#ifdef __cplusplus
1526
extern "C" {
1627
#endif

skainet-backends/skainet-backend-native-cpu/native/src/q4k_matmul.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,13 @@ static inline void skainet_q4k_decode_scales(
7676
* codeSum/inputSum accumulators on AVX2/NEON.
7777
*/
7878
SKAINET_API void skainet_q4k_matmul(
79-
const float* __restrict__ input,
79+
const float* SKAINET_RESTRICT input,
8080
int32_t input_offset,
81-
const uint8_t* __restrict__ weight,
81+
const uint8_t* SKAINET_RESTRICT weight,
8282
int32_t weight_byte_offset,
8383
int32_t input_dim,
8484
int32_t output_dim,
85-
float* __restrict__ output,
85+
float* SKAINET_RESTRICT output,
8686
int32_t output_offset
8787
) {
8888
if (output_dim <= 0 || input_dim <= 0) return;

0 commit comments

Comments
 (0)