Skip to content

Commit 432b397

Browse files
committed
fix(lint): proper fixes — no #[allow] patches
- simd_int_ops: removed unused `let n = s.len()` (moved into cfg blocks), replaced index loops with iterator loops (`for &v in acc_arr[1..].iter()`), simplified add_i8/sub_i8/add_i16 to plain scalar loops (LLVM autovectorizes) - kernels_avx512: added `/// # Safety` doc sections to all `#[target_feature(enable = "avx512f")]` functions (iamax_f32/f64 + 8 elementwise ops) instead of #[allow(missing_safety_doc)] - Removed all inner #![allow] hacks that didn't work on non-root modules Both CI clippy commands pass. 1778 tests pass. New code fmt-clean. https://claude.ai/code/session_01NYGrxVopyszZYgLBxe4hgj
1 parent fb651b0 commit 432b397

2 files changed

Lines changed: 92 additions & 161 deletions

File tree

src/backend/kernels_avx512.rs

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
//! The dispatch! macro's LazyLock tier check ensures these are only called
55
//! on AVX-512 CPUs.
66
7-
#![allow(missing_docs, clippy::missing_safety_doc)]
87
//!
98
//! BLAS-1 and element-wise functions use `F32x16`/`F64x8` from `crate::simd`.
109
//! GEMM microkernels retain raw intrinsics for masked stores and broadcast patterns.
@@ -23,6 +22,8 @@ use crate::simd::{F32x16, F64x8};
2322
// ═══════════════════════════════════════════════════════════════════
2423

2524
/// Dot product: sum(x[i] * y[i]) using 4x-unrolled FMA.
25+
/// # Safety
26+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
2627
#[cfg(target_arch = "x86_64")]
2728
#[target_feature(enable = "avx512f")]
2829
pub fn dot_f32(x: &[f32], y: &[f32]) -> f32 {
@@ -54,6 +55,8 @@ pub fn dot_f32(x: &[f32], y: &[f32]) -> f32 {
5455
}
5556

5657
/// Dot product f64: 4x-unrolled FMA (8 doubles each).
58+
/// # Safety
59+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
5760
#[cfg(target_arch = "x86_64")]
5861
#[target_feature(enable = "avx512f")]
5962
pub fn dot_f64(x: &[f64], y: &[f64]) -> f64 {
@@ -85,6 +88,8 @@ pub fn dot_f64(x: &[f64], y: &[f64]) -> f64 {
8588
}
8689

8790
/// AXPY: y = alpha * x + y (f32, 16-wide FMA).
91+
/// # Safety
92+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
8893
#[cfg(target_arch = "x86_64")]
8994
#[target_feature(enable = "avx512f")]
9095
pub fn axpy_f32(alpha: f32, x: &[f32], y: &mut [f32]) {
@@ -104,6 +109,8 @@ pub fn axpy_f32(alpha: f32, x: &[f32], y: &mut [f32]) {
104109
}
105110

106111
/// AXPY: y = alpha * x + y (f64, 8-wide FMA).
112+
/// # Safety
113+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
107114
#[cfg(target_arch = "x86_64")]
108115
#[target_feature(enable = "avx512f")]
109116
pub fn axpy_f64(alpha: f64, x: &[f64], y: &mut [f64]) {
@@ -123,6 +130,8 @@ pub fn axpy_f64(alpha: f64, x: &[f64], y: &mut [f64]) {
123130
}
124131

125132
/// Scale: x = alpha * x (f32, 16-wide).
133+
/// # Safety
134+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
126135
#[cfg(target_arch = "x86_64")]
127136
#[target_feature(enable = "avx512f")]
128137
pub fn scal_f32(alpha: f32, x: &mut [f32]) {
@@ -140,6 +149,8 @@ pub fn scal_f32(alpha: f32, x: &mut [f32]) {
140149
}
141150

142151
/// Scale: x = alpha * x (f64, 8-wide).
152+
/// # Safety
153+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
143154
#[cfg(target_arch = "x86_64")]
144155
#[target_feature(enable = "avx512f")]
145156
pub fn scal_f64(alpha: f64, x: &mut [f64]) {
@@ -157,6 +168,8 @@ pub fn scal_f64(alpha: f64, x: &mut [f64]) {
157168
}
158169

159170
/// L1 norm: sum(|x[i]|) (f32, 16-wide).
171+
/// # Safety
172+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
160173
#[cfg(target_arch = "x86_64")]
161174
#[target_feature(enable = "avx512f")]
162175
pub fn asum_f32(x: &[f32]) -> f32 {
@@ -172,6 +185,8 @@ pub fn asum_f32(x: &[f32]) -> f32 {
172185
}
173186

174187
/// L1 norm: sum(|x[i]|) (f64, 8-wide).
188+
/// # Safety
189+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
175190
#[cfg(target_arch = "x86_64")]
176191
#[target_feature(enable = "avx512f")]
177192
pub fn asum_f64(x: &[f64]) -> f64 {
@@ -187,6 +202,8 @@ pub fn asum_f64(x: &[f64]) -> f64 {
187202
}
188203

189204
/// L2 norm: sqrt(sum(x[i]^2)) (f32, 16-wide FMA).
205+
/// # Safety
206+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
190207
#[cfg(target_arch = "x86_64")]
191208
#[target_feature(enable = "avx512f")]
192209
pub fn nrm2_f32(x: &[f32]) -> f32 {
@@ -207,6 +224,8 @@ pub fn nrm2_f32(x: &[f32]) -> f32 {
207224
}
208225

209226
/// L2 norm: sqrt(sum(x[i]^2)) (f64, 8-wide FMA).
227+
/// # Safety
228+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
210229
#[cfg(target_arch = "x86_64")]
211230
#[target_feature(enable = "avx512f")]
212231
pub fn nrm2_f64(x: &[f64]) -> f64 {
@@ -227,6 +246,8 @@ pub fn nrm2_f64(x: &[f64]) -> f64 {
227246
}
228247

229248
/// Index of max absolute value (f32). Scalar — no AVX-512 specialization.
249+
/// # Safety
250+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
230251
#[cfg(target_arch = "x86_64")]
231252
#[target_feature(enable = "avx512f")]
232253
pub fn iamax_f32(x: &[f32]) -> (usize, f32) {
@@ -241,6 +262,8 @@ pub fn iamax_f32(x: &[f32]) -> (usize, f32) {
241262
}
242263

243264
/// Index of max absolute value (f64). Scalar — no AVX-512 specialization.
265+
/// # Safety
266+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
244267
#[cfg(target_arch = "x86_64")]
245268
#[target_feature(enable = "avx512f")]
246269
pub fn iamax_f64(x: &[f64]) -> (usize, f64) {
@@ -258,37 +281,53 @@ pub fn iamax_f64(x: &[f64]) -> (usize, f64) {
258281
// Element-wise f32 — 8 functions (16-wide, compat types)
259282
// ═══════════════════════════════════════════════════════════════════
260283

284+
/// Elementwise `out[i] = a[i] + scalar` (AVX-512 F32x16 kernel).
285+
/// # Safety
286+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
261287
#[cfg(target_arch = "x86_64")]
262-
#[allow(missing_docs, clippy::missing_safety_doc)]
263288
#[target_feature(enable = "avx512f")]
264289
pub fn add_f32_scalar(a: &[f32], scalar: f32) -> Vec<f32> { ew_f32_s(a, scalar, EwOp::Add) }
290+
/// Elementwise `out[i] = a[i] - scalar`.
291+
/// # Safety
292+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
265293
#[cfg(target_arch = "x86_64")]
266-
#[allow(missing_docs, clippy::missing_safety_doc)]
267294
#[target_feature(enable = "avx512f")]
268295
pub fn sub_f32_scalar(a: &[f32], scalar: f32) -> Vec<f32> { ew_f32_s(a, scalar, EwOp::Sub) }
296+
/// Elementwise `out[i] = a[i] * scalar`.
297+
/// # Safety
298+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
269299
#[cfg(target_arch = "x86_64")]
270-
#[allow(missing_docs, clippy::missing_safety_doc)]
271300
#[target_feature(enable = "avx512f")]
272301
pub fn mul_f32_scalar(a: &[f32], scalar: f32) -> Vec<f32> { ew_f32_s(a, scalar, EwOp::Mul) }
302+
/// Elementwise `out[i] = a[i] / scalar`.
303+
/// # Safety
304+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
273305
#[cfg(target_arch = "x86_64")]
274-
#[allow(missing_docs, clippy::missing_safety_doc)]
275306
#[target_feature(enable = "avx512f")]
276307
pub fn div_f32_scalar(a: &[f32], scalar: f32) -> Vec<f32> { ew_f32_s(a, scalar, EwOp::Div) }
277308

309+
/// Elementwise `out[i] = a[i] + b[i]` (AVX-512 F32x16 kernel).
310+
/// # Safety
311+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
278312
#[cfg(target_arch = "x86_64")]
279-
#[allow(missing_docs, clippy::missing_safety_doc)]
280313
#[target_feature(enable = "avx512f")]
281314
pub fn add_f32_vec(a: &[f32], b: &[f32]) -> Vec<f32> { ew_f32_v(a, b, EwOp::Add) }
315+
/// Elementwise `out[i] = a[i] - b[i]`.
316+
/// # Safety
317+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
282318
#[cfg(target_arch = "x86_64")]
283-
#[allow(missing_docs, clippy::missing_safety_doc)]
284319
#[target_feature(enable = "avx512f")]
285320
pub fn sub_f32_vec(a: &[f32], b: &[f32]) -> Vec<f32> { ew_f32_v(a, b, EwOp::Sub) }
321+
/// Elementwise `out[i] = a[i] * b[i]`.
322+
/// # Safety
323+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
286324
#[cfg(target_arch = "x86_64")]
287-
#[allow(missing_docs, clippy::missing_safety_doc)]
288325
#[target_feature(enable = "avx512f")]
289326
pub fn mul_f32_vec(a: &[f32], b: &[f32]) -> Vec<f32> { ew_f32_v(a, b, EwOp::Mul) }
327+
/// Elementwise `out[i] = a[i] / b[i]`.
328+
/// # Safety
329+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
290330
#[cfg(target_arch = "x86_64")]
291-
#[allow(missing_docs, clippy::missing_safety_doc)]
292331
#[target_feature(enable = "avx512f")]
293332
pub fn div_f32_vec(a: &[f32], b: &[f32]) -> Vec<f32> { ew_f32_v(a, b, EwOp::Div) }
294333

@@ -514,6 +553,8 @@ fn pack_b_f32(b: &[f32], ldb: usize, kc: usize, nc: usize, k_start: usize, j_sta
514553
/// AVX-512 microkernel: C[MR×NR] += A_packed[MR×kc] * B_packed[kc×NR]
515554
///
516555
/// Uses raw intrinsics for broadcast-FMA and masked store patterns.
556+
/// # Safety
557+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
517558
#[cfg(target_arch = "x86_64")]
518559
#[target_feature(enable = "avx512f")]
519560
unsafe fn sgemm_ukernel_6x16(
@@ -569,6 +610,8 @@ unsafe fn sgemm_ukernel_6x16(
569610
}
570611

571612
/// Goto BLAS style blocked SGEMM with packing and AVX-512 microkernel.
613+
/// # Safety
614+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
572615
#[cfg(target_arch = "x86_64")]
573616
#[target_feature(enable = "avx512f")]
574617
pub fn sgemm_blocked(
@@ -677,6 +720,8 @@ fn pack_b_f64(b: &[f64], ldb: usize, kc: usize, nc: usize, k_start: usize, j_sta
677720
}
678721

679722
/// AVX-512 microkernel: C[6×8] += A_packed[6×kc] * B_packed[kc×8] (f64)
723+
/// # Safety
724+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
680725
#[cfg(target_arch = "x86_64")]
681726
#[target_feature(enable = "avx512f")]
682727
unsafe fn dgemm_ukernel_6x8(
@@ -732,6 +777,8 @@ unsafe fn dgemm_ukernel_6x8(
732777
}
733778

734779
/// Goto BLAS style blocked DGEMM with packing and AVX-512 microkernel.
780+
/// # Safety
781+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
735782
#[cfg(target_arch = "x86_64")]
736783
#[target_feature(enable = "avx512f")]
737784
pub fn dgemm_blocked(
@@ -845,6 +892,8 @@ pub fn popcount(a: &[u8]) -> u64 {
845892
}
846893

847894
/// Int8 dot product (scalar — no AVX-512 VNNI specialization yet).
895+
/// # Safety
896+
/// Caller must ensure AVX-512F is available (`simd_caps().avx512f`).
848897
#[cfg(target_arch = "x86_64")]
849898
#[target_feature(enable = "avx512f")]
850899
pub fn dot_i8(a: &[u8], b: &[u8]) -> i64 {

0 commit comments

Comments
 (0)