Skip to content

Commit e30e15d

Browse files
committed
fix(simd_runtime): expose cpu_tier_for_cpu for cross-arch introspection (codex P2)
Codex flagged on PR #187 that `cpu_ops_for_cpu` is cfg-gated through `cpu_ops_for_tier`, so cross-arch lookups silently return None — e.g. `cpu_ops_for_cpu("apple-m2")` on an x86_64 build maps "apple-m2" → "neon" via `cpu_to_tier`, but then `cpu_ops_for_tier("neon")` is compiled out because `CPU_OPS_NEON` is `cfg(target_arch = "aarch64")`. This broke the documented "what would this CPU pick?" introspection use case, which is supposed to work for deployment-planning tools and cross-target reports regardless of the build host. Fix: promote the previously-private `cpu_to_tier` to `pub fn cpu_tier_for_cpu`. It returns `Option<&'static str>` and is cfg-free, so `cpu_tier_for_cpu("apple-m2")` reliably returns `Some("neon")` on every build target. `cpu_ops_for_cpu` keeps its current semantics (current-arch only) but the docstring now explicitly says so and points cross-arch callers at `cpu_tier_for_cpu`. Returning a phantom CpuOps with scalar fn ptrs for cross-arch lookups would lie about behavior — better to return None and force callers to use the honest tier-name surface. Added regression test `cpu_tier_for_cpu_is_cross_arch` that asserts the cross-arch CPU names resolve on every build host.
1 parent 77955ed commit e30e15d

2 files changed

Lines changed: 63 additions & 33 deletions

File tree

src/simd_runtime/cpu_ops.rs

Lines changed: 62 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -243,29 +243,42 @@ pub fn cpu_ops_for_tier(name: &str) -> Option<&'static CpuOps> {
243243
}
244244
}
245245

246-
/// Lookup by GCC CPU codename (e.g. `"sapphirerapids"`,
247-
/// `"neoverse-v2"`, `"apple-m2"`). Maps the canonical GCC name to the
248-
/// dispatch tier the CPU lands in, sourced from the scrape recorded
249-
/// in the matrix doc § M.
246+
/// Lookup a [`CpuOps`] by GCC CPU codename (e.g. `"sapphirerapids"`,
247+
/// `"neoverse-v2"`, `"apple-m2"`) on the **current build host**.
250248
///
251-
/// Used for "what would this CPU pick?" introspection without
252-
/// touching CPUID on the running host — e.g. cross-compilation
253-
/// reports, deployment-planning tools, integration tests that want
254-
/// to assert tier selection for a named target without running on
255-
/// that silicon.
249+
/// Returns `Some(&'static CpuOps)` only when the named CPU's tier is
250+
/// reachable from the current `target_arch` (e.g. an x86_64 CPU name
251+
/// on an x86_64 build, an aarch64 CPU name on an aarch64 build).
252+
/// Cross-arch lookups — e.g. `cpu_ops_for_cpu("apple-m2")` on an
253+
/// x86_64 build — return `None` because the underlying NEON kernel
254+
/// fn pointers are compiled out and there is no honest `CpuOps` to
255+
/// return.
256+
///
257+
/// For pure introspection ("what tier would this CPU pick?", with no
258+
/// intent to call kernels), use [`cpu_tier_for_cpu`] instead — it is
259+
/// `cfg`-free and works on any build host.
256260
///
257261
/// Returns `None` for unknown CPU names. Only modern (V8.2-A+ on
258262
/// aarch64, AVX-512+ or AVX-VNNI+ on x86_64) names are mapped — older
259263
/// silicon falls through to `cpu_ops_for_tier("scalar")` by
260264
/// convention if you really need it.
261265
pub fn cpu_ops_for_cpu(name: &str) -> Option<&'static CpuOps> {
262-
cpu_ops_for_tier(cpu_to_tier(name)?)
266+
cpu_ops_for_tier(cpu_tier_for_cpu(name)?)
263267
}
264268

265-
/// Maps a GCC CPU codename to the [`CpuOps`] tier it lands in. Data
266-
/// from the scrape recorded in `.claude/knowledge/agnostic-surface-cpu-matrix.md`
267-
/// § M (aarch64) plus the GCC i386 cpu definitions for x86_64.
268-
fn cpu_to_tier(cpu: &str) -> Option<&'static str> {
269+
/// Lookup the dispatch tier name (e.g. `"amx_int8"`, `"avx512vnni"`,
270+
/// `"neon"`) for a GCC CPU codename. Data from the scrape recorded
271+
/// in `.claude/knowledge/agnostic-surface-cpu-matrix.md` § M
272+
/// (aarch64) plus the GCC i386 cpu definitions for x86_64.
273+
///
274+
/// `cfg`-free — works on any build host regardless of `target_arch`.
275+
/// This is the right entry point for cross-target introspection:
276+
/// deployment-planning tools, cross-compilation reports, integration
277+
/// tests that assert "apple-m2 lands at the neon tier" without
278+
/// actually building for that silicon.
279+
///
280+
/// Returns `None` for unknown CPU names.
281+
pub fn cpu_tier_for_cpu(cpu: &str) -> Option<&'static str> {
269282
Some(match cpu {
270283
// x86_64 — AMX-INT8 hosts
271284
"sapphirerapids" | "graniterapids" | "graniterapids-d" | "emeraldrapids" => "amx_int8",
@@ -336,24 +349,41 @@ mod tests {
336349
}
337350

338351
#[test]
339-
fn cpu_ops_for_cpu_data_driven_lookup() {
340-
// Spot-check the GCC-scraped mapping (matrix doc § M).
341-
assert_eq!(cpu_to_tier("sapphirerapids"), Some("amx_int8"));
342-
assert_eq!(cpu_to_tier("graniterapids"), Some("amx_int8"));
343-
assert_eq!(cpu_to_tier("cascadelake"), Some("avx512vnni"));
344-
assert_eq!(cpu_to_tier("znver4"), Some("avx512vnni"));
345-
assert_eq!(cpu_to_tier("znver5"), Some("avx512vnni"));
346-
assert_eq!(cpu_to_tier("alderlake"), Some("avxvnni"));
347-
assert_eq!(cpu_to_tier("arrowlake"), Some("avxvnni"));
348-
assert_eq!(cpu_to_tier("haswell"), Some("avx2_fma"));
349-
assert_eq!(cpu_to_tier("znver3"), Some("avx2_fma"));
350-
351-
assert_eq!(cpu_to_tier("apple-m2"), Some("neon"));
352-
assert_eq!(cpu_to_tier("neoverse-v2"), Some("neon"));
353-
assert_eq!(cpu_to_tier("oryon-1"), Some("neon"));
354-
assert_eq!(cpu_to_tier("grace"), Some("neon"));
355-
356-
assert_eq!(cpu_to_tier("totally-fake-cpu"), None);
352+
fn cpu_tier_for_cpu_data_driven_lookup() {
353+
// Spot-check the GCC-scraped mapping (matrix doc § M). This
354+
// function is cfg-free — every assertion must hold on every
355+
// build host, regardless of target_arch.
356+
assert_eq!(cpu_tier_for_cpu("sapphirerapids"), Some("amx_int8"));
357+
assert_eq!(cpu_tier_for_cpu("graniterapids"), Some("amx_int8"));
358+
assert_eq!(cpu_tier_for_cpu("cascadelake"), Some("avx512vnni"));
359+
assert_eq!(cpu_tier_for_cpu("znver4"), Some("avx512vnni"));
360+
assert_eq!(cpu_tier_for_cpu("znver5"), Some("avx512vnni"));
361+
assert_eq!(cpu_tier_for_cpu("alderlake"), Some("avxvnni"));
362+
assert_eq!(cpu_tier_for_cpu("arrowlake"), Some("avxvnni"));
363+
assert_eq!(cpu_tier_for_cpu("haswell"), Some("avx2_fma"));
364+
assert_eq!(cpu_tier_for_cpu("znver3"), Some("avx2_fma"));
365+
366+
assert_eq!(cpu_tier_for_cpu("apple-m2"), Some("neon"));
367+
assert_eq!(cpu_tier_for_cpu("neoverse-v2"), Some("neon"));
368+
assert_eq!(cpu_tier_for_cpu("oryon-1"), Some("neon"));
369+
assert_eq!(cpu_tier_for_cpu("grace"), Some("neon"));
370+
371+
assert_eq!(cpu_tier_for_cpu("totally-fake-cpu"), None);
372+
}
373+
374+
/// Regression for the cross-arch-introspection bug Codex flagged
375+
/// on PR #187: `cpu_tier_for_cpu` MUST return the same Some-string
376+
/// regardless of the build host. Previously, ARM CPU names like
377+
/// `"apple-m2"` would fall to `None` on an x86_64 build because the
378+
/// lookup piped through the cfg-gated `cpu_ops_for_tier`.
379+
#[test]
380+
fn cpu_tier_for_cpu_is_cross_arch() {
381+
// These four must resolve on EVERY build host (x86_64, aarch64,
382+
// wasm, etc.) — no cfg gating on this surface.
383+
assert_eq!(cpu_tier_for_cpu("apple-m2"), Some("neon"));
384+
assert_eq!(cpu_tier_for_cpu("sapphirerapids"), Some("amx_int8"));
385+
assert_eq!(cpu_tier_for_cpu("neoverse-v2"), Some("neon"));
386+
assert_eq!(cpu_tier_for_cpu("alderlake"), Some("avxvnni"));
357387
}
358388

359389
#[test]

src/simd_runtime/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,6 @@ pub mod vnni_dot;
9292
// consumers can `use crate::simd_runtime::*` and get every op flat.
9393
pub use add_mul::{add_mul_f32, add_mul_f64};
9494
pub use casts::{bf16_to_f32_batch, cast_f16_to_f32_batch, cast_f32_to_f16_batch, f32_to_bf16_batch_rne};
95-
pub use cpu_ops::{cpu_ops, cpu_ops_for_cpu, cpu_ops_for_tier, CpuOps};
95+
pub use cpu_ops::{cpu_ops, cpu_ops_for_cpu, cpu_ops_for_tier, cpu_tier_for_cpu, CpuOps};
9696
pub use matmul::{gemm_u8_i8, matmul_bf16_to_f32, matmul_f32, matmul_i8_to_i32};
9797
pub use vnni_dot::vnni_dot_u8_i8;

0 commit comments

Comments
 (0)