Skip to content

Commit 03b30e5

Browse files
committed
feat(examples): simd_profile_probe — hardware verification binary
Supports step 1 of the TEST-promotion checklist from .claude/knowledge/td-simd-cpu-dispatch-matrix.md § "TEST verification checklist": "Boot the binary on the silicon and confirm simd_profile() returns the expected variant." The probe prints: - Resolved SimdProfile variant + arch/family flags - Compile-time pinning status (and pinned variant if active) - Every CPUID-derived SimdCaps bit, ticked/unticked - ARM heuristic profile when running on aarch64 - Active compile-time target features (avx512f / avx2) - Per-variant matrix-doc cell summary (terse — matrix is source of truth) - Runs the same pinning_consistency invariant the unit test checks, so a probe deployed on real silicon flags regressions in the cfg cascade. First-hardware results on the build host (Sapphire Rapids): - simd_profile() resolves to SapphireRapids ✓ - amx_tile + amx_bf16 + avx512fp16 all set ✓ - amx_fp16 unset (correctly NOT promoting to GraniteRapids) ✓ - GNR-before-SPR ordering invariant verified end-to-end This is the first end-to-end pass of the e40f3a3 SimdProfile detect chain plus the 5a3a663 cpu-* pinning machinery on real silicon — confirms the dispatch axis is functional, not just doc-checked. Smoke-tested with --features cpu-zen4: probe correctly reports ACTIVE pinning and Zen4Avx512 as the resolved variant, even on SPR silicon (pinning intentionally overrides hardware detection).
1 parent b9a6fa0 commit 03b30e5

1 file changed

Lines changed: 181 additions & 0 deletions

File tree

examples/simd_profile_probe.rs

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
//! `simd_profile_probe` — boot-on-silicon diagnostic for the dispatch matrix.
2+
//!
3+
//! Step 1 of the TEST-promotion checklist from
4+
//! `.claude/knowledge/td-simd-cpu-dispatch-matrix.md` § "TEST verification
5+
//! checklist": *"Boot the binary on the silicon and confirm `simd_profile()`
6+
//! returns the expected variant."*
7+
//!
8+
//! Prints every CPUID-derived capability bit plus the resolved `SimdProfile`
9+
//! variant. Used to verify silicon → profile mapping when promoting DOC
10+
//! cells in the dispatch matrix to TEST.
11+
//!
12+
//! Usage:
13+
//! ```sh
14+
//! # Runtime detection (default — same binary on any silicon):
15+
//! cargo run --example simd_profile_probe --release
16+
//!
17+
//! # Compile-time pinned (the LazyLock is not linked in):
18+
//! cargo run --example simd_profile_probe --release --features cpu-spr
19+
//! ```
20+
21+
use ndarray::hpc::simd_caps::{simd_caps, ArmProfile, SimdCaps};
22+
use ndarray::hpc::simd_profile::{is_pinned, pinned_profile, simd_profile, SimdProfile};
23+
24+
fn main() {
25+
let caps = simd_caps();
26+
let profile = simd_profile();
27+
28+
println!("ndarray simd-profile probe");
29+
println!("==========================");
30+
println!();
31+
32+
// ── Dispatch identity ───────────────────────────────────────────
33+
println!("Resolved profile: {}", profile.name());
34+
println!(" is_x86: {}", profile.is_x86());
35+
println!(" is_aarch64: {}", profile.is_aarch64());
36+
println!(" has_avx512: {}", profile.has_avx512());
37+
println!(" has_amx: {}", profile.has_amx());
38+
println!();
39+
40+
// ── Pinning status ─────────────────────────────────────────────
41+
println!("Compile-time pinning: {}", if is_pinned() { "ACTIVE" } else { "off (runtime detection)" });
42+
if let Some(p) = pinned_profile() {
43+
println!(" Pinned variant: {}", p.name());
44+
}
45+
println!();
46+
47+
// ── Raw capability bits ────────────────────────────────────────
48+
println!("SimdCaps (raw bits):");
49+
print_caps(&caps);
50+
println!();
51+
52+
// ── ARM-specific sub-profile (heuristic; deployment-pragmatic) ──
53+
let arm = caps.arm_profile();
54+
if !matches!(arm, ArmProfile::NotArm) {
55+
println!("ARM profile (heuristic): {}", arm.name());
56+
println!(" est. tok/sec: {}", arm.estimated_tok_per_sec());
57+
println!(" eff. f32 lanes:{}", arm.effective_f32_lanes());
58+
println!();
59+
}
60+
61+
// ── Build configuration ─────────────────────────────────────────
62+
println!("Build:");
63+
println!(" target_arch: {}", std::env::consts::ARCH);
64+
println!(" target_os: {}", std::env::consts::OS);
65+
#[cfg(target_feature = "avx512f")]
66+
println!(" -Ctarget-feature avx512f: yes (compile-time)");
67+
#[cfg(not(target_feature = "avx512f"))]
68+
println!(" -Ctarget-feature avx512f: no (compile-time)");
69+
#[cfg(target_feature = "avx2")]
70+
println!(" -Ctarget-feature avx2: yes (compile-time)");
71+
#[cfg(not(target_feature = "avx2"))]
72+
println!(" -Ctarget-feature avx2: no (compile-time)");
73+
println!();
74+
75+
// ── TEST promotion guidance ────────────────────────────────────
76+
println!("Matrix-doc cells affected by this CPU:");
77+
matrix_cell_summary(profile);
78+
79+
// Sanity invariant: simd_profile() and pinned_profile() must agree
80+
// when pinning is active. This is the same check that
81+
// `pinning_consistency` runs as a unit test; we re-run it here so a
82+
// probe binary deployed on real silicon flags any future regression
83+
// in the cfg cascade.
84+
if let Some(p) = pinned_profile() {
85+
assert_eq!(
86+
profile, p,
87+
"INVARIANT VIOLATION: pinned_profile()={:?} disagrees with simd_profile()={:?}",
88+
p, profile
89+
);
90+
}
91+
}
92+
93+
fn print_caps(c: &SimdCaps) {
94+
let bits: &[(&str, bool)] = &[
95+
("avx2", c.avx2),
96+
("avx512f", c.avx512f),
97+
("avx512bw", c.avx512bw),
98+
("avx512vl", c.avx512vl),
99+
("avx512vnni", c.avx512vnni),
100+
("avx512vbmi", c.avx512vbmi),
101+
("avx512vpopcntdq", c.avx512vpopcntdq),
102+
("avx512bf16", c.avx512bf16),
103+
("avx512fp16", c.avx512fp16),
104+
("avx512vp2intersect", c.avx512vp2intersect),
105+
("avxvnniint8", c.avxvnniint8),
106+
("amx_tile", c.amx_tile),
107+
("amx_int8", c.amx_int8),
108+
("amx_bf16", c.amx_bf16),
109+
("amx_fp16", c.amx_fp16),
110+
("fma", c.fma),
111+
("sse41", c.sse41),
112+
("sse2", c.sse2),
113+
("neon", c.neon),
114+
("asimd_dotprod", c.asimd_dotprod),
115+
("fp16 (arm)", c.fp16),
116+
("aes", c.aes),
117+
("sha2", c.sha2),
118+
("crc32", c.crc32),
119+
];
120+
for (name, present) in bits {
121+
println!(" [{}] {}", if *present { "x" } else { " " }, name);
122+
}
123+
}
124+
125+
fn matrix_cell_summary(p: SimdProfile) {
126+
// Lifted from `td-simd-cpu-dispatch-matrix.md` § "Master matrix"
127+
// for each x86 profile. The summary is intentionally terse — the
128+
// matrix doc is the source of truth and should be consulted before
129+
// promoting any DOC cell to TEST.
130+
let summary: &[&str] = match p {
131+
SimdProfile::GraniteRapids => &[
132+
"F+CD+VL+DQ+BW+IFMA+VBMI+VBMI2+VNNI+BF16+FP16",
133+
"VPOPCNTDQ+BITALG+GFNI+VAES+VPCLMUL",
134+
"AMX-TILE+INT8+BF16+FP16 (FP16 is the GNR discriminator)",
135+
],
136+
SimdProfile::SapphireRapids => &[
137+
"F+CD+VL+DQ+BW+IFMA+VBMI+VBMI2+VNNI+BF16+FP16",
138+
"VPOPCNTDQ+BITALG+GFNI+VAES+VPCLMUL",
139+
"AMX-TILE+INT8+BF16 (no AMX-FP16 — that's GNR)",
140+
],
141+
SimdProfile::Zen4Avx512 => &[
142+
"F+CD+VL+DQ+BW+IFMA+VBMI+VBMI2+VNNI+BF16+FP16",
143+
"No AMX of any kind; 256-bit FPU double-pumped on Zen4, native 512-bit on Zen5",
144+
],
145+
SimdProfile::CooperLake => &[
146+
"F+CD+VL+DQ+BW+VNNI+BF16",
147+
"No VBMI, no FP16, no AMX — unique 'BF16 without VBMI'",
148+
],
149+
SimdProfile::TigerLakeU => &[
150+
"F+CD+VL+DQ+BW+IFMA+VBMI+VBMI2+VNNI+VP2INTERSECT",
151+
"VP2INTERSECT is the sole discriminator vs IceLakeSp",
152+
],
153+
SimdProfile::IceLakeSp => &[
154+
"F+CD+VL+DQ+BW+IFMA+VBMI+VBMI2+VNNI",
155+
"No BF16, no FP16, no AMX, no VP2INTERSECT",
156+
],
157+
SimdProfile::CascadeLake => &["F+CD+VL+DQ+BW+VNNI", "First Xeon with VNNI; no VBMI/BF16/FP16/AMX"],
158+
SimdProfile::SkylakeX => &["F+CD+VL+DQ+BW", "Founding AVX-512 baseline; everything since adds on top"],
159+
SimdProfile::ArrowLake => &[
160+
"No AVX-512 (hybrid CPU design)",
161+
"AVX-VNNI-INT8 + AVX-IFMA + AVX-NE-CONVERT (256-bit / VEX forms)",
162+
],
163+
SimdProfile::HaswellAvx2 => &["AVX2 + FMA + F16C + BMI1/2", "Haswell..Coffee Lake / Zen 1-3"],
164+
SimdProfile::A76DotProd => &[
165+
"NEON + dotprod + fp16 + bf16+ + i8mm",
166+
"Pi 5 (BCM2712), Orange Pi 5 (RK3588), Apple M1+",
167+
],
168+
SimdProfile::A72Fast => &[
169+
"NEON 128-bit + crypto (AES/SHA-2/CRC32)",
170+
"Pi 4 (BCM2711), Pi 3-with-crypto, Orange Pi 4 — HWCAP cannot distinguish A72 from A53-with-crypto",
171+
],
172+
SimdProfile::A53Baseline => &[
173+
"NEON 128-bit baseline",
174+
"Rare in the wild — QEMU / minimal aarch64 without crypto",
175+
],
176+
SimdProfile::Scalar => &["No SIMD ISA recognised", "Fallback: scalar reference kernels"],
177+
};
178+
for line in summary {
179+
println!(" - {}", line);
180+
}
181+
}

0 commit comments

Comments
 (0)