Skip to content

Commit 5a6a6f3

Browse files
committed
bench: exact-match --filter + system-info in Meta
The bench harness's --filter was substring-match, so a per-kernel CI matrix entry `--filter nbody` silently ran BOTH `nbody` and `nbody_ref` (the latter as a second pass on the already-warm runner). The published nbody_ref number was the thermally-pressured side-by-side pass — ~100 ms slower than running nbody_ref alone on Linux x86_64. Switching to exact-match means each GHA matrix entry runs exactly one kernel and the published number reflects an isolated runner, matching what `bench (nbody_ref)` alone already reports (~605 ms tiered, ~609 ms LLVM on x86_64). Meta now also carries cpu / cpu_cores / ram_gb / host fields so the public page can render full hardware context. Probes use sysctl on macOS and /proc on Linux; available_parallelism for cores so cgroup quotas (GHA runners) are honoured. The page renders the new fields as a second meta row below the existing date/commit/arch/os line, hidden if any probe returned empty so older results.json files keep rendering.
1 parent 8332193 commit 5a6a6f3

2 files changed

Lines changed: 134 additions & 2 deletions

File tree

crates/zynml/examples/bench_runner.rs

Lines changed: 122 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,27 @@ struct Meta {
149149
/// render "median of N" accurately if we ever change the
150150
/// constant.
151151
runs: usize,
152+
/// CPU brand string (e.g. "Apple M1", "Intel(R) Xeon(R) Platinum
153+
/// 8370C CPU @ 2.80GHz"). Empty string if probing failed.
154+
/// Read from `sysctl -n machdep.cpu.brand_string` on macOS and
155+
/// `/proc/cpuinfo` on Linux.
156+
#[serde(default)]
157+
cpu: String,
158+
/// Logical CPU count (`thread::available_parallelism`). 0 if the
159+
/// probe failed.
160+
#[serde(default)]
161+
cpu_cores: usize,
162+
/// Total system RAM in GiB, integer-truncated. 0 if the probe
163+
/// failed. Read from `sysctl -n hw.memsize` on macOS and
164+
/// `/proc/meminfo` (MemTotal) on Linux.
165+
#[serde(default)]
166+
ram_gb: u64,
167+
/// `hostname` output. Public CI runners (GitHub Actions) give
168+
/// out unique-per-job names — useful for cross-referencing a
169+
/// noisy bench result with the specific runner instance it
170+
/// landed on.
171+
#[serde(default)]
172+
host: String,
152173
}
153174

154175
/// Each benchmark source lives at
@@ -381,13 +402,24 @@ fn main() {
381402
arch: env::consts::ARCH.to_string(),
382403
os: env::consts::OS.to_string(),
383404
runs,
405+
cpu: probe_cpu_brand(),
406+
cpu_cores: probe_cpu_cores(),
407+
ram_gb: probe_ram_gb(),
408+
host: probe_hostname(),
384409
},
385410
};
386411

387412
for kernel in KERNELS {
388413
let pretty = kernel.strip_prefix("bench_").unwrap_or(kernel);
389414
if let Some(f) = &kernel_filter {
390-
if !pretty.contains(f.as_str()) {
415+
// Exact-match against the stripped kernel name. Substring
416+
// was the old default; it silently matched `nbody` against
417+
// both `nbody` and `nbody_ref`, so the per-kernel GHA matrix
418+
// ran nbody_ref twice — once alone, once chained behind
419+
// nbody on the same runner — and the published number was
420+
// the thermally-throttled second pass (~100 ms worse than
421+
// the dedicated job).
422+
if pretty != f.as_str() {
391423
continue;
392424
}
393425
}
@@ -888,6 +920,95 @@ fn try_save_cached_hir(module: &HirModule, cache_key: &str, cache_dir: &Path) {
888920
}
889921
}
890922

923+
/// Best-effort CPU brand string. macOS uses `sysctl`, Linux reads
924+
/// `/proc/cpuinfo`'s `model name` line. Falls back to "" so the
925+
/// page can render a single em-dash rather than a stack of probe
926+
/// errors.
927+
fn probe_cpu_brand() -> String {
928+
use std::process::Command;
929+
if cfg!(target_os = "macos") {
930+
if let Ok(o) = Command::new("sysctl")
931+
.args(["-n", "machdep.cpu.brand_string"])
932+
.output()
933+
{
934+
if o.status.success() {
935+
return String::from_utf8_lossy(&o.stdout).trim().to_string();
936+
}
937+
}
938+
} else if cfg!(target_os = "linux") {
939+
if let Ok(s) = fs::read_to_string("/proc/cpuinfo") {
940+
for line in s.lines() {
941+
if let Some(rest) = line.strip_prefix("model name") {
942+
if let Some(v) = rest.split(':').nth(1) {
943+
return v.trim().to_string();
944+
}
945+
}
946+
}
947+
}
948+
}
949+
String::new()
950+
}
951+
952+
/// Logical CPU count via `std::thread::available_parallelism`. This
953+
/// honours cgroup CPU quotas (so a 4-CPU GHA runner reports 4, not
954+
/// the bare-metal hypervisor's higher count), which is the number
955+
/// the bench actually sees scheduling-wise.
956+
fn probe_cpu_cores() -> usize {
957+
std::thread::available_parallelism()
958+
.map(|n| n.get())
959+
.unwrap_or(0)
960+
}
961+
962+
/// Total system RAM in GiB, integer-truncated. macOS via `sysctl
963+
/// -n hw.memsize` (bytes), Linux via `/proc/meminfo`'s `MemTotal:
964+
/// <KiB> kB` line. Truncates rather than rounds so 15.6 GiB CI
965+
/// machines report 15, matching how rayzor's page renders.
966+
fn probe_ram_gb() -> u64 {
967+
use std::process::Command;
968+
if cfg!(target_os = "macos") {
969+
if let Ok(o) = Command::new("sysctl").args(["-n", "hw.memsize"]).output() {
970+
if o.status.success() {
971+
if let Ok(bytes) = String::from_utf8_lossy(&o.stdout).trim().parse::<u64>() {
972+
return bytes / (1024 * 1024 * 1024);
973+
}
974+
}
975+
}
976+
} else if cfg!(target_os = "linux") {
977+
if let Ok(s) = fs::read_to_string("/proc/meminfo") {
978+
for line in s.lines() {
979+
if let Some(rest) = line.strip_prefix("MemTotal:") {
980+
let kb: u64 = rest
981+
.trim()
982+
.split_whitespace()
983+
.next()
984+
.and_then(|s| s.parse().ok())
985+
.unwrap_or(0);
986+
return kb / (1024 * 1024);
987+
}
988+
}
989+
}
990+
}
991+
0
992+
}
993+
994+
/// Hostname via the `hostname` binary. Works on every CI runner we
995+
/// target and avoids pulling a libc-bindings crate into the bench
996+
/// example just for this.
997+
fn probe_hostname() -> String {
998+
use std::process::Command;
999+
Command::new("hostname")
1000+
.output()
1001+
.ok()
1002+
.and_then(|o| {
1003+
if o.status.success() {
1004+
Some(String::from_utf8_lossy(&o.stdout).trim().to_string())
1005+
} else {
1006+
None
1007+
}
1008+
})
1009+
.unwrap_or_default()
1010+
}
1011+
8911012
fn git_short_sha() -> String {
8921013
use std::process::Command;
8931014
Command::new("git")

website/benchmark/index.html

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -647,14 +647,25 @@ <h2>${title}</h2>
647647
if (data.meta) {
648648
const m = data.meta;
649649
const date = (m.date || '—').replace('T', ' ').replace('Z', ' UTC');
650+
// System info chips — rendered only when the corresponding
651+
// field is non-empty in the JSON (older results.json files
652+
// predate the probes and lack these keys).
653+
const sysChips = [];
654+
if (m.cpu) sysChips.push(`<span>cpu <code>${m.cpu}</code></span>`);
655+
if (m.cpu_cores) sysChips.push(`<span>cores <code>${m.cpu_cores}</code></span>`);
656+
if (m.ram_gb) sysChips.push(`<span>ram <code>${m.ram_gb} GiB</code></span>`);
657+
if (m.host) sysChips.push(`<span>host <code>${m.host}</code></span>`);
658+
const sysRow = sysChips.length
659+
? `<div class="meta" style="margin-top:0.4rem;">${sysChips.join('')}</div>`
660+
: '';
650661
metaEl.innerHTML = `
651662
<div class="meta">
652663
<span>updated <b style="color:var(--text-primary);">${date}</b></span>
653664
<span>commit <code>${m.commit || '—'}</code></span>
654665
<span>arch <code>${m.arch || '—'}</code></span>
655666
<span>os <code>${m.os || '—'}</code></span>
656667
<span>median of ${m.runs || 9} runs</span>
657-
</div>`;
668+
</div>${sysRow}`;
658669
}
659670
}
660671

0 commit comments

Comments
 (0)