Skip to content

Commit c4e266f

Browse files
adriangbclaude
andcommitted
ci(codspeed): discover benches via cargo metadata, declare skips in manifests
Replace the per-workflow awk/jq Cargo.toml parsing and hardcoded crate lists with a single shared script (.github/workflows/codspeed-matrix.sh) that discovers every [[bench]] target across the workspace via `cargo metadata`. New crates and bench targets are picked up automatically. The known-broken exclusion list, previously duplicated as an EXCLUDED_BENCHES env in both workflows, now lives next to each bench in its crate's Cargo.toml: [package.metadata.codspeed.benches] merge_kernels = { skip = true } cargo surfaces that table at .packages[].metadata.codspeed.benches and the script drops any target flagged `skip = true`. Unknown `bench:<crate>` label suffixes now error against the workspace member list rather than a loose regex. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent bb75b1f commit c4e266f

8 files changed

Lines changed: 133 additions & 88 deletions

File tree

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/usr/bin/env bash
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
19+
# Generate the CodSpeed benchmark matrix.
20+
#
21+
# Emits to stdout a compact JSON array of {"crate","bench"} objects — one per
22+
# `[[bench]]` target in the selected workspace crates. Each object becomes one
23+
# CodSpeed shard (`cargo codspeed run -p <crate> --bench <bench>`). Sharding
24+
# one job per bench target keeps every shard under CodSpeed's hard per-upload
25+
# limit of 1000 benchmarks, on the assumption that no single bench target
26+
# defines >1000 benchmark cases. If a target ever crosses that line, split the
27+
# bench source rather than reworking the sharding here.
28+
#
29+
# Targets are discovered structurally via `cargo metadata` (no Cargo.toml text
30+
# parsing, no hardcoded crate list), so new crates and new `[[bench]]` targets
31+
# are picked up automatically.
32+
#
33+
# A bench target is dropped from the matrix when its crate's Cargo.toml marks
34+
# it skipped:
35+
#
36+
# [package.metadata.codspeed.benches]
37+
# merge_kernels = { skip = true } # broken at runtime, fix and remove
38+
#
39+
# cargo surfaces that table at .packages[].metadata.codspeed.benches, so the
40+
# skip list lives next to the bench in the crate that owns it.
41+
#
42+
# Usage:
43+
# codspeed-matrix.sh # every workspace crate
44+
# codspeed-matrix.sh arrow parquet # only the named crates (must be members)
45+
46+
set -euo pipefail
47+
48+
metadata="$(cargo metadata --format-version 1 --no-deps)"
49+
50+
# Reject explicitly-requested crates that are not workspace members, so a typo
51+
# in a `bench:<crate>` label fails loudly instead of silently benching nothing.
52+
if [ "$#" -gt 0 ]; then
53+
members="$(jq -r '.packages[].name' <<<"$metadata")"
54+
for crate in "$@"; do
55+
if ! grep -qxF "$crate" <<<"$members"; then
56+
echo "::error::Unknown workspace crate '$crate'" >&2
57+
exit 1
58+
fi
59+
done
60+
fi
61+
62+
selected="$(printf '%s\n' "$@" | jq -Rsc 'split("\n") | map(select(length > 0))')"
63+
64+
jq -c \
65+
--argjson selected "$selected" '
66+
[ .packages[]
67+
| .name as $crate
68+
| (.metadata.codspeed.benches // {}) as $cfg
69+
| select(($selected | length) == 0 or ($selected | index($crate)))
70+
| .targets[]
71+
| select(.kind | index("bench"))
72+
| select(($cfg[.name].skip // false) | not)
73+
| { crate: $crate, bench: .name }
74+
]
75+
' <<<"$metadata"

.github/workflows/codspeed-pr.yml

Lines changed: 14 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -80,60 +80,27 @@ jobs:
8080
id: gen
8181
env:
8282
LABELS: ${{ toJSON(github.event.pull_request.labels.*.name) }}
83-
# Keep this list in sync with codspeed.yml — bench targets that
84-
# currently panic/error at runtime and should not be benched
85-
# until fixed in their respective crates.
86-
EXCLUDED_BENCHES: |
87-
arrow merge_kernels
88-
arrow buffer_bit_ops
89-
arrow buffer_create
90-
arrow sort_kernel
91-
arrow string_run_builder
92-
arrow primitive_run_accessor
93-
arrow-array union_array
94-
arrow-cast parse_date
95-
parquet row_selection_cursor
96-
parquet-variant-compute variant_kernels
83+
# Discovery + the known-broken exclusion list live in the shared
84+
# codspeed-matrix.sh (also used by codspeed.yml). `bench:all` passes
85+
# no crate args (every crate); otherwise each `bench:<crate>` suffix
86+
# is forwarded as an arg and validated against the workspace members
87+
# by the script.
9788
run: |
98-
all_crates="arrow arrow-array arrow-avro arrow-buffer arrow-cast arrow-ipc arrow-json arrow-schema parquet parquet-variant parquet-variant-compute"
89+
suffixes="$(jq -r '.[] | select(startswith("bench:")) | sub("^bench:"; "")' <<<"$LABELS")"
9990
100-
suffixes=$(jq -r '.[] | select(startswith("bench:")) | sub("^bench:"; "")' <<<"$LABELS")
101-
102-
if echo "$suffixes" | grep -qx "all"; then
103-
selected_crates="$all_crates"
91+
if grep -qx "all" <<<"$suffixes"; then
10492
scope="full workspace (bench:all)"
93+
matrix="$(bash .github/workflows/codspeed-matrix.sh)"
10594
else
106-
for pkg in $suffixes; do
107-
if ! [[ "$pkg" =~ ^[a-z][a-z0-9_-]*$ ]]; then
108-
echo "::error::Invalid bench label suffix 'bench:$pkg'"
109-
exit 1
110-
fi
111-
done
112-
selected_crates="$(echo $suffixes | tr '\n' ' ')"
113-
scope="$selected_crates"
95+
scope="$(echo $suffixes | tr '\n' ' ')"
96+
# Intentionally unquoted: each whitespace-separated suffix is a
97+
# separate crate argument.
98+
matrix="$(bash .github/workflows/codspeed-matrix.sh $suffixes)"
11499
fi
115100
116-
{
117-
for crate in $selected_crates; do
118-
if [ ! -f "$crate/Cargo.toml" ]; then
119-
echo "::warning::No Cargo.toml found for '$crate' (bench:$crate); skipping"
120-
continue
121-
fi
122-
awk -v crate="$crate" '
123-
/^\[\[bench\]\]/ { in_bench=1; next }
124-
/^\[/ { in_bench=0 }
125-
in_bench && /^name = / {
126-
sub(/^name = "/, ""); sub(/"$/, "");
127-
printf "%s %s\n", crate, $0
128-
}
129-
' "$crate/Cargo.toml"
130-
done
131-
} | grep -vxF -f <(printf '%s\n' "$EXCLUDED_BENCHES" | sed '/^$/d') \
132-
| jq -Rcs 'split("\n") | map(select(length>0) | split(" ") | {crate: .[0], bench: .[1]})' > matrix.json
133-
134-
echo "matrix=$(cat matrix.json)" >> "$GITHUB_OUTPUT"
101+
echo "matrix=$matrix" >> "$GITHUB_OUTPUT"
135102
echo "scope=$scope" >> "$GITHUB_OUTPUT"
136-
echo "::notice::Scope: $scope ($(jq length matrix.json) bench shards after excluding known-broken targets)"
103+
echo "::notice::Scope: $scope ($(jq length <<<"$matrix") bench shards, known-broken targets excluded)"
137104
138105
build:
139106
# Gate on the same label condition as setup so we don't build when

.github/workflows/codspeed.yml

Lines changed: 11 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,12 @@
2828
# Topology:
2929
#
3030
# setup ─┐
31-
# ├──→ bench (matrix, ~88 jobs)
31+
# ├──→ bench (matrix, ~78 jobs)
3232
# build ─┘
3333
#
34-
# `setup` parses every workspace Cargo.toml's `[[bench]]` entries and
35-
# emits a {crate, bench} matrix. `build` does the full-workspace
34+
# `setup` discovers every `[[bench]]` target via `cargo metadata` (see
35+
# codspeed-matrix.sh) and emits a {crate, bench} matrix. `build` does the
36+
# full-workspace
3637
# `cargo codspeed build` exactly once and uploads
3738
# `target/codspeed/<mode>/` as an artifact. `bench` shards download the
3839
# artifact and run a single bench target each via the CodSpeed action;
@@ -74,46 +75,15 @@ jobs:
7475
steps:
7576
- uses: actions/checkout@v6
7677

77-
- name: Generate matrix of {crate, bench} from workspace Cargo.tomls
78+
- name: Generate {crate, bench} matrix across the workspace
7879
id: gen
79-
env:
80-
# Bench targets known to panic / error at runtime as of writing.
81-
# These are pre-existing issues in the bench targets themselves,
82-
# not the CodSpeed integration; they should be fixed and removed
83-
# from this list one by one.
84-
# - arrow / merge_kernels: panics at arrow-data/src/transform/primitive.rs:31
85-
# - arrow / buffer_bit_ops, buffer_create, sort_kernel,
86-
# string_run_builder, primitive_run_accessor: fail at runtime
87-
# - arrow-array / union_array, arrow-cast / parse_date: fail at runtime
88-
# - parquet / row_selection_cursor: fails at runtime
89-
# - parquet-variant-compute / variant_kernels: intermittent
90-
EXCLUDED_BENCHES: |
91-
arrow merge_kernels
92-
arrow buffer_bit_ops
93-
arrow buffer_create
94-
arrow sort_kernel
95-
arrow string_run_builder
96-
arrow primitive_run_accessor
97-
arrow-array union_array
98-
arrow-cast parse_date
99-
parquet row_selection_cursor
100-
parquet-variant-compute variant_kernels
80+
# Discovery + the known-broken exclusion list live in the shared
81+
# codspeed-matrix.sh (also used by codspeed-pr.yml) so they stay in
82+
# one place. No args = every workspace crate.
10183
run: |
102-
{
103-
for crate in arrow arrow-array arrow-avro arrow-buffer arrow-cast arrow-ipc arrow-json arrow-schema parquet parquet-variant parquet-variant-compute; do
104-
awk -v crate="$crate" '
105-
/^\[\[bench\]\]/ { in_bench=1; next }
106-
/^\[/ { in_bench=0 }
107-
in_bench && /^name = / {
108-
sub(/^name = "/, ""); sub(/"$/, "");
109-
printf "%s %s\n", crate, $0
110-
}
111-
' "$crate/Cargo.toml"
112-
done
113-
} | grep -vxF -f <(printf '%s\n' "$EXCLUDED_BENCHES" | sed '/^$/d') \
114-
| jq -Rcs 'split("\n") | map(select(length>0) | split(" ") | {crate: .[0], bench: .[1]})' > matrix.json
115-
echo "matrix=$(cat matrix.json)" >> "$GITHUB_OUTPUT"
116-
echo "::notice::Generated $(jq length matrix.json) bench shards (after excluding known-broken targets)"
84+
matrix="$(bash .github/workflows/codspeed-matrix.sh)"
85+
echo "matrix=$matrix" >> "$GITHUB_OUTPUT"
86+
echo "::notice::Generated $(jq length <<<"$matrix") bench shards (one per bench target, known-broken targets excluded)"
11787
11888
build:
11989
name: Build workspace benchmarks

arrow-array/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ hashbrown = { version = "0.17.0", default-features = false }
6161
[package.metadata.docs.rs]
6262
all-features = true
6363

64+
[package.metadata.codspeed.benches]
65+
# Skipped on CodSpeed (read by .github/workflows/codspeed-matrix.sh):
66+
# currently fails at runtime. Fix the bench and remove this entry.
67+
union_array = { skip = true }
68+
6469
[features]
6570
async = ["dep:futures"]
6671
ffi = ["arrow-schema/ffi", "arrow-data/ffi"]

arrow-cast/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ bench = false
3535
[package.metadata.docs.rs]
3636
all-features = true
3737

38+
[package.metadata.codspeed.benches]
39+
# Skipped on CodSpeed (read by .github/workflows/codspeed-matrix.sh):
40+
# currently fails at runtime. Fix the bench and remove this entry.
41+
parse_date = { skip = true }
42+
3843
[features]
3944
prettyprint = ["comfy-table"]
4045
force_validate = []

arrow/Cargo.toml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,19 @@ half = { version = "2.1", default-features = false, features = ["rand_distr"], o
6060
[package.metadata.docs.rs]
6161
all-features = true
6262

63+
[package.metadata.codspeed.benches]
64+
# Per-bench CodSpeed config, read by .github/workflows/codspeed-matrix.sh.
65+
# `skip = true` drops the bench target from the CodSpeed matrix; these
66+
# currently panic/error at runtime, so fix the bench and remove its entry,
67+
# one at a time. (A per-bench `mode = "simulation" | "walltime"` could live
68+
# here too, once the workflow builds both measurement modes.)
69+
merge_kernels = { skip = true }
70+
buffer_bit_ops = { skip = true }
71+
buffer_create = { skip = true }
72+
sort_kernel = { skip = true }
73+
string_run_builder = { skip = true }
74+
primitive_run_accessor = { skip = true }
75+
6376
[features]
6477
default = ["csv", "ipc", "json"]
6578
async = ["arrow-array/async"]

parquet-variant-compute/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ keywords = ["arrow", "parquet", "variant"]
2727
edition = { workspace = true }
2828
rust-version = { workspace = true }
2929

30+
[package.metadata.codspeed.benches]
31+
# Skipped on CodSpeed (read by .github/workflows/codspeed-matrix.sh):
32+
# intermittently fails at runtime. Fix the bench and remove this entry.
33+
variant_kernels = { skip = true }
34+
3035
[dependencies]
3136
arrow = { workspace = true, features = ["canonical_extension_types"] }
3237
arrow-schema = { workspace = true }

parquet/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,11 @@ sysinfo = { version = "0.38.1", default-features = false, features = ["system"]
9898
[package.metadata.docs.rs]
9999
all-features = true
100100

101+
[package.metadata.codspeed.benches]
102+
# Skipped on CodSpeed (read by .github/workflows/codspeed-matrix.sh):
103+
# currently fails at runtime. Fix the bench and remove this entry.
104+
row_selection_cursor = { skip = true }
105+
101106
[features]
102107
default = ["arrow", "snap", "brotli", "flate2-zlib-rs", "lz4", "zstd", "base64", "simdutf8"]
103108
# Enable lz4

0 commit comments

Comments
 (0)