Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions .github/scripts/run-benchmarks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/usr/bin/env bash
# Benchmark comparison script for pull requests.
#
# Called by .github/workflows/benchmark.yml (run-benchmark job) after the repo
# has been checked out at the PR head. Writes the formatted markdown comparison
# to /tmp/bench-comment.md; the post-comment job picks it up and posts it.
#
# Expects the following environment variables:
#
# COMMENT - the /bench PR comment body
# BASE_REF - base branch ref (e.g. "main")
# HEAD_SHA - full SHA of the PR head commit

set -euo pipefail
shopt -s extglob

# ---------------------------------------------------------------------------
# 1. Parse the /bench comment
# Syntax: /bench [--tags <csv>] [--filter <regex>]
# --tags sets BENCH_TAGS (comma-separated tag list); defaults to "base"
# when the comment is just /bench
# --filter Criterion name regex passed as a positional arg to cargo bench
# ---------------------------------------------------------------------------

ARGS="${COMMENT#/bench}"
ARGS="${ARGS##+( )}"

TAGS=""
FILTER=""

if [[ -z "$ARGS" ]]; then
# Bare /bench with no args: default to the "base" tag
TAGS="base"
else
# Normalize: strip /bench prefix, collapse all whitespace (including newlines)
# to spaces, then strip to a safe allowlist before parsing
ARGS=$(printf '%s' "$ARGS" | tr '\n\r\t' ' ' | tr -s ' ' | tr -cd 'a-zA-Z0-9,_./|*+?()[]^$ -')
ARGS="${ARGS## }" # strip leading space
ARGS="${ARGS%% }" # strip trailing space

read -ra TOKENS <<< "$ARGS"
i=0
while [[ $i -lt ${#TOKENS[@]} ]]; do
case "${TOKENS[$i]}" in
--tags) i=$((i + 1)); TAGS="${TOKENS[$i]:-}" ;;
--filter) i=$((i + 1)); FILTER="${TOKENS[$i]:-}" ;;
*) echo "Unknown token: '${TOKENS[$i]}'" >&2; exit 1 ;;
esac
i=$((i + 1))
done
fi

# Default: if nothing was parsed, run with BENCH_TAGS=base
if [[ -z "$TAGS" && -z "$FILTER" ]]; then
TAGS="base"
fi

echo "Parsed tags: ${TAGS:-<none>}"
echo "Parsed filter: ${FILTER:-<none>}"

[[ -n "$TAGS" ]] && export BENCH_TAGS="$TAGS"

# ---------------------------------------------------------------------------
# 2. Benchmark the PR branch (already checked out by the workflow)
# ---------------------------------------------------------------------------
(cd benchmarks && cargo bench --locked --bench workload_bench -- --save-baseline changes "$FILTER")

# ---------------------------------------------------------------------------
# 3. Switch to the base branch and benchmark it
# The benchmarks/target/ directory is not tracked by git, so the
# "changes" baseline files are preserved across the branch switch.
# ---------------------------------------------------------------------------
git fetch origin -- "$BASE_REF"
git checkout FETCH_HEAD
(cd benchmarks && cargo bench --locked --bench workload_bench -- --save-baseline base "$FILTER")

# ---------------------------------------------------------------------------
# 4. Compare baselines with critcmp and format as a markdown table.
# - Parses actual duration values (not rank factors) for the % column
# - Bolds the faster duration and % cell when the difference is
# statistically significant (error bounds do not overlap)
# ---------------------------------------------------------------------------
# Use `critcmp` to compare the criterion output for `base` and `changes`. We use `critcmp` instead of manually
# parsing criterion outputs because criterion may update its output format. By using `critcmp`, we inherit all
# updated criterion output parsing.
COMPARISON=$((cd benchmarks && critcmp base changes) | python3 benchmarks/ci/parse_critcmp.py)

# ---------------------------------------------------------------------------
# 5. Write results to /tmp/bench-comment.md
# The post-comment job in benchmark.yml downloads this file as an artifact
# and posts it as a PR comment using a step that holds GH_TOKEN.
# ---------------------------------------------------------------------------
SHORT_SHA="${HEAD_SHA:0:7}"

SUMMARY=""
[[ -n "$TAGS" ]] && SUMMARY="tags: \`${TAGS}\`"
[[ -n "$FILTER" ]] && SUMMARY+="${SUMMARY:+ | }filter: \`${FILTER}\`"

{
echo "## Benchmark for ${SHORT_SHA}"
echo "<details>"
echo "<summary>${SUMMARY}</summary>"
echo ""
echo "$COMPARISON"
echo ""
echo "</details>"
} > /tmp/bench-comment.md
83 changes: 54 additions & 29 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,35 +6,39 @@ on:
types: [created, edited]
name: Benchmarking PR performance
jobs:
runBenchmark:
run-benchmark:
name: Run benchmarks
if: >
github.event.issue.pull_request &&
(github.event.comment.body == '/bench' || startsWith(github.event.comment.body, '/bench '))
runs-on: ubuntu-latest
permissions:
pull-requests: write
contents: read
outputs:
pr_number: ${{ steps.pr.outputs.pr_number }}
steps:
- name: Parse benchmark tags
env:
COMMENT: ${{ github.event.comment.body }}
run: |
if [[ "$COMMENT" == "/bench" ]]; then
TAGS="base"
else
TAGS="${COMMENT#/bench }"
TAGS=$(echo "$TAGS" | tr -d '[:space:]')
fi
echo "BENCH_TAGS=$TAGS" >> "$GITHUB_ENV"
echo "Parsed tags: $TAGS"
- name: Get PR HEAD sha
- name: Get PR metadata
id: pr
run: |
PR_DATA=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.issue.number }})
echo "head_sha=$(echo "$PR_DATA" | jq -r .head.sha)" >> "$GITHUB_OUTPUT"
echo "base_ref=$(echo "$PR_DATA" | jq -r .base.ref)" >> "$GITHUB_OUTPUT"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ github.repository }}
PR_NUMBER: ${{ github.event.issue.number }}
run: |
PR_DATA=$(gh api "repos/$REPO/pulls/$PR_NUMBER")
HEAD_SHA=$(echo "$PR_DATA" | jq -r .head.sha)
BASE_REF=$(echo "$PR_DATA" | jq -r .base.ref)
[[ "$HEAD_SHA" == *$'\n'* || "$BASE_REF" == *$'\n'* ]] && { echo "Unexpected newline in API response" >&2; exit 1; }
[[ "$BASE_REF" =~ ^[a-zA-Z0-9/_.-]+$ ]] || { echo "Invalid BASE_REF: $BASE_REF" >&2; exit 1; }
printf 'head_sha=%s\n' "$HEAD_SHA" >> "$GITHUB_OUTPUT"
printf 'base_ref=%s\n' "$BASE_REF" >> "$GITHUB_OUTPUT"
printf 'pr_number=%s\n' "$PR_NUMBER" >> "$GITHUB_OUTPUT"
- name: Install critcmp
# Installed before checkout so the PR's .cargo/config.toml cannot
# redirect the registry to a malicious source. The runner's
# pre-installed Rust is sufficient -- no toolchain setup needed here.
# --locked is omitted for cargo install (same exemption as cargo miri
# setup); --version pins the top-level crate.
run: cargo install critcmp --version 0.1.8
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
with:
ref: ${{ steps.pr.outputs.head_sha }}
Expand All @@ -45,13 +49,34 @@ jobs:
- uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2
with:
save-if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
# TODO: This action internally runs `cargo bench` without --locked, bypassing our
# supply chain lockfile policy (see build.yml top-level comment). Replace with
# manual cargo bench --locked + critcmp + gh pr comment steps.
# - uses: boa-dev/criterion-compare-action@adfd3a94634fe2041ce5613eb7df09d247555b87 # v3.2.4
# with:
# token: ${{ secrets.GITHUB_TOKEN }}
# branchName: ${{ steps.pr.outputs.base_ref }}
# cwd: benchmarks
# benchName: workload_bench
- run: echo "Benchmarking is temporarily disabled. See TODO above."
- name: Run benchmarks
# The comment is posted in the post-comment job after this job completes.
env:
COMMENT: ${{ github.event.comment.body }}
BASE_REF: ${{ steps.pr.outputs.base_ref }}
HEAD_SHA: ${{ steps.pr.outputs.head_sha }}
run: bash .github/scripts/run-benchmarks.sh
- name: Upload benchmark comment
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: bench-comment
path: /tmp/bench-comment.md

post-comment:
name: Post benchmark results
needs: run-benchmark
runs-on: ubuntu-latest
permissions:
pull-requests: write
steps:
- name: Download benchmark comment
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: bench-comment
path: /tmp/
- name: Post results as PR comment
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ needs.run-benchmark.outputs.pr_number }}
REPO: ${{ github.repository }}
run: gh pr comment "$PR_NUMBER" --repo "$REPO" --body-file /tmp/bench-comment.md
24 changes: 20 additions & 4 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,28 @@ BENCH_TAGS=base,my-feature cargo bench -p delta_kernel_benchmarks

### Running benchmarking on a PR

To trigger benchmarks on a pull request, post a comment on the PR with one of the following:
To trigger benchmarks on a pull request, post a comment using the following syntax:

- `/bench` — runs benchmarks with `BENCH_TAGS=base`
- `/bench <tags>` — runs benchmarks with `BENCH_TAGS=<tags>` (e.g. `bench base,tag1`)
```
/bench [--tags <comma separated list of tags>] [--filter <regex>]
```

- `--tags` sets `BENCH_TAGS` (comma-separated), controlling which table groupings run.
- `--filter` is a single-token Criterion regex matched against benchmark names.
- Both flags are optional and independent; they can be given in any order.
- When both are specified, they apply as AND: only benchmarks from tables that match the tag filter AND whose name matches the regex are run.
- Running just `/bench` (with no flags) defaults to `BENCH_TAGS=base`. If neither flag is parsed, the same default applies.

Examples:
```
/bench # BENCH_TAGS=base, all benchmark names
/bench --tags base,my-tag # BENCH_TAGS=base,my-tag, all benchmark names
/bench --filter snapshotConstruction # no BENCH_TAGS set, only snapshotConstruction benchmarks
/bench --tags base --filter 101kAdds.*snapshotConstruction # only snapshotConstruction benchmarks from tables tagged "base"
/bench --filter 101kAdds|10kAdds # no BENCH_TAGS set, OR two table names
```

See [By tag (`BENCH_TAGS`)](#by-tag-bench_tags) for details on how tags work. Results are posted automatically as a PR comment, comparing the PR branch against the base branch.
See [By tag (`BENCH_TAGS`)](#by-tag-bench_tags) for how tags work and [By benchmark name](#by-benchmark-name) for regex pattern examples. Results are posted automatically as a PR comment, comparing the PR branch against the base branch.

## Workload data layout

Expand Down
68 changes: 68 additions & 0 deletions benchmarks/ci/parse_critcmp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import sys, re

def to_seconds(value, units):
u = units.strip()
if u == 's': return value
if u == 'ms': return value / 1e3
if u in ('µs', 'us', 'μs'): return value / 1e6
if u == 'ns': return value / 1e9
return value

def is_significant(chg_dur, chg_err, base_dur, base_err):
if chg_dur < base_dur:
return chg_dur + chg_err < base_dur or base_dur - base_err > chg_dur
else:
return chg_dur - chg_err > base_dur or base_dur + base_err < chg_dur

def parse_duration(s):
m = re.match(r'([0-9.]+)±([0-9.]+)(.+)', s.strip())
if not m:
return None
return float(m.group(1)), float(m.group(2)), m.group(3).strip()

lines = sys.stdin.read().splitlines()
print("| Test | Base | PR | % |")
print("|------|--------------|------------------|---|")

for line in lines[2:]: # skip critcmp header rows
if not line.strip():
continue
# critcmp columns (split on 2+ spaces):
# with throughput: name, baseFactor, baseDuration, baseBandwidth, changesFactor, changesDuration, changesBandwidth
# without throughput: name, baseFactor, baseDuration, changesFactor, changesDuration
# Locate duration fields by the presence of "±" rather than hardcoding indices,
# so the script works correctly regardless of whether bandwidth columns are present.
fields = re.split(r' +', line)
name = fields[0].strip().replace('|', r'\|') if fields else ''
dur_fields = [f.strip() for f in fields[1:] if '±' in f]
base_dur_str = dur_fields[0] if len(dur_fields) > 0 else None
chg_dur_str = dur_fields[1] if len(dur_fields) > 1 else None

if not name and not base_dur_str and not chg_dur_str:
continue

base_display = base_dur_str or 'N/A'
chg_display = chg_dur_str or 'N/A'
difference = 'N/A'

if base_dur_str and chg_dur_str:
base_p = parse_duration(base_dur_str)
chg_p = parse_duration(chg_dur_str)
if base_p and chg_p:
base_secs = to_seconds(base_p[0], base_p[2])
base_err_secs = to_seconds(base_p[1], base_p[2])
chg_secs = to_seconds(chg_p[0], chg_p[2])
chg_err_secs = to_seconds(chg_p[1], chg_p[2])

pct = -(1 - chg_secs / base_secs) * 100
prefix = '' if chg_secs <= base_secs else '+'
difference = f'{prefix}{pct:.2f}%'

if is_significant(chg_secs, chg_err_secs, base_secs, base_err_secs):
if chg_secs < base_secs:
chg_display = f'**{chg_dur_str}**'
elif chg_secs > base_secs:
base_display = f'**{base_dur_str}**'
difference = f'**{difference}**'

print(f'| {name} | {base_display} | {chg_display} | {difference} |')
46 changes: 32 additions & 14 deletions kernel/src/log_segment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,20 +298,38 @@ impl LogSegment {
.as_ref()
.and_then(|hint| hint.checkpoint_schema.clone());

let listed_files = match (checkpoint_hint, time_travel_version) {
(Some(cp), None) => {
LogSegmentFiles::list_with_checkpoint_hint(&cp, storage, &log_root, log_tail, None)?
}
(Some(cp), Some(end_version)) if cp.version <= end_version => {
LogSegmentFiles::list_with_checkpoint_hint(
&cp,
storage,
&log_root,
log_tail,
Some(end_version),
)?
}
_ => LogSegmentFiles::list(storage, &log_root, log_tail, None, time_travel_version)?,
// The end_version is the time_travel_version, if present
// TODO: When max catalog version is implemented, we would use that as end_version if
// time_travel_version is not present
let end_version = time_travel_version;

// Keep the hint only if it points at or before end_version, or if there is no end_version bound
let usable_hint = checkpoint_hint.filter(|cp| end_version.is_none_or(|v| cp.version <= v));

// Cases:
//
// 1. usable_hint present, end_version is Some --> list_with_checkpoint_hint from hint.version TO end_version
// 2. usable_hint present, end_version is None --> list_with_checkpoint_hint from hint.version unbounded
// 3. no usable_hint, end_version is Some --> backward-scan for checkpoint before end_version,
// list from that checkpoint TO end_version
// (falls back to v0 if no checkpoint found)
// 4. no usable_hint, end_version is None --> list from v0 unbounded

let listed_files = match (usable_hint, end_version) {
// Cases 1 and 2
(Some(cp), end_version) => LogSegmentFiles::list_with_checkpoint_hint(
&cp,
storage,
&log_root,
log_tail,
end_version,
)?,
// Case 3
(None, Some(end)) => LogSegmentFiles::list_with_backward_checkpoint_scan(
storage, &log_root, log_tail, end,
)?,
// Case 4
(None, None) => LogSegmentFiles::list(storage, &log_root, log_tail, None, None)?,
};

LogSegment::try_new(
Expand Down
Loading
Loading