Skip to content

Commit 16844fe

Browse files
committed
moved python to new file, refactoring
1 parent 5b20385 commit 16844fe

2 files changed

Lines changed: 88 additions & 97 deletions

File tree

.github/scripts/run-benchmarks.sh

Lines changed: 20 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -32,33 +32,25 @@ if [[ -z "$ARGS" ]]; then
3232
# Bare /bench with no args: default to the "base" tag
3333
TAGS="base"
3434
else
35-
# Parse --tags and --filter flags; each takes the next whitespace-delimited
36-
# token as its value. Unknown tokens are silently ignored.
37-
while [[ -n "$ARGS" ]]; do
38-
TOKEN="${ARGS%% *}"
39-
ARGS="${ARGS#"$TOKEN"}"
40-
ARGS="${ARGS##+( )}"
41-
42-
if [[ "$TOKEN" == "--tags" ]]; then
43-
TAGS="${ARGS%% *}"
44-
ARGS="${ARGS#"$TAGS"}"
45-
ARGS="${ARGS##+( )}"
46-
elif [[ "$TOKEN" == "--filter" ]]; then
47-
FILTER="${ARGS%% *}"
48-
ARGS="${ARGS#"$FILTER"}"
49-
ARGS="${ARGS##+( )}"
50-
fi
35+
# Normalize: strip /bench prefix, collapse all whitespace (including newlines)
36+
# to spaces, then strip to a safe allowlist before parsing
37+
ARGS=$(printf '%s' "$ARGS" | tr '\n\r\t' ' ' | tr -s ' ' | tr -cd 'a-zA-Z0-9,_./|*+?()[]^$ -')
38+
ARGS="${ARGS## }" # strip leading space
39+
ARGS="${ARGS%% }" # strip trailing space
40+
41+
read -ra TOKENS <<< "$ARGS"
42+
i=0
43+
while [[ $i -lt ${#TOKENS[@]} ]]; do
44+
case "${TOKENS[$i]}" in
45+
--tags) i=$((i + 1)); TAGS="${TOKENS[$i]:-}" ;;
46+
--filter) i=$((i + 1)); FILTER="${TOKENS[$i]:-}" ;;
47+
*) echo "Unknown token: '${TOKENS[$i]}'" >&2; exit 1 ;;
48+
esac
49+
i=$((i + 1))
5150
done
5251
fi
5352

54-
# Sanitize tags: strict allowlist (alphanumeric, comma, underscore, hyphen)
55-
TAGS=$(printf '%s' "$TAGS" | tr -cd 'a-zA-Z0-9,_-')
56-
57-
# Sanitize filter: strip control characters only, preserving regex metacharacters.
58-
# The filter is always passed double-quoted to cargo bench.
59-
FILTER=$(printf '%s' "$FILTER" | tr -d '\000-\037\177')
60-
61-
# If nothing was parsed (unrecognized tokens, typos, missing values), default to "base"
53+
# Default: if nothing was parsed, run with BENCH_TAGS=base
6254
if [[ -z "$TAGS" && -z "$FILTER" ]]; then
6355
TAGS="base"
6456
fi
@@ -84,83 +76,14 @@ git checkout FETCH_HEAD
8476

8577
# ---------------------------------------------------------------------------
8678
# 4. Compare baselines with critcmp and format as a markdown table.
87-
# Replicates criterion-compare-action's output:
8879
# - Parses actual duration values (not rank factors) for the % column
8980
# - Bolds the faster duration and % cell when the difference is
9081
# statistically significant (error bounds do not overlap)
9182
# ---------------------------------------------------------------------------
92-
cat > /tmp/parse_critcmp.py << 'PYEOF'
93-
import sys, re
94-
95-
def to_seconds(value, units):
96-
u = units.strip()
97-
if u == 's': return value
98-
if u == 'ms': return value / 1e3
99-
if u in ('µs', 'us', 'μs'): return value / 1e6
100-
if u == 'ns': return value / 1e9
101-
return value
102-
103-
def is_significant(chg_dur, chg_err, base_dur, base_err):
104-
if chg_dur < base_dur:
105-
return chg_dur + chg_err < base_dur or base_dur - base_err > chg_dur
106-
else:
107-
return chg_dur - chg_err > base_dur or base_dur + base_err < chg_dur
108-
109-
def parse_duration(s):
110-
m = re.match(r'([0-9.]+)±([0-9.]+)(.+)', s.strip())
111-
if not m:
112-
return None
113-
return float(m.group(1)), float(m.group(2)), m.group(3).strip()
114-
115-
lines = sys.stdin.read().splitlines()
116-
print("| Test | Base | PR | % |")
117-
print("|------|--------------|------------------|---|")
118-
119-
for line in lines[2:]: # skip critcmp header rows
120-
if not line.strip():
121-
continue
122-
# critcmp columns (split on 2+ spaces):
123-
# with throughput: name, baseFactor, baseDuration, baseBandwidth, changesFactor, changesDuration, changesBandwidth
124-
# without throughput: name, baseFactor, baseDuration, changesFactor, changesDuration
125-
# Locate duration fields by the presence of "±" rather than hardcoding indices,
126-
# so the script works correctly regardless of whether bandwidth columns are present.
127-
fields = re.split(r' +', line)
128-
name = fields[0].strip().replace('|', r'\|') if fields else ''
129-
dur_fields = [f.strip() for f in fields[1:] if '±' in f]
130-
base_dur_str = dur_fields[0] if len(dur_fields) > 0 else None
131-
chg_dur_str = dur_fields[1] if len(dur_fields) > 1 else None
132-
133-
if not name and not base_dur_str and not chg_dur_str:
134-
continue
135-
136-
base_display = base_dur_str or 'N/A'
137-
chg_display = chg_dur_str or 'N/A'
138-
difference = 'N/A'
139-
140-
if base_dur_str and chg_dur_str:
141-
base_p = parse_duration(base_dur_str)
142-
chg_p = parse_duration(chg_dur_str)
143-
if base_p and chg_p:
144-
base_secs = to_seconds(base_p[0], base_p[2])
145-
base_err_secs = to_seconds(base_p[1], base_p[2])
146-
chg_secs = to_seconds(chg_p[0], chg_p[2])
147-
chg_err_secs = to_seconds(chg_p[1], chg_p[2])
148-
149-
pct = -(1 - chg_secs / base_secs) * 100
150-
prefix = '' if chg_secs <= base_secs else '+'
151-
difference = f'{prefix}{pct:.2f}%'
152-
153-
if is_significant(chg_secs, chg_err_secs, base_secs, base_err_secs):
154-
if chg_secs < base_secs:
155-
chg_display = f'**{chg_dur_str}**'
156-
elif chg_secs > base_secs:
157-
base_display = f'**{base_dur_str}**'
158-
difference = f'**{difference}**'
159-
160-
print(f'| {name} | {base_display} | {chg_display} | {difference} |')
161-
PYEOF
162-
163-
COMPARISON=$((cd benchmarks && critcmp base changes) | python3 /tmp/parse_critcmp.py)
83+
# Use `critcmp` to compare the criterion output for `base` and `changes`. We use `critcmp` instead of manually
84+
# parsing criterion outputs because criterion may update its output format. By using `critcmp`, we inherit all
85+
# updated criterion output parsing.
86+
COMPARISON=$((cd benchmarks && critcmp base changes) | python3 benchmarks/ci/parse_critcmp.py)
16487
16588
# ---------------------------------------------------------------------------
16689
# 5. Write results to /tmp/bench-comment.md

benchmarks/ci/parse_critcmp.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import sys, re
2+
3+
def to_seconds(value, units):
4+
u = units.strip()
5+
if u == 's': return value
6+
if u == 'ms': return value / 1e3
7+
if u in ('µs', 'us', 'μs'): return value / 1e6
8+
if u == 'ns': return value / 1e9
9+
return value
10+
11+
def is_significant(chg_dur, chg_err, base_dur, base_err):
12+
if chg_dur < base_dur:
13+
return chg_dur + chg_err < base_dur or base_dur - base_err > chg_dur
14+
else:
15+
return chg_dur - chg_err > base_dur or base_dur + base_err < chg_dur
16+
17+
def parse_duration(s):
18+
m = re.match(r'([0-9.]+)±([0-9.]+)(.+)', s.strip())
19+
if not m:
20+
return None
21+
return float(m.group(1)), float(m.group(2)), m.group(3).strip()
22+
23+
lines = sys.stdin.read().splitlines()
24+
print("| Test | Base | PR | % |")
25+
print("|------|--------------|------------------|---|")
26+
27+
for line in lines[2:]: # skip critcmp header rows
28+
if not line.strip():
29+
continue
30+
# critcmp columns (split on 2+ spaces):
31+
# with throughput: name, baseFactor, baseDuration, baseBandwidth, changesFactor, changesDuration, changesBandwidth
32+
# without throughput: name, baseFactor, baseDuration, changesFactor, changesDuration
33+
# Locate duration fields by the presence of "±" rather than hardcoding indices,
34+
# so the script works correctly regardless of whether bandwidth columns are present.
35+
fields = re.split(r' +', line)
36+
name = fields[0].strip().replace('|', r'\|') if fields else ''
37+
dur_fields = [f.strip() for f in fields[1:] if '±' in f]
38+
base_dur_str = dur_fields[0] if len(dur_fields) > 0 else None
39+
chg_dur_str = dur_fields[1] if len(dur_fields) > 1 else None
40+
41+
if not name and not base_dur_str and not chg_dur_str:
42+
continue
43+
44+
base_display = base_dur_str or 'N/A'
45+
chg_display = chg_dur_str or 'N/A'
46+
difference = 'N/A'
47+
48+
if base_dur_str and chg_dur_str:
49+
base_p = parse_duration(base_dur_str)
50+
chg_p = parse_duration(chg_dur_str)
51+
if base_p and chg_p:
52+
base_secs = to_seconds(base_p[0], base_p[2])
53+
base_err_secs = to_seconds(base_p[1], base_p[2])
54+
chg_secs = to_seconds(chg_p[0], chg_p[2])
55+
chg_err_secs = to_seconds(chg_p[1], chg_p[2])
56+
57+
pct = -(1 - chg_secs / base_secs) * 100
58+
prefix = '' if chg_secs <= base_secs else '+'
59+
difference = f'{prefix}{pct:.2f}%'
60+
61+
if is_significant(chg_secs, chg_err_secs, base_secs, base_err_secs):
62+
if chg_secs < base_secs:
63+
chg_display = f'**{chg_dur_str}**'
64+
elif chg_secs > base_secs:
65+
base_display = f'**{base_dur_str}**'
66+
difference = f'**{difference}**'
67+
68+
print(f'| {name} | {base_display} | {chg_display} | {difference} |')

0 commit comments

Comments
 (0)