@@ -32,33 +32,25 @@ if [[ -z "$ARGS" ]]; then
3232 # Bare /bench with no args: default to the "base" tag
3333 TAGS=" base"
3434else
35- # Parse --tags and --filter flags; each takes the next whitespace-delimited
36- # token as its value. Unknown tokens are silently ignored.
37- while [[ -n " $ARGS " ]]; do
38- TOKEN=" ${ARGS%% * } "
39- ARGS=" ${ARGS# " $TOKEN " } "
40- ARGS=" ${ARGS## +( )} "
41-
42- if [[ " $TOKEN " == " --tags" ]]; then
43- TAGS=" ${ARGS%% * } "
44- ARGS=" ${ARGS# " $TAGS " } "
45- ARGS=" ${ARGS## +( )} "
46- elif [[ " $TOKEN " == " --filter" ]]; then
47- FILTER=" ${ARGS%% * } "
48- ARGS=" ${ARGS# " $FILTER " } "
49- ARGS=" ${ARGS## +( )} "
50- fi
35+ # Normalize: strip /bench prefix, collapse all whitespace (including newlines)
36+ # to spaces, then strip to a safe allowlist before parsing
37+ ARGS=$( printf ' %s' " $ARGS " | tr ' \n\r\t' ' ' | tr -s ' ' | tr -cd ' a-zA-Z0-9,_./|*+?()[]^$ -' )
38+ ARGS=" ${ARGS## } " # strip leading space
39+ ARGS=" ${ARGS%% } " # strip trailing space
40+
41+ read -ra TOKENS <<< " $ARGS"
42+ i=0
43+ while [[ $i -lt ${# TOKENS[@]} ]]; do
44+ case " ${TOKENS[$i]} " in
45+ --tags) i=$(( i + 1 )) ; TAGS=" ${TOKENS[$i]:- } " ;;
46+ --filter) i=$(( i + 1 )) ; FILTER=" ${TOKENS[$i]:- } " ;;
47+ * ) echo " Unknown token: '${TOKENS[$i]} '" >&2 ; exit 1 ;;
48+ esac
49+ i=$(( i + 1 ))
5150 done
5251fi
5352
54- # Sanitize tags: strict allowlist (alphanumeric, comma, underscore, hyphen)
55- TAGS=$( printf ' %s' " $TAGS " | tr -cd ' a-zA-Z0-9,_-' )
56-
57- # Sanitize filter: strip control characters only, preserving regex metacharacters.
58- # The filter is always passed double-quoted to cargo bench.
59- FILTER=$( printf ' %s' " $FILTER " | tr -d ' \000-\037\177' )
60-
61- # If nothing was parsed (unrecognized tokens, typos, missing values), default to "base"
53+ # Default: if nothing was parsed, run with BENCH_TAGS=base
6254if [[ -z " $TAGS " && -z " $FILTER " ]]; then
6355 TAGS=" base"
6456fi
@@ -84,83 +76,14 @@ git checkout FETCH_HEAD
8476
8577# ---------------------------------------------------------------------------
8678# 4. Compare baselines with critcmp and format as a markdown table.
87- # Replicates criterion-compare-action's output:
8879# - Parses actual duration values (not rank factors) for the % column
8980# - Bolds the faster duration and % cell when the difference is
9081# statistically significant (error bounds do not overlap)
9182# ---------------------------------------------------------------------------
92- cat > /tmp/parse_critcmp.py << 'PYEOF '
93- import sys, re
94-
95- def to_seconds(value, units):
96- u = units.strip()
97- if u == 's': return value
98- if u == 'ms': return value / 1e3
99- if u in ('µs', 'us', 'μs'): return value / 1e6
100- if u == 'ns': return value / 1e9
101- return value
102-
103- def is_significant(chg_dur, chg_err, base_dur, base_err):
104- if chg_dur < base_dur:
105- return chg_dur + chg_err < base_dur or base_dur - base_err > chg_dur
106- else:
107- return chg_dur - chg_err > base_dur or base_dur + base_err < chg_dur
108-
109- def parse_duration(s):
110- m = re.match(r'([0-9.]+)±([0-9.]+)(.+)', s.strip())
111- if not m:
112- return None
113- return float(m.group(1)), float(m.group(2)), m.group(3).strip()
114-
115- lines = sys.stdin.read().splitlines()
116- print("| Test | Base | PR | % |")
117- print("|------|--------------|------------------|---|")
118-
119- for line in lines[2:]: # skip critcmp header rows
120- if not line.strip():
121- continue
122- # critcmp columns (split on 2+ spaces):
123- # with throughput: name, baseFactor, baseDuration, baseBandwidth, changesFactor, changesDuration, changesBandwidth
124- # without throughput: name, baseFactor, baseDuration, changesFactor, changesDuration
125- # Locate duration fields by the presence of "±" rather than hardcoding indices,
126- # so the script works correctly regardless of whether bandwidth columns are present.
127- fields = re.split(r' +', line)
128- name = fields[0].strip().replace('|', r'\|') if fields else ''
129- dur_fields = [f.strip() for f in fields[1:] if '±' in f]
130- base_dur_str = dur_fields[0] if len(dur_fields) > 0 else None
131- chg_dur_str = dur_fields[1] if len(dur_fields) > 1 else None
132-
133- if not name and not base_dur_str and not chg_dur_str:
134- continue
135-
136- base_display = base_dur_str or 'N/A'
137- chg_display = chg_dur_str or 'N/A'
138- difference = 'N/A'
139-
140- if base_dur_str and chg_dur_str:
141- base_p = parse_duration(base_dur_str)
142- chg_p = parse_duration(chg_dur_str)
143- if base_p and chg_p:
144- base_secs = to_seconds(base_p[0], base_p[2])
145- base_err_secs = to_seconds(base_p[1], base_p[2])
146- chg_secs = to_seconds(chg_p[0], chg_p[2])
147- chg_err_secs = to_seconds(chg_p[1], chg_p[2])
148-
149- pct = -(1 - chg_secs / base_secs) * 100
150- prefix = '' if chg_secs <= base_secs else '+'
151- difference = f'{prefix}{pct:.2f}%'
152-
153- if is_significant(chg_secs, chg_err_secs, base_secs, base_err_secs):
154- if chg_secs < base_secs:
155- chg_display = f'**{chg_dur_str}**'
156- elif chg_secs > base_secs:
157- base_display = f'**{base_dur_str}**'
158- difference = f'**{difference}**'
159-
160- print(f'| {name} | {base_display} | {chg_display} | {difference} |')
161- PYEOF
162-
163- COMPARISON=$(( cd benchmarks && critcmp base changes) | python3 / tmp/ parse_critcmp.py)
83+ # Use `critcmp` to compare the criterion output for `base` and `changes`. We use `critcmp` instead of manually
84+ # parsing criterion outputs because criterion may update its output format. By using `critcmp`, we inherit all
85+ # updated criterion output parsing.
86+ COMPARISON=$(( cd benchmarks && critcmp base changes) | python3 benchmarks/ ci/ parse_critcmp.py)
16487
16588# ---------------------------------------------------------------------------
16689# 5 . Write results to /tmp/bench-comment.md
0 commit comments