moved python to new file, refactoring

lorenarosati · lorenarosati · commit 16844fe2ff81 · 2026-03-30T19:34:00.000Z
diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
@@ -32,33 +32,25 @@ if [[ -z "$ARGS" ]]; then
   # Bare /bench with no args: default to the "base" tag
   TAGS="base"
 else
-  # Parse --tags and --filter flags; each takes the next whitespace-delimited
-  # token as its value. Unknown tokens are silently ignored.
-  while [[ -n "$ARGS" ]]; do
-    TOKEN="${ARGS%% *}"
-    ARGS="${ARGS#"$TOKEN"}"
-    ARGS="${ARGS##+( )}"
-
-    if [[ "$TOKEN" == "--tags" ]]; then
-      TAGS="${ARGS%% *}"
-      ARGS="${ARGS#"$TAGS"}"
-      ARGS="${ARGS##+( )}"
-    elif [[ "$TOKEN" == "--filter" ]]; then
-      FILTER="${ARGS%% *}"
-      ARGS="${ARGS#"$FILTER"}"
-      ARGS="${ARGS##+( )}"
-    fi
+  # Normalize: strip /bench prefix, collapse all whitespace (including newlines)
+  # to spaces, then strip to a safe allowlist before parsing
+  ARGS=$(printf '%s' "$ARGS" | tr '\n\r\t' ' ' | tr -s ' ' | tr -cd 'a-zA-Z0-9,_./|*+?()[]^$ -')
+  ARGS="${ARGS## }"   # strip leading space
+  ARGS="${ARGS%% }"   # strip trailing space
+
+  read -ra TOKENS <<< "$ARGS"
+  i=0
+  while [[ $i -lt ${#TOKENS[@]} ]]; do
+    case "${TOKENS[$i]}" in
+      --tags)   i=$((i + 1)); TAGS="${TOKENS[$i]:-}"   ;;
+      --filter) i=$((i + 1)); FILTER="${TOKENS[$i]:-}" ;;
+      *)        echo "Unknown token: '${TOKENS[$i]}'" >&2; exit 1 ;;
+    esac
+    i=$((i + 1))
   done
 fi
 
-# Sanitize tags: strict allowlist (alphanumeric, comma, underscore, hyphen)
-TAGS=$(printf '%s' "$TAGS" | tr -cd 'a-zA-Z0-9,_-')
-
-# Sanitize filter: strip control characters only, preserving regex metacharacters.
-# The filter is always passed double-quoted to cargo bench.
-FILTER=$(printf '%s' "$FILTER" | tr -d '\000-\037\177')
-
-# If nothing was parsed (unrecognized tokens, typos, missing values), default to "base"
+# Default: if nothing was parsed, run with BENCH_TAGS=base
 if [[ -z "$TAGS" && -z "$FILTER" ]]; then
   TAGS="base"
 fi
@@ -84,83 +76,14 @@ git checkout FETCH_HEAD
 
 # ---------------------------------------------------------------------------
 # 4. Compare baselines with critcmp and format as a markdown table.
-#    Replicates criterion-compare-action's output:
 #      - Parses actual duration values (not rank factors) for the % column
 #      - Bolds the faster duration and % cell when the difference is
 #        statistically significant (error bounds do not overlap)
 # ---------------------------------------------------------------------------
-cat > /tmp/parse_critcmp.py << 'PYEOF'
-import sys, re
-
-def to_seconds(value, units):
-    u = units.strip()
-    if u == 's':   return value
-    if u == 'ms':  return value / 1e3
-    if u in ('µs', 'us', 'μs'): return value / 1e6
-    if u == 'ns':  return value / 1e9
-    return value
-
-def is_significant(chg_dur, chg_err, base_dur, base_err):
-    if chg_dur < base_dur:
-        return chg_dur + chg_err < base_dur or base_dur - base_err > chg_dur
-    else:
-        return chg_dur - chg_err > base_dur or base_dur + base_err < chg_dur
-
-def parse_duration(s):
-    m = re.match(r'([0-9.]+)±([0-9.]+)(.+)', s.strip())
-    if not m:
-        return None
-    return float(m.group(1)), float(m.group(2)), m.group(3).strip()
-
-lines = sys.stdin.read().splitlines()
-print("| Test | Base         | PR               | % |")
-print("|------|--------------|------------------|---|")
-
-for line in lines[2:]:  # skip critcmp header rows
-    if not line.strip():
-        continue
-    # critcmp columns (split on 2+ spaces):
-    #   with throughput:    name, baseFactor, baseDuration, baseBandwidth, changesFactor, changesDuration, changesBandwidth
-    #   without throughput: name, baseFactor, baseDuration, changesFactor, changesDuration
-    # Locate duration fields by the presence of "±" rather than hardcoding indices,
-    # so the script works correctly regardless of whether bandwidth columns are present.
-    fields = re.split(r'  +', line)
-    name = fields[0].strip().replace('|', r'\|') if fields else ''
-    dur_fields = [f.strip() for f in fields[1:] if '±' in f]
-    base_dur_str = dur_fields[0] if len(dur_fields) > 0 else None
-    chg_dur_str  = dur_fields[1] if len(dur_fields) > 1 else None
-
-    if not name and not base_dur_str and not chg_dur_str:
-        continue
-
-    base_display = base_dur_str or 'N/A'
-    chg_display  = chg_dur_str  or 'N/A'
-    difference   = 'N/A'
-
-    if base_dur_str and chg_dur_str:
-        base_p = parse_duration(base_dur_str)
-        chg_p  = parse_duration(chg_dur_str)
-        if base_p and chg_p:
-            base_secs     = to_seconds(base_p[0], base_p[2])
-            base_err_secs = to_seconds(base_p[1], base_p[2])
-            chg_secs      = to_seconds(chg_p[0],  chg_p[2])
-            chg_err_secs  = to_seconds(chg_p[1],  chg_p[2])
-
-            pct    = -(1 - chg_secs / base_secs) * 100
-            prefix = '' if chg_secs <= base_secs else '+'
-            difference = f'{prefix}{pct:.2f}%'
-
-            if is_significant(chg_secs, chg_err_secs, base_secs, base_err_secs):
-                if chg_secs < base_secs:
-                    chg_display = f'**{chg_dur_str}**'
-                elif chg_secs > base_secs:
-                    base_display = f'**{base_dur_str}**'
-                difference = f'**{difference}**'
-
-    print(f'| {name} | {base_display} | {chg_display} | {difference} |')
-PYEOF
-
-COMPARISON=$((cd benchmarks && critcmp base changes) | python3 /tmp/parse_critcmp.py)
+# Use `critcmp` to compare the criterion output for `base` and `changes`. We use `critcmp` instead of manually
+# parsing criterion outputs because criterion may update its output format. By using `critcmp`, we inherit all
+# updated criterion output parsing.
+COMPARISON=$((cd benchmarks && critcmp base changes) | python3 benchmarks/ci/parse_critcmp.py)
 
 # ---------------------------------------------------------------------------
 # 5. Write results to /tmp/bench-comment.md
diff --git a/benchmarks/ci/parse_critcmp.py b/benchmarks/ci/parse_critcmp.py
@@ -0,0 +1,68 @@
+import sys, re
+
+def to_seconds(value, units):
+    u = units.strip()
+    if u == 's':   return value
+    if u == 'ms':  return value / 1e3
+    if u in ('µs', 'us', 'μs'): return value / 1e6
+    if u == 'ns':  return value / 1e9
+    return value
+
+def is_significant(chg_dur, chg_err, base_dur, base_err):
+    if chg_dur < base_dur:
+        return chg_dur + chg_err < base_dur or base_dur - base_err > chg_dur
+    else:
+        return chg_dur - chg_err > base_dur or base_dur + base_err < chg_dur
+
+def parse_duration(s):
+    m = re.match(r'([0-9.]+)±([0-9.]+)(.+)', s.strip())
+    if not m:
+        return None
+    return float(m.group(1)), float(m.group(2)), m.group(3).strip()
+
+lines = sys.stdin.read().splitlines()
+print("| Test | Base         | PR               | % |")
+print("|------|--------------|------------------|---|")
+
+for line in lines[2:]:  # skip critcmp header rows
+    if not line.strip():
+        continue
+    # critcmp columns (split on 2+ spaces):
+    #   with throughput:    name, baseFactor, baseDuration, baseBandwidth, changesFactor, changesDuration, changesBandwidth
+    #   without throughput: name, baseFactor, baseDuration, changesFactor, changesDuration
+    # Locate duration fields by the presence of "±" rather than hardcoding indices,
+    # so the script works correctly regardless of whether bandwidth columns are present.
+    fields = re.split(r'  +', line)
+    name = fields[0].strip().replace('|', r'\|') if fields else ''
+    dur_fields = [f.strip() for f in fields[1:] if '±' in f]
+    base_dur_str = dur_fields[0] if len(dur_fields) > 0 else None
+    chg_dur_str  = dur_fields[1] if len(dur_fields) > 1 else None
+
+    if not name and not base_dur_str and not chg_dur_str:
+        continue
+
+    base_display = base_dur_str or 'N/A'
+    chg_display  = chg_dur_str  or 'N/A'
+    difference   = 'N/A'
+
+    if base_dur_str and chg_dur_str:
+        base_p = parse_duration(base_dur_str)
+        chg_p  = parse_duration(chg_dur_str)
+        if base_p and chg_p:
+            base_secs     = to_seconds(base_p[0], base_p[2])
+            base_err_secs = to_seconds(base_p[1], base_p[2])
+            chg_secs      = to_seconds(chg_p[0],  chg_p[2])
+            chg_err_secs  = to_seconds(chg_p[1],  chg_p[2])
+
+            pct    = -(1 - chg_secs / base_secs) * 100
+            prefix = '' if chg_secs <= base_secs else '+'
+            difference = f'{prefix}{pct:.2f}%'
+
+            if is_significant(chg_secs, chg_err_secs, base_secs, base_err_secs):
+                if chg_secs < base_secs:
+                    chg_display = f'**{chg_dur_str}**'
+                elif chg_secs > base_secs:
+                    base_display = f'**{base_dur_str}**'
+                difference = f'**{difference}**'
+
+    print(f'| {name} | {base_display} | {chg_display} | {difference} |')