diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 13313f8a7..e785a9ed4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -37,3 +37,17 @@ jobs: - name: Build run: npm run bundle-all + + - name: Verify no audit-harness markers leaked into build output + # Delegates to scripts/marker-scan.sh so the live CI scan and the + # self-test below exercise the same code path and cannot drift. + shell: bash + run: bash scripts/marker-scan.sh dist commonjs es typings languages + + - name: Self-test marker-scan logic against synthetic fixtures + # Run once per OS (the script is platform-agnostic and only takes <1s). + # Validates that scripts/marker-scan.sh catches markers in dist/*.js, + # dist/*.js.map (sourcesContent), commonjs/*.js, and es/*.mjs surfaces. + if: matrix.node-version == 22 && matrix.install-command == 'ci' + shell: bash + run: bash scripts/test-marker-scan.sh diff --git a/scripts/marker-scan.sh b/scripts/marker-scan.sh new file mode 100755 index 000000000..cd304ce6d --- /dev/null +++ b/scripts/marker-scan.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# Shared audit-harness marker scan used by both the CI verify step in +# .github/workflows/build.yml and the self-test in scripts/test-marker-scan.sh. +# +# Centralizing the scan in one script keeps the live CI logic and its self-test +# from drifting independently — a workflow-only edit would otherwise miss the +# self-test fixture coverage. +# +# Why this scan exists: +# Greps build output for audit-harness citation markers and the `§AuditSources` +# footer — internal spec-drafting tokens that must never ship in compiled +# output. Markers are the current lowercase prefixed form `[vrf_1]`/`[dec_3]`/ +# `[con_2]`/`[que_5]`/`[wrg_7]`/`[crf_4]` (parser `^\[[a-z][a-z0-9_]*\]$`) plus +# the legacy `[V]` form. Source comments leak through THREE surfaces and +# ALL are covered here: +# 1) commonjs/*.js and es/*.mjs (babel preserves comments) +# 2) dist/hyperformula.js and dist/hyperformula.full.js (webpack preserves +# comments in the development build) +# 3) dist/*.js.map (`sourcesContent` embeds full original source, so markers +# survive in source-maps even if stripped from .js) +# `grep -rn` over dist/ catches .map files because they are plain JSON. +# +# Usage: +# bash scripts/marker-scan.sh [ ...] +# +# Exit codes: +# 0 — scan ran cleanly, no markers found (or no requested dirs exist) +# 1 — markers found in at least one scanned file +# 2+ — grep failed with an I/O / scan error; the caller must surface this + +set -u + +if [ "$#" -lt 1 ]; then + echo "usage: $0 [ ...]" >&2 + exit 2 +fi + +paths=() +for dir in "$@"; do + if [ -d "$dir" ]; then + paths+=("$dir") + fi +done + +if [ ${#paths[@]} -eq 0 ]; then + echo "No build output directories found ($*); skipping marker scan." + exit 0 +fi + +echo "Scanning ${paths[*]} for audit-harness markers..." + +# grep exit codes: 0=match, 1=no-match, 2+=scan/IO error. A bare `if grep` +# collapses 1 and 2 into the same branch, silently green-lighting on read +# errors. Branch on rc explicitly. +set +e +grep -rnE '\[(V[0-9]+|(vrf|dec|con|que|wrg|crf)_[0-9]+)\]|§[[:space:]]*AuditSources' "${paths[@]}" +rc=$? +set -e + +case "$rc" in + 0) + echo "" + echo "ERROR: audit-harness markers ([V]/[vrf_n] or §AuditSources) found in build output." + echo "These markers are an internal spec-drafting convention and must never" + echo "ship in compiled JS. Strip them from the source comments/strings above." + exit 1 + ;; + 1) + echo "OK: no audit-harness markers found in build output." + exit 0 + ;; + *) + echo "ERROR: grep exited with rc=$rc while scanning ${paths[*]} — aborting scan." >&2 + exit "$rc" + ;; +esac diff --git a/scripts/test-marker-scan.sh b/scripts/test-marker-scan.sh new file mode 100755 index 000000000..e330a0e73 --- /dev/null +++ b/scripts/test-marker-scan.sh @@ -0,0 +1,189 @@ +#!/usr/bin/env bash +# Self-test for the audit-harness marker scan used in .github/workflows/build.yml. +# +# Why this exists: +# The CI step in build.yml invokes +# `scripts/marker-scan.sh dist commonjs es typings languages` to grep the build +# output for `[V]`/`[vrf_n]` citation markers and the `§AuditSources` footer — internal +# spec-drafting tokens that must never ship in compiled JS. Empirically +# (probed 2026-05-25 by planting `// [V99] test marker` in src/index.ts and +# running `npm run bundle-all`), markers leak through THREE distinct surfaces: +# 1) babel-transpiled commonjs/*.js and es/*.mjs preserve source comments +# 2) webpack-bundled dist/hyperformula.js and dist/hyperformula.full.js +# preserve source comments (development build, no comment-stripping) +# 3) dist/*.js.map source-maps embed full original source in +# `sourcesContent`, so comments survive into the map even when stripped +# from the .js itself (not applicable here, but defends future configs) +# This script exercises the SAME scripts/marker-scan.sh that CI runs, against +# synthetic fixtures covering all three surfaces — so the workflow step and +# the self-test cannot drift independently. +# +# Exit code: 0 on all assertions pass, non-zero on any failure. + +set -uo pipefail + +readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +readonly MARKER_SCAN="$SCRIPT_DIR/marker-scan.sh" +readonly TMP_ROOT="$(mktemp -d -t hf-marker-scan-XXXXXX)" +trap 'rm -rf "$TMP_ROOT"' EXIT + +# ---- Adapter: drives the shared scan against a synthetic fixture root ------ +# Invokes scripts/marker-scan.sh — the same script CI runs — passing the +# fixture's dist/commonjs/es subdirectories. This is the only place where the +# self-test couples to the scan; the scan logic itself lives in marker-scan.sh. +run_marker_scan() { + local root="$1" + local paths=() + local dir + # Mirror the build.yml invocation exactly so the self-test and CI cannot drift. + for dir in dist commonjs es typings languages; do + if [ -d "$root/$dir" ]; then + paths+=("$root/$dir") + fi + done + if [ ${#paths[@]} -eq 0 ]; then + # Mirror the script's "no dirs" branch so the assertion harness treats this + # as a clean (rc=0) outcome. + bash "$MARKER_SCAN" "$root/__missing__" + return $? + fi + local rc=0 + bash "$MARKER_SCAN" "${paths[@]}" || rc=$? + case "$rc" in + 0) return 0 ;; # no markers -> CI would pass + 1) return 1 ;; # markers found -> CI would fail + *) return "$rc" ;; + esac +} + +# ---- Fixture builders ------------------------------------------------------- +make_clean_fixture() { + local root="$1" + mkdir -p "$root/dist" "$root/commonjs" "$root/es" + cat >"$root/dist/hyperformula.js" <<'EOF' +// HyperFormula bundle (synthetic, clean) +const HyperFormula = function() { return 42; }; +module.exports = HyperFormula; +EOF + cat >"$root/dist/hyperformula.js.map" <<'EOF' +{"version":3,"sources":["webpack:///./src/index.ts"],"sourcesContent":["const HyperFormula = function() { return 42; };\nmodule.exports = HyperFormula;\n"],"mappings":"AAAA"} +EOF + cat >"$root/commonjs/index.js" <<'EOF' +"use strict"; +exports.foo = 1; +EOF + cat >"$root/es/index.mjs" <<'EOF' +export const foo = 1; +EOF +} + +# Variant: marker in dist .js file (e.g. preserved source comment). +make_marker_in_dist_js() { + local root="$1" + make_clean_fixture "$root" + cat >>"$root/dist/hyperformula.js" <<'EOF' +// [V12] internal citation marker — must not ship +EOF +} + +# Variant: marker in source-map sourcesContent only (stripped from .js). +make_marker_in_sourcemap() { + local root="$1" + make_clean_fixture "$root" + cat >"$root/dist/hyperformula.js.map" <<'EOF' +{"version":3,"sources":["webpack:///./src/index.ts"],"sourcesContent":["// [V7] citation that survived into sourcesContent\nconst HyperFormula = function() { return 42; };\n"],"mappings":"AAAA"} +EOF +} + +# Variant: §AuditSources footer leaked into commonjs output. +make_marker_in_commonjs() { + local root="$1" + make_clean_fixture "$root" + cat >>"$root/commonjs/index.js" <<'EOF' +// §AuditSources: internal/spec.md +EOF +} + +# Variant: marker in es/*.mjs. +make_marker_in_es() { + local root="$1" + make_clean_fixture "$root" + cat >>"$root/es/index.mjs" <<'EOF' +// [V42] another internal token +EOF +} + +# Variant: CURRENT lowercase prefixed marker (e.g. [vrf_3]) in commonjs output. +# The pre-2026-05-21 [V] form is legacy; real specs emit [vrf_/dec_/con_/ +# que_/wrg_/crf_]_. Isolates the scan-PATTERN coverage. +make_marker_in_commonjs_current() { + local root="$1" + make_clean_fixture "$root" + cat >>"$root/commonjs/index.js" <<'EOF' +// [vrf_3] current-grammar citation marker — must not ship +EOF +} + +# Variant: marker leaked into typings/*.d.ts. `bundle:typings` (tsc -d) and +# `bundle:languages` are build outputs that preserve source comments, so they +# are leak surfaces too. Uses a legacy [V] marker to isolate the DIRECTORY- +# coverage defect from the pattern defect. +make_marker_in_typings() { + local root="$1" + make_clean_fixture "$root" + mkdir -p "$root/typings" + cat >"$root/typings/index.d.ts" <<'EOF' +/** Public API surface. [V12] citation leaked into declarations. */ +export declare const foo: number; +EOF +} + +# ---- Assertion harness ------------------------------------------------------ +PASS_COUNT=0 +FAIL_COUNT=0 + +assert_scan() { + local name="$1" + local expected="$2" # "clean" -> expect rc 0 ; "dirty" -> expect rc 1 + local root="$3" + + local rc=0 + run_marker_scan "$root" >/tmp/scan-out 2>&1 || rc=$? + + local got + case "$rc" in + 0) got="clean" ;; + 1) got="dirty" ;; + *) got="error($rc)" ;; + esac + + if [ "$got" = "$expected" ]; then + echo "PASS $name (expected=$expected, got=$got)" + PASS_COUNT=$((PASS_COUNT + 1)) + else + echo "FAIL $name (expected=$expected, got=$got)" + echo " scan output:" + sed 's/^/ /' /tmp/scan-out + FAIL_COUNT=$((FAIL_COUNT + 1)) + fi +} + +# ---- Test cases ------------------------------------------------------------- +echo "=== audit-marker scan self-test ===" +echo "Fixture root: $TMP_ROOT" +echo "" + +f="$TMP_ROOT/case-clean"; make_clean_fixture "$f"; assert_scan "clean build (no markers)" "clean" "$f" +f="$TMP_ROOT/case-dist-js"; make_marker_in_dist_js "$f"; assert_scan "marker in dist/*.js comment" "dirty" "$f" +f="$TMP_ROOT/case-dist-map"; make_marker_in_sourcemap "$f"; assert_scan "marker in dist/*.js.map (sourcesContent)" "dirty" "$f" +f="$TMP_ROOT/case-commonjs"; make_marker_in_commonjs "$f"; assert_scan "§AuditSources in commonjs/*.js" "dirty" "$f" +f="$TMP_ROOT/case-es"; make_marker_in_es "$f"; assert_scan "marker in es/*.mjs" "dirty" "$f" +f="$TMP_ROOT/case-commonjs-current"; make_marker_in_commonjs_current "$f"; assert_scan "current [vrf_n] marker in commonjs/*.js" "dirty" "$f" +f="$TMP_ROOT/case-typings"; make_marker_in_typings "$f"; assert_scan "marker in typings/*.d.ts" "dirty" "$f" + +echo "" +echo "=== summary: $PASS_COUNT passed, $FAIL_COUNT failed ===" + +if [ "$FAIL_COUNT" -gt 0 ]; then + exit 1 +fi