Skip to content
Open
14 changes: 14 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,17 @@ jobs:

- name: Build
run: npm run bundle-all

- name: Verify no audit-harness markers leaked into build output
# Delegates to scripts/marker-scan.sh so the live CI scan and the
# self-test below exercise the same code path and cannot drift.
shell: bash
run: bash scripts/marker-scan.sh dist commonjs es typings languages

- name: Self-test marker-scan logic against synthetic fixtures
# Run once per OS (the script is platform-agnostic and only takes <1s).
# Validates that scripts/marker-scan.sh catches markers in dist/*.js,
# dist/*.js.map (sourcesContent), commonjs/*.js, and es/*.mjs surfaces.
if: matrix.node-version == 22 && matrix.install-command == 'ci'
shell: bash
run: bash scripts/test-marker-scan.sh
Comment thread
cursor[bot] marked this conversation as resolved.
76 changes: 76 additions & 0 deletions scripts/marker-scan.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env bash
# Shared audit-harness marker scan used by both the CI verify step in
# .github/workflows/build.yml and the self-test in scripts/test-marker-scan.sh.
#
# Centralizing the scan in one script keeps the live CI logic and its self-test
# from drifting independently — a workflow-only edit would otherwise miss the
# self-test fixture coverage.
#
# Why this scan exists:
# Greps build output for audit-harness citation markers and the `§AuditSources`
# footer — internal spec-drafting tokens that must never ship in compiled
# output. Markers are the current lowercase prefixed form `[vrf_1]`/`[dec_3]`/
# `[con_2]`/`[que_5]`/`[wrg_7]`/`[crf_4]` (parser `^\[[a-z][a-z0-9_]*\]$`) plus
# the legacy `[V<n>]` form. Source comments leak through THREE surfaces and
# ALL are covered here:
# 1) commonjs/*.js and es/*.mjs (babel preserves comments)
# 2) dist/hyperformula.js and dist/hyperformula.full.js (webpack preserves
# comments in the development build)
# 3) dist/*.js.map (`sourcesContent` embeds full original source, so markers
# survive in source-maps even if stripped from .js)
# `grep -rn` over dist/ catches .map files because they are plain JSON.
#
# Usage:
# bash scripts/marker-scan.sh <path> [<path> ...]
#
# Exit codes:
# 0 — scan ran cleanly, no markers found (or no requested dirs exist)
# 1 — markers found in at least one scanned file
# 2+ — grep failed with an I/O / scan error; the caller must surface this

set -u

if [ "$#" -lt 1 ]; then
echo "usage: $0 <path> [<path> ...]" >&2
exit 2
fi

paths=()
for dir in "$@"; do
if [ -d "$dir" ]; then
paths+=("$dir")
fi
done

if [ ${#paths[@]} -eq 0 ]; then
echo "No build output directories found ($*); skipping marker scan."
exit 0
fi

echo "Scanning ${paths[*]} for audit-harness markers..."

# grep exit codes: 0=match, 1=no-match, 2+=scan/IO error. A bare `if grep`
# collapses 1 and 2 into the same branch, silently green-lighting on read
# errors. Branch on rc explicitly.
set +e
grep -rnE '\[(V[0-9]+|(vrf|dec|con|que|wrg|crf)_[0-9]+)\]|§[[:space:]]*AuditSources' "${paths[@]}"
rc=$?
set -e

case "$rc" in
0)
echo ""
echo "ERROR: audit-harness markers ([V<n>]/[vrf_n] or §AuditSources) found in build output."
echo "These markers are an internal spec-drafting convention and must never"
echo "ship in compiled JS. Strip them from the source comments/strings above."
exit 1
;;
1)
echo "OK: no audit-harness markers found in build output."
exit 0
;;
*)
echo "ERROR: grep exited with rc=$rc while scanning ${paths[*]} — aborting scan." >&2
exit "$rc"
;;
esac
189 changes: 189 additions & 0 deletions scripts/test-marker-scan.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
#!/usr/bin/env bash
# Self-test for the audit-harness marker scan used in .github/workflows/build.yml.
#
# Why this exists:
# The CI step in build.yml invokes
# `scripts/marker-scan.sh dist commonjs es typings languages` to grep the build
# output for `[V<n>]`/`[vrf_n]` citation markers and the `§AuditSources` footer — internal
# spec-drafting tokens that must never ship in compiled JS. Empirically
# (probed 2026-05-25 by planting `// [V99] test marker` in src/index.ts and
# running `npm run bundle-all`), markers leak through THREE distinct surfaces:
# 1) babel-transpiled commonjs/*.js and es/*.mjs preserve source comments
# 2) webpack-bundled dist/hyperformula.js and dist/hyperformula.full.js
# preserve source comments (development build, no comment-stripping)
# 3) dist/*.js.map source-maps embed full original source in
# `sourcesContent`, so comments survive into the map even when stripped
# from the .js itself (not applicable here, but defends future configs)
# This script exercises the SAME scripts/marker-scan.sh that CI runs, against
# synthetic fixtures covering all three surfaces — so the workflow step and
# the self-test cannot drift independently.
#
# Exit code: 0 on all assertions pass, non-zero on any failure.

set -uo pipefail

readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
readonly MARKER_SCAN="$SCRIPT_DIR/marker-scan.sh"
readonly TMP_ROOT="$(mktemp -d -t hf-marker-scan-XXXXXX)"
trap 'rm -rf "$TMP_ROOT"' EXIT

# ---- Adapter: drives the shared scan against a synthetic fixture root ------
# Invokes scripts/marker-scan.sh — the same script CI runs — passing the
# fixture's dist/commonjs/es subdirectories. This is the only place where the
# self-test couples to the scan; the scan logic itself lives in marker-scan.sh.
run_marker_scan() {
local root="$1"
local paths=()
local dir
# Mirror the build.yml invocation exactly so the self-test and CI cannot drift.
for dir in dist commonjs es typings languages; do
if [ -d "$root/$dir" ]; then
paths+=("$root/$dir")
fi
done
if [ ${#paths[@]} -eq 0 ]; then
# Mirror the script's "no dirs" branch so the assertion harness treats this
# as a clean (rc=0) outcome.
bash "$MARKER_SCAN" "$root/__missing__"
return $?
fi
local rc=0
bash "$MARKER_SCAN" "${paths[@]}" || rc=$?
case "$rc" in
0) return 0 ;; # no markers -> CI would pass
1) return 1 ;; # markers found -> CI would fail
*) return "$rc" ;;
esac
}

# ---- Fixture builders -------------------------------------------------------
make_clean_fixture() {
local root="$1"
mkdir -p "$root/dist" "$root/commonjs" "$root/es"
cat >"$root/dist/hyperformula.js" <<'EOF'
// HyperFormula bundle (synthetic, clean)
const HyperFormula = function() { return 42; };
module.exports = HyperFormula;
EOF
cat >"$root/dist/hyperformula.js.map" <<'EOF'
{"version":3,"sources":["webpack:///./src/index.ts"],"sourcesContent":["const HyperFormula = function() { return 42; };\nmodule.exports = HyperFormula;\n"],"mappings":"AAAA"}
EOF
cat >"$root/commonjs/index.js" <<'EOF'
"use strict";
exports.foo = 1;
EOF
cat >"$root/es/index.mjs" <<'EOF'
export const foo = 1;
EOF
}

# Variant: marker in dist .js file (e.g. preserved source comment).
make_marker_in_dist_js() {
local root="$1"
make_clean_fixture "$root"
cat >>"$root/dist/hyperformula.js" <<'EOF'
// [V12] internal citation marker — must not ship
EOF
}

# Variant: marker in source-map sourcesContent only (stripped from .js).
make_marker_in_sourcemap() {
local root="$1"
make_clean_fixture "$root"
cat >"$root/dist/hyperformula.js.map" <<'EOF'
{"version":3,"sources":["webpack:///./src/index.ts"],"sourcesContent":["// [V7] citation that survived into sourcesContent\nconst HyperFormula = function() { return 42; };\n"],"mappings":"AAAA"}
EOF
}

# Variant: §AuditSources footer leaked into commonjs output.
make_marker_in_commonjs() {
local root="$1"
make_clean_fixture "$root"
cat >>"$root/commonjs/index.js" <<'EOF'
// §AuditSources: internal/spec.md
EOF
}

# Variant: marker in es/*.mjs.
make_marker_in_es() {
local root="$1"
make_clean_fixture "$root"
cat >>"$root/es/index.mjs" <<'EOF'
// [V42] another internal token
EOF
}

# Variant: CURRENT lowercase prefixed marker (e.g. [vrf_3]) in commonjs output.
# The pre-2026-05-21 [V<n>] form is legacy; real specs emit [vrf_/dec_/con_/
# que_/wrg_/crf_]_<digits>. Isolates the scan-PATTERN coverage.
make_marker_in_commonjs_current() {
local root="$1"
make_clean_fixture "$root"
cat >>"$root/commonjs/index.js" <<'EOF'
// [vrf_3] current-grammar citation marker — must not ship
EOF
}

# Variant: marker leaked into typings/*.d.ts. `bundle:typings` (tsc -d) and
# `bundle:languages` are build outputs that preserve source comments, so they
# are leak surfaces too. Uses a legacy [V<n>] marker to isolate the DIRECTORY-
# coverage defect from the pattern defect.
make_marker_in_typings() {
local root="$1"
make_clean_fixture "$root"
mkdir -p "$root/typings"
cat >"$root/typings/index.d.ts" <<'EOF'
/** Public API surface. [V12] citation leaked into declarations. */
export declare const foo: number;
EOF
}

# ---- Assertion harness ------------------------------------------------------
PASS_COUNT=0
FAIL_COUNT=0

assert_scan() {
local name="$1"
local expected="$2" # "clean" -> expect rc 0 ; "dirty" -> expect rc 1
local root="$3"

local rc=0
run_marker_scan "$root" >/tmp/scan-out 2>&1 || rc=$?
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Scan output file bypasses temp directory cleanup trap

Low Severity

assert_scan writes scan output to a hardcoded /tmp/scan-out path instead of using the already-allocated $TMP_ROOT directory. This file is not cleaned up by the trap 'rm -rf "$TMP_ROOT"' EXIT handler, which is inconsistent with the script's own temp-file management design. Using $TMP_ROOT/scan-out would keep all artifacts under the managed directory and ensure proper cleanup.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 542062c. Configure here.


local got
case "$rc" in
0) got="clean" ;;
1) got="dirty" ;;
*) got="error($rc)" ;;
esac

if [ "$got" = "$expected" ]; then
echo "PASS $name (expected=$expected, got=$got)"
PASS_COUNT=$((PASS_COUNT + 1))
else
echo "FAIL $name (expected=$expected, got=$got)"
echo " scan output:"
sed 's/^/ /' /tmp/scan-out
FAIL_COUNT=$((FAIL_COUNT + 1))
fi
}

# ---- Test cases -------------------------------------------------------------
echo "=== audit-marker scan self-test ==="
echo "Fixture root: $TMP_ROOT"
echo ""

f="$TMP_ROOT/case-clean"; make_clean_fixture "$f"; assert_scan "clean build (no markers)" "clean" "$f"
f="$TMP_ROOT/case-dist-js"; make_marker_in_dist_js "$f"; assert_scan "marker in dist/*.js comment" "dirty" "$f"
f="$TMP_ROOT/case-dist-map"; make_marker_in_sourcemap "$f"; assert_scan "marker in dist/*.js.map (sourcesContent)" "dirty" "$f"
f="$TMP_ROOT/case-commonjs"; make_marker_in_commonjs "$f"; assert_scan "§AuditSources in commonjs/*.js" "dirty" "$f"
f="$TMP_ROOT/case-es"; make_marker_in_es "$f"; assert_scan "marker in es/*.mjs" "dirty" "$f"
f="$TMP_ROOT/case-commonjs-current"; make_marker_in_commonjs_current "$f"; assert_scan "current [vrf_n] marker in commonjs/*.js" "dirty" "$f"
f="$TMP_ROOT/case-typings"; make_marker_in_typings "$f"; assert_scan "marker in typings/*.d.ts" "dirty" "$f"

echo ""
echo "=== summary: $PASS_COUNT passed, $FAIL_COUNT failed ==="

if [ "$FAIL_COUNT" -gt 0 ]; then
exit 1
fi
Loading