From 803f11aafd04fcbe11922c972b9262a612737862 Mon Sep 17 00:00:00 2001 From: marcin-kordas-hoc Date: Mon, 25 May 2026 08:22:48 +0000 Subject: [PATCH 1/7] CI: fail build when audit-harness citation markers leak into compiled output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a post-build scan step to `.github/workflows/build.yml` that greps `dist/`, `commonjs/`, and `es/` for two internal-only marker patterns: - `\[V[0-9]+\]` — audit-harness citation markers used in spec drafts - `§[[:space:]]*Sources` — section heading used in audit-harness footers Both are conventions from the audit-harness tooling and belong in internal docs/prompts only. If they ever appear in compiled JS it means a comment or string literal slipped through from a spec draft into shipped output — the scan fails the workflow with the offending file path and line number. The step runs after `npm run bundle-all` (which produces the three output directories) and skips gracefully if a directory is missing, so unrelated build failures aren't masked by this guardrail. Manual verification: - Synthesized `dist/foo.js` containing both markers — grep matched both lines and exited 1 with a clear message. - Repeated with clean JS — grep exited 0. - Repeated with no output dirs — step exited 0 (skip path). --- .github/workflows/build.yml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 13313f8a7..0c17ef897 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -37,3 +37,30 @@ jobs: - name: Build run: npm run bundle-all + + - name: Verify no audit-harness markers leaked into build output + shell: bash + run: | + # Fails the build if internal audit-harness citation markers ([V]) + # or `§Sources` footers leak from spec drafts/comments into compiled output. + # Markers belong only in internal docs/prompts, never in shipped JS. + set -u + paths=() + for dir in dist commonjs es; do + if [ -d "$dir" ]; then + paths+=("$dir") + fi + done + if [ ${#paths[@]} -eq 0 ]; then + echo "No build output directories found (dist/, commonjs/, es/); skipping marker scan." + exit 0 + fi + echo "Scanning ${paths[*]} for audit-harness markers..." + if grep -rnE '\[V[0-9]+\]|§[[:space:]]*Sources' "${paths[@]}"; then + echo "" + echo "ERROR: audit-harness markers ([V] or §Sources) found in build output." + echo "These markers are an internal spec-drafting convention and must never" + echo "ship in compiled JS. Strip them from the source comments/strings above." + exit 1 + fi + echo "OK: no audit-harness markers found in build output." From 2a20b008ddd322537d1711f766140b87a2fbb569 Mon Sep 17 00:00:00 2001 From: marcin-kordas-hoc Date: Mon, 25 May 2026 13:12:51 +0000 Subject: [PATCH 2/7] CI: branch grep on rc to surface scan errors instead of masking them MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bugbot review #3296952334 flagged that the `if grep ...` form treats grep's exit code 2 (scan/IO error) identically to exit code 1 (no matches) — so a permission or read error on dist/, commonjs/, or es/ would silently green- light the step. Split the rc into 0/1/other and fail the step explicitly on any non-zero, non-1 result. --- .github/workflows/build.yml | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0c17ef897..7287d1c41 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -56,11 +56,26 @@ jobs: exit 0 fi echo "Scanning ${paths[*]} for audit-harness markers..." - if grep -rnE '\[V[0-9]+\]|§[[:space:]]*Sources' "${paths[@]}"; then - echo "" - echo "ERROR: audit-harness markers ([V] or §Sources) found in build output." - echo "These markers are an internal spec-drafting convention and must never" - echo "ship in compiled JS. Strip them from the source comments/strings above." - exit 1 - fi - echo "OK: no audit-harness markers found in build output." + # grep exit codes: 0=match, 1=no-match, 2+=scan/IO error. A bare + # `if grep` collapses 1 and 2 into the same branch, silently green- + # lighting the step on a read error. Branch on rc explicitly. + set +e + grep -rnE '\[V[0-9]+\]|§[[:space:]]*Sources' "${paths[@]}" + rc=$? + set -e + case "$rc" in + 0) + echo "" + echo "ERROR: audit-harness markers ([V] or §Sources) found in build output." + echo "These markers are an internal spec-drafting convention and must never" + echo "ship in compiled JS. Strip them from the source comments/strings above." + exit 1 + ;; + 1) + echo "OK: no audit-harness markers found in build output." + ;; + *) + echo "ERROR: grep exited with rc=$rc while scanning ${paths[*]} — aborting CI step." >&2 + exit "$rc" + ;; + esac From 1083965744ea676597f849e564c99ad5c4831646 Mon Sep 17 00:00:00 2001 From: marcin-kordas-hoc Date: Mon, 25 May 2026 15:16:24 +0000 Subject: [PATCH 3/7] CI: add integration test for audit-marker scan covering source-maps Validates the build.yml marker-scan step against synthetic fixtures: clean build, marker in dist/*.js, marker in dist/*.js.map (sourcesContent), marker in commonjs/*.js, marker in es/*.mjs. Wired as a single self-test step in build.yml that runs once per OS (node 22, ci install). Empirically confirmed (probed by planting a marker in src/index.ts and running `npm run bundle-all`) that source comments survive into: - commonjs/index.js and es/index.mjs (babel preserves comments) - dist/hyperformula{,.full}.js (webpack development build preserves comments) - dist/hyperformula.js.map (`sourcesContent` embeds full original source) All three surfaces are inside the existing `grep -rn dist commonjs es` scope, so the scan already covers source-maps. The new self-test pins this behavior so a future bundler/comment-stripping change cannot silently erode coverage. --- .github/workflows/build.yml | 17 ++++ scripts/test-marker-scan.sh | 154 ++++++++++++++++++++++++++++++++++++ 2 files changed, 171 insertions(+) create mode 100755 scripts/test-marker-scan.sh diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7287d1c41..2254a0124 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -44,6 +44,15 @@ jobs: # Fails the build if internal audit-harness citation markers ([V]) # or `§Sources` footers leak from spec drafts/comments into compiled output. # Markers belong only in internal docs/prompts, never in shipped JS. + # + # Empirical scope notes (probed 2026-05-25, see scripts/test-marker-scan.sh): + # Source comments leak through THREE surfaces and ALL are covered here: + # 1) commonjs/*.js and es/*.mjs (babel preserves comments) + # 2) dist/hyperformula.js and dist/hyperformula.full.js (webpack preserves + # comments in the development build) + # 3) dist/*.js.map (`sourcesContent` embeds full original source, so + # markers survive in source-maps even if stripped from .js) + # `grep -rn` over dist/ catches .map files because they are plain JSON. set -u paths=() for dir in dist commonjs es; do @@ -79,3 +88,11 @@ jobs: exit "$rc" ;; esac + + - name: Self-test marker-scan logic against synthetic fixtures + # Run once per OS (the script is platform-agnostic and only takes <1s). + # Validates that the inline scan above catches markers in dist/*.js, + # dist/*.js.map (sourcesContent), commonjs/*.js, and es/*.mjs surfaces. + if: matrix.node-version == 22 && matrix.install-command == 'ci' + shell: bash + run: bash scripts/test-marker-scan.sh diff --git a/scripts/test-marker-scan.sh b/scripts/test-marker-scan.sh new file mode 100755 index 000000000..c0ea2d35f --- /dev/null +++ b/scripts/test-marker-scan.sh @@ -0,0 +1,154 @@ +#!/usr/bin/env bash +# Self-test for the audit-harness marker scan used in .github/workflows/build.yml. +# +# Why this exists: +# The CI step in build.yml greps the build output (dist/, commonjs/, es/) for +# `[V]` and `§Sources` markers — internal spec-drafting tokens that must +# never ship in compiled JS. Empirically (probed 2026-05-25 by planting +# `// [V99] test marker` in src/index.ts and running `npm run bundle-all`), +# markers leak through THREE distinct surfaces: +# 1) babel-transpiled commonjs/*.js and es/*.mjs preserve source comments +# 2) webpack-bundled dist/hyperformula.js and dist/hyperformula.full.js +# preserve source comments (development build, no comment-stripping) +# 3) dist/*.js.map source-maps embed full original source in +# `sourcesContent`, so comments survive into the map even when stripped +# from the .js itself (not applicable here, but defends future configs) +# This script asserts the grep logic catches markers in all three surfaces. +# +# Exit code: 0 on all assertions pass, non-zero on any failure. + +set -uo pipefail + +readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +readonly TMP_ROOT="$(mktemp -d -t hf-marker-scan-XXXXXX)" +trap 'rm -rf "$TMP_ROOT"' EXIT + +# ---- Mirror of the CI scan logic from .github/workflows/build.yml ---------- +# Keep this in sync with build.yml. The script-under-test must behave +# identically to the YAML inline script. +run_marker_scan() { + local root="$1" + local paths=() + local dir + for dir in dist commonjs es; do + if [ -d "$root/$dir" ]; then + paths+=("$root/$dir") + fi + done + if [ ${#paths[@]} -eq 0 ]; then + echo "No build output directories found; skipping marker scan." + return 0 + fi + local rc=0 + grep -rnE '\[V[0-9]+\]|§[[:space:]]*Sources' "${paths[@]}" || rc=$? + case "$rc" in + 0) return 1 ;; # markers found -> CI would fail + 1) return 0 ;; # no markers -> CI would pass + *) return "$rc" ;; + esac +} + +# ---- Fixture builders ------------------------------------------------------- +make_clean_fixture() { + local root="$1" + mkdir -p "$root/dist" "$root/commonjs" "$root/es" + cat >"$root/dist/hyperformula.js" <<'EOF' +// HyperFormula bundle (synthetic, clean) +const HyperFormula = function() { return 42; }; +module.exports = HyperFormula; +EOF + cat >"$root/dist/hyperformula.js.map" <<'EOF' +{"version":3,"sources":["webpack:///./src/index.ts"],"sourcesContent":["const HyperFormula = function() { return 42; };\nmodule.exports = HyperFormula;\n"],"mappings":"AAAA"} +EOF + cat >"$root/commonjs/index.js" <<'EOF' +"use strict"; +exports.foo = 1; +EOF + cat >"$root/es/index.mjs" <<'EOF' +export const foo = 1; +EOF +} + +# Variant: marker in dist .js file (e.g. preserved source comment). +make_marker_in_dist_js() { + local root="$1" + make_clean_fixture "$root" + cat >>"$root/dist/hyperformula.js" <<'EOF' +// [V12] internal citation marker — must not ship +EOF +} + +# Variant: marker in source-map sourcesContent only (stripped from .js). +make_marker_in_sourcemap() { + local root="$1" + make_clean_fixture "$root" + cat >"$root/dist/hyperformula.js.map" <<'EOF' +{"version":3,"sources":["webpack:///./src/index.ts"],"sourcesContent":["// [V7] citation that survived into sourcesContent\nconst HyperFormula = function() { return 42; };\n"],"mappings":"AAAA"} +EOF +} + +# Variant: §Sources footer leaked into commonjs output. +make_marker_in_commonjs() { + local root="$1" + make_clean_fixture "$root" + cat >>"$root/commonjs/index.js" <<'EOF' +// §Sources: internal/spec.md +EOF +} + +# Variant: marker in es/*.mjs. +make_marker_in_es() { + local root="$1" + make_clean_fixture "$root" + cat >>"$root/es/index.mjs" <<'EOF' +// [V42] another internal token +EOF +} + +# ---- Assertion harness ------------------------------------------------------ +PASS_COUNT=0 +FAIL_COUNT=0 + +assert_scan() { + local name="$1" + local expected="$2" # "clean" -> expect rc 0 ; "dirty" -> expect rc 1 + local root="$3" + + local rc=0 + run_marker_scan "$root" >/tmp/scan-out 2>&1 || rc=$? + + local got + case "$rc" in + 0) got="clean" ;; + 1) got="dirty" ;; + *) got="error($rc)" ;; + esac + + if [ "$got" = "$expected" ]; then + echo "PASS $name (expected=$expected, got=$got)" + PASS_COUNT=$((PASS_COUNT + 1)) + else + echo "FAIL $name (expected=$expected, got=$got)" + echo " scan output:" + sed 's/^/ /' /tmp/scan-out + FAIL_COUNT=$((FAIL_COUNT + 1)) + fi +} + +# ---- Test cases ------------------------------------------------------------- +echo "=== audit-marker scan self-test ===" +echo "Fixture root: $TMP_ROOT" +echo "" + +f="$TMP_ROOT/case-clean"; make_clean_fixture "$f"; assert_scan "clean build (no markers)" "clean" "$f" +f="$TMP_ROOT/case-dist-js"; make_marker_in_dist_js "$f"; assert_scan "marker in dist/*.js comment" "dirty" "$f" +f="$TMP_ROOT/case-dist-map"; make_marker_in_sourcemap "$f"; assert_scan "marker in dist/*.js.map (sourcesContent)" "dirty" "$f" +f="$TMP_ROOT/case-commonjs"; make_marker_in_commonjs "$f"; assert_scan "§Sources in commonjs/*.js" "dirty" "$f" +f="$TMP_ROOT/case-es"; make_marker_in_es "$f"; assert_scan "marker in es/*.mjs" "dirty" "$f" + +echo "" +echo "=== summary: $PASS_COUNT passed, $FAIL_COUNT failed ===" + +if [ "$FAIL_COUNT" -gt 0 ]; then + exit 1 +fi From b9a7ba2ea09418e5ec855c978600dc942e51e3cf Mon Sep 17 00:00:00 2001 From: marcin-kordas-hoc Date: Tue, 26 May 2026 02:54:20 +0000 Subject: [PATCH 4/7] CI: extract marker-scan into shared script so workflow step and self-test cannot drift The verify step in build.yml previously inlined the audit-marker grep logic while scripts/test-marker-scan.sh kept its own duplicate copy. A workflow-only edit could silently desynchronize the live scan from the self-test fixtures that are supposed to guard it. Move the scan into scripts/marker-scan.sh as a single parameterized entry point (accepts paths as $@, exit 0=clean, 1=dirty, 2+=error). The workflow step now invokes `bash scripts/marker-scan.sh dist commonjs es`, and the self-test drives the SAME script against synthetic fixture roots. --- .github/workflows/build.yml | 53 ++------------------------- scripts/marker-scan.sh | 73 +++++++++++++++++++++++++++++++++++++ scripts/test-marker-scan.sh | 34 ++++++++++------- 3 files changed, 97 insertions(+), 63 deletions(-) create mode 100755 scripts/marker-scan.sh diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2254a0124..b1dcb4ead 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,59 +39,14 @@ jobs: run: npm run bundle-all - name: Verify no audit-harness markers leaked into build output + # Delegates to scripts/marker-scan.sh so the live CI scan and the + # self-test below exercise the same code path and cannot drift. shell: bash - run: | - # Fails the build if internal audit-harness citation markers ([V]) - # or `§Sources` footers leak from spec drafts/comments into compiled output. - # Markers belong only in internal docs/prompts, never in shipped JS. - # - # Empirical scope notes (probed 2026-05-25, see scripts/test-marker-scan.sh): - # Source comments leak through THREE surfaces and ALL are covered here: - # 1) commonjs/*.js and es/*.mjs (babel preserves comments) - # 2) dist/hyperformula.js and dist/hyperformula.full.js (webpack preserves - # comments in the development build) - # 3) dist/*.js.map (`sourcesContent` embeds full original source, so - # markers survive in source-maps even if stripped from .js) - # `grep -rn` over dist/ catches .map files because they are plain JSON. - set -u - paths=() - for dir in dist commonjs es; do - if [ -d "$dir" ]; then - paths+=("$dir") - fi - done - if [ ${#paths[@]} -eq 0 ]; then - echo "No build output directories found (dist/, commonjs/, es/); skipping marker scan." - exit 0 - fi - echo "Scanning ${paths[*]} for audit-harness markers..." - # grep exit codes: 0=match, 1=no-match, 2+=scan/IO error. A bare - # `if grep` collapses 1 and 2 into the same branch, silently green- - # lighting the step on a read error. Branch on rc explicitly. - set +e - grep -rnE '\[V[0-9]+\]|§[[:space:]]*Sources' "${paths[@]}" - rc=$? - set -e - case "$rc" in - 0) - echo "" - echo "ERROR: audit-harness markers ([V] or §Sources) found in build output." - echo "These markers are an internal spec-drafting convention and must never" - echo "ship in compiled JS. Strip them from the source comments/strings above." - exit 1 - ;; - 1) - echo "OK: no audit-harness markers found in build output." - ;; - *) - echo "ERROR: grep exited with rc=$rc while scanning ${paths[*]} — aborting CI step." >&2 - exit "$rc" - ;; - esac + run: bash scripts/marker-scan.sh dist commonjs es - name: Self-test marker-scan logic against synthetic fixtures # Run once per OS (the script is platform-agnostic and only takes <1s). - # Validates that the inline scan above catches markers in dist/*.js, + # Validates that scripts/marker-scan.sh catches markers in dist/*.js, # dist/*.js.map (sourcesContent), commonjs/*.js, and es/*.mjs surfaces. if: matrix.node-version == 22 && matrix.install-command == 'ci' shell: bash diff --git a/scripts/marker-scan.sh b/scripts/marker-scan.sh new file mode 100755 index 000000000..e64b30ec4 --- /dev/null +++ b/scripts/marker-scan.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# Shared audit-harness marker scan used by both the CI verify step in +# .github/workflows/build.yml and the self-test in scripts/test-marker-scan.sh. +# +# Centralizing the scan in one script keeps the live CI logic and its self-test +# from drifting independently — a workflow-only edit would otherwise miss the +# self-test fixture coverage. +# +# Why this scan exists: +# Greps build output for `[V]` and `§Sources` markers — internal +# spec-drafting tokens that must never ship in compiled JS. Source comments +# leak through THREE surfaces and ALL are covered here: +# 1) commonjs/*.js and es/*.mjs (babel preserves comments) +# 2) dist/hyperformula.js and dist/hyperformula.full.js (webpack preserves +# comments in the development build) +# 3) dist/*.js.map (`sourcesContent` embeds full original source, so markers +# survive in source-maps even if stripped from .js) +# `grep -rn` over dist/ catches .map files because they are plain JSON. +# +# Usage: +# bash scripts/marker-scan.sh [ ...] +# +# Exit codes: +# 0 — scan ran cleanly, no markers found (or no requested dirs exist) +# 1 — markers found in at least one scanned file +# 2+ — grep failed with an I/O / scan error; the caller must surface this + +set -u + +if [ "$#" -lt 1 ]; then + echo "usage: $0 [ ...]" >&2 + exit 2 +fi + +paths=() +for dir in "$@"; do + if [ -d "$dir" ]; then + paths+=("$dir") + fi +done + +if [ ${#paths[@]} -eq 0 ]; then + echo "No build output directories found ($*); skipping marker scan." + exit 0 +fi + +echo "Scanning ${paths[*]} for audit-harness markers..." + +# grep exit codes: 0=match, 1=no-match, 2+=scan/IO error. A bare `if grep` +# collapses 1 and 2 into the same branch, silently green-lighting on read +# errors. Branch on rc explicitly. +set +e +grep -rnE '\[V[0-9]+\]|§[[:space:]]*Sources' "${paths[@]}" +rc=$? +set -e + +case "$rc" in + 0) + echo "" + echo "ERROR: audit-harness markers ([V] or §Sources) found in build output." + echo "These markers are an internal spec-drafting convention and must never" + echo "ship in compiled JS. Strip them from the source comments/strings above." + exit 1 + ;; + 1) + echo "OK: no audit-harness markers found in build output." + exit 0 + ;; + *) + echo "ERROR: grep exited with rc=$rc while scanning ${paths[*]} — aborting scan." >&2 + exit "$rc" + ;; +esac diff --git a/scripts/test-marker-scan.sh b/scripts/test-marker-scan.sh index c0ea2d35f..4b1e0f2ab 100755 --- a/scripts/test-marker-scan.sh +++ b/scripts/test-marker-scan.sh @@ -2,30 +2,34 @@ # Self-test for the audit-harness marker scan used in .github/workflows/build.yml. # # Why this exists: -# The CI step in build.yml greps the build output (dist/, commonjs/, es/) for -# `[V]` and `§Sources` markers — internal spec-drafting tokens that must -# never ship in compiled JS. Empirically (probed 2026-05-25 by planting -# `// [V99] test marker` in src/index.ts and running `npm run bundle-all`), -# markers leak through THREE distinct surfaces: +# The CI step in build.yml invokes `scripts/marker-scan.sh dist commonjs es` +# to grep the build output for `[V]` and `§Sources` markers — internal +# spec-drafting tokens that must never ship in compiled JS. Empirically +# (probed 2026-05-25 by planting `// [V99] test marker` in src/index.ts and +# running `npm run bundle-all`), markers leak through THREE distinct surfaces: # 1) babel-transpiled commonjs/*.js and es/*.mjs preserve source comments # 2) webpack-bundled dist/hyperformula.js and dist/hyperformula.full.js # preserve source comments (development build, no comment-stripping) # 3) dist/*.js.map source-maps embed full original source in # `sourcesContent`, so comments survive into the map even when stripped # from the .js itself (not applicable here, but defends future configs) -# This script asserts the grep logic catches markers in all three surfaces. +# This script exercises the SAME scripts/marker-scan.sh that CI runs, against +# synthetic fixtures covering all three surfaces — so the workflow step and +# the self-test cannot drift independently. # # Exit code: 0 on all assertions pass, non-zero on any failure. set -uo pipefail readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +readonly MARKER_SCAN="$SCRIPT_DIR/marker-scan.sh" readonly TMP_ROOT="$(mktemp -d -t hf-marker-scan-XXXXXX)" trap 'rm -rf "$TMP_ROOT"' EXIT -# ---- Mirror of the CI scan logic from .github/workflows/build.yml ---------- -# Keep this in sync with build.yml. The script-under-test must behave -# identically to the YAML inline script. +# ---- Adapter: drives the shared scan against a synthetic fixture root ------ +# Invokes scripts/marker-scan.sh — the same script CI runs — passing the +# fixture's dist/commonjs/es subdirectories. This is the only place where the +# self-test couples to the scan; the scan logic itself lives in marker-scan.sh. run_marker_scan() { local root="$1" local paths=() @@ -36,14 +40,16 @@ run_marker_scan() { fi done if [ ${#paths[@]} -eq 0 ]; then - echo "No build output directories found; skipping marker scan." - return 0 + # Mirror the script's "no dirs" branch so the assertion harness treats this + # as a clean (rc=0) outcome. + bash "$MARKER_SCAN" "$root/__missing__" + return $? fi local rc=0 - grep -rnE '\[V[0-9]+\]|§[[:space:]]*Sources' "${paths[@]}" || rc=$? + bash "$MARKER_SCAN" "${paths[@]}" || rc=$? case "$rc" in - 0) return 1 ;; # markers found -> CI would fail - 1) return 0 ;; # no markers -> CI would pass + 0) return 0 ;; # no markers -> CI would pass + 1) return 1 ;; # markers found -> CI would fail *) return "$rc" ;; esac } From e6a8c1994b7c657902348a04621480b891e8947d Mon Sep 17 00:00:00 2001 From: marcin-kordas-hoc Date: Sun, 31 May 2026 21:51:41 +0000 Subject: [PATCH 5/7] fix(ci): scan for current lowercase markers and cover typings/languages output marker-scan grepped only the legacy [V] form, so current markers ([vrf_1], [dec_3], ...) passed the gate; it also scanned only dist/commonjs/es, missing the typings/ and languages/ bundle outputs (both preserve source comments). Extend the grep to the lowercase prefix+_digits grammar, add typings+languages to the CI invocation and the self-test dir list, and add fixtures for both gaps. --- .github/workflows/build.yml | 2 +- scripts/marker-scan.sh | 11 +++++++---- scripts/test-marker-scan.sh | 30 +++++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b1dcb4ead..e785a9ed4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -42,7 +42,7 @@ jobs: # Delegates to scripts/marker-scan.sh so the live CI scan and the # self-test below exercise the same code path and cannot drift. shell: bash - run: bash scripts/marker-scan.sh dist commonjs es + run: bash scripts/marker-scan.sh dist commonjs es typings languages - name: Self-test marker-scan logic against synthetic fixtures # Run once per OS (the script is platform-agnostic and only takes <1s). diff --git a/scripts/marker-scan.sh b/scripts/marker-scan.sh index e64b30ec4..9fa792962 100755 --- a/scripts/marker-scan.sh +++ b/scripts/marker-scan.sh @@ -7,9 +7,12 @@ # self-test fixture coverage. # # Why this scan exists: -# Greps build output for `[V]` and `§Sources` markers — internal -# spec-drafting tokens that must never ship in compiled JS. Source comments -# leak through THREE surfaces and ALL are covered here: +# Greps build output for audit-harness citation markers and the `§Sources` +# footer — internal spec-drafting tokens that must never ship in compiled +# output. Markers are the current lowercase prefixed form `[vrf_1]`/`[dec_3]`/ +# `[con_2]`/`[que_5]`/`[wrg_7]`/`[crf_4]` (parser `^\[[a-z][a-z0-9_]*\]$`) plus +# the legacy `[V]` form. Source comments leak through THREE surfaces and +# ALL are covered here: # 1) commonjs/*.js and es/*.mjs (babel preserves comments) # 2) dist/hyperformula.js and dist/hyperformula.full.js (webpack preserves # comments in the development build) @@ -50,7 +53,7 @@ echo "Scanning ${paths[*]} for audit-harness markers..." # collapses 1 and 2 into the same branch, silently green-lighting on read # errors. Branch on rc explicitly. set +e -grep -rnE '\[V[0-9]+\]|§[[:space:]]*Sources' "${paths[@]}" +grep -rnE '\[(V[0-9]+|(vrf|dec|con|que|wrg|crf)_[0-9]+)\]|§[[:space:]]*Sources' "${paths[@]}" rc=$? set -e diff --git a/scripts/test-marker-scan.sh b/scripts/test-marker-scan.sh index 4b1e0f2ab..500771e8f 100755 --- a/scripts/test-marker-scan.sh +++ b/scripts/test-marker-scan.sh @@ -34,7 +34,8 @@ run_marker_scan() { local root="$1" local paths=() local dir - for dir in dist commonjs es; do + # Mirror the build.yml invocation exactly so the self-test and CI cannot drift. + for dir in dist commonjs es typings languages; do if [ -d "$root/$dir" ]; then paths+=("$root/$dir") fi @@ -111,6 +112,31 @@ make_marker_in_es() { EOF } +# Variant: CURRENT lowercase prefixed marker (e.g. [vrf_3]) in commonjs output. +# The pre-2026-05-21 [V] form is legacy; real specs emit [vrf_/dec_/con_/ +# que_/wrg_/crf_]_. Isolates the scan-PATTERN coverage. +make_marker_in_commonjs_current() { + local root="$1" + make_clean_fixture "$root" + cat >>"$root/commonjs/index.js" <<'EOF' +// [vrf_3] current-grammar citation marker — must not ship +EOF +} + +# Variant: marker leaked into typings/*.d.ts. `bundle:typings` (tsc -d) and +# `bundle:languages` are build outputs that preserve source comments, so they +# are leak surfaces too. Uses a legacy [V] marker to isolate the DIRECTORY- +# coverage defect from the pattern defect. +make_marker_in_typings() { + local root="$1" + make_clean_fixture "$root" + mkdir -p "$root/typings" + cat >"$root/typings/index.d.ts" <<'EOF' +/** Public API surface. [V12] citation leaked into declarations. */ +export declare const foo: number; +EOF +} + # ---- Assertion harness ------------------------------------------------------ PASS_COUNT=0 FAIL_COUNT=0 @@ -151,6 +177,8 @@ f="$TMP_ROOT/case-dist-js"; make_marker_in_dist_js "$f"; assert_scan f="$TMP_ROOT/case-dist-map"; make_marker_in_sourcemap "$f"; assert_scan "marker in dist/*.js.map (sourcesContent)" "dirty" "$f" f="$TMP_ROOT/case-commonjs"; make_marker_in_commonjs "$f"; assert_scan "§Sources in commonjs/*.js" "dirty" "$f" f="$TMP_ROOT/case-es"; make_marker_in_es "$f"; assert_scan "marker in es/*.mjs" "dirty" "$f" +f="$TMP_ROOT/case-commonjs-current"; make_marker_in_commonjs_current "$f"; assert_scan "current [vrf_n] marker in commonjs/*.js" "dirty" "$f" +f="$TMP_ROOT/case-typings"; make_marker_in_typings "$f"; assert_scan "marker in typings/*.d.ts" "dirty" "$f" echo "" echo "=== summary: $PASS_COUNT passed, $FAIL_COUNT failed ===" From 25a566a6716f0d90fb6c628319bcccd69f8ea25d Mon Sep 17 00:00:00 2001 From: marcin-kordas-hoc Date: Tue, 2 Jun 2026 07:01:11 +0000 Subject: [PATCH 6/7] =?UTF-8?q?fix(ci):=20rename=20audit-harness=20footer?= =?UTF-8?q?=20marker=20=C2=A7Sources=20to=20=C2=A7AuditSources=20in=20buil?= =?UTF-8?q?d-output=20scan?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/marker-scan.sh | 6 +++--- scripts/test-marker-scan.sh | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/marker-scan.sh b/scripts/marker-scan.sh index 9fa792962..cd304ce6d 100755 --- a/scripts/marker-scan.sh +++ b/scripts/marker-scan.sh @@ -7,7 +7,7 @@ # self-test fixture coverage. # # Why this scan exists: -# Greps build output for audit-harness citation markers and the `§Sources` +# Greps build output for audit-harness citation markers and the `§AuditSources` # footer — internal spec-drafting tokens that must never ship in compiled # output. Markers are the current lowercase prefixed form `[vrf_1]`/`[dec_3]`/ # `[con_2]`/`[que_5]`/`[wrg_7]`/`[crf_4]` (parser `^\[[a-z][a-z0-9_]*\]$`) plus @@ -53,14 +53,14 @@ echo "Scanning ${paths[*]} for audit-harness markers..." # collapses 1 and 2 into the same branch, silently green-lighting on read # errors. Branch on rc explicitly. set +e -grep -rnE '\[(V[0-9]+|(vrf|dec|con|que|wrg|crf)_[0-9]+)\]|§[[:space:]]*Sources' "${paths[@]}" +grep -rnE '\[(V[0-9]+|(vrf|dec|con|que|wrg|crf)_[0-9]+)\]|§[[:space:]]*AuditSources' "${paths[@]}" rc=$? set -e case "$rc" in 0) echo "" - echo "ERROR: audit-harness markers ([V] or §Sources) found in build output." + echo "ERROR: audit-harness markers ([V]/[vrf_n] or §AuditSources) found in build output." echo "These markers are an internal spec-drafting convention and must never" echo "ship in compiled JS. Strip them from the source comments/strings above." exit 1 diff --git a/scripts/test-marker-scan.sh b/scripts/test-marker-scan.sh index 500771e8f..1390e06e4 100755 --- a/scripts/test-marker-scan.sh +++ b/scripts/test-marker-scan.sh @@ -94,12 +94,12 @@ make_marker_in_sourcemap() { EOF } -# Variant: §Sources footer leaked into commonjs output. +# Variant: §AuditSources footer leaked into commonjs output. make_marker_in_commonjs() { local root="$1" make_clean_fixture "$root" cat >>"$root/commonjs/index.js" <<'EOF' -// §Sources: internal/spec.md +// §AuditSources: internal/spec.md EOF } @@ -175,7 +175,7 @@ echo "" f="$TMP_ROOT/case-clean"; make_clean_fixture "$f"; assert_scan "clean build (no markers)" "clean" "$f" f="$TMP_ROOT/case-dist-js"; make_marker_in_dist_js "$f"; assert_scan "marker in dist/*.js comment" "dirty" "$f" f="$TMP_ROOT/case-dist-map"; make_marker_in_sourcemap "$f"; assert_scan "marker in dist/*.js.map (sourcesContent)" "dirty" "$f" -f="$TMP_ROOT/case-commonjs"; make_marker_in_commonjs "$f"; assert_scan "§Sources in commonjs/*.js" "dirty" "$f" +f="$TMP_ROOT/case-commonjs"; make_marker_in_commonjs "$f"; assert_scan "§AuditSources in commonjs/*.js" "dirty" "$f" f="$TMP_ROOT/case-es"; make_marker_in_es "$f"; assert_scan "marker in es/*.mjs" "dirty" "$f" f="$TMP_ROOT/case-commonjs-current"; make_marker_in_commonjs_current "$f"; assert_scan "current [vrf_n] marker in commonjs/*.js" "dirty" "$f" f="$TMP_ROOT/case-typings"; make_marker_in_typings "$f"; assert_scan "marker in typings/*.d.ts" "dirty" "$f" From 542062c1e9a03515c10531745eda6ea6d206b4fc Mon Sep 17 00:00:00 2001 From: marcin-kordas-hoc Date: Tue, 2 Jun 2026 11:25:37 +0000 Subject: [PATCH 7/7] docs(ci): fix stale marker references in marker-scan self-test header --- scripts/test-marker-scan.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/test-marker-scan.sh b/scripts/test-marker-scan.sh index 1390e06e4..e330a0e73 100755 --- a/scripts/test-marker-scan.sh +++ b/scripts/test-marker-scan.sh @@ -2,8 +2,9 @@ # Self-test for the audit-harness marker scan used in .github/workflows/build.yml. # # Why this exists: -# The CI step in build.yml invokes `scripts/marker-scan.sh dist commonjs es` -# to grep the build output for `[V]` and `§Sources` markers — internal +# The CI step in build.yml invokes +# `scripts/marker-scan.sh dist commonjs es typings languages` to grep the build +# output for `[V]`/`[vrf_n]` citation markers and the `§AuditSources` footer — internal # spec-drafting tokens that must never ship in compiled JS. Empirically # (probed 2026-05-25 by planting `// [V99] test marker` in src/index.ts and # running `npm run bundle-all`), markers leak through THREE distinct surfaces: