From ba9aa8858dbfe8d79063a5ef845fa359d1381223 Mon Sep 17 00:00:00 2001 From: Aaron Lewter <77890109+lewta@users.noreply.github.com> Date: Sat, 20 Jun 2026 09:09:31 -0400 Subject: [PATCH 1/3] fix(SatisfactionCapture): skip signal on inference failure instead of defaulting to 5 A failed/timed-out classification was being written to ratings.jsonl as a neutral rating:5 row. These placeholder rows accumulate (in one running install, ~27% of all signals) and drag every today/week/month average toward 5, making the satisfaction trend meaningless. Both failure branches now log the error for observability and write no rating. --- .../.claude/hooks/SatisfactionCapture.hook.ts | 27 +++++-------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/Releases/v5.0.0/.claude/hooks/SatisfactionCapture.hook.ts b/Releases/v5.0.0/.claude/hooks/SatisfactionCapture.hook.ts index 42c584537..242c5e622 100755 --- a/Releases/v5.0.0/.claude/hooks/SatisfactionCapture.hook.ts +++ b/Releases/v5.0.0/.claude/hooks/SatisfactionCapture.hook.ts @@ -460,30 +460,17 @@ async function main() { } } } else { - // Inference failed — default to 5 (neutral) + // Inference failed — skip the signal instead of fabricating a neutral 5. + // A failed classification is not a real satisfaction signal; writing a + // default-5 row pollutes ratings.jsonl and drags every running average + // toward the mushy middle. Log for observability, write nothing. const errorReason = result.error || 'unknown'; - console.error(`[SatisfactionCapture] Inference failed: ${errorReason} — defaulting to 5`); - writeRating({ - timestamp: getISOTimestamp(), - rating: 5, - session_id: sessionId, - source: 'implicit', - sentiment_summary: `Inference failed: ${errorReason.slice(0, 80)}`, - confidence: 0.3, - }); + console.error(`[SatisfactionCapture] Inference failed: ${errorReason} — skipping signal (no rating written)`); } } catch (err) { - // Inference errored — default to 5 (neutral) + // Inference errored — skip the signal (see note above). Do not fabricate a 5. const errMsg = err instanceof Error ? err.message : String(err); - console.error(`[SatisfactionCapture] Inference error: ${errMsg} — defaulting to 5`); - writeRating({ - timestamp: getISOTimestamp(), - rating: 5, - session_id: sessionId, - source: 'implicit', - sentiment_summary: `Inference error: ${errMsg.slice(0, 80)}`, - confidence: 0.3, - }); + console.error(`[SatisfactionCapture] Inference error: ${errMsg} — skipping signal (no rating written)`); } process.exit(0); From 66adb7fc7712e047dde01d8ed4d0c07f41837f79 Mon Sep 17 00:00:00 2001 From: Aaron Lewter <77890109+lewta@users.noreply.github.com> Date: Sat, 20 Jun 2026 09:27:27 -0400 Subject: [PATCH 2/3] test(SatisfactionCapture): add regression test for inference-failure skip Reproducibly proves the failure path writes no ratings row (forces an inference failure by removing claude from PATH, asserts 0 rows + skip log). Writes only to a throwaway temp PAI_DIR; never touches a real ~/.claude tree. --- .../SatisfactionCapture.failure-path.test.sh | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100755 Releases/v5.0.0/.claude/hooks/tests/SatisfactionCapture.failure-path.test.sh diff --git a/Releases/v5.0.0/.claude/hooks/tests/SatisfactionCapture.failure-path.test.sh b/Releases/v5.0.0/.claude/hooks/tests/SatisfactionCapture.failure-path.test.sh new file mode 100755 index 000000000..9ac6587e9 --- /dev/null +++ b/Releases/v5.0.0/.claude/hooks/tests/SatisfactionCapture.failure-path.test.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Regression test for SatisfactionCapture.hook.ts inference-failure handling. +# +# Asserts: when the satisfaction classifier fails (here forced by making the +# `claude` executable unavailable so Inference's spawn returns failure), the +# hook writes NO row to ratings.jsonl and logs that it skipped the signal. +# +# Before the fix, the failure branch wrote a placeholder {"rating":5,...} row; +# those accumulate and flatten every running average. This test guards against +# that regression. +# +# Safe by construction: all writes go to a throwaway PAI_DIR under a temp dir; +# the real ~/.claude tree is never touched. +# +# Usage: bash SatisfactionCapture.failure-path.test.sh (exit 0 = PASS) +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +HOOK="$SCRIPT_DIR/../SatisfactionCapture.hook.ts" +BUN="$(command -v bun || true)" +[ -z "$BUN" ] && { echo "SKIP: bun not found on PATH"; exit 0; } +[ -f "$HOOK" ] && : || { echo "FAIL: hook not found at $HOOK"; exit 1; } + +TMP="$(mktemp -d)" +PAIDIR="$TMP/PAI" +RATINGS="$PAIDIR/MEMORY/LEARNING/SIGNALS/ratings.jsonl" +mkdir -p "$PAIDIR" +trap 'rm -rf "$TMP"' EXIT + +# Long, non-praise, non-explicit-rating prompt → reaches the inference path. +STDIN='{"prompt":"this is still broken and not what I asked for at all","session_id":"test-failure-path","transcript_path":"/nonexistent-transcript"}' + +# env -i with a PATH that excludes the `claude` binary → spawn() fails ENOENT +# → Inference returns success:false → exercises the failure branch deterministically. +printf '%s' "$STDIN" | env -i HOME="$HOME" PATH="/usr/bin:/bin" PAI_DIR="$PAIDIR" "$BUN" "$HOOK" >"$TMP/stdout.log" 2>"$TMP/stderr.log" + +ROWS=0 +[ -f "$RATINGS" ] && ROWS="$(wc -l < "$RATINGS" | tr -d ' ')" + +echo "rows written on inference failure: $ROWS" +echo "stderr:"; grep -i "inference" "$TMP/stderr.log" | sed 's/^/ /' || true + +if [ "$ROWS" = "0" ] && grep -q "skipping signal" "$TMP/stderr.log"; then + echo "PASS — failure path wrote no rating and logged skip." + exit 0 +else + echo "FAIL — expected 0 rows + a 'skipping signal' log; got rows=$ROWS" + [ -f "$RATINGS" ] && { echo "unexpected row:"; sed 's/^/ /' "$RATINGS"; } + exit 1 +fi From a589c1e0322db451adebeb870b13270d37a65d70 Mon Sep 17 00:00:00 2001 From: Aaron Lewter <77890109+lewta@users.noreply.github.com> Date: Sat, 20 Jun 2026 18:42:52 -0400 Subject: [PATCH 3/3] fix(SatisfactionCapture): skip success-path rating when model returns no valid 1-10 value The success branch defaulted a missing/garbled rating to 5 (`? r.rating : 5`), the same fabrication the failure branches did. Extract isValidRating() as the single source of truth and only persist when the model returned a real 1-10 rating; otherwise log and skip. Guards main() behind import.meta.main so the helper can be unit-tested without executing the hook. Adds isValidRating unit test. --- .../.claude/hooks/SatisfactionCapture.hook.ts | 24 +++++++++++++++---- .../.claude/hooks/tests/isValidRating.test.ts | 21 ++++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 Releases/v5.0.0/.claude/hooks/tests/isValidRating.test.ts diff --git a/Releases/v5.0.0/.claude/hooks/SatisfactionCapture.hook.ts b/Releases/v5.0.0/.claude/hooks/SatisfactionCapture.hook.ts index 242c5e622..39c52bfca 100755 --- a/Releases/v5.0.0/.claude/hooks/SatisfactionCapture.hook.ts +++ b/Releases/v5.0.0/.claude/hooks/SatisfactionCapture.hook.ts @@ -153,6 +153,19 @@ const SYSTEM_TEXT_PATTERNS = [ /^Note:.*was read before/i, ]; +// ── Rating Validity ── + +/** + * A satisfaction rating is only meaningful when the model returns an actual + * value in the 1-10 band. Anything else (missing, null, out of range, NaN, + * non-number) is the ABSENCE of a signal — it must not be coerced to a neutral + * 5, which would pollute every running average (the aggregator flat-means + * `.rating` and ignores confidence). + */ +export function isValidRating(x: unknown): boolean { + return typeof x === 'number' && Number.isFinite(x) && x >= 1 && x <= 10; +} + // ── Rating Writer ── function writeRating(entry: RatingEntry): void { @@ -422,10 +435,9 @@ async function main() { level: 'fast', }); - if (result.success && result.parsed) { + if (result.success && result.parsed && isValidRating((result.parsed as SentimentResult).rating)) { const r = result.parsed as SentimentResult; - // Clamp rating to 1-10, default 5 if missing - const rating = (r.rating != null && r.rating >= 1 && r.rating <= 10) ? r.rating : 5; + const rating = r.rating; const confidence = r.confidence || 0.5; console.error(`[SatisfactionCapture] Implicit: ${rating}/10 (${confidence}) - ${r.summary || 'no summary'}`); @@ -459,6 +471,10 @@ async function main() { }).catch((err) => console.error(`[SatisfactionCapture] Failure capture error: ${err}`)); } } + } else if (result.success) { + // Inference succeeded but returned no usable 1-10 rating (missing/garbled + // JSON). That's the absence of a signal, not a neutral 5 — skip the write. + console.error(`[SatisfactionCapture] Inference returned no valid rating — skipping signal (no rating written)`); } else { // Inference failed — skip the signal instead of fabricating a neutral 5. // A failed classification is not a real satisfaction signal; writing a @@ -480,4 +496,4 @@ async function main() { } } -main(); +if (import.meta.main) main(); diff --git a/Releases/v5.0.0/.claude/hooks/tests/isValidRating.test.ts b/Releases/v5.0.0/.claude/hooks/tests/isValidRating.test.ts new file mode 100644 index 000000000..3d9711cc4 --- /dev/null +++ b/Releases/v5.0.0/.claude/hooks/tests/isValidRating.test.ts @@ -0,0 +1,21 @@ +// Unit test for SatisfactionCapture's rating-validity guard. +// +// The success path must only persist a rating the model actually returned in +// the 1-10 band. Anything else is the absence of a signal and must NOT be +// coerced into a neutral 5 (which pollutes every running average). +// +// Run: bun test isValidRating.test.ts +import { test, expect } from "bun:test"; +import { isValidRating } from "../SatisfactionCapture.hook.ts"; + +test("accepts valid 1-10 ratings", () => { + for (const v of [1, 5, 10, 7.5]) { + expect(isValidRating(v)).toBe(true); + } +}); + +test("rejects values that must not become a fabricated 5", () => { + for (const v of [null, undefined, 0, 0.9, 11, -1, NaN, Infinity, -Infinity, "5", {}, [], true]) { + expect(isValidRating(v as unknown)).toBe(false); + } +});