-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtest-thinking-display.sh
More file actions
executable file
·65 lines (58 loc) · 2.34 KB
/
Copy pathtest-thinking-display.sh
File metadata and controls
executable file
·65 lines (58 loc) · 2.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env bash
# A/B test: does Opus 4.8 return populated thinking summaries with
# --thinking-display summarized vs. relying on the showThinkingSummaries setting?
# Sends 2 small live requests through your logged-in account (uses tokens).
#
# Expected on Opus 4.7 / 4.8: run A (flag) is populated, run B (no flag) is empty.
set -euo pipefail
CLAUDE="${CLAUDE_REAL_BIN:-$(command -v claude 2>/dev/null || echo "$HOME/.local/bin/claude")}"
[ -x "$CLAUDE" ] || { echo "could not find 'claude'; set CLAUDE_REAL_BIN" >&2; exit 1; }
PROMPT='If all Bloops are Razzies, and all Razzies are Lazzies, are all Bloops necessarily Lazzies? Reason through it carefully step by step, then give a one-word answer.'
A_JSONL="$(mktemp "${TMPDIR:-/tmp}/cc-thinking-a.XXXXXX")"
B_JSONL="$(mktemp "${TMPDIR:-/tmp}/cc-thinking-b.XXXXXX")"
cleanup() {
rm -f "$A_JSONL" "$B_JSONL"
}
trap cleanup EXIT
run_with_timeout() {
local seconds="$1"
shift
if command -v timeout >/dev/null 2>&1; then
timeout "$seconds" "$@"
elif command -v gtimeout >/dev/null 2>&1; then
gtimeout "$seconds" "$@"
else
"$@"
fi
}
inspect() { # $1 = jsonl file
python3 - "$1" <<'PY'
import json, sys
found=False
for ln in open(sys.argv[1]):
ln=ln.strip()
if not ln: continue
try: ev=json.loads(ln)
except: continue
msg=ev.get('message') or ev
content=msg.get('content') if isinstance(msg,dict) else None
if isinstance(content,list):
for b in content:
if isinstance(b,dict) and b.get('type')=='thinking':
t=b.get('thinking',''); found=True
print(f" THINKING len={len(t)} preview={t[:80]!r}")
elif isinstance(b,dict) and b.get('type')=='text':
print(f" TEXT len={len(b.get('text',''))}")
if not found: print(" (no thinking block emitted)")
PY
}
echo "### A: --thinking-display summarized"
run_with_timeout 150 "$CLAUDE" -p "$PROMPT" --model opus --thinking-display summarized \
--output-format stream-json --verbose --max-turns 1 </dev/null >"$A_JSONL" 2>/dev/null
inspect "$A_JSONL"
echo "### B: no flag (relies on showThinkingSummaries setting)"
run_with_timeout 150 "$CLAUDE" -p "$PROMPT" --model opus \
--output-format stream-json --verbose --max-turns 1 </dev/null >"$B_JSONL" 2>/dev/null
inspect "$B_JSONL"
echo
echo "Expected: A populated (len>0), B empty (len=0) on Opus 4.7 / 4.8."