Skip to content

Commit e0e0681

Browse files
illeatmyhatclaude
andcommitted
test/hardening: robustify Bob cwd-hash + cover codex/pwd-fallback locators
Follow-ups from an adversarial review of the Bob provenance work: - audit_recall._bob_session_id now also tries $PWD (which preserves the symlinked path the user cd'd through) alongside os.getcwd() (resolved), so a symlinked workspace can't silently miss the chat dir and fall back to minting. - Add the missing native-Codex transcript locator test (had none). - Add a Bob $PWD-fallback test (getcwd hash empty, only $PWD hash matches). Gate green, 272 passed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 5885157 commit e0e0681

7 files changed

Lines changed: 114 additions & 40 deletions

File tree

platform-integrations/bob/evolve-lite/lib/evolve-lite/audit_recall.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,19 @@ def _bob_session_id() -> str | None:
4343
if not os.environ.get("BOBSHELL_CLI"):
4444
return None
4545
try:
46-
project_hash = hashlib.sha256(os.getcwd().encode()).hexdigest()
47-
chats = Path.home() / ".bob" / "tmp" / project_hash / "chats"
48-
files = sorted(
49-
chats.glob("session-*.json"),
50-
key=lambda p: p.stat().st_mtime,
51-
reverse=True,
52-
)
53-
for chat in files:
46+
# Bob hashes the project path it was launched in. os.getcwd() returns
47+
# the resolved (symlink-free) path, but Bob may have captured the
48+
# symlinked path the user cd'd through; $PWD preserves that. Try both
49+
# candidate hashes and pick the newest chat across them.
50+
chats = []
51+
seen_paths: set[str] = set()
52+
for raw in (os.getcwd(), os.environ.get("PWD")):
53+
if not raw or raw in seen_paths:
54+
continue
55+
seen_paths.add(raw)
56+
project_hash = hashlib.sha256(raw.encode()).hexdigest()
57+
chats.extend((Path.home() / ".bob" / "tmp" / project_hash / "chats").glob("session-*.json"))
58+
for chat in sorted(chats, key=lambda p: p.stat().st_mtime, reverse=True):
5459
try:
5560
sid = json.loads(chat.read_text(encoding="utf-8")).get("sessionId")
5661
except (OSError, json.JSONDecodeError):

platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/audit_recall.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,19 @@ def _bob_session_id() -> str | None:
4343
if not os.environ.get("BOBSHELL_CLI"):
4444
return None
4545
try:
46-
project_hash = hashlib.sha256(os.getcwd().encode()).hexdigest()
47-
chats = Path.home() / ".bob" / "tmp" / project_hash / "chats"
48-
files = sorted(
49-
chats.glob("session-*.json"),
50-
key=lambda p: p.stat().st_mtime,
51-
reverse=True,
52-
)
53-
for chat in files:
46+
# Bob hashes the project path it was launched in. os.getcwd() returns
47+
# the resolved (symlink-free) path, but Bob may have captured the
48+
# symlinked path the user cd'd through; $PWD preserves that. Try both
49+
# candidate hashes and pick the newest chat across them.
50+
chats = []
51+
seen_paths: set[str] = set()
52+
for raw in (os.getcwd(), os.environ.get("PWD")):
53+
if not raw or raw in seen_paths:
54+
continue
55+
seen_paths.add(raw)
56+
project_hash = hashlib.sha256(raw.encode()).hexdigest()
57+
chats.extend((Path.home() / ".bob" / "tmp" / project_hash / "chats").glob("session-*.json"))
58+
for chat in sorted(chats, key=lambda p: p.stat().st_mtime, reverse=True):
5459
try:
5560
sid = json.loads(chat.read_text(encoding="utf-8")).get("sessionId")
5661
except (OSError, json.JSONDecodeError):

platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/audit_recall.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,19 @@ def _bob_session_id() -> str | None:
4343
if not os.environ.get("BOBSHELL_CLI"):
4444
return None
4545
try:
46-
project_hash = hashlib.sha256(os.getcwd().encode()).hexdigest()
47-
chats = Path.home() / ".bob" / "tmp" / project_hash / "chats"
48-
files = sorted(
49-
chats.glob("session-*.json"),
50-
key=lambda p: p.stat().st_mtime,
51-
reverse=True,
52-
)
53-
for chat in files:
46+
# Bob hashes the project path it was launched in. os.getcwd() returns
47+
# the resolved (symlink-free) path, but Bob may have captured the
48+
# symlinked path the user cd'd through; $PWD preserves that. Try both
49+
# candidate hashes and pick the newest chat across them.
50+
chats = []
51+
seen_paths: set[str] = set()
52+
for raw in (os.getcwd(), os.environ.get("PWD")):
53+
if not raw or raw in seen_paths:
54+
continue
55+
seen_paths.add(raw)
56+
project_hash = hashlib.sha256(raw.encode()).hexdigest()
57+
chats.extend((Path.home() / ".bob" / "tmp" / project_hash / "chats").glob("session-*.json"))
58+
for chat in sorted(chats, key=lambda p: p.stat().st_mtime, reverse=True):
5459
try:
5560
sid = json.loads(chat.read_text(encoding="utf-8")).get("sessionId")
5661
except (OSError, json.JSONDecodeError):

platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/audit_recall.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,19 @@ def _bob_session_id() -> str | None:
4343
if not os.environ.get("BOBSHELL_CLI"):
4444
return None
4545
try:
46-
project_hash = hashlib.sha256(os.getcwd().encode()).hexdigest()
47-
chats = Path.home() / ".bob" / "tmp" / project_hash / "chats"
48-
files = sorted(
49-
chats.glob("session-*.json"),
50-
key=lambda p: p.stat().st_mtime,
51-
reverse=True,
52-
)
53-
for chat in files:
46+
# Bob hashes the project path it was launched in. os.getcwd() returns
47+
# the resolved (symlink-free) path, but Bob may have captured the
48+
# symlinked path the user cd'd through; $PWD preserves that. Try both
49+
# candidate hashes and pick the newest chat across them.
50+
chats = []
51+
seen_paths: set[str] = set()
52+
for raw in (os.getcwd(), os.environ.get("PWD")):
53+
if not raw or raw in seen_paths:
54+
continue
55+
seen_paths.add(raw)
56+
project_hash = hashlib.sha256(raw.encode()).hexdigest()
57+
chats.extend((Path.home() / ".bob" / "tmp" / project_hash / "chats").glob("session-*.json"))
58+
for chat in sorted(chats, key=lambda p: p.stat().st_mtime, reverse=True):
5459
try:
5560
sid = json.loads(chat.read_text(encoding="utf-8")).get("sessionId")
5661
except (OSError, json.JSONDecodeError):

plugin-source/lib/audit_recall.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,19 @@ def _bob_session_id() -> str | None:
4343
if not os.environ.get("BOBSHELL_CLI"):
4444
return None
4545
try:
46-
project_hash = hashlib.sha256(os.getcwd().encode()).hexdigest()
47-
chats = Path.home() / ".bob" / "tmp" / project_hash / "chats"
48-
files = sorted(
49-
chats.glob("session-*.json"),
50-
key=lambda p: p.stat().st_mtime,
51-
reverse=True,
52-
)
53-
for chat in files:
46+
# Bob hashes the project path it was launched in. os.getcwd() returns
47+
# the resolved (symlink-free) path, but Bob may have captured the
48+
# symlinked path the user cd'd through; $PWD preserves that. Try both
49+
# candidate hashes and pick the newest chat across them.
50+
chats = []
51+
seen_paths: set[str] = set()
52+
for raw in (os.getcwd(), os.environ.get("PWD")):
53+
if not raw or raw in seen_paths:
54+
continue
55+
seen_paths.add(raw)
56+
project_hash = hashlib.sha256(raw.encode()).hexdigest()
57+
chats.extend((Path.home() / ".bob" / "tmp" / project_hash / "chats").glob("session-*.json"))
58+
for chat in sorted(chats, key=lambda p: p.stat().st_mtime, reverse=True):
5459
try:
5560
sid = json.loads(chat.read_text(encoding="utf-8")).get("sessionId")
5661
except (OSError, json.JSONDecodeError):

tests/platform_integrations/test_audit_recall.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,28 @@ def test_env_session_id_beats_bob_lookup(self, tmp_path):
180180

181181
rows = _read_rows(proj / ".evolve" / "audit.log")
182182
assert rows[0]["session_id"] == "codex-wins"
183+
184+
def test_bob_recovers_via_pwd_when_getcwd_differs(self, tmp_path):
185+
"""If Bob captured a symlinked path (preserved in $PWD) that differs from
186+
os.getcwd()'s resolved path, the $PWD-based hash still finds the chat —
187+
even though nothing is seeded under sha256(os.getcwd())."""
188+
import hashlib
189+
190+
home = tmp_path / "home"
191+
proj = tmp_path / "proj"
192+
proj.mkdir()
193+
symlink_path = "/some/symlinked/workspace" # what $PWD would carry
194+
sid = "d6484b2c-24f4-474c-8f43-36544e2dbcd8"
195+
chats = home / ".bob" / "tmp" / hashlib.sha256(symlink_path.encode()).hexdigest() / "chats"
196+
chats.mkdir(parents=True)
197+
(chats / "session-2026-06-10T21-12-d6484b2c.json").write_text(json.dumps({"sessionId": sid}), encoding="utf-8")
198+
199+
result = _run(
200+
proj,
201+
["project/baz"],
202+
{"BOBSHELL_CLI": "1", "PWD": symlink_path, "HOME": str(home), "USERPROFILE": str(home)},
203+
)
204+
205+
rows = _read_rows(proj / ".evolve" / "audit.log")
206+
assert rows[0]["session_id"] == sid
207+
assert "evolve-session:" not in result.stdout

tests/platform_integrations/test_provenance.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,30 @@ def test_locates_native_claude_transcript(self, tmp_path):
118118
assert "missing" not in cand
119119

120120

121+
class TestCandidatesCodexTranscript:
122+
"""Codex writes ~/.codex/sessions/<Y>/<M>/<D>/rollout-<ts>-<sid>.jsonl; the
123+
locator finds it by a recursive glob on the thread id."""
124+
125+
def test_locates_native_codex_transcript(self, tmp_path):
126+
home = tmp_path / "home"
127+
evolve_dir = tmp_path / "proj" / ".evolve"
128+
evolve_dir.mkdir(parents=True)
129+
sid = "019eb34f-f827-7311-b775-b749ae4fae72"
130+
write_audit(evolve_dir, [{"event": "recall", "session_id": sid, "entities": ["project/baz"]}])
131+
write_entity(evolve_dir, "project/baz", body="baz guidance")
132+
rollout = home / ".codex" / "sessions" / "2026" / "06" / "10" / f"rollout-2026-06-10T12-00-{sid}.jsonl"
133+
rollout.parent.mkdir(parents=True)
134+
rollout.write_text('{"x":1}\n', encoding="utf-8")
135+
136+
result = run_provenance("candidates", evolve_dir=evolve_dir, home=home)
137+
assert result.returncode == 0, result.stderr
138+
candidates = parse_jsonl(result.stdout)
139+
assert len(candidates) == 1
140+
assert candidates[0]["entity_id"] == "project/baz"
141+
assert candidates[0]["trajectory_path"] == str(rollout)
142+
assert "missing" not in candidates[0]
143+
144+
121145
class TestCandidatesBobTranscript:
122146
"""Bob writes ~/.bob/tmp/<projecthash>/chats/session-<ts>-<sid8>.json with a
123147
real ``sessionId`` field; the locator matches the chat file by that id."""

0 commit comments

Comments
 (0)