Skip to content

Commit 0f12fde

Browse files
sriumcpclaude
andauthored
fix(isolation): always emit worktree_uncommitted_writes in findings.json (AI-native-Systems-Research#235) (AI-native-Systems-Research#237)
Before this change, a clean iteration produced findings.json without the `worktree_uncommitted_writes` key — the recorder silently no-op'd when the tripwire saw nothing. Operators (and a future regression test) could not distinguish: - ✅ tripwire ran, saw nothing → key absent - ❌ tripwire silently disappeared in a refactor → key absent The absence of evidence was being conflated with evidence of absence — exactly what a tripwire is designed to prevent. After this change, presence of the key signals "the tripwire ran" and the value signals "what it found": - empty array → clean run (declared everything written, or wrote nothing) - non-empty → paths the executor wrote without declaring; lost on cleanup - absent → findings.json was never produced (execute-incomplete branch) or was corrupt at write time. Either of these is itself a real regression signal in an otherwise-successful run. The schema description is updated accordingly. The recorder still no-ops when findings.json is missing (the execute-incomplete branch surfaces the data via retry_log.jsonl) and still refuses to write into a corrupt findings.json (logged at ERROR with the lost paths). Tests now express the parametric table from the issue: empty undeclared writes the empty list (was: noop), schema validates the empty case, and the missing-findings no-raise path is exercised for both empty and non-empty inputs. Closes AI-native-Systems-Research#235. Refs AI-native-Systems-Research#228 (isolation tracker), AI-native-Systems-Research#230 (worktree tripwire). Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 76ed590 commit 0f12fde

3 files changed

Lines changed: 43 additions & 9 deletions

File tree

orchestrator/iteration.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,13 @@ def _record_undeclared_writes_in_findings(
9595
) -> None:
9696
"""Merge ``worktree_uncommitted_writes`` into ``findings.json`` (#230).
9797
98+
Always writes the (sorted, de-duplicated) list — including the empty
99+
case (#235). Presence of the key signals "the tripwire ran"; the
100+
value signals "what it found". Absence is reserved for the failure
101+
modes below (findings missing or corrupt) so an absent key in an
102+
otherwise-successful iteration is a real regression signal, not
103+
silent success.
104+
98105
No-op if findings.json is missing — the cleanup may be running in
99106
the execute-incomplete branch where findings was never produced
100107
(the caller surfaces the data via retry_log there instead).
@@ -104,7 +111,7 @@ def _record_undeclared_writes_in_findings(
104111
returns without writing — modifying a corrupt JSON file would
105112
only make recovery harder.
106113
"""
107-
if not undeclared or not findings_path.exists():
114+
if not findings_path.exists():
108115
return
109116
try:
110117
findings = json.loads(findings_path.read_text())
@@ -1136,14 +1143,16 @@ def _max_turns_for(phase_key: str) -> int:
11361143
# worktree is removed below. Persist into findings.json so the
11371144
# design agent on iter-N+1 can see what to declare in
11381145
# ``code_changes``. Tripwire only — never blocks cleanup.
1146+
# #235: emit unconditionally (even when the list is empty) so
1147+
# an absent key in findings.json is a real regression signal,
1148+
# not silent success.
11391149
if repo_path and experiment_id and experiment_dir is not None:
11401150
undeclared = _detect_undeclared_writes_for_iter(
11411151
iter_dir, experiment_dir,
11421152
)
1143-
if undeclared:
1144-
_record_undeclared_writes_in_findings(
1145-
iter_dir / "findings.json", undeclared,
1146-
)
1153+
_record_undeclared_writes_in_findings(
1154+
iter_dir / "findings.json", undeclared,
1155+
)
11471156
# Clean up worktree only on success
11481157
if repo_path and experiment_id:
11491158
remove_experiment_worktree(Path(repo_path), experiment_id)

orchestrator/schemas/findings.schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
"type": "array",
3030
"items": { "type": "string", "minLength": 1 },
3131
"uniqueItems": true,
32-
"description": "#230 — paths the executor wrote inside the experiment worktree without declaring them in the bundle's `code_changes`. Surfaced just before worktree cleanup; logged at WARNING. Empty array (or absent) means the executor declared everything it wrote, or wrote nothing untracked. The orchestrator does not block cleanup on undeclared writes — this is a tripwire, not a gate."
32+
"description": "#230 — paths the executor wrote inside the experiment worktree without declaring them in the bundle's `code_changes`. Surfaced just before worktree cleanup; logged at WARNING. #235: written unconditionally — presence of the key means the tripwire ran, value is what it found (empty list = clean run; non-empty = paths that will be lost on cleanup). Absent only when findings.json was never produced (execute-incomplete branch surfaces the data via `retry_log.jsonl` instead) or when findings.json was corrupt at write time. The orchestrator does not block cleanup on undeclared writes — this is a tripwire, not a gate."
3333
}
3434
},
3535
"$defs": {

tests/test_worktree.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -491,17 +491,42 @@ def test_findings_remains_schema_valid(self, tmp_path):
491491
schema = json.loads(schema_path.read_text())
492492
jsonschema.validate(loaded, schema) # raises on failure
493493

494-
def test_empty_undeclared_is_noop(self, tmp_path):
494+
def test_empty_undeclared_writes_empty_list(self, tmp_path):
495+
# #235: empty list is written explicitly so "I checked, saw nothing"
496+
# is distinguishable from "I never ran". Absence of evidence ≠
497+
# evidence of absence.
495498
findings_path = self._make_findings(tmp_path)
496-
original = findings_path.read_text()
497499
_record_undeclared_writes_in_findings(findings_path, [])
498-
assert findings_path.read_text() == original
500+
loaded = json.loads(findings_path.read_text())
501+
assert "worktree_uncommitted_writes" in loaded
502+
assert loaded["worktree_uncommitted_writes"] == []
503+
# Other keys preserved.
504+
assert loaded["experiment_valid"] is True
505+
506+
def test_empty_undeclared_keeps_findings_schema_valid(self, tmp_path):
507+
# #235: the empty-list case must still validate against the schema.
508+
import jsonschema
509+
findings_path = self._make_findings(tmp_path)
510+
_record_undeclared_writes_in_findings(findings_path, [])
511+
loaded = json.loads(findings_path.read_text())
512+
schema_path = (
513+
Path(__file__).resolve().parent.parent
514+
/ "orchestrator" / "schemas" / "findings.schema.json"
515+
)
516+
schema = json.loads(schema_path.read_text())
517+
jsonschema.validate(loaded, schema)
499518

500519
def test_missing_findings_no_raise(self, tmp_path):
501520
# If findings.json wasn't produced (bad iteration), don't blow up.
521+
# Applies whether undeclared is empty or non-empty — the recorder
522+
# has no findings to merge into either way.
502523
_record_undeclared_writes_in_findings(
503524
tmp_path / "missing.json", ["a.py"],
504525
)
526+
_record_undeclared_writes_in_findings(
527+
tmp_path / "missing.json", [],
528+
)
529+
assert not (tmp_path / "missing.json").exists()
505530

506531
def test_malformed_findings_no_raise(self, tmp_path):
507532
findings_path = tmp_path / "findings.json"

0 commit comments

Comments
 (0)