Skip to content

Commit 3996a2e

Browse files
authored
Revert "feat(claude-plugin): record guideline usage per session in audit.log …" (#248)
This reverts commit 6cc2a5b.
1 parent a2ab28c commit 3996a2e

5 files changed

Lines changed: 1 addition & 370 deletions

File tree

platform-integrations/claude/plugins/evolve-lite/skills/learn/SKILL.md

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -116,38 +116,6 @@ The script will:
116116
- Deduplicate against existing entities
117117
- Display confirmation with the total count
118118

119-
### Step 5: Assess Influence of Recalled Entities
120-
121-
Regardless of whether Step 4 saved new entities, judge whether the guidelines the recall hook served to *this* session were actually followed, contradicted, or simply irrelevant. This closes the provenance loop: the recall hook records *what* was served; this step records *what effect* it had.
122-
123-
1. Derive this session's `session_id` from the `saved_trajectory_path` extracted in Step 0: strip the directory prefix and the `claude-transcript_` / `.jsonl` affixes. For `.evolve/trajectories/claude-transcript_abc-123.jsonl` the `session_id` is `abc-123`.
124-
125-
2. Read `.evolve/audit.log` (JSONL, one object per line). Find every line where `event == "recall"` and `session_id` matches. Take the union of their `entities` arrays — that is the set of guideline identifiers served to this session. Each identifier is a relative path from `.evolve/entities/` without the `.md` suffix (e.g. `guideline/foo` for a local entity, or `subscribed/alice/guideline/foo` for a subscribed one), so it unambiguously names one file. If the set is empty, skip this step.
126-
127-
3. For each identifier, open `.evolve/entities/<id>.md` with the Read tool. Read its content + trigger — that is the guideline's intent. Skip the identifier (log it as an assessment-less entry) if the file is not found.
128-
129-
4. Compare against the transcript loaded in Step 0. For each identifier, pick one verdict:
130-
- `followed` — the agent's actual actions are consistent with the guideline's recommendation.
131-
- `contradicted` — the guideline's trigger matched the task but the agent did the opposite, or hit the dead end the guideline would have prevented.
132-
- `not_applicable` — the guideline's trigger didn't match what this session was about.
133-
134-
Keep `evidence` to one short sentence citing a specific action or tool call from the transcript.
135-
136-
5. Emit one JSON payload and pipe it to the helper:
137-
138-
```bash
139-
echo '{
140-
"session_id": "<session-id>",
141-
"assessments": [
142-
{"entity": "guideline/<slug>", "verdict": "followed", "evidence": "Agent imported struct and parsed APP1 directly"}
143-
]
144-
}' | python3 ${CLAUDE_PLUGIN_ROOT}/skills/learn/scripts/log_influence.py
145-
```
146-
147-
The `entity` value must match exactly what appeared in the recall event — include the `subscribed/<source>/` prefix if the entity came from a subscribed repo.
148-
149-
Emit zero assessments (empty `assessments` list) when no recall events exist for this session.
150-
151119
## Quality Gate
152120

153121
Before saving, review each entity against this checklist:

platform-integrations/claude/plugins/evolve-lite/skills/learn/scripts/log_influence.py

Lines changed: 0 additions & 79 deletions
This file was deleted.

platform-integrations/claude/plugins/evolve-lite/skills/recall/scripts/retrieve_entities.py

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88

99
# Add lib to path so we can import entity_io
1010
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent.parent / "lib"))
11-
from entity_io import find_recall_entity_dirs, get_evolve_dir, markdown_to_entity, log as _log
12-
import audit
11+
from entity_io import find_recall_entity_dirs, markdown_to_entity, log as _log
1312

1413

1514
def log(message):
@@ -83,13 +82,6 @@ def load_entities_with_source(entities_dir):
8382
entity = markdown_to_entity(md)
8483
if not entity.get("content"):
8584
continue
86-
# Record the on-disk path relative to entities_dir (without the
87-
# .md suffix) as a qualified identifier. This distinguishes
88-
# same-named entities in different trees — e.g.
89-
# "guideline/foo" (local) vs "subscribed/alice/guideline/foo"
90-
# (from a subscribed repo) — so downstream auditing doesn't
91-
# collapse them into one.
92-
entity["_id"] = str(md.relative_to(entities_dir).with_suffix(""))
9385
# Detect subscribed entities by path: .../entities/subscribed/{name}/...
9486
parts = md.parts
9587
try:
@@ -137,24 +129,6 @@ def main():
137129
print(output)
138130
log(f"Output {len(output)} chars to stdout")
139131

140-
# Audit: record which entities were served to which session. Must not
141-
# fail the hook if logging errors — recall is the user-visible path.
142-
try:
143-
transcript_path = input_data.get("transcript_path", "")
144-
session_id = Path(transcript_path).stem if transcript_path else None
145-
entity_ids = sorted({e["_id"] for e in entities if e.get("_id")})
146-
if session_id and entity_ids:
147-
project_root = get_evolve_dir().resolve().parent
148-
audit.append(
149-
project_root=str(project_root),
150-
event="recall",
151-
session_id=session_id,
152-
entities=entity_ids,
153-
)
154-
log(f"Audit: recall session_id={session_id} entities={len(entity_ids)}")
155-
except Exception as exc:
156-
log(f"Audit append failed (non-fatal): {exc}")
157-
158132

159133
if __name__ == "__main__":
160134
main()

tests/e2e/test_sandbox_learn_recall.py

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -164,36 +164,3 @@ def test_learn_then_recall_flow(sandbox_ready, sandbox_workspace):
164164
# pip-installed). Other libraries (PIL, piexif, exifread) may appear in a
165165
# valid guideline as "install via pip and use", so we don't ban them.
166166
assert not re.search(r"\bexiftool\b", joined), "session 2 invoked exiftool despite recall guideline:\n" + "\n".join(commands)
167-
168-
# --- Usage provenance: audit.log should record recall + influence ---
169-
audit_log = sandbox_workspace / ".evolve" / "audit.log"
170-
assert audit_log.is_file(), f"{audit_log} was not created — recall did not append audit events"
171-
172-
events = []
173-
for line in audit_log.read_text().splitlines():
174-
line = line.strip()
175-
if not line:
176-
continue
177-
events.append(json.loads(line))
178-
179-
session2_id = session2_transcript.stem.removeprefix("claude-transcript_")
180-
# Recall audit records qualified ids — path relative to .evolve/entities/
181-
# without the .md suffix — so we match session 1's entities the same way.
182-
session1_ids = {str(p.relative_to(entities_dir).with_suffix("")) for p in entity_files}
183-
184-
recall_events = [e for e in events if e.get("event") == "recall" and e.get("session_id") == session2_id]
185-
assert recall_events, f"no recall audit event for session 2 ({session2_id}). all events: {events}"
186-
recalled_ids = {eid for e in recall_events for eid in e.get("entities", [])}
187-
assert recalled_ids & session1_ids, f"recall event entities {recalled_ids} did not include any id from session 1 ({session1_ids})"
188-
log.info(f"session 2: audit recorded recall of {recalled_ids}")
189-
190-
influence_events = [e for e in events if e.get("event") == "influence" and e.get("session_id") == session2_id]
191-
assert influence_events, (
192-
f"no influence audit event for session 2 ({session2_id}). recall events exist but learn did not emit assessments."
193-
)
194-
for ie in influence_events:
195-
assert ie.get("verdict") in {"followed", "contradicted", "not_applicable"}, f"influence event has invalid verdict: {ie}"
196-
log.info(
197-
f"session 2: audit recorded {len(influence_events)} influence assessment(s): "
198-
f"{[(e['entity'], e['verdict']) for e in influence_events]}"
199-
)

0 commit comments

Comments
 (0)