Skip to content

Commit 34715cc

Browse files
Shahinyanmclaude
andcommitted
feat(v0.7.1): PreCompact transcript catch-up
Read transcript JSONL on PreCompact, enqueue entries newer than the active task's last event timestamp as pending v2 chunks, spawn the classify-worker. Closes the gap between the final PostToolUse and the compaction event, where chunks were previously lost. Also fix plugin/hooks/hooks.json PostToolUse template — was passing \$TOOL_OUTPUT (an env var Claude Code never sets), so the plugin install path always fed the classifier empty text. Stdin parse is the correct wiring, matching install-hooks. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 30cbfa0 commit 34715cc

10 files changed

Lines changed: 265 additions & 18 deletions

File tree

CHANGELOG.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.7.1] - 2026-05-17
11+
12+
PreCompact closes the gap before compaction — the synchronous hook only
13+
fires on `PostToolUse`, so any reasoning between the final tool call and
14+
the compact event used to vanish. v0.7.1 reads the transcript JSONL on
15+
`PreCompact`, enqueues entries newer than the active task's last event
16+
timestamp as pending v2 chunks, and spawns the classify-worker. The
17+
boundary marker still lands as before; the catch-up is additive.
18+
19+
### Added
20+
- PreCompact transcript catch-up — `ingest-hook --kind=PreCompact` now
21+
reads `transcript_path` from the hook stdin payload, walks the
22+
session JSONL, and enqueues user/assistant entries newer than the
23+
task's last event timestamp as pending v2 chunks (`UserPromptSubmit`
24+
/ `PreCompactChunk`). The classify-worker picks them up after the
25+
hook returns. Best-effort: missing or unreadable transcript falls
26+
through to the marker-only path.
27+
- Plugin `hooks.json` now wires `PreCompact` (was previously only
28+
installed via `install-hooks`). Plugin users get the catch-up
29+
without re-running the installer.
30+
- `TJ_DISABLE_CLASSIFY_SPAWN=1` env var — skips the classify-worker
31+
spawn after enqueueing. Test-only; not documented as public.
32+
33+
### Fixed
34+
- Plugin `hooks.json` PostToolUse template — was passing
35+
`--text="$TOOL_OUTPUT"` (an env var Claude Code never sets), feeding
36+
the classifier empty text and dropping every PostToolUse event in
37+
the plugin install path. Now reads the hook payload from stdin like
38+
`install-hooks` already does. Plugin users may see a sudden
39+
uplift in captured events — by design.
40+
1041
## [0.7.0] - 2026-05-10
1142

1243
Reasoning-chain ergonomics: surface the journal in the Claude Code

Cargo.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ members = [
77
]
88

99
[workspace.package]
10-
version = "0.7.0"
10+
version = "0.7.1"
1111
edition = "2021"
1212
rust-version = "1.88"
1313
license = "MIT"

crates/tj-cli/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ name = "task-journal"
1616
path = "src/main.rs"
1717

1818
[dependencies]
19-
tj-core = { package = "task-journal-core", version = "0.7.0", path = "../tj-core" }
19+
tj-core = { package = "task-journal-core", version = "0.7.1", path = "../tj-core" }
2020
anyhow = { workspace = true }
2121
clap = { workspace = true }
2222
tracing = { workspace = true }

crates/tj-cli/src/main.rs

Lines changed: 93 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,8 +1514,8 @@ fn main() -> Result<()> {
15141514
// `$CLAUDE_HOOK_TEXT` env vars that Claude Code does NOT set,
15151515
// so production was always called with empty text and every
15161516
// event ended up rejected — see claude-memory-rsw.
1517-
let (kind, text) = match (kind, text) {
1518-
(Some(k), Some(t)) => (k, t),
1517+
let (kind, text, payload) = match (kind, text) {
1518+
(Some(k), Some(t)) => (k, t, serde_json::Value::Null),
15191519
_ => parse_hook_stdin()?,
15201520
};
15211521

@@ -1566,10 +1566,14 @@ fn main() -> Result<()> {
15661566
}
15671567

15681568
// PreCompact: Claude Code is about to compact the conversation.
1569-
// Drop a marker decision event on the most-recent open task so
1570-
// the post-compact agent sees a clear boundary in the journal.
1571-
// The marker is intentionally minimal — a future v0.7.x may
1572-
// synthesize a real summary if CC starts exposing one on stdin.
1569+
// Two responsibilities:
1570+
// 1. Catch-up ingest — read the transcript JSONL tail (entries
1571+
// newer than the active task's last event timestamp) and
1572+
// enqueue them as pending v2 chunks for the classify-worker.
1573+
// Closes the gap between the last PostToolUse hook and the
1574+
// compaction event, where chunks would otherwise be lost.
1575+
// 2. Boundary marker — synthetic decision event so the
1576+
// post-compact agent sees a clear cut in the journal.
15731577
if kind == "PreCompact" {
15741578
if !events_path.exists() {
15751579
return Ok(());
@@ -1582,6 +1586,38 @@ fn main() -> Result<()> {
15821586
let Some(tc) = recent.into_iter().next() else {
15831587
return Ok(());
15841588
};
1589+
1590+
// (1) Catch-up ingest. Best-effort: missing transcript_path
1591+
// or unreadable JSONL falls through to the marker only.
1592+
let last_event_ts: Option<String> = conn
1593+
.query_row(
1594+
"SELECT timestamp FROM events_index WHERE task_id=?1 \
1595+
ORDER BY timestamp DESC LIMIT 1",
1596+
rusqlite::params![&tc.task_id],
1597+
|r| r.get::<_, String>(0),
1598+
)
1599+
.ok();
1600+
let transcript_path = payload
1601+
.get("transcript_path")
1602+
.and_then(|x| x.as_str())
1603+
.map(std::path::PathBuf::from);
1604+
if let Some(tp) = transcript_path.as_ref() {
1605+
if tp.exists() {
1606+
let enq = precompact_enqueue_transcript_chunks(
1607+
tp,
1608+
&events_path,
1609+
&project_hash,
1610+
&backend,
1611+
last_event_ts.as_deref(),
1612+
)
1613+
.unwrap_or(0);
1614+
if enq > 0 && std::env::var("TJ_DISABLE_CLASSIFY_SPAWN").is_err() {
1615+
let _ = spawn_classify_worker(&backend);
1616+
}
1617+
}
1618+
}
1619+
1620+
// (2) Boundary marker.
15851621
let now = chrono::Utc::now()
15861622
.to_rfc3339_opts(chrono::SecondsFormat::Secs, true);
15871623
let marker_text = format!(
@@ -2801,6 +2837,54 @@ fn persist_pending_v2(
28012837
Ok(path)
28022838
}
28032839

2840+
/// PreCompact catch-up: parse the transcript JSONL and enqueue text
2841+
/// chunks newer than `last_event_ts` as pending v2 entries. The
2842+
/// classify-worker picks them up afterwards. Returns the number of
2843+
/// chunks queued. Errors are absorbed — best-effort, never fatal.
2844+
fn precompact_enqueue_transcript_chunks(
2845+
transcript_path: &std::path::Path,
2846+
events_path: &std::path::Path,
2847+
project_hash: &str,
2848+
backend: &str,
2849+
last_event_ts: Option<&str>,
2850+
) -> anyhow::Result<usize> {
2851+
use tj_core::session::parser::{
2852+
extract_assistant_texts, extract_user_text, parse_session, SessionEntry,
2853+
};
2854+
let parsed = match parse_session(transcript_path) {
2855+
Ok(p) => p,
2856+
Err(_) => return Ok(0),
2857+
};
2858+
let mut count = 0usize;
2859+
for entry in &parsed.entries {
2860+
let (ts, text, kind) = match entry {
2861+
SessionEntry::User(u) => {
2862+
let text = extract_user_text(u).unwrap_or_default();
2863+
(u.timestamp.clone(), text, "UserPromptSubmit")
2864+
}
2865+
SessionEntry::Assistant(a) => {
2866+
let texts = extract_assistant_texts(a);
2867+
if texts.is_empty() {
2868+
continue;
2869+
}
2870+
(a.timestamp.clone(), texts.join("\n"), "PreCompactChunk")
2871+
}
2872+
_ => continue,
2873+
};
2874+
if text.trim().len() < 20 {
2875+
continue;
2876+
}
2877+
if let Some(last) = last_event_ts {
2878+
if ts.as_str() <= last {
2879+
continue;
2880+
}
2881+
}
2882+
persist_pending_v2(events_path, kind, &text, project_hash, backend)?;
2883+
count += 1;
2884+
}
2885+
Ok(count)
2886+
}
2887+
28042888
/// Spawn the classify-worker as a detached child. We deliberately drop
28052889
/// the `Child` handle so the parent (the actual Claude Code hook child)
28062890
/// can exit without waiting; the worker re-parents to init on Linux.
@@ -3164,13 +3248,13 @@ fn drain_pending(
31643248
/// piping), we silently return ("Stop", "") so the hook becomes a no-op
31653249
/// instead of erroring — matches the `|| true` safety net in the
31663250
/// installed hook command.
3167-
fn parse_hook_stdin() -> anyhow::Result<(String, String)> {
3251+
fn parse_hook_stdin() -> anyhow::Result<(String, String, serde_json::Value)> {
31683252
let mut buf = String::new();
31693253
std::io::Read::read_to_string(&mut std::io::stdin(), &mut buf)
31703254
.context("read hook payload from stdin")?;
31713255
let buf = buf.trim();
31723256
if buf.is_empty() {
3173-
return Ok(("Stop".into(), String::new()));
3257+
return Ok(("Stop".into(), String::new(), serde_json::Value::Null));
31743258
}
31753259
let v: serde_json::Value =
31763260
serde_json::from_str(buf).with_context(|| format!("parse hook payload JSON: {buf}"))?;
@@ -3209,7 +3293,7 @@ fn parse_hook_stdin() -> anyhow::Result<(String, String)> {
32093293
_ => String::new(),
32103294
};
32113295

3212-
Ok((kind, text))
3296+
Ok((kind, text, v))
32133297
}
32143298

32153299
fn parse_event_type(s: &str) -> anyhow::Result<tj_core::event::EventType> {

crates/tj-cli/tests/cli.rs

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2482,6 +2482,127 @@ fn precompact_hook_with_no_open_task_writes_nothing() {
24822482
.stdout("");
24832483
}
24842484

2485+
#[test]
2486+
fn precompact_ingests_transcript_tail_into_pending_v2() {
2487+
let dir = assert_fs::TempDir::new().unwrap();
2488+
let workdir = dir.path().join("proj");
2489+
std::fs::create_dir_all(&workdir).unwrap();
2490+
2491+
let _task_id = String::from_utf8(
2492+
Command::cargo_bin("task-journal")
2493+
.unwrap()
2494+
.env("XDG_DATA_HOME", dir.path())
2495+
.current_dir(&workdir)
2496+
.args(["create", "Compactable thing"])
2497+
.assert()
2498+
.success()
2499+
.get_output()
2500+
.stdout
2501+
.clone(),
2502+
)
2503+
.unwrap()
2504+
.trim()
2505+
.to_string();
2506+
2507+
// Forge a transcript JSONL with two entries strictly newer than any
2508+
// timestamp `task-journal create` could have produced (year 2099) —
2509+
// so the catch-up walk sees them as "post-last-event".
2510+
let transcript = workdir.join("session.jsonl");
2511+
let line_user = r#"{"type":"user","uuid":"u1","timestamp":"2099-01-01T00:00:00.000Z","sessionId":"s1","message":{"content":"I think the auth middleware drops the token at the refresh boundary"}}"#;
2512+
let line_assistant = r#"{"type":"assistant","uuid":"a1","timestamp":"2099-01-01T00:00:05.000Z","sessionId":"s1","message":{"content":[{"type":"text","text":"Confirmed: src/auth/refresh.rs uses < instead of <= at the expiry comparison."}]}}"#;
2513+
std::fs::write(&transcript, format!("{line_user}\n{line_assistant}\n")).unwrap();
2514+
2515+
let stdin_payload = serde_json::json!({
2516+
"hook_event_name": "PreCompact",
2517+
"transcript_path": transcript.to_str().unwrap(),
2518+
})
2519+
.to_string();
2520+
2521+
Command::cargo_bin("task-journal")
2522+
.unwrap()
2523+
.env("XDG_DATA_HOME", dir.path())
2524+
.env("TJ_DISABLE_CLASSIFY_SPAWN", "1")
2525+
.current_dir(&workdir)
2526+
.args(["ingest-hook", "--backend", "cli"])
2527+
.write_stdin(stdin_payload)
2528+
.assert()
2529+
.success();
2530+
2531+
let pending_dir = dir.path().join("task-journal").join("pending");
2532+
let queued: Vec<_> = std::fs::read_dir(&pending_dir)
2533+
.expect("pending dir must exist after PreCompact ingest")
2534+
.filter_map(|e| e.ok())
2535+
.collect();
2536+
assert_eq!(
2537+
queued.len(),
2538+
2,
2539+
"expected 2 pending v2 chunks (user + assistant), got {}",
2540+
queued.len()
2541+
);
2542+
2543+
// Verify v2 schema and that one entry carries the user text, the other the assistant text.
2544+
let mut saw_user = false;
2545+
let mut saw_assistant = false;
2546+
for entry in &queued {
2547+
let body = std::fs::read_to_string(entry.path()).unwrap();
2548+
let v: serde_json::Value = serde_json::from_str(&body).unwrap();
2549+
assert_eq!(v["schema"], "v2");
2550+
let text = v["text"].as_str().unwrap_or("");
2551+
if text.contains("auth middleware drops the token") {
2552+
saw_user = true;
2553+
assert_eq!(v["kind"], "UserPromptSubmit");
2554+
}
2555+
if text.contains("uses < instead of <=") {
2556+
saw_assistant = true;
2557+
assert_eq!(v["kind"], "PreCompactChunk");
2558+
}
2559+
}
2560+
assert!(saw_user && saw_assistant, "missing one of the chunks: user={saw_user} assistant={saw_assistant}");
2561+
}
2562+
2563+
#[test]
2564+
fn precompact_skips_transcript_entries_older_than_last_event() {
2565+
let dir = assert_fs::TempDir::new().unwrap();
2566+
let workdir = dir.path().join("proj");
2567+
std::fs::create_dir_all(&workdir).unwrap();
2568+
2569+
Command::cargo_bin("task-journal")
2570+
.unwrap()
2571+
.env("XDG_DATA_HOME", dir.path())
2572+
.current_dir(&workdir)
2573+
.args(["create", "Already covered"])
2574+
.assert()
2575+
.success();
2576+
2577+
// Transcript with entries from year 2000 — strictly older than the
2578+
// task's create event. Catch-up must skip both, leaving pending empty.
2579+
let transcript = workdir.join("session.jsonl");
2580+
let line_old = r#"{"type":"user","uuid":"u1","timestamp":"2000-01-01T00:00:00.000Z","sessionId":"s1","message":{"content":"ancient chatter that classifier already processed"}}"#;
2581+
std::fs::write(&transcript, format!("{line_old}\n")).unwrap();
2582+
2583+
let stdin_payload = serde_json::json!({
2584+
"hook_event_name": "PreCompact",
2585+
"transcript_path": transcript.to_str().unwrap(),
2586+
})
2587+
.to_string();
2588+
2589+
Command::cargo_bin("task-journal")
2590+
.unwrap()
2591+
.env("XDG_DATA_HOME", dir.path())
2592+
.env("TJ_DISABLE_CLASSIFY_SPAWN", "1")
2593+
.current_dir(&workdir)
2594+
.args(["ingest-hook", "--backend", "cli"])
2595+
.write_stdin(stdin_payload)
2596+
.assert()
2597+
.success();
2598+
2599+
let pending_dir = dir.path().join("task-journal").join("pending");
2600+
let queued_count = std::fs::read_dir(&pending_dir)
2601+
.map(|it| it.count())
2602+
.unwrap_or(0);
2603+
assert_eq!(queued_count, 0, "no chunks must be queued for ancient transcript");
2604+
}
2605+
24852606
#[test]
24862607
fn rewind_prompt_appends_correction_event() {
24872608
let dir = assert_fs::TempDir::new().unwrap();

crates/tj-mcp/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ name = "task-journal-mcp"
1616
path = "src/main.rs"
1717

1818
[dependencies]
19-
tj-core = { package = "task-journal-core", version = "0.7.0", path = "../tj-core" }
19+
tj-core = { package = "task-journal-core", version = "0.7.1", path = "../tj-core" }
2020
anyhow = { workspace = true }
2121
tokio = { workspace = true }
2222
tracing = { workspace = true }

plugin/.claude-plugin/plugin.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "task-journal",
3-
"version": "0.7.0",
3+
"version": "0.7.1",
44
"description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.",
55
"author": {
66
"name": "Mher Shahinyan"

plugin/hooks/hooks.json

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,18 @@
66
"hooks": [
77
{
88
"type": "command",
9-
"command": "task-journal ingest-hook --kind=PostToolUse --text=\"$TOOL_OUTPUT\" --backend=cli 2>/dev/null || true"
9+
"command": "task-journal ingest-hook --backend=cli 2>/dev/null || true"
10+
}
11+
]
12+
}
13+
],
14+
"PreCompact": [
15+
{
16+
"matcher": "",
17+
"hooks": [
18+
{
19+
"type": "command",
20+
"command": "task-journal ingest-hook --backend=cli 2>/dev/null || true"
1021
}
1122
]
1223
}

0 commit comments

Comments
 (0)