Merge pull request #44 from Digital-Threads/feat/compaction-distiller-subagent

Shahinyanm · web-flow · commit 266aef4a323f · 2026-06-13T23:25:13.000+04:00
feat: in-session compaction distiller subagent + advisory (0.25.0)
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.25.0] - 2026-06-13
+
+### Added
+- **In-session compaction distiller.** A new `task-journal-distiller` subagent
+  (Haiku, `background: true`) reads a just-compacted conversation segment from
+  the transcript file and backfills the decisions / rejections / findings that
+  weren't logged yet for the active task — via the journal MCP, never closing a
+  task. Because it runs as an in-session subagent it costs no separate `claude
+  -p` call (~5k token overhead vs ~46k) and doesn't block the main chat. After a
+  compaction, the `SessionStart` hook now adds a short advisory suggesting the
+  main agent delegate the segment to it (the platform doesn't let a hook spawn a
+  subagent, so this is advisory; the existing deterministic catch-up remains the
+  guaranteed safety net). Disable the hint with `TJ_DISTILLER_HINT=0`.
+
+### Changed
+- **Cheaper, honest `complete` stats.** One-shot `claude -p` calls now pass
+  `--disallowed-tools` (we never use tools), keeping the built-in tool schemas
+  out of the prompt and roughly halving the harness overhead. The stats line now
+  leads with the real dollar cost for `claude -p` (whose token counts are muddy —
+  a big prompt lands in `cache_creation`, not `input_tokens`) and shows clean
+  token counts only for API backends; token sizes scale to `M`. When a
+  cost-reporting backend is used, a one-line tip points at `--backend anthropic`
+  (direct Haiku API, ~50× cheaper per task by skipping Claude Code's overhead)
+  or `--backend ollama` (free, local).
+
 ## [0.24.0] - 2026-06-13
 
 ### Added
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -7,7 +7,7 @@ members = [
 ]
 
 [workspace.package]
-version = "0.24.0"
+version = "0.25.0"
 edition = "2021"
 rust-version = "1.88"
 license = "MIT"
diff --git a/crates/tj-cli/Cargo.toml b/crates/tj-cli/Cargo.toml
@@ -23,7 +23,7 @@ default = ["embed"]
 embed = ["tj-core/embed"]
 
 [dependencies]
-tj-core = { package = "task-journal-core", version = "0.24.0", path = "../tj-core", default-features = false }
+tj-core = { package = "task-journal-core", version = "0.25.0", path = "../tj-core", default-features = false }
 anyhow = { workspace = true }
 clap = { workspace = true }
 tracing = { workspace = true }
diff --git a/crates/tj-cli/src/main.rs b/crates/tj-cli/src/main.rs
@@ -2140,6 +2140,25 @@ fn main() -> Result<()> {
                         bundle.push_str(&reminder);
                         bundle.push_str("\n\n");
                     }
+                    // Advisory (the hook can't force it): suggest the main agent
+                    // delegate the just-compacted segment to the in-session
+                    // distiller subagent, which backfills missed reasoning from
+                    // the transcript file (which survives compaction) for the
+                    // active task(s). Background → never blocks. Gated off by
+                    // TJ_DISTILLER_HINT=0 for users who don't want it.
+                    if std::env::var("TJ_DISTILLER_HINT").as_deref() != Ok("0") {
+                        let transcript_hint = payload
+                            .get("transcript_path")
+                            .and_then(|v| v.as_str())
+                            .map(|p| format!(" (transcript: {p})"))
+                            .unwrap_or_default();
+                        bundle.push_str(&format!(
+                            "[task-journal] A compaction just occurred. If decisions, rejections, \
+or findings from before it are not yet in the journal for the active task(s) above, delegate to \
+the `task-journal-distiller` subagent to capture them from the transcript{transcript_hint}. It \
+runs in the background and won't block you; it only fills gaps and never closes tasks.\n\n"
+                        ));
+                    }
                 }
                 for tc in &recent {
                     let pack = tj_core::pack::assemble(
@@ -4149,31 +4168,34 @@ fn compute_savings(
     })
 }
 
-/// Format a token count compactly: 980 → "980", 3_240 → "3.2k", 88_000 → "88k".
+/// Format a token count compactly: 980 → "980", 3_240 → "3.2k", 88_000 → "88k",
+/// 2_760_000 → "2.8M".
 fn fmt_tokens(n: u64) -> String {
     if n < 1_000 {
         n.to_string()
     } else if n < 100_000 {
         format!("{:.1}k", n as f64 / 1_000.0)
-    } else {
+    } else if n < 1_000_000 {
         format!("{}k", n / 1_000)
+    } else {
+        format!("{:.1}M", n as f64 / 1_000_000.0)
     }
 }
 
 /// Human spent/saved suffix for a finalize line, e.g.
 /// " | spent 3.2k tok ($0.0012) · saved ~88k→1.5k tok (59×)".
 fn stats_suffix(spent: &tj_core::llm::LlmUsage, saved: &Option<Savings>) -> String {
     let mut parts = Vec::new();
-    if spent.total_tokens() > 0 {
-        let cost = match spent.cost_usd {
-            Some(c) if c > 0.0 => format!(" (${c:.4})"),
-            _ => String::new(),
-        };
-        parts.push(format!(
-            "spent {} tok{}",
-            fmt_tokens(spent.total_tokens()),
-            cost
-        ));
+    // claude -p reports a (notional) dollar cost but muddy token counts — its
+    // big prompt lands in `cache_creation`, not `input_tokens` — so lead with
+    // the cost there. API backends report no cost but clean tokens, so show
+    // those instead.
+    match spent.cost_usd {
+        Some(c) if c > 0.0 => parts.push(format!("cost ${c:.4}")),
+        _ if spent.total_tokens() > 0 => {
+            parts.push(format!("spent {} tok", fmt_tokens(spent.total_tokens())))
+        }
+        _ => {}
     }
     if let Some(s) = saved {
         if s.pack_tokens > 0 && s.raw_tokens > s.pack_tokens {
@@ -4395,6 +4417,21 @@ fn finalize_one_task(
     Ok(out)
 }
 
+/// A one-line nudge shown when a cost-reporting backend (claude -p) was used:
+/// the same Haiku via a direct API skips Claude Code's harness overhead. Only
+/// claude -p reports a non-zero `cost_usd`, so this fires for it alone.
+fn backend_cost_tip(cost: Option<f64>) -> Option<String> {
+    match cost {
+        Some(c) if c > 0.0 => Some(
+            "tip: that cost is claude -p's Claude Code overhead (notional under a \
+subscription). For ~50× cheaper per task, use --backend anthropic (direct Haiku API, \
+needs ANTHROPIC_API_KEY) — or --backend ollama for free, local."
+                .to_string(),
+        ),
+        _ => None,
+    }
+}
+
 /// Human-readable one-liner for a finalize result.
 fn print_finalize_outcome(task_id: &str, out: &FinalizeOutcome) {
     if out.skipped_no_backend {
@@ -4458,6 +4495,9 @@ fn run_complete_single(
     };
     let out = finalize_one_task(&ctx, task_id, enrich, dry_run, backend)?;
     print_finalize_outcome(task_id, &out);
+    if let Some(tip) = backend_cost_tip(out.spent.cost_usd) {
+        eprintln!("{tip}");
+    }
     Ok(())
 }
 
@@ -4604,6 +4644,9 @@ fn run_complete_batch(
             totals.trim_start_matches(" | ")
         );
     }
+    if let Some(tip) = backend_cost_tip(total_spent.cost_usd) {
+        eprintln!("{tip}");
+    }
 
     if !left_open.is_empty() {
         println!("\nLeft open ({}):", left_open.len());
@@ -5682,10 +5725,26 @@ mod inline_tests {
             pack_tokens: 1_500,
         });
         let s = stats_suffix(&spent, &saved);
-        assert!(s.contains("spent 1.5k tok ($0.0012)"), "{s}");
+        // Cost-reporting backend (claude -p) → lead with cost, not muddy tokens.
+        assert!(s.contains("cost $0.0012"), "{s}");
         assert!(s.contains("saved ~90.0k→1.5k tok (60×)"), "{s}");
     }
 
+    #[test]
+    fn stats_suffix_shows_tokens_for_costless_backend() {
+        // API backend reports clean tokens, no cost → show the token count.
+        let spent = tj_core::llm::LlmUsage {
+            input_tokens: 1800,
+            output_tokens: 200,
+            cost_usd: None,
+        };
+        assert_eq!(
+            stats_suffix(&spent, &None),
+            " | spent 2.0k tok",
+            "API backend should show tokens"
+        );
+    }
+
     #[test]
     fn stats_suffix_empty_when_nothing_to_report() {
         let spent = tj_core::llm::LlmUsage::default();
diff --git a/crates/tj-cli/tests/cli.rs b/crates/tj-cli/tests/cli.rs
@@ -4516,6 +4516,10 @@ fn session_start_compact_prepends_active_task_reminder() {
         ctx.contains("Must ship before Friday"),
         "reminder must include the in-force constraint: {ctx}"
     );
+    assert!(
+        ctx.contains("task-journal-distiller"),
+        "compact SessionStart must advise delegating to the distiller subagent: {ctx}"
+    );
 }
 
 #[test]
@@ -4525,6 +4529,10 @@ fn session_start_startup_has_no_reminder() {
         !ctx.contains("[Active task after compaction]"),
         "non-compact SessionStart must NOT inject the reminder: {ctx}"
     );
+    assert!(
+        !ctx.contains("task-journal-distiller"),
+        "non-compact SessionStart must NOT advise the distiller: {ctx}"
+    );
 }
 
 /// Recursively collect file names under `dir` that match a predicate.
@@ -5621,7 +5629,7 @@ fn complete_retitles_and_closes_via_fake_backend() {
         .args(["complete", &task_id])
         .assert()
         .success()
-        .stdout(contains("spent 1.5k tok ($0.0012)"))
+        .stdout(contains("cost $0.0012"))
         .stdout(contains("retitled"))
         .stdout(contains("closed"));
 
diff --git a/crates/tj-core/src/classifier/agent_sdk.rs b/crates/tj-core/src/classifier/agent_sdk.rs
@@ -54,10 +54,21 @@ fn base_claude_command(model: &str) -> Command {
         .arg("--output-format")
         .arg("json")
         .arg("--strict-mcp-config")
+        // We never use tools in these one-shot text calls — denying the
+        // built-in toolset keeps their schemas out of the prompt, roughly
+        // halving the harness overhead. (The cache-creation cost floor
+        // remains; for true pennies use a direct API backend.)
+        .arg("--disallowed-tools")
+        .arg(DISABLED_TOOLS)
         .env(IN_CLASSIFIER_ENV, "1");
     cmd
 }
 
+/// Built-in tools denied in our one-shot `claude -p` calls (we only want a text
+/// completion, never tool use). Listed explicitly because there is no wildcard.
+const DISABLED_TOOLS: &str = "Bash Read Edit Write Glob Grep Task WebFetch \
+WebSearch NotebookEdit TodoWrite BashOutput KillBash";
+
 /// Production runner: invokes the local `claude` binary in print mode, pinned
 /// to the given model, asking for the JSON envelope and an isolated MCP config
 /// (`--strict-mcp-config` keeps the project's own MCP servers — including this
@@ -259,10 +270,6 @@ struct EnvelopeUsage {
     input_tokens: u64,
     #[serde(default)]
     output_tokens: u64,
-    #[serde(default)]
-    cache_creation_input_tokens: u64,
-    #[serde(default)]
-    cache_read_input_tokens: u64,
 }
 
 impl Classifier for ClaudeCliClassifier {
@@ -307,8 +314,14 @@ pub fn run_claude_json_usage(
     }
     let u = envelope.usage.unwrap_or_default();
     let usage = crate::llm::LlmUsage {
-        // Count cache reads/writes as input so the total reflects real context.
-        input_tokens: u.input_tokens + u.cache_creation_input_tokens + u.cache_read_input_tokens,
+        // Only our *fresh* prompt tokens — NOT the cached Claude Code system
+        // prompt + tool schemas (cache_read/creation), which are harness
+        // overhead, not work the user asked for. The dollar `cost` below still
+        // reflects everything (claude computes it with the cache discount), so
+        // a small token count next to a few-cents cost is the honest signal
+        // that claude -p's overhead dominates — switch to a direct API backend
+        // to avoid it.
+        input_tokens: u.input_tokens,
         output_tokens: u.output_tokens,
         cost_usd: envelope.total_cost_usd,
     };
diff --git a/crates/tj-mcp/Cargo.toml b/crates/tj-mcp/Cargo.toml
@@ -17,7 +17,7 @@ path = "src/main.rs"
 
 [dependencies]
 # Lean: the MCP server doesn't embed yet, so it skips the model2vec backend.
-tj-core = { package = "task-journal-core", version = "0.24.0", path = "../tj-core", default-features = false }
+tj-core = { package = "task-journal-core", version = "0.25.0", path = "../tj-core", default-features = false }
 anyhow = { workspace = true }
 tokio = { workspace = true }
 tracing = { workspace = true }
diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "task-journal",
-  "version": "0.24.0",
+  "version": "0.25.0",
   "description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.",
   "author": {
     "name": "Mher Shahinyan"
diff --git a/plugin/agents/task-journal-distiller.md b/plugin/agents/task-journal-distiller.md
@@ -0,0 +1,49 @@
+---
+name: task-journal-distiller
+description: Distills a conversation segment into task-journal memory. Use when a compaction just happened (or is about to), or when asked to "capture what we just did" — it reads the segment from the transcript, finds the decisions / rejections / findings that were NOT yet logged for the active task, and records them via the task-journal MCP. Runs in the background so it never blocks the main chat. Does NOT close tasks.
+model: haiku
+background: true
+tools: Read, Bash, Grep, Glob, mcp__plugin_task-journal_task-journal__task_search, mcp__plugin_task-journal_task-journal__task_pack, mcp__plugin_task-journal_task-journal__event_add
+---
+
+You are the **task-journal distiller**. A segment of a coding conversation is
+about to be (or has just been) compacted away. Your one job: make sure the
+**reasoning** from that segment is preserved in the task journal as typed
+events, so nothing is lost and the task does not later look "interrupted".
+
+You are dispatched with: the active **task id(s)**, the **transcript path**
+(a JSONL file), and optionally a **boundary timestamp** (the start of the
+segment — usually the task's last recorded event, or the previous compaction).
+
+## Procedure
+
+1. **Know what's already recorded.** For the task, call
+   `task_pack` (or `task_search`) and read its existing events. You will NOT
+   re-record anything already represented there.
+2. **Read the segment.** Read the transcript JSONL file (use `Read`; for large
+   files read the tail or grep for the boundary timestamp and read forward).
+   Focus on the assistant/user turns AFTER the boundary timestamp.
+3. **Extract only SIGNIFICANT, NOT-yet-logged reasoning** for the task:
+   - `decision` — a committed choice. Pass `alternatives` (the options weighed).
+   - `rejection` — an approach ruled out, and why.
+   - `finding` — a fact verified from code/logs (cite file:line, ids, names).
+   - `evidence` — a test/benchmark that proved something.
+   - `constraint` — an external limit discovered.
+   Skip chatter, restated tool output, greetings, and anything already in the
+   existing events. When in doubt, leave it out — precision over volume.
+4. **Record** each via `event_add(task_id, event_type, text, ...)`. Write in the
+   user's language, terse and specific. Append-only — never edit.
+
+## Hard rules
+
+- **Never close** a task and **never** mark it done — you only fill gaps.
+- **Never create** a new task unless the segment clearly pursued a *distinct*
+  objective with no matching open task; prefer attaching to the given task id.
+- **De-dupe ruthlessly** — if the substance is already an event, skip it.
+- If the transcript is unreadable or the segment holds nothing new, do nothing
+  and say so. Doing nothing is a valid, correct outcome.
+
+## Output
+
+One terse line: `distilled <N> event(s) into <task_id>: <comma-separated types>`
+(or `nothing new to record`). The main agent only needs this summary back.

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ members = [`
`7`	`7`	`]`
`8`	`8`
`9`	`9`	`[workspace.package]`
`10`		`-version = "0.24.0"`
	`10`	`+version = "0.25.0"`
`11`	`11`	`edition = "2021"`
`12`	`12`	`rust-version = "1.88"`
`13`	`13`	`license = "MIT"`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "task-journal",`
`3`		`- "version": "0.24.0",`
	`3`	`+ "version": "0.25.0",`
`4`	`4`	`"description": "Append-only journal of AI-coding task reasoning chains: hypotheses, decisions, rejections, evidence. Renders compact resume packs so an agent can pick up a 2-week-old task with full context.",`
`5`	`5`	`"author": {`
`6`	`6`	`"name": "Mher Shahinyan"`