Skip to content

Commit 2d7171e

Browse files
authored
Merge pull request #41 from Digital-Threads/fix/enrich-nonjson-resilience
feat: complete judge-only by default, --enrich opt-in + enrich hardening (0.23.0)
2 parents 552ae5b + 6c5cd64 commit 2d7171e

11 files changed

Lines changed: 238 additions & 45 deletions

File tree

CHANGELOG.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,46 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.23.0] - 2026-06-13
11+
12+
Finalize, retuned after running `complete` on real 12-session tasks: the fast,
13+
reliable judge-only path is now the default, and the slow session-enrich pass is
14+
opt-in.
15+
16+
### Changed
17+
- **`complete` is judge-only by default; enrich is opt-in via `--enrich`.**
18+
Finalizing through the model's judgment (retitle + close + outcome) takes
19+
seconds and is what gives ~90% of the value. The session-backfill pass — one
20+
`claude -p` call per session, minutes on a big multi-session task — proved too
21+
slow to be the default, so it now runs only with `--enrich`. (The old `--quick`
22+
flag is gone: its behaviour is the default. Replace `complete <id> --quick`
23+
with `complete <id>`, and `complete <id>` with `complete <id> --enrich` if you
24+
want the old full behaviour.)
25+
26+
### Fixed
27+
- **`complete` survives a non-JSON enrich reply.** When the backfill model
28+
answered with prose instead of the requested JSON array — e.g. continuing the
29+
transcript's own dialogue ("Контекст в норме… Что дальше?") — the parse error
30+
aborted the whole `complete`, losing the retitle and close. Backfill now skips
31+
an unparseable chunk reply (with a warning), the parser extracts a JSON array
32+
even when wrapped in prose, and the prompt re-asserts "output ONLY the JSON
33+
array, do not continue the transcript" after the transcript.
34+
- **Enrich chunks are sized for `claude -p`'s overhead.** `claude -p` is a full
35+
Claude Code instance whose system prompt + tool definitions cost ~113k tokens
36+
before our content, so the earlier 360k-char chunk still 400'd at ~204k total.
37+
The per-call transcript budget drops to 150k chars (~37k tokens), and **any**
38+
per-chunk failure (over-budget 400, transient error, non-JSON) is skipped
39+
rather than aborting — a genuinely broken backend still surfaces at the judge
40+
step.
41+
- **No more apparent hang.** A big task makes many sequential `claude -p` calls;
42+
without a timeout one wedged call hung the whole command with no output. Each
43+
call now has a wall-clock timeout (90s, `TJ_CLAUDE_TIMEOUT_SECS`) that kills a
44+
stuck `claude` (pipes drained in threads to avoid buffer deadlock), and enrich
45+
prints an "enriching N session(s)…" progress line pointing at `--quick`.
46+
- **Legible `claude -p` errors** (carried from the same investigation): a
47+
non-zero exit now surfaces the JSON error claude prints on stdout, so failures
48+
read as "Prompt is too long · ~204261 tokens" instead of a bare "exit 1".
49+
1050
## [0.22.1] - 2026-06-13
1151

1252
### Fixed

Cargo.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ members = [
77
]
88

99
[workspace.package]
10-
version = "0.22.1"
10+
version = "0.23.0"
1111
edition = "2021"
1212
rust-version = "1.88"
1313
license = "MIT"

crates/tj-cli/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ default = ["embed"]
2323
embed = ["tj-core/embed"]
2424

2525
[dependencies]
26-
tj-core = { package = "task-journal-core", version = "0.22.1", path = "../tj-core", default-features = false }
26+
tj-core = { package = "task-journal-core", version = "0.23.0", path = "../tj-core", default-features = false }
2727
anyhow = { workspace = true }
2828
clap = { workspace = true }
2929
tracing = { workspace = true }

crates/tj-cli/src/main.rs

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -871,21 +871,22 @@ enum Commands {
871871
#[arg(long)]
872872
backend: Option<String>,
873873
},
874-
/// Finalize a task: enrich its memory from the sessions it touched, fix a
875-
/// junk auto-title, and close it IF the events clearly show it is done —
876-
/// the model decides from the content. Omit the id to finalize every open
877-
/// task in the project (batch, with a reviewable list). One LLM call per
878-
/// session for enrich + one judge call per task, via the chosen backend
879-
/// (free with `--backend ollama`).
874+
/// Finalize a task: fix a junk auto-title and close it IF the events
875+
/// clearly show it is done — the model decides from the content, in
876+
/// seconds. Omit the id to finalize every open task (batch, with a
877+
/// reviewable list). Add `--enrich` to also re-read the task's sessions and
878+
/// backfill missed events first — thorough but slow (one `claude -p` call
879+
/// per session; minutes on a big multi-session task).
880880
Complete {
881881
/// The task id to finalize. Omit to finalize all open tasks (batch).
882882
task: Option<String>,
883883
/// Show scope and planned actions without calling the model or writing.
884884
#[arg(long)]
885885
dry_run: bool,
886-
/// Skip the (heavy) enrich pass; judge/retitle/close from stored events only.
886+
/// Also backfill missed events from the task's sessions before judging.
887+
/// Thorough but slow (one `claude -p` call per session).
887888
#[arg(long)]
888-
quick: bool,
889+
enrich: bool,
889890
/// Required for batch finalize when stdin is not an interactive terminal.
890891
#[arg(long)]
891892
yes: bool,
@@ -2784,12 +2785,12 @@ fn main() -> Result<()> {
27842785
Commands::Complete {
27852786
task,
27862787
dry_run,
2787-
quick,
2788+
enrich,
27882789
yes,
27892790
backend,
27902791
} => match task {
2791-
Some(id) => run_complete_single(&id, dry_run, quick, backend.as_deref())?,
2792-
None => run_complete_batch(dry_run, quick, yes, backend.as_deref())?,
2792+
Some(id) => run_complete_single(&id, dry_run, enrich, backend.as_deref())?,
2793+
None => run_complete_batch(dry_run, enrich, yes, backend.as_deref())?,
27932794
},
27942795
Commands::Export {
27952796
format,
@@ -4153,6 +4154,14 @@ fn enrich_task(
41534154
if sessions.is_empty() {
41544155
return Ok(0);
41554156
}
4157+
// Enrich is the slow part — one (or more, for big transcripts) `claude -p`
4158+
// call per session. Announce it so a multi-minute run doesn't look hung;
4159+
// `--quick` skips this entirely.
4160+
eprintln!(
4161+
"complete: enriching {} session(s) via {} — can take a few minutes (or use --quick to skip)…",
4162+
sessions.len(),
4163+
llm.name()
4164+
);
41564165
let run_id = ulid::Ulid::new().to_string();
41574166
let dream_backend = tj_core::dream::llm_backend::LlmDreamBackend::new(llm);
41584167
let opts = tj_core::dream::DreamOptions {
@@ -4206,7 +4215,7 @@ fn task_event_lines(conn: &rusqlite::Connection, task_id: &str) -> anyhow::Resul
42064215
fn finalize_one_task(
42074216
ctx: &ProjectCtx<'_>,
42084217
task_id: &str,
4209-
quick: bool,
4218+
enrich: bool,
42104219
dry_run: bool,
42114220
backend: Option<&str>,
42124221
) -> anyhow::Result<FinalizeOutcome> {
@@ -4215,8 +4224,9 @@ fn finalize_one_task(
42154224
let events_path = ctx.events_path;
42164225
let project_hash = ctx.project_hash;
42174226

4218-
// 1. Enrich (unless quick / dry-run) — needs sessions and a backend.
4219-
if !quick && !dry_run {
4227+
// 1. Enrich (only when asked, and not on a dry-run) — needs sessions and a
4228+
// backend. Off by default because it is slow (one claude -p per session).
4229+
if enrich && !dry_run {
42204230
if let Some(dir) = ctx.project_dir {
42214231
if let Some(llm) = tj_core::llm::backend_from_env(backend)? {
42224232
out.enriched = enrich_task(conn, events_path, project_hash, dir, task_id, llm)?;
@@ -4331,7 +4341,7 @@ PATH; or pick one via --backend / TJ_BACKEND: anthropic, openai, ollama (free, l
43314341
fn run_complete_single(
43324342
task_id: &str,
43334343
dry_run: bool,
4334-
quick: bool,
4344+
enrich: bool,
43354345
backend: Option<&str>,
43364346
) -> anyhow::Result<()> {
43374347
let cwd = std::env::current_dir()?;
@@ -4352,7 +4362,7 @@ fn run_complete_single(
43524362
project_hash: &project_hash,
43534363
project_dir: project_dir.as_deref(),
43544364
};
4355-
let out = finalize_one_task(&ctx, task_id, quick, dry_run, backend)?;
4365+
let out = finalize_one_task(&ctx, task_id, enrich, dry_run, backend)?;
43564366
print_finalize_outcome(task_id, &out);
43574367
Ok(())
43584368
}
@@ -4361,7 +4371,7 @@ fn run_complete_single(
43614371
/// user can prune before confirming. Refuses without a TTY unless `--yes`.
43624372
fn run_complete_batch(
43634373
dry_run: bool,
4364-
quick: bool,
4374+
enrich: bool,
43654375
yes: bool,
43664376
backend: Option<&str>,
43674377
) -> anyhow::Result<()> {
@@ -4417,7 +4427,7 @@ fn run_complete_batch(
44174427
if dry_run {
44184428
println!();
44194429
for (id, _) in &open {
4420-
finalize_one_task(&ctx, id, quick, true, backend)?;
4430+
finalize_one_task(&ctx, id, enrich, true, backend)?;
44214431
}
44224432
return Ok(());
44234433
}
@@ -4457,7 +4467,11 @@ fn run_complete_batch(
44574467
println!(
44584468
"\nWill finalize {} task(s){}. Proceed? [y/N]",
44594469
targets.len(),
4460-
if quick { " (quick: no enrich)" } else { "" }
4470+
if enrich {
4471+
" (with --enrich: slow, reads sessions)"
4472+
} else {
4473+
""
4474+
}
44614475
);
44624476
let mut buf = String::new();
44634477
std::io::stdin().read_line(&mut buf)?;
@@ -4469,7 +4483,7 @@ fn run_complete_batch(
44694483

44704484
let mut left_open: Vec<(String, String)> = Vec::new();
44714485
for (id, _) in &targets {
4472-
let out = finalize_one_task(&ctx, id, quick, false, backend)?;
4486+
let out = finalize_one_task(&ctx, id, enrich, false, backend)?;
44734487
print_finalize_outcome(id, &out);
44744488
if out.skipped_no_backend {
44754489
println!("complete: stopping batch — no LLM backend available.");

crates/tj-cli/tests/cli.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5549,10 +5549,10 @@ fn complete_batch_dry_run_lists_open_tasks() {
55495549
/// `claude` on PATH returning a canned judgment. Proves the wiring: junk
55505550
/// title → Rename, done verdict → Close with a persisted outcome. Unix-only
55515551
/// (shell-script stub); the logic itself is covered cross-platform by the
5552-
/// finalize.rs unit tests.
5552+
/// finalize.rs unit tests. Default mode (judge-only, no `--enrich`).
55535553
#[cfg(unix)]
55545554
#[test]
5555-
fn complete_quick_retitles_and_closes_via_fake_backend() {
5555+
fn complete_retitles_and_closes_via_fake_backend() {
55565556
use std::os::unix::fs::PermissionsExt;
55575557

55585558
let dir = assert_fs::TempDir::new().unwrap();
@@ -5609,14 +5609,14 @@ fn complete_quick_retitles_and_closes_via_fake_backend() {
56095609
.trim()
56105610
.to_string();
56115611

5612-
// --quick: skip enrich (no sessions), exercise judge → retitle → close.
5612+
// Default mode (judge-only): exercise judge → retitle → close.
56135613
Command::cargo_bin("task-journal")
56145614
.unwrap()
56155615
.current_dir(proj.path())
56165616
.env("XDG_DATA_HOME", dir.path())
56175617
.env("PATH", &path_env)
56185618
.env_remove("ANTHROPIC_API_KEY")
5619-
.args(["complete", &task_id, "--quick"])
5619+
.args(["complete", &task_id])
56205620
.assert()
56215621
.success()
56225622
.stdout(contains("retitled"))

crates/tj-core/src/classifier/agent_sdk.rs

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,71 @@ fn claude_exit_error(
9393
anyhow!("`claude -p` exited with {status}: {detail}")
9494
}
9595

96+
/// Per-call wall-clock ceiling for a `claude -p` invocation. A spawned full
97+
/// Claude Code instance normally answers in seconds; this kills a wedged one so
98+
/// a multi-chunk enrich can't hang the whole `complete`. Override with
99+
/// `TJ_CLAUDE_TIMEOUT_SECS`.
100+
fn claude_timeout() -> std::time::Duration {
101+
let secs = std::env::var("TJ_CLAUDE_TIMEOUT_SECS")
102+
.ok()
103+
.and_then(|s| s.parse::<u64>().ok())
104+
.filter(|n| *n > 0)
105+
.unwrap_or(90);
106+
std::time::Duration::from_secs(secs)
107+
}
108+
109+
/// Wait for `child` up to `timeout`, draining stdout/stderr concurrently so a
110+
/// full pipe can't deadlock the wait. On timeout the child is killed and an
111+
/// error returned; otherwise the captured output is handed back.
112+
fn wait_with_timeout(
113+
mut child: std::process::Child,
114+
timeout: std::time::Duration,
115+
) -> anyhow::Result<std::process::Output> {
116+
use std::io::Read;
117+
let mut out_pipe = child.stdout.take();
118+
let mut err_pipe = child.stderr.take();
119+
let so = std::thread::spawn(move || {
120+
let mut b = Vec::new();
121+
if let Some(p) = out_pipe.as_mut() {
122+
let _ = p.read_to_end(&mut b);
123+
}
124+
b
125+
});
126+
let se = std::thread::spawn(move || {
127+
let mut b = Vec::new();
128+
if let Some(p) = err_pipe.as_mut() {
129+
let _ = p.read_to_end(&mut b);
130+
}
131+
b
132+
});
133+
let start = std::time::Instant::now();
134+
let status = loop {
135+
if let Some(status) = child.try_wait()? {
136+
break status;
137+
}
138+
if start.elapsed() >= timeout {
139+
let _ = child.kill();
140+
let _ = child.wait();
141+
anyhow::bail!("`claude -p` timed out after {}s", timeout.as_secs());
142+
}
143+
std::thread::sleep(std::time::Duration::from_millis(150));
144+
};
145+
Ok(std::process::Output {
146+
status,
147+
stdout: so.join().unwrap_or_default(),
148+
stderr: se.join().unwrap_or_default(),
149+
})
150+
}
151+
96152
impl CommandRunner for ClaudeBinaryRunner {
97153
fn run(&self, model: &str, prompt: &str) -> anyhow::Result<String> {
98-
let output = base_claude_command(model)
154+
let child = base_claude_command(model)
99155
.arg(prompt)
100-
.output()
156+
.stdout(std::process::Stdio::piped())
157+
.stderr(std::process::Stdio::piped())
158+
.spawn()
101159
.context("failed to spawn `claude` (is Claude Code installed and on PATH?)")?;
160+
let output = wait_with_timeout(child, claude_timeout())?;
102161
if !output.status.success() {
103162
return Err(claude_exit_error(
104163
output.status,
@@ -135,9 +194,7 @@ impl CommandRunner for ClaudeBinaryStdinRunner {
135194
.context("claude stdin was not captured")?
136195
.write_all(prompt.as_bytes())
137196
.context("failed to write prompt to claude stdin")?;
138-
let output = child
139-
.wait_with_output()
140-
.context("failed to wait for `claude`")?;
197+
let output = wait_with_timeout(child, claude_timeout())?;
141198
if !output.status.success() {
142199
return Err(claude_exit_error(
143200
output.status,

0 commit comments

Comments
 (0)