Skip to content

Commit 0ba5fba

Browse files
Shahinyanmclaude
andauthored
feat: pack honesty score + artifact drift + gap-fill (0.28.0) (#56)
Port 3 mechanisms from mex (github.com/theDakshJaitly/mex) into the deterministic, zero-LLM completeness layer: - Artifact drift checkers: MissingFile / DeadCommit / BrokenLink over a task's aggregated Artifacts. Pure check_artifacts core + git/FS wrapper assess_artifacts, gated on is_git_repo so there is no false drift outside the project repo. Wired into the pack. - Honesty score 0-100: GapKind::weight() (error -10, warn -3, info -1) + CompletenessReport::score(); rendered as "honesty score: N/100". - Gap-fill: build_gap_fill_prompt() emits a targeted prompt embedding the pack (like `mex sync --dry-run`); no LLM call in the binary. Surfaces: `task-journal check <id> [--json]`, `task-journal gaps <id> [--fill]`, and the `task_check` MCP tool. Minor bump -> inter-crate version pins (tj-core in tj-cli/tj-mcp) bumped to 0.28.0 so the CI registry-resolve passes. Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 627037b commit 0ba5fba

11 files changed

Lines changed: 694 additions & 8 deletions

File tree

CHANGELOG.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.28.0] - 2026-06-18
11+
12+
### Added
13+
- **Pack honesty score + artifact drift detection** (inspired by
14+
[mex](https://github.com/theDakshJaitly/mex)). Deterministic, zero-LLM
15+
checks that a task's recorded artifacts still match reality:
16+
- New `GapKind`s in `completeness`: `MissingFile` (a referenced file is gone
17+
from disk), `DeadCommit` (a referenced commit hash is unknown to git),
18+
`BrokenLink` (a local-file link is missing). A pure
19+
`check_artifacts(arts, file_exists, commit_alive)` core plus a git/FS
20+
wrapper `assess_artifacts`; wired into the pack via `artifact_gaps_for_cwd`
21+
(silent outside the project's git repo, so no false drift).
22+
- **Honesty score 0–100**`CompletenessReport::score()` deducts each gap's
23+
weight (error −10, warn −3, info −1). Rendered in the pack's Completeness
24+
section as `honesty score: N/100`.
25+
- **`task-journal check <id> [--json]`** — print a task's honesty score and
26+
gaps (like `mex check`).
27+
- **`task-journal gaps <id> [--fill]`** — list gaps, or with `--fill` emit a
28+
targeted, deterministic gap-fill prompt embedding the current pack (like
29+
`mex sync --dry-run`) for the in-session agent to close — no LLM call in the
30+
binary.
31+
- **`task_check` MCP tool** — returns `{score, gaps:[{kind, severity, detail}]}`
32+
so an in-session agent can self-assess a task before closing.
33+
1034
## [0.27.0] - 2026-06-17
1135

1236
### Added

Cargo.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ members = [
77
]
88

99
[workspace.package]
10-
version = "0.27.0"
10+
version = "0.28.0"
1111
edition = "2021"
1212
rust-version = "1.88"
1313
license = "MIT"

crates/tj-cli/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ default = ["embed"]
2323
embed = ["tj-core/embed"]
2424

2525
[dependencies]
26-
tj-core = { package = "task-journal-core", version = "0.27.0", path = "../tj-core", default-features = false }
26+
tj-core = { package = "task-journal-core", version = "0.28.0", path = "../tj-core", default-features = false }
2727
anyhow = { workspace = true }
2828
clap = { workspace = true }
2929
tracing = { workspace = true }

crates/tj-cli/src/main.rs

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,6 +1053,21 @@ enum Commands {
10531053
/// Why-this-approach, Verification, Affected). Reuses event log +
10541054
/// artifacts; introduces no new tables.
10551055
ExportPr { task_id: String },
1056+
/// Print a task's honesty score (0–100) and completeness gaps —
1057+
/// deterministic, zero-LLM (like `mex check`). `--json` for machines.
1058+
Check {
1059+
task_id: String,
1060+
#[arg(long)]
1061+
json: bool,
1062+
},
1063+
/// Print a targeted gap-fill prompt for a task (deterministic, zero-LLM,
1064+
/// like `mex sync --dry-run`); the in-session agent runs it to close gaps.
1065+
Gaps {
1066+
task_id: String,
1067+
/// Emit the full fix prompt (default: just list gaps + score).
1068+
#[arg(long)]
1069+
fill: bool,
1070+
},
10561071
/// Export task knowledge as Claude-memory frontmatter files (feeds native dream).
10571072
ExportMemory {
10581073
/// Export a single task by id.
@@ -3351,6 +3366,12 @@ runs in the background and won't block you; it only fills gaps and never closes
33513366
Commands::ExportPr { task_id } => {
33523367
run_export_pr(&task_id)?;
33533368
}
3369+
Commands::Check { task_id, json } => {
3370+
run_check(&task_id, json)?;
3371+
}
3372+
Commands::Gaps { task_id, fill } => {
3373+
run_gaps(&task_id, fill)?;
3374+
}
33543375
Commands::ExportMemory {
33553376
task,
33563377
all_closed,
@@ -3766,6 +3787,109 @@ fn run_export_pr(task_id: &str) -> Result<()> {
37663787
Ok(())
37673788
}
37683789

3790+
/// Severity label for a gap weight (10/3/1 → error/warn/info).
3791+
fn severity_for(weight: u32) -> &'static str {
3792+
match weight {
3793+
10 => "error",
3794+
3 => "warn",
3795+
_ => "info",
3796+
}
3797+
}
3798+
3799+
/// Open the project DB, ingest new events, and build the full completeness
3800+
/// report — structural gaps plus artifact honesty drift — for `task_id`.
3801+
/// Exits(1) when the task is unknown.
3802+
fn assess_task(
3803+
task_id: &str,
3804+
) -> Result<(
3805+
rusqlite::Connection,
3806+
tj_core::completeness::CompletenessReport,
3807+
)> {
3808+
let cwd = std::env::current_dir()?;
3809+
let project_hash = tj_core::project_hash::from_path(&cwd)?;
3810+
let events_path = tj_core::paths::events_dir()?.join(format!("{project_hash}.jsonl"));
3811+
let state_path = tj_core::paths::state_dir()?.join(format!("{project_hash}.sqlite"));
3812+
let conn = tj_core::db::open(&state_path)?;
3813+
if events_path.exists() {
3814+
tj_core::db::ingest_new_events(&conn, &events_path, &project_hash)?;
3815+
}
3816+
3817+
// Typed not-found exit, mirroring run_export_pr.
3818+
if let Err(rusqlite::Error::QueryReturnedNoRows) = conn.query_row(
3819+
"SELECT 1 FROM tasks WHERE task_id = ?1",
3820+
rusqlite::params![task_id],
3821+
|r| r.get::<_, i64>(0),
3822+
) {
3823+
eprintln!("Error: task not found: {task_id}");
3824+
std::process::exit(1);
3825+
}
3826+
3827+
let mut report =
3828+
tj_core::completeness::assess(&conn, task_id, tj_core::completeness::pending_count())?;
3829+
let arts = tj_core::db::task_artifacts(&conn, task_id)?;
3830+
report
3831+
.gaps
3832+
.extend(tj_core::completeness::artifact_gaps_for_cwd(&arts));
3833+
Ok((conn, report))
3834+
}
3835+
3836+
/// `task-journal check <id> [--json]` — print honesty score + gaps.
3837+
fn run_check(task_id: &str, json: bool) -> Result<()> {
3838+
let (_conn, report) = assess_task(task_id)?;
3839+
if json {
3840+
let gaps: Vec<_> = report
3841+
.gaps
3842+
.iter()
3843+
.map(|g| {
3844+
serde_json::json!({
3845+
"kind": format!("{:?}", g.kind),
3846+
"severity": severity_for(g.kind.weight()),
3847+
"detail": g.detail,
3848+
})
3849+
})
3850+
.collect();
3851+
let out = serde_json::json!({
3852+
"task_id": task_id,
3853+
"score": report.score(),
3854+
"gaps": gaps,
3855+
});
3856+
println!("{}", serde_json::to_string_pretty(&out)?);
3857+
} else {
3858+
println!(
3859+
"honesty score: {}/100 ({} gap(s))",
3860+
report.score(),
3861+
report.gaps.len()
3862+
);
3863+
for g in &report.gaps {
3864+
println!("- [{}] {}", severity_for(g.kind.weight()), g.detail);
3865+
}
3866+
}
3867+
Ok(())
3868+
}
3869+
3870+
/// `task-journal gaps <id> [--fill]` — list gaps, or with `--fill` print the
3871+
/// deterministic gap-fill prompt (embedding the current pack) for the agent.
3872+
fn run_gaps(task_id: &str, fill: bool) -> Result<()> {
3873+
let (conn, report) = assess_task(task_id)?;
3874+
if !fill {
3875+
println!(
3876+
"honesty score: {}/100 ({} gap(s))",
3877+
report.score(),
3878+
report.gaps.len()
3879+
);
3880+
for g in &report.gaps {
3881+
println!("- [{}] {}", severity_for(g.kind.weight()), g.detail);
3882+
}
3883+
return Ok(());
3884+
}
3885+
let pack = tj_core::pack::assemble(&conn, task_id, tj_core::pack::PackMode::Full)?;
3886+
match tj_core::completeness::build_gap_fill_prompt(task_id, &report, &pack.text) {
3887+
Some(prompt) => print!("{prompt}"),
3888+
None => println!("honesty score: 100/100 — no gaps to fill"),
3889+
}
3890+
Ok(())
3891+
}
3892+
37693893
fn run_export_memory(task: Option<&str>, _all_closed: bool, dry_run: bool) -> Result<()> {
37703894
const MAX_ITEMS: usize = 10;
37713895

crates/tj-cli/tests/cli.rs

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,87 @@ fn close_warns_on_completeness_gap() {
143143
.stdout(contains("status: closed"));
144144
}
145145

146+
#[test]
147+
fn check_reports_honesty_score_and_gaps() {
148+
let dir = assert_fs::TempDir::new().unwrap();
149+
let task_id = String::from_utf8(
150+
Command::cargo_bin("task-journal")
151+
.unwrap()
152+
.env("XDG_DATA_HOME", dir.path())
153+
.args(["create", "Check me", "--goal", "ship it"])
154+
.assert()
155+
.success()
156+
.get_output()
157+
.stdout
158+
.clone(),
159+
)
160+
.unwrap()
161+
.trim()
162+
.to_string();
163+
164+
// A decision with no evidence → DecisionNoEvidence (warn, −3) → 97/100.
165+
Command::cargo_bin("task-journal")
166+
.unwrap()
167+
.env("XDG_DATA_HOME", dir.path())
168+
.args(["event", &task_id, "--type", "decision", "--text", "Adopt X"])
169+
.assert()
170+
.success();
171+
172+
Command::cargo_bin("task-journal")
173+
.unwrap()
174+
.env("XDG_DATA_HOME", dir.path())
175+
.args(["check", &task_id])
176+
.assert()
177+
.success()
178+
.stdout(contains("honesty score: 97/100"))
179+
.stdout(contains("decisions unverified"));
180+
181+
Command::cargo_bin("task-journal")
182+
.unwrap()
183+
.env("XDG_DATA_HOME", dir.path())
184+
.args(["check", &task_id, "--json"])
185+
.assert()
186+
.success()
187+
.stdout(contains("\"score\": 97"))
188+
.stdout(contains("\"severity\": \"warn\""));
189+
}
190+
191+
#[test]
192+
fn gaps_fill_emits_targeted_prompt() {
193+
let dir = assert_fs::TempDir::new().unwrap();
194+
let task_id = String::from_utf8(
195+
Command::cargo_bin("task-journal")
196+
.unwrap()
197+
.env("XDG_DATA_HOME", dir.path())
198+
.args(["create", "Fill me", "--goal", "ship it"])
199+
.assert()
200+
.success()
201+
.get_output()
202+
.stdout
203+
.clone(),
204+
)
205+
.unwrap()
206+
.trim()
207+
.to_string();
208+
209+
Command::cargo_bin("task-journal")
210+
.unwrap()
211+
.env("XDG_DATA_HOME", dir.path())
212+
.args(["event", &task_id, "--type", "decision", "--text", "Adopt X"])
213+
.assert()
214+
.success();
215+
216+
Command::cargo_bin("task-journal")
217+
.unwrap()
218+
.env("XDG_DATA_HOME", dir.path())
219+
.args(["gaps", &task_id, "--fill"])
220+
.assert()
221+
.success()
222+
.stdout(contains("Close ONLY these"))
223+
.stdout(contains("Add an evidence event"))
224+
.stdout(contains("```markdown"));
225+
}
226+
146227
#[test]
147228
fn doctor_exits_zero_on_fresh_install() {
148229
let dir = assert_fs::TempDir::new().unwrap();

0 commit comments

Comments
 (0)