Skip to content

Commit 4ff462a

Browse files
Shahinyanmclaude
andcommitted
feat: add backfill command — import tasks from Claude Code session history
Parses ~/.claude/projects/<project>/*.jsonl files and extracts task-journal events retroactively using heuristics (no LLM needed): - Session → task (open/close) - Bash test commands → evidence events - Git commits → evidence events - Decision/rejection/constraint keywords → typed events - File modifications → finding events Usage: task-journal backfill [--dry-run] [--limit N] [--project path] Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ee7092a commit 4ff462a

6 files changed

Lines changed: 1026 additions & 0 deletions

File tree

crates/tj-cli/src/main.rs

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,19 @@ enum Commands {
8686
},
8787
/// Show local classifier and journal statistics.
8888
Stats,
89+
/// Import task-journal events from existing Claude Code session history.
90+
/// Parses JSONL session files and creates tasks retroactively.
91+
Backfill {
92+
/// Dry run: show what would be imported without writing.
93+
#[arg(long)]
94+
dry_run: bool,
95+
/// Limit to N most recent sessions (default: all).
96+
#[arg(long)]
97+
limit: Option<usize>,
98+
/// Project path override (default: current directory).
99+
#[arg(long)]
100+
project: Option<String>,
101+
},
89102
/// Hook entry point: ingest a chat chunk through the classifier.
90103
IngestHook {
91104
/// Hook kind: UserPromptSubmit | PostToolUse | Stop | SessionStart.
@@ -560,6 +573,159 @@ fn main() -> Result<()> {
560573
}
561574
}
562575
}
576+
Commands::Backfill {
577+
dry_run,
578+
limit,
579+
project,
580+
} => {
581+
use tj_core::session::{discovery, extractor, parser};
582+
583+
let project_path = match project {
584+
Some(p) => std::path::PathBuf::from(p),
585+
None => std::env::current_dir()?,
586+
};
587+
588+
let project_hash = tj_core::project_hash::from_path(&project_path)?;
589+
let events_dir = tj_core::paths::events_dir()?;
590+
let events_path = events_dir.join(format!("{project_hash}.jsonl"));
591+
592+
// Find the Claude Code project directory for this path.
593+
let proj_dir = discovery::find_project_dir(&project_path)?;
594+
let proj_dir = match proj_dir {
595+
Some(d) => d,
596+
None => {
597+
eprintln!(
598+
"No Claude Code sessions found for: {}",
599+
project_path.display()
600+
);
601+
eprintln!(
602+
"Looked in: {}",
603+
discovery::projects_dir()
604+
.map(|p| p.display().to_string())
605+
.unwrap_or_else(|_| "?".into())
606+
);
607+
return Ok(());
608+
}
609+
};
610+
611+
// List available sessions.
612+
let mut sessions = discovery::list_sessions(&proj_dir)?;
613+
if let Some(max) = limit {
614+
sessions.truncate(max);
615+
}
616+
617+
if sessions.is_empty() {
618+
eprintln!("No session JSONL files found in: {}", proj_dir.display());
619+
return Ok(());
620+
}
621+
622+
eprintln!(
623+
"Found {} session(s) for {}",
624+
sessions.len(),
625+
project_path.display()
626+
);
627+
628+
// Check which sessions are already imported (idempotent).
629+
let already_imported = if events_path.exists() {
630+
let content = std::fs::read_to_string(&events_path).unwrap_or_default();
631+
sessions
632+
.iter()
633+
.filter_map(|p| p.file_stem().and_then(|s| s.to_str()).map(String::from))
634+
.filter(|sid| content.contains(sid))
635+
.collect::<std::collections::HashSet<_>>()
636+
} else {
637+
std::collections::HashSet::new()
638+
};
639+
640+
let mut total_tasks = 0;
641+
let mut total_events = 0;
642+
643+
for session_path in &sessions {
644+
let session_id = session_path
645+
.file_stem()
646+
.and_then(|s| s.to_str())
647+
.unwrap_or("?")
648+
.to_string();
649+
650+
if already_imported.contains(&session_id) {
651+
eprintln!(" ⊘ {} — already imported, skipping", &session_id[..8.min(session_id.len())]);
652+
continue;
653+
}
654+
655+
// Parse the session JSONL.
656+
let parsed = match parser::parse_session(session_path) {
657+
Ok(p) => p,
658+
Err(e) => {
659+
eprintln!(
660+
" ✗ {} — parse error: {}",
661+
&session_id[..8.min(session_id.len())],
662+
e
663+
);
664+
continue;
665+
}
666+
};
667+
668+
// Extract events.
669+
let task = match extractor::extract_from_session(&parsed) {
670+
Some(t) => t,
671+
None => {
672+
eprintln!(
673+
" ⊘ {} — too small ({} msgs), skipping",
674+
&session_id[..8.min(session_id.len())],
675+
parsed.user_message_count()
676+
);
677+
continue;
678+
}
679+
};
680+
681+
if dry_run {
682+
eprintln!(
683+
" ▸ {} → task {} \"{}\" ({} events)",
684+
&session_id[..8.min(session_id.len())],
685+
task.task_id,
686+
task.title.chars().take(60).collect::<String>(),
687+
task.events.len()
688+
);
689+
for ev in &task.events {
690+
let etype = serde_json::to_value(ev.event_type)
691+
.ok()
692+
.and_then(|v| v.as_str().map(String::from))
693+
.unwrap_or_else(|| "?".into());
694+
eprintln!(
695+
" {:12} {}",
696+
etype,
697+
ev.text.chars().take(80).collect::<String>()
698+
);
699+
}
700+
} else {
701+
// Write events to JSONL.
702+
std::fs::create_dir_all(&events_dir)?;
703+
let mut writer = tj_core::storage::JsonlWriter::open(&events_path)?;
704+
for event in &task.events {
705+
writer.append(event)?;
706+
}
707+
writer.flush_durable()?;
708+
709+
eprintln!(
710+
" ✓ {} → {} \"{}\" ({} events)",
711+
&session_id[..8.min(session_id.len())],
712+
task.task_id,
713+
task.title.chars().take(60).collect::<String>(),
714+
task.events.len()
715+
);
716+
}
717+
718+
total_tasks += 1;
719+
total_events += task.events.len();
720+
}
721+
722+
if dry_run {
723+
eprintln!("\nDry run: would create {total_tasks} task(s) with {total_events} event(s).");
724+
eprintln!("Run without --dry-run to import.");
725+
} else {
726+
eprintln!("\nImported {total_tasks} task(s) with {total_events} event(s).");
727+
}
728+
}
563729
}
564730
Ok(())
565731
}

crates/tj-core/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ pub mod event;
88
pub mod pack;
99
pub mod paths;
1010
pub mod project_hash;
11+
pub mod session;
1112
pub mod storage;
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
//! Discover Claude Code session JSONL files for a project.
2+
//!
3+
//! Sessions live at `~/.claude/projects/<encoded-path>/<uuid>.jsonl`.
4+
//! The encoded path replaces non-alphanumeric chars (except `-`) with `-`.
5+
6+
use std::path::{Path, PathBuf};
7+
8+
/// Resolve the Claude Code config directory.
9+
/// Uses `CLAUDE_CONFIG_DIR` env if set, otherwise `~/.claude`.
10+
pub fn claude_config_dir() -> anyhow::Result<PathBuf> {
11+
if let Ok(custom) = std::env::var("CLAUDE_CONFIG_DIR") {
12+
if !custom.is_empty() {
13+
return Ok(PathBuf::from(custom));
14+
}
15+
}
16+
let home = dirs_home()?;
17+
Ok(home.join(".claude"))
18+
}
19+
20+
/// Get the projects directory where session files live.
21+
pub fn projects_dir() -> anyhow::Result<PathBuf> {
22+
Ok(claude_config_dir()?.join("projects"))
23+
}
24+
25+
/// Encode a filesystem path into the Claude Code project directory name format.
26+
/// Non-alphanumeric chars (except `-`) are replaced with `-`.
27+
pub fn encode_project_path(path: &str) -> String {
28+
path.chars()
29+
.map(|c| {
30+
if c.is_alphanumeric() || c == '-' {
31+
c
32+
} else {
33+
'-'
34+
}
35+
})
36+
.collect()
37+
}
38+
39+
/// Find the project directory for a given filesystem path.
40+
/// Tries exact match first, then prefix match for worktree variants.
41+
pub fn find_project_dir(project_path: &Path) -> anyhow::Result<Option<PathBuf>> {
42+
let projects = projects_dir()?;
43+
if !projects.exists() {
44+
return Ok(None);
45+
}
46+
47+
let encoded = encode_project_path(&project_path.to_string_lossy());
48+
49+
// Try exact match first.
50+
let exact = projects.join(&encoded);
51+
if exact.is_dir() {
52+
return Ok(Some(exact));
53+
}
54+
55+
// Try case-insensitive match (WSL paths can differ in case).
56+
let encoded_lower = encoded.to_lowercase();
57+
if let Ok(entries) = std::fs::read_dir(&projects) {
58+
for entry in entries.flatten() {
59+
let name = entry.file_name().to_string_lossy().to_string();
60+
if name.to_lowercase() == encoded_lower && entry.path().is_dir() {
61+
return Ok(Some(entry.path()));
62+
}
63+
}
64+
}
65+
66+
Ok(None)
67+
}
68+
69+
/// List all session JSONL files in a project directory.
70+
/// Excludes agent files (starting with `agent-`).
71+
/// Returns files sorted by modification time (newest first).
72+
pub fn list_sessions(project_dir: &Path) -> anyhow::Result<Vec<PathBuf>> {
73+
let mut sessions: Vec<(PathBuf, std::time::SystemTime)> = Vec::new();
74+
75+
for entry in std::fs::read_dir(project_dir)? {
76+
let entry = entry?;
77+
let path = entry.path();
78+
let name = entry.file_name().to_string_lossy().to_string();
79+
80+
if !name.ends_with(".jsonl") {
81+
continue;
82+
}
83+
// Skip agent sessions.
84+
if name.starts_with("agent-") {
85+
continue;
86+
}
87+
88+
let mtime = entry
89+
.metadata()
90+
.and_then(|m| m.modified())
91+
.unwrap_or(std::time::UNIX_EPOCH);
92+
93+
sessions.push((path, mtime));
94+
}
95+
96+
// Sort newest first.
97+
sessions.sort_by(|a, b| b.1.cmp(&a.1));
98+
Ok(sessions.into_iter().map(|(p, _)| p).collect())
99+
}
100+
101+
/// List all project directories in Claude Code config.
102+
pub fn list_all_projects() -> anyhow::Result<Vec<(String, PathBuf)>> {
103+
let projects = projects_dir()?;
104+
if !projects.exists() {
105+
return Ok(vec![]);
106+
}
107+
108+
let mut result = Vec::new();
109+
for entry in std::fs::read_dir(&projects)? {
110+
let entry = entry?;
111+
if entry.path().is_dir() {
112+
let name = entry.file_name().to_string_lossy().to_string();
113+
// Decode the project name back to a readable path.
114+
let decoded = decode_project_path(&name);
115+
result.push((decoded, entry.path()));
116+
}
117+
}
118+
result.sort_by(|a, b| a.0.cmp(&b.0));
119+
Ok(result)
120+
}
121+
122+
/// Decode an encoded project directory name back to a readable path.
123+
/// This is approximate — we can't distinguish `-` from original `/`.
124+
fn decode_project_path(encoded: &str) -> String {
125+
// Common pattern: leading `--` means the path started with a path separator.
126+
// Replace double dashes carefully.
127+
encoded.to_string()
128+
}
129+
130+
fn dirs_home() -> anyhow::Result<PathBuf> {
131+
directories::BaseDirs::new()
132+
.map(|d| d.home_dir().to_path_buf())
133+
.ok_or_else(|| anyhow::anyhow!("could not resolve home directory"))
134+
}
135+
136+
#[cfg(test)]
137+
mod tests {
138+
use super::*;
139+
140+
#[test]
141+
fn encode_path_replaces_separators() {
142+
let encoded = encode_project_path("/home/user/project");
143+
assert_eq!(encoded, "-home-user-project");
144+
}
145+
146+
#[test]
147+
fn encode_preserves_dashes() {
148+
let encoded = encode_project_path("/home/my-project");
149+
assert_eq!(encoded, "-home-my-project");
150+
}
151+
152+
#[test]
153+
fn encode_wsl_path() {
154+
let encoded = encode_project_path("\\\\wsl.localhost\\ubuntu\\home\\user\\project");
155+
assert_eq!(encoded, "--wsl-localhost-ubuntu-home-user-project");
156+
}
157+
}

0 commit comments

Comments
 (0)