|
1 | 1 | //! Memory consolidation (Pillar C): distil a project's recurring decisions and |
2 | | -//! constraints into a handful of durable semantic/procedural facts via a direct |
3 | | -//! Anthropic Haiku API call. |
| 2 | +//! constraints into a handful of durable semantic/procedural facts with a |
| 3 | +//! single LLM call. |
4 | 4 | //! |
5 | | -//! Direct API, not `claude -p`: post-2026-06-15 both bill as extra usage, but |
6 | | -//! `claude -p` also boots the whole user environment (~tens of k tokens) on |
7 | | -//! every call, while the direct API sends only our ~7k-token prompt — roughly |
8 | | -//! 1c per run versus 5-10c. This is a MANUAL command (one call per run, only |
9 | | -//! when the user asks), so it never resembles the per-prompt classifier burn. |
10 | | -//! No `ANTHROPIC_API_KEY` → the caller skips cleanly; we never fall back to a |
| 5 | +//! Two backends, picked by [`summarize`]: the **direct Anthropic Haiku API** |
| 6 | +//! when `ANTHROPIC_API_KEY` is set (cheapest — only our ~7k-token prompt, |
| 7 | +//! ~1c/run), otherwise the local **`claude -p`** binary (subscription auth, no |
| 8 | +//! API key needed, but it boots the whole environment per call so it's |
| 9 | +//! pricier). With neither, the caller skips cleanly — we never fall back to a |
11 | 10 | //! heuristic, which would manufacture low-trust "facts". |
| 11 | +//! |
| 12 | +//! Either way this is a MANUAL command: one call per run, only when the user |
| 13 | +//! asks, never wired to a hook — so it never resembles the per-prompt |
| 14 | +//! classifier burn. |
12 | 15 |
|
13 | 16 | use anyhow::{anyhow, Context}; |
14 | 17 | use serde::{Deserialize, Serialize}; |
@@ -91,6 +94,50 @@ impl Consolidator { |
91 | 94 | } |
92 | 95 | } |
93 | 96 |
|
| 97 | +/// Run whichever summarisation backend is available and return its label plus |
| 98 | +/// the facts it produced. Order: (1) `ANTHROPIC_API_KEY` set → direct Haiku API |
| 99 | +/// (cheapest, ~1c/run); (2) else `claude` on PATH → local `claude -p` |
| 100 | +/// (subscription auth, no API key, heavier per-call boot); (3) else `Ok(None)`, |
| 101 | +/// so the caller skips with a message — never a heuristic. |
| 102 | +/// `TJ_CONSOLIDATE_BACKEND=none` forces the no-backend path (disable / tests). |
| 103 | +pub fn summarize( |
| 104 | + events: &[String], |
| 105 | + max_facts: usize, |
| 106 | +) -> anyhow::Result<Option<(&'static str, Vec<ConsolidatedFact>)>> { |
| 107 | + if std::env::var("TJ_CONSOLIDATE_BACKEND").as_deref() == Ok("none") { |
| 108 | + return Ok(None); |
| 109 | + } |
| 110 | + if std::env::var("ANTHROPIC_API_KEY").is_ok() { |
| 111 | + let c = Consolidator::from_env(max_facts)?; |
| 112 | + return Ok(Some(("haiku-api", c.consolidate(events)?))); |
| 113 | + } |
| 114 | + if crate::classifier::agent_sdk::claude_on_path() { |
| 115 | + return Ok(Some(("claude -p", consolidate_via_cli(events, max_facts)?))); |
| 116 | + } |
| 117 | + Ok(None) |
| 118 | +} |
| 119 | + |
| 120 | +/// Summarise via the local `claude -p` binary (subscription auth). Reuses the |
| 121 | +/// classifier's command plumbing — including the recursion guard set by |
| 122 | +/// `base_claude_command` — and unwraps the `--output-format json` envelope. |
| 123 | +fn consolidate_via_cli( |
| 124 | + events: &[String], |
| 125 | + max_facts: usize, |
| 126 | +) -> anyhow::Result<Vec<ConsolidatedFact>> { |
| 127 | + if events.is_empty() { |
| 128 | + return Ok(Vec::new()); |
| 129 | + } |
| 130 | + let prompt = build_prompt(events, max_facts); |
| 131 | + let model = std::env::var("TJ_CONSOLIDATE_MODEL") |
| 132 | + .unwrap_or_else(|_| crate::classifier::agent_sdk::DEFAULT_MODEL.to_string()); |
| 133 | + let text = crate::classifier::agent_sdk::run_claude_json( |
| 134 | + &crate::classifier::agent_sdk::ClaudeBinaryStdinRunner, |
| 135 | + &model, |
| 136 | + &prompt, |
| 137 | + )?; |
| 138 | + Ok(parse_facts(&text)) |
| 139 | +} |
| 140 | + |
94 | 141 | /// The summarisation prompt. Deliberately strict: durable-only, fixed line |
95 | 142 | /// format, "output nothing" escape hatch so the model doesn't pad. |
96 | 143 | pub fn build_prompt(events: &[String], max_facts: usize) -> String { |
|
0 commit comments