Skip to content

Commit ccaaebd

Browse files
var-ggclaude
andcommitted
feat(cache): repos lifecycle schema for tiered discovery (v0.1.1 prep 1/6)
Foundation for the multi-tier discovery overhaul. No behaviour change yet — every existing query keeps working — but the schema is now ready for the orchestrator to write source provenance, learned roots, tombstones, path aliases, and run audit rows. repos table gains: canonical_path, gitdir_path, status, user_state, primary_source, confidence, first_seen_at, last_verified_at, missing_since, removed_at, last_emit_at, repo_kind. Existing rows backfill canonical_path=path so the new unique index can be built. upsert_repos now also restores status='active' if a path that was previously marked 'missing' shows up again — needed for the relink flow in commit 6. New tables: repo_sources per-tier provenance + decay state discovery_roots learned scan roots with score/cooldown/budget discovery_tombstones user-hidden repos that must not auto-rediscover path_aliases observed→canonical so symlinks dedupe discovery_runs per-tier audit log meta app-wide flags (e.g. initial_scan_completed) placed in cache.db so wiping cache resets them PRAGMA user_version = 2 set as a future-migration marker. All ALTERs follow the existing idempotent pattern (errors swallowed on warm DBs); the CREATE TABLE/INDEX statements use IF NOT EXISTS so a fresh DB and an upgraded DB end up in the same state. Rust tests 20/20. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 56825cc commit ccaaebd

1 file changed

Lines changed: 185 additions & 3 deletions

File tree

src-tauri/src/cache.rs

Lines changed: 185 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,143 @@ pub fn open(app: &AppHandle) -> Result<Connection> {
9595
"ALTER TABLE commits ADD COLUMN remote_tip_extra_count INTEGER NOT NULL DEFAULT 0",
9696
[],
9797
);
98+
99+
// v0.1.1 discovery lifecycle migration. `repos` gains identity +
100+
// status + provenance fields so the tiered scanner can express
101+
// "this row came from VS Code recents, confidence 80, last verified
102+
// 3 minutes ago, currently missing on disk." All ALTERs are
103+
// idempotent ("duplicate column" errors swallowed on warm DBs).
104+
let _ = conn.execute("ALTER TABLE repos ADD COLUMN canonical_path TEXT", []);
105+
let _ = conn.execute("ALTER TABLE repos ADD COLUMN gitdir_path TEXT", []);
106+
let _ = conn.execute(
107+
"ALTER TABLE repos ADD COLUMN status TEXT NOT NULL DEFAULT 'active'",
108+
[],
109+
);
110+
let _ = conn.execute(
111+
"ALTER TABLE repos ADD COLUMN user_state TEXT NOT NULL DEFAULT 'normal'",
112+
[],
113+
);
114+
let _ = conn.execute(
115+
"ALTER TABLE repos ADD COLUMN primary_source TEXT NOT NULL DEFAULT 'unknown'",
116+
[],
117+
);
118+
let _ = conn.execute(
119+
"ALTER TABLE repos ADD COLUMN confidence INTEGER NOT NULL DEFAULT 50",
120+
[],
121+
);
122+
let _ = conn.execute("ALTER TABLE repos ADD COLUMN first_seen_at INTEGER", []);
123+
let _ = conn.execute("ALTER TABLE repos ADD COLUMN last_verified_at INTEGER", []);
124+
let _ = conn.execute("ALTER TABLE repos ADD COLUMN missing_since INTEGER", []);
125+
let _ = conn.execute("ALTER TABLE repos ADD COLUMN removed_at INTEGER", []);
126+
let _ = conn.execute("ALTER TABLE repos ADD COLUMN last_emit_at INTEGER", []);
127+
let _ = conn.execute(
128+
"ALTER TABLE repos ADD COLUMN repo_kind TEXT NOT NULL DEFAULT 'workdir'",
129+
[],
130+
);
131+
132+
// Backfill canonical_path for rows from older versions so the
133+
// unique index has something to grab onto.
134+
let _ = conn.execute(
135+
"UPDATE repos SET canonical_path = path WHERE canonical_path IS NULL",
136+
[],
137+
);
138+
139+
conn.execute_batch(
140+
r#"
141+
CREATE UNIQUE INDEX IF NOT EXISTS idx_repos_canonical_path
142+
ON repos(canonical_path);
143+
CREATE INDEX IF NOT EXISTS idx_repos_paint_order
144+
ON repos(user_state, status, confidence, last_seen_at);
145+
146+
-- Per-source provenance: a repo can be known to multiple sources
147+
-- (VS Code recents AND fs walk AND manual), each with its own
148+
-- confidence + last-seen so source-level decay/learning works.
149+
CREATE TABLE IF NOT EXISTS repo_sources (
150+
repo_canonical_path TEXT NOT NULL,
151+
source TEXT NOT NULL,
152+
source_path TEXT,
153+
source_mtime INTEGER,
154+
raw_hint TEXT,
155+
confidence INTEGER NOT NULL,
156+
first_seen_at INTEGER NOT NULL,
157+
last_seen_at INTEGER NOT NULL,
158+
last_success_at INTEGER,
159+
fail_count INTEGER NOT NULL DEFAULT 0,
160+
PRIMARY KEY(repo_canonical_path, source)
161+
);
162+
163+
-- Learned scan roots. When we find a repo at C:\k2\keymall\workspace
164+
-- we learn C:\k2\keymall (score 0.95) and conservatively C:\k2
165+
-- (score 0.65), so next pass finds siblings without making the
166+
-- user re-enter paths.
167+
CREATE TABLE IF NOT EXISTS discovery_roots (
168+
root_path TEXT PRIMARY KEY,
169+
root_kind TEXT NOT NULL,
170+
created_from_repo TEXT,
171+
score REAL NOT NULL,
172+
max_depth INTEGER NOT NULL,
173+
entry_budget INTEGER NOT NULL,
174+
repo_hits INTEGER NOT NULL DEFAULT 0,
175+
miss_count INTEGER NOT NULL DEFAULT 0,
176+
last_scan_at INTEGER,
177+
cooldown_until INTEGER,
178+
enabled INTEGER NOT NULL DEFAULT 1,
179+
created_at INTEGER NOT NULL,
180+
updated_at INTEGER NOT NULL
181+
);
182+
CREATE INDEX IF NOT EXISTS idx_discovery_roots_priority
183+
ON discovery_roots(enabled, cooldown_until, score DESC, last_scan_at);
184+
185+
-- "User hid this repo, do not auto-rediscover it." Survives even if
186+
-- a tier source still reports the path next scan.
187+
CREATE TABLE IF NOT EXISTS discovery_tombstones (
188+
canonical_path TEXT PRIMARY KEY,
189+
removed_at INTEGER NOT NULL,
190+
reason TEXT NOT NULL,
191+
last_known_name TEXT,
192+
last_known_source TEXT
193+
);
194+
195+
-- observed path → canonical path mapping, so symlinks/case
196+
-- variants from different IDEs don't double-count.
197+
CREATE TABLE IF NOT EXISTS path_aliases (
198+
observed_path TEXT NOT NULL,
199+
canonical_path TEXT NOT NULL,
200+
source TEXT NOT NULL,
201+
first_seen_at INTEGER NOT NULL,
202+
last_seen_at INTEGER NOT NULL,
203+
PRIMARY KEY(observed_path, source)
204+
);
205+
206+
-- Per-tier-run audit log. Mostly for debugging "why didn't my
207+
-- repo appear?" — keep tiny, GC the oldest entries periodically.
208+
CREATE TABLE IF NOT EXISTS discovery_runs (
209+
run_id TEXT PRIMARY KEY,
210+
tier TEXT NOT NULL,
211+
started_at INTEGER NOT NULL,
212+
finished_at INTEGER,
213+
budget_ms INTEGER,
214+
candidates_seen INTEGER NOT NULL DEFAULT 0,
215+
candidates_valid INTEGER NOT NULL DEFAULT 0,
216+
repos_emitted INTEGER NOT NULL DEFAULT 0,
217+
cancelled INTEGER NOT NULL DEFAULT 0
218+
);
219+
220+
-- One-row table for app-wide flags (e.g. first-run scan completion).
221+
-- Lives in cache.db rather than settings.json so a wiped cache
222+
-- correctly re-triggers the first-run scan.
223+
CREATE TABLE IF NOT EXISTS meta (
224+
key TEXT PRIMARY KEY,
225+
value TEXT NOT NULL,
226+
updated_at INTEGER NOT NULL
227+
);
228+
"#,
229+
)?;
230+
231+
// PRAGMA user_version = 2 marks schema generation 2 (post v0.1.0).
232+
// We don't gate behaviour on this yet, but future migrations can.
233+
let _ = conn.pragma_update(None, "user_version", 2_i64);
234+
98235
Ok(conn)
99236
}
100237

@@ -119,11 +256,24 @@ pub fn upsert_repos(conn: &mut Connection, repos: &[Repo]) -> Result<()> {
119256
{
120257
let mut stmt = tx.prepare(
121258
r#"
122-
INSERT INTO repos (path, name, discovered_at, last_seen_at)
123-
VALUES (?1, ?2, ?3, ?3)
259+
INSERT INTO repos (
260+
path, name, discovered_at, last_seen_at,
261+
canonical_path, first_seen_at, last_verified_at
262+
)
263+
VALUES (?1, ?2, ?3, ?3, ?1, ?3, ?3)
124264
ON CONFLICT(path) DO UPDATE SET
125265
name = excluded.name,
126-
last_seen_at = excluded.last_seen_at
266+
last_seen_at = excluded.last_seen_at,
267+
last_verified_at = excluded.last_verified_at,
268+
-- Restore from missing if the path showed up again
269+
status = CASE
270+
WHEN repos.status = 'missing' THEN 'active'
271+
ELSE repos.status
272+
END,
273+
missing_since = CASE
274+
WHEN repos.status = 'missing' THEN NULL
275+
ELSE repos.missing_since
276+
END
127277
"#,
128278
)?;
129279
for r in repos {
@@ -134,6 +284,38 @@ pub fn upsert_repos(conn: &mut Connection, repos: &[Repo]) -> Result<()> {
134284
Ok(())
135285
}
136286

287+
// ----- meta (app-wide flags lived in cache.db so wiping cache resets them) -----
288+
289+
/// Read a value from the meta table. None if the key has never been set.
290+
#[allow(dead_code)] // wired up by orchestrator
291+
pub fn meta_get(conn: &Connection, key: &str) -> Result<Option<String>> {
292+
let res = conn.query_row(
293+
"SELECT value FROM meta WHERE key = ?1",
294+
params![key],
295+
|r| r.get::<_, String>(0),
296+
);
297+
match res {
298+
Ok(v) => Ok(Some(v)),
299+
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
300+
Err(e) => Err(e.into()),
301+
}
302+
}
303+
304+
/// Upsert a value into the meta table.
305+
#[allow(dead_code)] // wired up by orchestrator
306+
pub fn meta_set(conn: &Connection, key: &str, value: &str) -> Result<()> {
307+
conn.execute(
308+
r#"
309+
INSERT INTO meta (key, value, updated_at) VALUES (?1, ?2, ?3)
310+
ON CONFLICT(key) DO UPDATE SET
311+
value = excluded.value,
312+
updated_at = excluded.updated_at
313+
"#,
314+
params![key, value, unix_now()],
315+
)?;
316+
Ok(())
317+
}
318+
137319
// ----- commits -----
138320

139321
pub fn upsert_commits(conn: &mut Connection, commits: &[CommitSummary]) -> Result<()> {

0 commit comments

Comments
 (0)