Skip to content

Commit 511c235

Browse files
joshwilhelmiclaude
andcommitted
[gobby-cli-#460] fix: lock-free read-time freshness pre-gate in gcode
Every project-scoped gcode read ran ensure_project_fresh, which took the per-project advisory lock and unconditionally re-hashed every discovered file before comparing against stored hashes. Under concurrent agent sessions these lock-holds overlapped, the 150ms brief-try window lost, and reads printed "gcode index refresh already running" despite nothing having changed. Add a lock-free, hash-free pre-gate in freshness::ensure_fresh for FreshnessScope::Project: read last_indexed_at + indexed paths read-only from the hub and call the new index::api::project_changed_since, which reuses walker::discover_files + DEFAULT_EXCLUDES and returns true on any modify/add (mtime newer than last_indexed_at minus a 2s skew margin) or delete (indexed path missing on disk). With no change it returns Checked without taking the lock or hashing anything; on a change it falls through to the existing lock + incremental reconcile, unchanged. No staleness window, no hub schema change, standalone backstop preserved. The 2s margin only ever makes the gate more eager, so it can never miss a change; backdated mtimes are reconciled by the periodic maintenance sweep. Discovery mirrors the indexer exactly, so the .gobby/plans/**/*.md allowlist (edits the daemon trigger never forwards) is covered. Tests: unit tests for project_changed_since (no-change/modify/add/delete and the 2s margin boundary) and postgres-gated freshness tests asserting Checked-without-lock on no change and SkippedBusy when a change is present while the lock is held. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 5acfa18 commit 511c235

4 files changed

Lines changed: 327 additions & 0 deletions

File tree

crates/gcode/src/freshness.rs

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::path::{Path, PathBuf};
2+
use std::time::SystemTime;
23

34
use crate::config::Context;
45
use crate::db;
@@ -25,6 +26,15 @@ pub fn ensure_fresh(ctx: &Context, scope: FreshnessScope) -> anyhow::Result<Fres
2526
return Ok(FreshnessStatus::Checked);
2627
}
2728

29+
// Lock-free pre-gate for whole-project reads: if nothing on disk is newer
30+
// than the recorded index and nothing was deleted, skip the advisory lock
31+
// and the full re-hash entirely (no "refresh already running" warning).
32+
// `FreshnessScope::Files` is already cheap (explicit-files path) and is left
33+
// untouched.
34+
if matches!(scope, FreshnessScope::Project) && !project_needs_refresh(ctx)? {
35+
return Ok(FreshnessStatus::Checked);
36+
}
37+
2838
let _guard = FreshnessGuard::enter();
2939
let result =
3040
index_lock::with_project_lock(ctx, IndexLockPolicy::brief_freshness_try(), || {
@@ -72,6 +82,41 @@ pub fn ensure_fresh(ctx: &Context, scope: FreshnessScope) -> anyhow::Result<Fres
7282
}
7383
}
7484

85+
/// Read-only pre-gate for whole-project freshness.
86+
///
87+
/// Returns `true` when the project must be reconciled under the advisory lock —
88+
/// because it has never been indexed or because the on-disk tree changed since
89+
/// `last_indexed_at` — and `false` when the recorded index is already current
90+
/// and the lock (plus the full re-hash) can be skipped. Reads only; needs the
91+
/// hub exactly like the existing refresh path, and propagates a hub error the
92+
/// same way (`--no-freshness` still bypasses it upstream).
93+
fn project_needs_refresh(ctx: &Context) -> anyhow::Result<bool> {
94+
let mut conn = db::connect_readonly(&ctx.database_url)?;
95+
96+
let last_indexed_at: Option<SystemTime> = match conn.query_opt(
97+
"SELECT last_indexed_at FROM code_indexed_projects WHERE id = $1",
98+
&[&ctx.project_id],
99+
)? {
100+
Some(row) => row.try_get::<_, Option<SystemTime>>(0)?,
101+
None => None,
102+
};
103+
104+
// Never indexed (or no recorded timestamp): do not gate; let the existing
105+
// refresh path build the first index.
106+
let Some(last_indexed_at) = last_indexed_at else {
107+
return Ok(true);
108+
};
109+
110+
let indexed_paths = db::list_indexed_file_paths(&mut conn, &ctx.project_id)?;
111+
drop(conn);
112+
113+
Ok(api::project_changed_since(
114+
&ctx.project_root,
115+
last_indexed_at,
116+
&indexed_paths,
117+
))
118+
}
119+
75120
pub fn ensure_symbol_fresh(ctx: &Context, id: &str) -> anyhow::Result<FreshnessStatus> {
76121
if std::env::var_os(INFLIGHT_ENV).is_some() {
77122
return Ok(FreshnessStatus::Checked);
@@ -198,6 +243,30 @@ mod tests {
198243
}
199244
}
200245

246+
fn postgres_context_with_root(project_id: &str, root: &Path) -> Option<Context> {
247+
let database_url = std::env::var("GCODE_POSTGRES_TEST_DATABASE_URL").ok()?;
248+
match db::connect_readwrite(&database_url) {
249+
Ok(_) => Some(Context {
250+
database_url,
251+
project_root: root.to_path_buf(),
252+
project_id: project_id.to_string(),
253+
quiet: true,
254+
falkordb: None,
255+
qdrant: None,
256+
embedding: None,
257+
code_vectors: crate::config::CodeVectorSettings::default(),
258+
daemon_url: None,
259+
index_scope: crate::config::ProjectIndexScope::Single,
260+
}),
261+
Err(error) => {
262+
eprintln!(
263+
"skipping freshness pre-gate test: PostgreSQL hub is unavailable: {error}"
264+
);
265+
None
266+
}
267+
}
268+
}
269+
201270
fn hold_project_lock(ctx: &Context) -> Client {
202271
let mut conn =
203272
db::connect_readwrite(&ctx.database_url).expect("connect test PostgreSQL hub");
@@ -207,6 +276,37 @@ mod tests {
207276
conn
208277
}
209278

279+
fn set_mtime(path: &Path, time: SystemTime) {
280+
std::fs::File::options()
281+
.write(true)
282+
.open(path)
283+
.expect("open file to set mtime")
284+
.set_modified(time)
285+
.expect("set mtime");
286+
}
287+
288+
fn invalidate_test_project(ctx: &Context) {
289+
let mut conn =
290+
db::connect_readwrite(&ctx.database_url).expect("connect test PostgreSQL hub");
291+
crate::index::indexer::invalidate(&mut conn, &ctx.project_id, None)
292+
.expect("invalidate test project index");
293+
}
294+
295+
fn full_index(ctx: &Context) {
296+
api::index_files(
297+
api::IndexRequest {
298+
project_root: ctx.project_root.clone(),
299+
path_filter: None,
300+
explicit_files: Vec::new(),
301+
full: true,
302+
require_cpp_semantics: false,
303+
sync_projections: false,
304+
},
305+
ctx,
306+
)
307+
.expect("full index of test project");
308+
}
309+
210310
#[test]
211311
#[serial_test::serial]
212312
fn no_freshness_env_short_circuits_project_refresh() {
@@ -232,6 +332,51 @@ mod tests {
232332
assert_eq!(status, FreshnessStatus::SkippedBusy);
233333
}
234334

335+
#[test]
336+
#[serial_test::serial]
337+
fn pre_gate_skips_lock_when_unchanged_and_trips_after_a_change() {
338+
let tmp = tempfile::tempdir().expect("tempdir");
339+
let root = tmp.path();
340+
std::fs::create_dir_all(root.join("src")).expect("create src");
341+
let lib = root.join("src/lib.rs");
342+
std::fs::write(&lib, b"fn main() {}\n").expect("write lib.rs");
343+
std::fs::write(root.join("README.md"), b"# Title\n").expect("write README");
344+
345+
// Age the files well past the skew margin so a clean index leaves them
346+
// unambiguously older than last_indexed_at, regardless of host/hub skew.
347+
let aged = SystemTime::now() - std::time::Duration::from_secs(3600);
348+
set_mtime(&lib, aged);
349+
set_mtime(&root.join("README.md"), aged);
350+
351+
let Some(ctx) = postgres_context_with_root("gcode-freshness-pregate", root) else {
352+
return;
353+
};
354+
355+
// Start clean, then index so code_indexed_projects.last_indexed_at = NOW().
356+
invalidate_test_project(&ctx);
357+
full_index(&ctx);
358+
359+
// Nothing changed: the pre-gate must skip the advisory lock entirely,
360+
// even while another connection holds it, and report Checked. Without
361+
// the gate this would force SkippedBusy.
362+
let holder = hold_project_lock(&ctx);
363+
let status = ensure_fresh(&ctx, FreshnessScope::Project).expect("freshness status");
364+
assert_eq!(status, FreshnessStatus::Checked);
365+
366+
// Touch a tracked file with a future mtime so the gate trips regardless
367+
// of skew; with the lock still held it reports SkippedBusy, proving it
368+
// took the lock path rather than skipping.
369+
set_mtime(
370+
&lib,
371+
SystemTime::now() + std::time::Duration::from_secs(3600),
372+
);
373+
let status = ensure_fresh(&ctx, FreshnessScope::Project).expect("freshness status");
374+
assert_eq!(status, FreshnessStatus::SkippedBusy);
375+
drop(holder);
376+
377+
invalidate_test_project(&ctx);
378+
}
379+
235380
#[test]
236381
#[serial_test::serial]
237382
fn symbol_slice_check_uses_stored_byte_range_hash() {

crates/gcode/src/index/api.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use serde::{Deserialize, Serialize};
33

44
pub use crate::index::indexer::{
55
IndexDegradation, IndexDurations, IndexOutcome, IndexRequest, UnsupportedFileType, index_files,
6+
project_changed_since,
67
};
78

89
use crate::models::{

crates/gcode/src/index/indexer.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
//! delegated through projection sync status and handled outside this module.
66
77
mod file;
8+
mod freshness_probe;
89
mod lifecycle;
910
mod overlay;
1011
mod pipeline;
1112
mod sink;
1213
mod types;
1314
mod util;
1415

16+
pub use freshness_probe::project_changed_since;
1517
pub use lifecycle::invalidate;
1618
pub use pipeline::index_files;
1719
pub use types::{
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
//! Lock-free, hash-free freshness pre-gate.
2+
//!
3+
//! `project_changed_since` answers one question without taking the per-project
4+
//! advisory lock and without hashing any file: has anything under the project
5+
//! root changed since the recorded `last_indexed_at`? Read-time freshness calls
6+
//! this *before* the lock so the common no-change case is cheap and never prints
7+
//! "refresh already running". When it reports a change, the caller falls through
8+
//! to the existing lock + incremental reconcile, which is exactly as correct as
9+
//! before.
10+
11+
use std::path::Path;
12+
use std::time::{Duration, SystemTime};
13+
14+
use crate::index::walker;
15+
16+
use super::util::DEFAULT_EXCLUDES;
17+
18+
/// Clock-skew / mtime-granularity margin. Subtracted from `last_indexed_at`
19+
/// before comparing file mtimes, so the gate only ever errs toward refreshing
20+
/// and can never miss a real change. Absorbs host-vs-PostgreSQL (docker) clock
21+
/// skew and same-second mtime granularity; anything beyond it is reconciled by
22+
/// the periodic maintenance full re-hash sweep.
23+
const SKEW_MARGIN: Duration = Duration::from_secs(2);
24+
25+
/// Returns `true` if any discovered file is newer than `last_indexed_at` (a
26+
/// modify or add) or any previously indexed path no longer exists on disk (a
27+
/// delete or rename), and `false` only when the on-disk tree still matches the
28+
/// recorded index. A `false` result lets the caller skip the advisory lock and
29+
/// the full re-hash entirely.
30+
///
31+
/// Discovery mirrors the indexer (`walker::discover_files` with
32+
/// `DEFAULT_EXCLUDES`), so the `.gobby/plans/**/*.md` allowlist and every other
33+
/// exclusion stay in lockstep with what actually gets indexed — including the
34+
/// internal `.gobby/plans/*.md` edits the daemon trigger never forwards.
35+
/// Short-circuits on the first sign of change.
36+
pub fn project_changed_since(
37+
project_root: &Path,
38+
last_indexed_at: SystemTime,
39+
indexed_paths: &[String],
40+
) -> bool {
41+
let threshold = last_indexed_at
42+
.checked_sub(SKEW_MARGIN)
43+
.unwrap_or(last_indexed_at);
44+
45+
let excludes: Vec<String> = DEFAULT_EXCLUDES.iter().map(|s| s.to_string()).collect();
46+
let (candidates, content_only) = walker::discover_files(project_root, &excludes);
47+
48+
// Modify / add: a discovered file whose mtime is newer than the threshold.
49+
// A freshly added file also carries a recent mtime, so adds are caught here
50+
// without a fragile path-set diff. An unreadable mtime is treated as a
51+
// change, so we never skip a refresh for a file we cannot stat.
52+
for path in candidates.iter().chain(content_only.iter()) {
53+
match path.metadata().and_then(|meta| meta.modified()) {
54+
Ok(modified) if modified <= threshold => {}
55+
_ => return true,
56+
}
57+
}
58+
59+
// Delete / rename: a path recorded in the index that is gone from disk.
60+
indexed_paths
61+
.iter()
62+
.any(|rel| !project_root.join(rel).exists())
63+
}
64+
65+
#[cfg(test)]
66+
mod tests {
67+
use super::*;
68+
use std::fs::File;
69+
use std::path::PathBuf;
70+
71+
fn write_file(root: &Path, rel: &str, contents: &[u8]) -> PathBuf {
72+
let path = root.join(rel);
73+
if let Some(parent) = path.parent() {
74+
std::fs::create_dir_all(parent).expect("create parent");
75+
}
76+
std::fs::write(&path, contents).expect("write file");
77+
path
78+
}
79+
80+
fn set_mtime(path: &Path, time: SystemTime) {
81+
File::options()
82+
.write(true)
83+
.open(path)
84+
.expect("open file to set mtime")
85+
.set_modified(time)
86+
.expect("set mtime");
87+
}
88+
89+
/// A fixed, whole-second base instant well in the past, so the arithmetic
90+
/// never underflows and 1-second-granularity filesystems round-trip it.
91+
fn base_time() -> SystemTime {
92+
SystemTime::UNIX_EPOCH + Duration::from_secs(1_700_000_000)
93+
}
94+
95+
#[test]
96+
fn reports_no_change_when_everything_predates_last_index() {
97+
let tmp = tempfile::tempdir().expect("tempdir");
98+
let root = tmp.path();
99+
let lib = write_file(root, "src/lib.rs", b"fn main() {}\n");
100+
let readme = write_file(root, "README.md", b"# Title\n");
101+
102+
let base = base_time();
103+
set_mtime(&lib, base);
104+
set_mtime(&readme, base);
105+
106+
// last_indexed_at is well after every file's mtime.
107+
let last = base + Duration::from_secs(3600);
108+
let indexed = vec!["src/lib.rs".to_string(), "README.md".to_string()];
109+
110+
assert!(!project_changed_since(root, last, &indexed));
111+
}
112+
113+
#[test]
114+
fn reports_change_when_a_file_is_modified_after_last_index() {
115+
let tmp = tempfile::tempdir().expect("tempdir");
116+
let root = tmp.path();
117+
let lib = write_file(root, "src/lib.rs", b"fn main() {}\n");
118+
set_mtime(&lib, base_time() + Duration::from_secs(7200));
119+
120+
let last = base_time() + Duration::from_secs(3600);
121+
let indexed = vec!["src/lib.rs".to_string()];
122+
123+
assert!(project_changed_since(root, last, &indexed));
124+
}
125+
126+
#[test]
127+
fn reports_change_for_newly_added_file() {
128+
// A new (unindexed) file carries a recent mtime, so the modify/add scan
129+
// trips even though it is absent from indexed_paths.
130+
let tmp = tempfile::tempdir().expect("tempdir");
131+
let root = tmp.path();
132+
let added = write_file(root, "src/new.rs", b"fn added() {}\n");
133+
set_mtime(&added, base_time() + Duration::from_secs(7200));
134+
135+
let last = base_time() + Duration::from_secs(3600);
136+
let indexed: Vec<String> = Vec::new();
137+
138+
assert!(project_changed_since(root, last, &indexed));
139+
}
140+
141+
#[test]
142+
fn reports_change_when_indexed_file_is_deleted() {
143+
let tmp = tempfile::tempdir().expect("tempdir");
144+
let root = tmp.path();
145+
let lib = write_file(root, "src/lib.rs", b"fn main() {}\n");
146+
set_mtime(&lib, base_time());
147+
148+
let last = base_time() + Duration::from_secs(3600);
149+
// "src/gone.rs" is recorded as indexed but no longer exists on disk.
150+
let indexed = vec!["src/lib.rs".to_string(), "src/gone.rs".to_string()];
151+
152+
assert!(project_changed_since(root, last, &indexed));
153+
}
154+
155+
#[test]
156+
fn skew_margin_boundary_only_ever_makes_the_gate_more_eager() {
157+
let tmp = tempfile::tempdir().expect("tempdir");
158+
let root = tmp.path();
159+
let lib = write_file(root, "src/lib.rs", b"fn main() {}\n");
160+
let mtime = base_time();
161+
set_mtime(&lib, mtime);
162+
let indexed = vec!["src/lib.rs".to_string()];
163+
164+
// File is 1s older than last_indexed_at — inside the 2s margin, so the
165+
// gate refreshes (threshold = last - 2s = mtime - 1s < mtime).
166+
let within_margin = mtime + Duration::from_secs(1);
167+
assert!(project_changed_since(root, within_margin, &indexed));
168+
169+
// File sits exactly at the boundary (threshold == mtime, mtime <=
170+
// threshold), so it counts as unchanged.
171+
let at_margin = mtime + SKEW_MARGIN;
172+
assert!(!project_changed_since(root, at_margin, &indexed));
173+
174+
// File is 3s older than last_indexed_at — beyond the 2s margin, so the
175+
// gate skips (threshold = last - 2s = mtime + 1s >= mtime).
176+
let beyond_margin = mtime + Duration::from_secs(3);
177+
assert!(!project_changed_since(root, beyond_margin, &indexed));
178+
}
179+
}

0 commit comments

Comments
 (0)