diff --git a/Cargo.lock b/Cargo.lock
index 757dfb80..f1ba9935 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1103,7 +1103,7 @@ dependencies = [
[[package]]
name = "gather-step"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"anyhow",
"blake3",
@@ -1145,7 +1145,7 @@ dependencies = [
[[package]]
name = "gather-step-analysis"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"gather-step-core",
"gather-step-parser",
@@ -1160,7 +1160,7 @@ dependencies = [
[[package]]
name = "gather-step-bench"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"anyhow",
"chrono",
@@ -1190,7 +1190,7 @@ dependencies = [
[[package]]
name = "gather-step-core"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"bitcode",
"blake3",
@@ -1204,7 +1204,7 @@ dependencies = [
[[package]]
name = "gather-step-deploy"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"gather-step-core",
"pretty_assertions",
@@ -1216,7 +1216,7 @@ dependencies = [
[[package]]
name = "gather-step-git"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"blake3",
"gather-step-core",
@@ -1233,7 +1233,7 @@ dependencies = [
[[package]]
name = "gather-step-mcp"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"blake3",
"criterion",
@@ -1264,7 +1264,7 @@ dependencies = [
[[package]]
name = "gather-step-output"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"chrono",
"gather-step-analysis",
@@ -1276,7 +1276,7 @@ dependencies = [
[[package]]
name = "gather-step-parser"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"aho-corasick",
"blake3",
@@ -1318,7 +1318,7 @@ dependencies = [
[[package]]
name = "gather-step-storage"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"bitcode",
"blake3",
diff --git a/Cargo.toml b/Cargo.toml
index 2b5e0f21..a90acb56 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,7 @@ members = [
]
[workspace.package]
-version = "4.2.1"
+version = "4.3.1"
authors = ["JJ Adonis"]
edition = "2024"
rust-version = "1.96"
@@ -24,15 +24,15 @@ homepage = "https://github.com/thedoublejay/gather-step"
description = "High-performance multi-repo codebase intelligence engine"
[workspace.dependencies]
-gather-step = { path = "crates/gather-step-cli", version = "4.2.1" }
-gather-step-analysis = { path = "crates/gather-step-analysis", version = "4.2.1" }
-gather-step-core = { path = "crates/gather-step-core", version = "4.2.1" }
-gather-step-deploy = { path = "crates/gather-step-deploy", version = "4.2.1" }
-gather-step-git = { path = "crates/gather-step-git", version = "4.2.1" }
-gather-step-mcp = { path = "crates/gather-step-mcp", version = "4.2.1" }
-gather-step-output = { path = "crates/gather-step-output", version = "4.2.1" }
-gather-step-parser = { path = "crates/gather-step-parser", version = "4.2.1" }
-gather-step-storage = { path = "crates/gather-step-storage", version = "4.2.1" }
+gather-step = { path = "crates/gather-step-cli", version = "4.3.1" }
+gather-step-analysis = { path = "crates/gather-step-analysis", version = "4.3.1" }
+gather-step-core = { path = "crates/gather-step-core", version = "4.3.1" }
+gather-step-deploy = { path = "crates/gather-step-deploy", version = "4.3.1" }
+gather-step-git = { path = "crates/gather-step-git", version = "4.3.1" }
+gather-step-mcp = { path = "crates/gather-step-mcp", version = "4.3.1" }
+gather-step-output = { path = "crates/gather-step-output", version = "4.3.1" }
+gather-step-parser = { path = "crates/gather-step-parser", version = "4.3.1" }
+gather-step-storage = { path = "crates/gather-step-storage", version = "4.3.1" }
tree-sitter = "=0.26.9"
tree-sitter-typescript = "0.23.2"
diff --git a/crates/gather-step-analysis/src/anchor.rs b/crates/gather-step-analysis/src/anchor.rs
index c68cb0e3..8c42c99b 100644
--- a/crates/gather-step-analysis/src/anchor.rs
+++ b/crates/gather-step-analysis/src/anchor.rs
@@ -148,7 +148,7 @@ fn score_candidate(
if boundary_bonus > 0.0 {
let downstream = outgoing
.iter()
- .filter(|edge| !matches!(edge.kind, EdgeKind::Defines | EdgeKind::Imports))
+ .filter(|edge| edge.kind.is_consumer_edge())
.count();
rationale.push(AnchorRationale::ControllerService {
downstream_nodes: downstream,
diff --git a/crates/gather-step-analysis/src/cycles.rs b/crates/gather-step-analysis/src/cycles.rs
new file mode 100644
index 00000000..9a6f89d4
--- /dev/null
+++ b/crates/gather-step-analysis/src/cycles.rs
@@ -0,0 +1,286 @@
+use gather_step_core::{EdgeKind, NodeKind};
+use gather_step_storage::{GraphStore, GraphStoreError};
+use rustc_hash::FxHashMap;
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum CycleError {
+ #[error(transparent)]
+ Store(#[from] GraphStoreError),
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Cycle {
+ pub nodes: Vec,
+ pub repos: Vec,
+ pub cross_repo: bool,
+}
+
+pub fn find_cycles(
+ store: &S,
+ edge_kinds: Option<&[EdgeKind]>,
+) -> Result, CycleError> {
+ let mut index_of: FxHashMap<[u8; 16], usize> = FxHashMap::default();
+ let mut labels: Vec = Vec::new();
+ let mut repos: Vec = Vec::new();
+ let mut node_ids = Vec::new();
+ for kind in NodeKind::all() {
+ for node in store.nodes_by_type(*kind)? {
+ let key = node.id.as_bytes();
+ if index_of.contains_key(&key) {
+ continue;
+ }
+ index_of.insert(key, labels.len());
+ labels.push(
+ node.qualified_name
+ .clone()
+ .unwrap_or_else(|| format!("{}::{}", node.repo, node.name)),
+ );
+ repos.push(node.repo.clone());
+ node_ids.push(node.id);
+ }
+ }
+
+ let count = node_ids.len();
+ let mut adjacency: Vec> = vec![Vec::new(); count];
+ let mut self_loop = vec![false; count];
+ for (source_index, node_id) in node_ids.iter().enumerate() {
+ for edge in store.get_outgoing(*node_id)? {
+ if let Some(kinds) = edge_kinds
+ && !kinds.contains(&edge.kind)
+ {
+ continue;
+ }
+ let Some(&target_index) = index_of.get(&edge.target.as_bytes()) else {
+ continue;
+ };
+ if target_index == source_index {
+ self_loop[source_index] = true;
+ }
+ adjacency[source_index].push(target_index);
+ }
+ }
+
+ let sccs = tarjan(&adjacency);
+
+ let mut cycles = Vec::new();
+ for scc in sccs {
+ let is_cycle = scc.len() > 1 || (scc.len() == 1 && self_loop[scc[0]]);
+ if !is_cycle {
+ continue;
+ }
+ let mut nodes: Vec = scc.iter().map(|&i| labels[i].clone()).collect();
+ nodes.sort();
+ let mut cycle_repos: Vec = scc.iter().map(|&i| repos[i].clone()).collect();
+ cycle_repos.sort();
+ cycle_repos.dedup();
+ let cross_repo = cycle_repos.len() > 1;
+ cycles.push(Cycle {
+ nodes,
+ repos: cycle_repos,
+ cross_repo,
+ });
+ }
+ cycles.sort_by(|left, right| left.nodes.cmp(&right.nodes));
+ Ok(cycles)
+}
+
+fn tarjan(adjacency: &[Vec]) -> Vec> {
+ let count = adjacency.len();
+ let mut indices = vec![usize::MAX; count];
+ let mut lowlink = vec![0_usize; count];
+ let mut on_stack = vec![false; count];
+ let mut stack: Vec = Vec::new();
+ let mut sccs: Vec> = Vec::new();
+ let mut next_index = 0_usize;
+
+ for root in 0..count {
+ if indices[root] != usize::MAX {
+ continue;
+ }
+ let mut call_stack: Vec<(usize, usize)> = vec![(root, 0)];
+ while let Some(&(node, child)) = call_stack.last() {
+ if child == 0 {
+ indices[node] = next_index;
+ lowlink[node] = next_index;
+ next_index += 1;
+ stack.push(node);
+ on_stack[node] = true;
+ }
+
+ if child < adjacency[node].len() {
+ let top = call_stack.len() - 1;
+ call_stack[top].1 += 1;
+ let target = adjacency[node][child];
+ if indices[target] == usize::MAX {
+ call_stack.push((target, 0));
+ } else if on_stack[target] {
+ lowlink[node] = lowlink[node].min(indices[target]);
+ }
+ } else {
+ if lowlink[node] == indices[node] {
+ let mut scc = Vec::new();
+ while let Some(member) = stack.pop() {
+ on_stack[member] = false;
+ scc.push(member);
+ if member == node {
+ break;
+ }
+ }
+ sccs.push(scc);
+ }
+ call_stack.pop();
+ if let Some(&(parent, _)) = call_stack.last() {
+ lowlink[parent] = lowlink[parent].min(lowlink[node]);
+ }
+ }
+ }
+ }
+ sccs
+}
+
+#[cfg(test)]
+mod tests {
+ use std::{
+ env, fs,
+ path::{Path, PathBuf},
+ process,
+ sync::atomic::{AtomicU64, Ordering},
+ };
+
+ use gather_step_core::{EdgeData, EdgeKind, EdgeMetadata, NodeData, NodeId, NodeKind, node_id};
+ use gather_step_storage::{GraphStore, GraphStoreDb};
+
+ use super::find_cycles;
+
+ static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
+
+ struct TempDb {
+ path: PathBuf,
+ }
+
+ impl TempDb {
+ fn new(name: &str) -> Self {
+ let id = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
+ let path = env::temp_dir().join(format!(
+ "gather-step-cycles-{name}-{}-{id}.redb",
+ process::id()
+ ));
+ Self { path }
+ }
+
+ fn path(&self) -> &Path {
+ &self.path
+ }
+ }
+
+ impl Drop for TempDb {
+ fn drop(&mut self) {
+ let _ = fs::remove_file(&self.path);
+ }
+ }
+
+ fn func(repo: &str, name: &str) -> NodeData {
+ NodeData {
+ id: node_id(repo, "src/a.ts", NodeKind::Function, name),
+ kind: NodeKind::Function,
+ repo: repo.to_owned(),
+ file_path: "src/a.ts".to_owned(),
+ name: name.to_owned(),
+ qualified_name: Some(format!("{repo}::{name}")),
+ external_id: None,
+ signature: None,
+ visibility: None,
+ span: None,
+ is_virtual: false,
+ }
+ }
+
+ fn calls(source: NodeId, target: NodeId, owner: NodeId) -> EdgeData {
+ EdgeData {
+ source,
+ target,
+ kind: EdgeKind::Calls,
+ metadata: EdgeMetadata::default(),
+ owner_file: owner,
+ is_cross_file: false,
+ }
+ }
+
+ #[test]
+ fn detects_a_simple_cycle_and_ignores_acyclic_chains() {
+ let temp = TempDb::new("simple");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let file = NodeData {
+ kind: NodeKind::File,
+ ..func("web", "src/a.ts")
+ };
+ let a = func("web", "a");
+ let b = func("web", "b");
+ let c = func("web", "c");
+ let d = func("web", "d");
+ store
+ .bulk_insert(
+ &[file.clone(), a.clone(), b.clone(), c.clone(), d.clone()],
+ &[
+ calls(a.id, b.id, file.id),
+ calls(b.id, c.id, file.id),
+ calls(c.id, a.id, file.id),
+ calls(c.id, d.id, file.id),
+ ],
+ )
+ .expect("write");
+
+ let cycles = find_cycles(&store, Some(&[EdgeKind::Calls])).expect("cycles");
+ assert_eq!(cycles.len(), 1, "expected one cycle, got {cycles:?}");
+ assert_eq!(cycles[0].nodes, vec!["web::a", "web::b", "web::c"]);
+ assert!(!cycles[0].cross_repo);
+ }
+
+ #[test]
+ fn acyclic_graph_has_no_cycles() {
+ let temp = TempDb::new("acyclic");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let file = NodeData {
+ kind: NodeKind::File,
+ ..func("web", "src/a.ts")
+ };
+ let a = func("web", "a");
+ let b = func("web", "b");
+ store
+ .bulk_insert(
+ &[file.clone(), a.clone(), b.clone()],
+ &[calls(a.id, b.id, file.id)],
+ )
+ .expect("write");
+
+ assert!(
+ find_cycles(&store, Some(&[EdgeKind::Calls]))
+ .expect("cycles")
+ .is_empty()
+ );
+ }
+
+ #[test]
+ fn flags_cross_repo_cycle() {
+ let temp = TempDb::new("cross-repo");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let file = NodeData {
+ kind: NodeKind::File,
+ ..func("web", "src/a.ts")
+ };
+ let a = func("web", "a");
+ let b = func("api", "b");
+ store
+ .bulk_insert(
+ &[file.clone(), a.clone(), b.clone()],
+ &[calls(a.id, b.id, file.id), calls(b.id, a.id, file.id)],
+ )
+ .expect("write");
+
+ let cycles = find_cycles(&store, Some(&[EdgeKind::Calls])).expect("cycles");
+ assert_eq!(cycles.len(), 1);
+ assert!(cycles[0].cross_repo);
+ assert_eq!(cycles[0].repos, vec!["api", "web"]);
+ }
+}
diff --git a/crates/gather-step-analysis/src/lib.rs b/crates/gather-step-analysis/src/lib.rs
index 3ac29fdb..07517af6 100644
--- a/crates/gather-step-analysis/src/lib.rs
+++ b/crates/gather-step-analysis/src/lib.rs
@@ -9,17 +9,21 @@ pub mod contract_drift;
pub mod conventions;
pub mod cross_repo;
pub mod crud_trace;
+pub mod cycles;
pub mod dead_code;
pub mod deployment_topology;
pub mod event_topology;
pub mod evidence;
pub mod impact;
+pub mod mock_leakage;
+pub mod mongo_query_safety;
pub mod overview;
pub mod pack_assembly;
pub mod projection_impact;
pub mod proofs;
pub mod query;
pub mod semantic_health;
+pub mod shared_component_usage;
pub mod shared_contract;
pub mod transport;
@@ -36,6 +40,7 @@ pub use cross_repo::{
pub use crud_trace::{
CrudTrace, CrudTraceEntry, CrudTraceError, CrudTraceRole, trace_crud_route, trace_crud_symbol,
};
+pub use cycles::{Cycle, CycleError, find_cycles};
pub use dead_code::{
ConfidenceBand, DeadCodeError, DeadCodeFinding, DeadCodeReport, DetectorBasis, find_dead_code,
find_dead_code_with_manifest,
@@ -54,6 +59,11 @@ pub use event_topology::{
pub use impact::{
BoundaryRole, EvidenceBand, ImpactError, ImpactMap, ImpactedFile, shared_contract_impact,
};
+pub use mock_leakage::{MockLeakage, MockLeakageError, find_mock_leakage, is_mock_path};
+pub use mongo_query_safety::{
+ MongoQueryFinding, RULE_ATLAS_INDEX_DRIFT, RULE_INDEX_DEFEAT, RULE_NULL_PARENT_PATH,
+ RULE_UNSAFE_COERCION, analyze_atlas_index_drift, analyze_mongo_value,
+};
pub use overview::{ModuleSummary, OverviewError, RepoOverview, build_overview};
pub use pack_assembly::{
CandidateKey, Pack, PackAssembler, PackItem, PackMode, QueryShape, SimplePackAssembler,
@@ -68,11 +78,14 @@ pub use proofs::{
MAX_PROOFS_PER_REPO, ProofCaller, ProofEngineError, ProofEngineOptions, ProofEngineOutput,
build_pack_proofs, derive_repo_sets, finalize_proofs, proof_strength,
};
-pub use query::{GraphQuery, QueryError, TraversalStep};
+pub use query::{GraphQuery, QueryError, TraversalOutcome, TraversalStep};
pub use semantic_health::{
SemanticHealthError, SemanticHealthReport, SemanticLinkHealth, semantic_health_for_repo,
semantic_health_for_workspace,
};
+pub use shared_component_usage::{
+ ReuseOpportunity, SharedComponentError, analyze_shared_component_reuse, is_design_system_path,
+};
pub use shared_contract::{
guard_class_name_for_anchor, looks_like_guard_entrypoint, peer_matches_guard_class_name,
shared_contract_candidate_ids,
diff --git a/crates/gather-step-analysis/src/mock_leakage.rs b/crates/gather-step-analysis/src/mock_leakage.rs
new file mode 100644
index 00000000..d2d41bdc
--- /dev/null
+++ b/crates/gather-step-analysis/src/mock_leakage.rs
@@ -0,0 +1,207 @@
+use gather_step_core::EdgeKind;
+use gather_step_storage::{GraphStore, GraphStoreError};
+use rustc_hash::FxHashMap;
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum MockLeakageError {
+ #[error(transparent)]
+ Store(#[from] GraphStoreError),
+}
+
+const MOCK_MARKERS: &[&str] = &[
+ "__mocks__",
+ ".mock.",
+ "/mocks/",
+ ".fixture.",
+ "/fixtures/",
+ ".stub.",
+];
+
+const TEST_MARKERS: &[&str] = &[".test.", ".spec.", "/__tests__/", "/tests/", "/test/"];
+
+#[must_use]
+pub fn is_mock_path(file_path: &str) -> bool {
+ MOCK_MARKERS.iter().any(|marker| file_path.contains(marker))
+}
+
+#[must_use]
+pub fn is_test_path(file_path: &str) -> bool {
+ TEST_MARKERS.iter().any(|marker| file_path.contains(marker))
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct MockLeakage {
+ pub repo: String,
+ pub importer_file: String,
+ pub mock_file: String,
+}
+
+pub fn find_mock_leakage(
+ store: &S,
+ repo: &str,
+) -> Result, MockLeakageError> {
+ let nodes = store.nodes_by_repo(repo)?;
+ let file_of: FxHashMap<[u8; 16], String> = nodes
+ .iter()
+ .map(|node| (node.id.as_bytes(), node.file_path.clone()))
+ .collect();
+
+ let mut leaks = Vec::new();
+ for node in &nodes {
+ if is_test_path(&node.file_path) || is_mock_path(&node.file_path) {
+ continue;
+ }
+ for edge in store.get_outgoing(node.id)? {
+ if edge.kind != EdgeKind::Imports {
+ continue;
+ }
+ let Some(target_file) = file_of.get(&edge.target.as_bytes()) else {
+ continue;
+ };
+ if is_mock_path(target_file) {
+ leaks.push(MockLeakage {
+ repo: repo.to_owned(),
+ importer_file: node.file_path.clone(),
+ mock_file: target_file.clone(),
+ });
+ }
+ }
+ }
+
+ leaks.sort_by(|left, right| {
+ left.importer_file
+ .cmp(&right.importer_file)
+ .then(left.mock_file.cmp(&right.mock_file))
+ });
+ leaks.dedup();
+ Ok(leaks)
+}
+
+#[cfg(test)]
+mod tests {
+ use std::{
+ env, fs,
+ path::{Path, PathBuf},
+ process,
+ sync::atomic::{AtomicU64, Ordering},
+ };
+
+ use gather_step_core::{EdgeData, EdgeKind, EdgeMetadata, NodeData, NodeId, NodeKind, node_id};
+ use gather_step_storage::{GraphStore, GraphStoreDb};
+
+ use super::find_mock_leakage;
+
+ static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
+
+ struct TempDb {
+ path: PathBuf,
+ }
+
+ impl TempDb {
+ fn new(name: &str) -> Self {
+ let id = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
+ let path = env::temp_dir().join(format!(
+ "gather-step-mock-leakage-{name}-{}-{id}.redb",
+ process::id()
+ ));
+ Self { path }
+ }
+
+ fn path(&self) -> &Path {
+ &self.path
+ }
+ }
+
+ impl Drop for TempDb {
+ fn drop(&mut self) {
+ let _ = fs::remove_file(&self.path);
+ }
+ }
+
+ fn module(repo: &str, file_path: &str) -> NodeData {
+ NodeData {
+ id: node_id(repo, file_path, NodeKind::File, file_path),
+ kind: NodeKind::File,
+ repo: repo.to_owned(),
+ file_path: file_path.to_owned(),
+ name: file_path.to_owned(),
+ qualified_name: Some(file_path.to_owned()),
+ external_id: None,
+ signature: None,
+ visibility: None,
+ span: None,
+ is_virtual: false,
+ }
+ }
+
+ fn imports(source: NodeId, target: NodeId, owner: NodeId) -> EdgeData {
+ EdgeData {
+ source,
+ target,
+ kind: EdgeKind::Imports,
+ metadata: EdgeMetadata::default(),
+ owner_file: owner,
+ is_cross_file: true,
+ }
+ }
+
+ #[test]
+ fn flags_prod_module_importing_a_mock() {
+ let temp = TempDb::new("leak");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let prod = module("web", "src/features/OrderList.tsx");
+ let mock = module("web", "src/features/__mocks__/orders.mock.ts");
+ store
+ .bulk_insert(
+ &[prod.clone(), mock.clone()],
+ &[imports(prod.id, mock.id, prod.id)],
+ )
+ .expect("write");
+
+ let leaks = find_mock_leakage(&store, "web").expect("analyze");
+ assert_eq!(leaks.len(), 1);
+ assert_eq!(leaks[0].importer_file, "src/features/OrderList.tsx");
+ assert_eq!(leaks[0].mock_file, "src/features/__mocks__/orders.mock.ts");
+ }
+
+ #[test]
+ fn test_file_importing_a_mock_is_not_flagged() {
+ let temp = TempDb::new("test-import");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let spec = module("web", "src/features/OrderList.test.tsx");
+ let mock = module("web", "src/features/__mocks__/orders.mock.ts");
+ store
+ .bulk_insert(
+ &[spec.clone(), mock.clone()],
+ &[imports(spec.id, mock.id, spec.id)],
+ )
+ .expect("write");
+
+ assert!(
+ find_mock_leakage(&store, "web")
+ .expect("analyze")
+ .is_empty()
+ );
+ }
+
+ #[test]
+ fn prod_importing_prod_is_not_flagged() {
+ let temp = TempDb::new("clean");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let prod = module("web", "src/features/OrderList.tsx");
+ let helper = module("web", "src/features/format.ts");
+ store
+ .bulk_insert(
+ &[prod.clone(), helper.clone()],
+ &[imports(prod.id, helper.id, prod.id)],
+ )
+ .expect("write");
+
+ assert!(
+ find_mock_leakage(&store, "web")
+ .expect("analyze")
+ .is_empty()
+ );
+ }
+}
diff --git a/crates/gather-step-analysis/src/mongo_query_safety.rs b/crates/gather-step-analysis/src/mongo_query_safety.rs
new file mode 100644
index 00000000..3bd741d9
--- /dev/null
+++ b/crates/gather-step-analysis/src/mongo_query_safety.rs
@@ -0,0 +1,297 @@
+//! Structural safety detectors for Mongo queries, aggregations, and Atlas
+//! search-index definitions. Pure over a [`serde_json::Value`] so they are
+//! testable without a live parser.
+
+use serde_json::Value;
+
+pub const RULE_INDEX_DEFEAT: &str = "GS-MONGO-INDEX-DEFEAT";
+pub const RULE_UNSAFE_COERCION: &str = "GS-MONGO-UNSAFE-COERCION";
+pub const RULE_NULL_PARENT_PATH: &str = "GS-MONGO-NULL-PARENT-PATH";
+pub const RULE_ATLAS_INDEX_DRIFT: &str = "GS-MONGO-ATLAS-INDEX-DRIFT";
+
+#[derive(Clone, Debug, PartialEq)]
+pub struct MongoQueryFinding {
+ pub rule_id: &'static str,
+ pub confidence: f64,
+ pub message: String,
+ pub path: String,
+}
+
+#[must_use]
+pub fn analyze_mongo_value(value: &Value) -> Vec {
+ let mut findings = Vec::new();
+ walk(value, "$", &mut findings);
+ findings.sort_by(|left, right| {
+ left.rule_id
+ .cmp(right.rule_id)
+ .then_with(|| left.path.cmp(&right.path))
+ });
+ findings
+}
+
+#[must_use]
+pub fn analyze_atlas_index_drift(
+ index_def: &Value,
+ referenced_fields: &[&str],
+) -> Vec {
+ let mappings = index_def.get("mappings");
+ let dynamic = mappings
+ .and_then(|m| m.get("dynamic"))
+ .and_then(Value::as_bool)
+ .unwrap_or(true);
+ if dynamic {
+ return Vec::new();
+ }
+ let mapped: Vec<&str> = mappings
+ .and_then(|m| m.get("fields"))
+ .and_then(Value::as_object)
+ .map(|fields| fields.keys().map(String::as_str).collect())
+ .unwrap_or_default();
+
+ let mut findings: Vec = referenced_fields
+ .iter()
+ .filter(|field| !mapped.contains(*field))
+ .map(|field| MongoQueryFinding {
+ rule_id: RULE_ATLAS_INDEX_DRIFT,
+ confidence: 0.75,
+ message: format!(
+ "Field `{field}` is queried but absent from the `dynamic:false` Atlas index \
+ mapping; the search silently matches nothing on it. Add it to the mapping."
+ ),
+ path: (*field).to_owned(),
+ })
+ .collect();
+ findings.sort_by(|left, right| left.path.cmp(&right.path));
+ findings
+}
+
+fn walk(value: &Value, path: &str, findings: &mut Vec) {
+ match value {
+ Value::Object(map) => {
+ for (key, child) in map {
+ let child_path = format!("{path}.{key}");
+ match key.as_str() {
+ "$lookup" => detect_index_defeat(child, &child_path, findings),
+ "$toObjectId" => findings.push(MongoQueryFinding {
+ rule_id: RULE_UNSAFE_COERCION,
+ confidence: 0.8,
+ message: "Bare `$toObjectId` throws on a malformed id; use `$convert` \
+ with an `onError` fallback for untrusted input."
+ .to_owned(),
+ path: child_path.clone(),
+ }),
+ "$set" => detect_null_parent_path(child, &child_path, findings),
+ _ => {}
+ }
+ walk(child, &child_path, findings);
+ }
+ }
+ Value::Array(items) => {
+ for (index, item) in items.iter().enumerate() {
+ walk(item, &format!("{path}[{index}]"), findings);
+ }
+ }
+ _ => {}
+ }
+}
+
+fn detect_index_defeat(lookup: &Value, path: &str, findings: &mut Vec) {
+ if mentions_operator(lookup, "$toString") || mentions_operator(lookup, "$toObjectId") {
+ findings.push(MongoQueryFinding {
+ rule_id: RULE_INDEX_DEFEAT,
+ confidence: 0.7,
+ message: "`$lookup` coerces its join key (`$toString`/`$toObjectId`), defeating the \
+ index on the join field; align field types or pre-store the join key."
+ .to_owned(),
+ path: path.to_owned(),
+ });
+ }
+}
+
+fn detect_null_parent_path(set_doc: &Value, path: &str, findings: &mut Vec) {
+ let Value::Object(map) = set_doc else {
+ return;
+ };
+ for (field, expr) in map {
+ let Some((parent, _)) = field.rsplit_once('.') else {
+ continue;
+ };
+ if value_guards_parent(expr) || sibling_assigns_parent(map, parent) {
+ continue;
+ }
+ findings.push(MongoQueryFinding {
+ rule_id: RULE_NULL_PARENT_PATH,
+ confidence: 0.6,
+ message: format!(
+ "`$set` on dotted path `{field}` has no existence/`$type:object` guard on parent \
+ `{parent}`; a null or scalar parent will be clobbered or error."
+ ),
+ path: format!("{path}.{field}"),
+ });
+ }
+}
+
+fn value_guards_parent(expr: &Value) -> bool {
+ mentions_operator(expr, "$ifNull") || mentions_operator(expr, "$cond")
+}
+
+fn sibling_assigns_parent(map: &serde_json::Map, parent: &str) -> bool {
+ map.keys()
+ .any(|key| key == parent || parent.starts_with(&format!("{key}.")))
+}
+
+fn mentions_operator(value: &Value, op: &str) -> bool {
+ match value {
+ Value::Object(map) => map
+ .iter()
+ .any(|(key, child)| key == op || mentions_operator(child, op)),
+ Value::Array(items) => items.iter().any(|item| mentions_operator(item, op)),
+ _ => false,
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{
+ RULE_INDEX_DEFEAT, RULE_NULL_PARENT_PATH, RULE_UNSAFE_COERCION, analyze_mongo_value,
+ };
+ use serde_json::json;
+
+ fn rule_ids(value: &serde_json::Value) -> Vec<&'static str> {
+ analyze_mongo_value(value)
+ .into_iter()
+ .map(|finding| finding.rule_id)
+ .collect()
+ }
+
+ #[test]
+ fn flags_lookup_that_coerces_join_key_but_not_index_aligned_lookup() {
+ let coercing = json!([{
+ "$lookup": {
+ "from": "users",
+ "let": { "uid": "$userId" },
+ "pipeline": [
+ { "$match": { "$expr": { "$eq": [{ "$toString": "$_id" }, "$$uid"] } } }
+ ],
+ "as": "user"
+ }
+ }]);
+ assert!(rule_ids(&coercing).contains(&RULE_INDEX_DEFEAT));
+
+ let aligned = json!([{
+ "$lookup": {
+ "from": "users",
+ "localField": "userId",
+ "foreignField": "_id",
+ "as": "user"
+ }
+ }]);
+ assert!(!rule_ids(&aligned).contains(&RULE_INDEX_DEFEAT));
+ }
+
+ #[test]
+ fn flags_bare_to_object_id_but_not_convert_with_on_error() {
+ let bare = json!({ "$match": { "_id": { "$toObjectId": "$$req.id" } } });
+ assert!(rule_ids(&bare).contains(&RULE_UNSAFE_COERCION));
+
+ let safe = json!({
+ "$match": {
+ "_id": { "$convert": { "input": "$$req.id", "to": "objectId", "onError": null } }
+ }
+ });
+ assert!(!rule_ids(&safe).contains(&RULE_UNSAFE_COERCION));
+ }
+
+ #[test]
+ fn flags_unguarded_dotted_set_but_not_guarded_sibling() {
+ let unguarded = json!({ "$set": { "meta.flags.active": true } });
+ assert!(rule_ids(&unguarded).contains(&RULE_NULL_PARENT_PATH));
+
+ let guarded_value =
+ json!({ "$set": { "meta.flags.active": { "$ifNull": ["$meta.flags.active", true] } } });
+ assert!(!rule_ids(&guarded_value).contains(&RULE_NULL_PARENT_PATH));
+
+ let guarded_sibling =
+ json!({ "$set": { "meta": { "flags": {} }, "meta.flags.active": true } });
+ assert!(!rule_ids(&guarded_sibling).contains(&RULE_NULL_PARENT_PATH));
+ }
+
+ #[test]
+ fn flags_unmapped_field_on_dynamic_false_index_but_not_mapped_field() {
+ use super::{RULE_ATLAS_INDEX_DRIFT, analyze_atlas_index_drift};
+
+ let index = json!({
+ "mappings": { "dynamic": false, "fields": { "title": {}, "body": {} } }
+ });
+ let findings = analyze_atlas_index_drift(&index, &["title", "entity"]);
+ let ids: Vec<_> = findings.iter().map(|f| f.rule_id).collect();
+ assert!(ids.contains(&RULE_ATLAS_INDEX_DRIFT));
+ assert!(findings.iter().any(|f| f.path == "entity"));
+ assert!(!findings.iter().any(|f| f.path == "title"));
+
+ let dynamic_index = json!({ "mappings": { "dynamic": true } });
+ assert!(analyze_atlas_index_drift(&dynamic_index, &["anything"]).is_empty());
+ }
+
+ #[test]
+ fn trap_detectors_cover_each_pattern_and_clean_siblings() {
+ use super::{
+ RULE_ATLAS_INDEX_DRIFT, RULE_INDEX_DEFEAT, RULE_NULL_PARENT_PATH, RULE_UNSAFE_COERCION,
+ analyze_atlas_index_drift,
+ };
+
+ let trap_a = json!([{ "$lookup": {
+ "from": "u", "let": { "k": "$userId" },
+ "pipeline": [{ "$match": { "$expr": { "$eq": [{ "$toString": "$_id" }, "$$k"] } } }],
+ "as": "u"
+ }}]);
+ let clean_a = json!([{ "$lookup": {
+ "from": "u", "localField": "userId", "foreignField": "_id", "as": "u"
+ }}]);
+ assert!(rule_ids(&trap_a).contains(&RULE_INDEX_DEFEAT));
+ assert!(!rule_ids(&clean_a).contains(&RULE_INDEX_DEFEAT));
+
+ let trap_b = json!({ "_id": { "$toObjectId": "$$req.id" } });
+ let clean_b = json!({ "_id": { "$convert": { "input": "$$req.id", "to": "objectId", "onError": null } } });
+ assert!(rule_ids(&trap_b).contains(&RULE_UNSAFE_COERCION));
+ assert!(!rule_ids(&clean_b).contains(&RULE_UNSAFE_COERCION));
+
+ let trap_c = json!({ "$set": { "meta.flags.active": true } });
+ let clean_c = json!({ "$set": { "meta": { "flags": {} }, "meta.flags.active": true } });
+ assert!(rule_ids(&trap_c).contains(&RULE_NULL_PARENT_PATH));
+ assert!(!rule_ids(&clean_c).contains(&RULE_NULL_PARENT_PATH));
+
+ let index = json!({ "mappings": { "dynamic": false, "fields": { "title": {} } } });
+ let trap_d = analyze_atlas_index_drift(&index, &["title", "entity"]);
+ assert!(
+ trap_d
+ .iter()
+ .any(|f| f.rule_id == RULE_ATLAS_INDEX_DRIFT && f.path == "entity")
+ );
+ assert!(!trap_d.iter().any(|f| f.path == "title"));
+ }
+
+ #[test]
+ fn clean_pipeline_yields_no_findings() {
+ let clean = json!([
+ { "$match": { "status": "active" } },
+ { "$project": { "name": 1, "email": 1 } }
+ ]);
+ assert!(analyze_mongo_value(&clean).is_empty());
+ }
+
+ #[test]
+ fn findings_are_deterministically_sorted() {
+ let value = json!({
+ "$set": { "a.b": true },
+ "stage": { "$toObjectId": "$x" }
+ });
+ let first = analyze_mongo_value(&value);
+ let second = analyze_mongo_value(&value);
+ assert_eq!(first, second);
+ let ids: Vec<_> = first.iter().map(|f| f.rule_id).collect();
+ let mut sorted = ids.clone();
+ sorted.sort_unstable();
+ assert_eq!(ids, sorted);
+ }
+}
diff --git a/crates/gather-step-analysis/src/query.rs b/crates/gather-step-analysis/src/query.rs
index 697d6ebb..d259ee2e 100644
--- a/crates/gather-step-analysis/src/query.rs
+++ b/crates/gather-step-analysis/src/query.rs
@@ -2,9 +2,11 @@ use std::collections::{BTreeMap, VecDeque};
use gather_step_core::{EdgeData, EdgeKind, NodeData, NodeId, NodeKind};
use gather_step_storage::{GraphStore, GraphStoreError};
-use rustc_hash::FxHashSet;
+use rustc_hash::{FxHashMap, FxHashSet};
use thiserror::Error;
+const DEFAULT_FANOUT_CAP: usize = 256;
+
#[derive(Debug, Error)]
pub enum QueryError {
#[error(transparent)]
@@ -16,6 +18,14 @@ pub struct TraversalStep {
pub node_id: NodeId,
pub edge_kinds: Vec,
pub depth: usize,
+ pub in_paths: Vec>,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct TraversalOutcome {
+ pub steps: Vec,
+ pub depth_capped: bool,
+ pub truncated: bool,
}
pub struct GraphQuery<'a, S> {
@@ -40,11 +50,13 @@ impl<'a, S: GraphStore> GraphQuery<'a, S> {
&self,
source: NodeId,
edge_kind: Option,
+ min_confidence: Option,
) -> Result, QueryError> {
let mut edges = self.store.get_outgoing(source)?;
if let Some(edge_kind) = edge_kind {
edges.retain(|edge| edge.kind == edge_kind);
}
+ edges.retain(|edge| edge.metadata.passes_confidence(min_confidence));
Ok(edges)
}
@@ -52,11 +64,13 @@ impl<'a, S: GraphStore> GraphQuery<'a, S> {
&self,
target: NodeId,
edge_kind: Option,
+ min_confidence: Option,
) -> Result, QueryError> {
let mut edges = self.store.get_incoming(target)?;
if let Some(edge_kind) = edge_kind {
edges.retain(|edge| edge.kind == edge_kind);
}
+ edges.retain(|edge| edge.metadata.passes_confidence(min_confidence));
Ok(edges)
}
@@ -65,34 +79,88 @@ impl<'a, S: GraphStore> GraphQuery<'a, S> {
start: NodeId,
edge_kinds: &[EdgeKind],
max_depth: usize,
+ min_confidence: Option,
) -> Result, QueryError> {
+ Ok(self
+ .traverse_with_provenance(start, edge_kinds, max_depth, min_confidence)?
+ .steps)
+ }
+
+ pub fn traverse_with_provenance(
+ &self,
+ start: NodeId,
+ edge_kinds: &[EdgeKind],
+ max_depth: usize,
+ min_confidence: Option,
+ ) -> Result {
let mut queue = VecDeque::from([(start, Vec::::new(), 0_usize)]);
- let mut seen = FxHashSet::from_iter([start.as_bytes()]);
- let mut steps = Vec::new();
+ let mut enqueued = FxHashSet::from_iter([start.as_bytes()]);
+ let mut order: Vec = Vec::new();
+ let mut primary: FxHashMap<[u8; 16], (Vec, usize)> = FxHashMap::default();
+ let mut in_paths: FxHashMap<[u8; 16], Vec>> = FxHashMap::default();
+ let mut depth_capped = false;
+ let mut truncated = false;
while let Some((node_id, path, depth)) = queue.pop_front() {
+ let outgoing: Vec = self
+ .store
+ .get_outgoing(node_id)?
+ .into_iter()
+ .filter(|edge| edge_kinds.is_empty() || edge_kinds.contains(&edge.kind))
+ .filter(|edge| edge.metadata.passes_confidence(min_confidence))
+ .collect();
+
if depth >= max_depth {
+ if !outgoing.is_empty() {
+ depth_capped = true;
+ }
continue;
}
- for edge in self.store.get_outgoing(node_id)? {
- if !edge_kinds.is_empty() && !edge_kinds.contains(&edge.kind) {
- continue;
+ for (index, edge) in outgoing.iter().enumerate() {
+ if index >= DEFAULT_FANOUT_CAP {
+ truncated = true;
+ break;
}
let mut next_path = path.clone();
next_path.push(edge.kind);
- if seen.insert(edge.target.as_bytes()) {
- steps.push(TraversalStep {
- node_id: edge.target,
- edge_kinds: next_path.clone(),
- depth: depth + 1,
- });
+ let key = edge.target.as_bytes();
+
+ let recorded = in_paths.entry(key).or_default();
+ if recorded.len() < DEFAULT_FANOUT_CAP {
+ recorded.push(next_path.clone());
+ } else {
+ truncated = true;
+ }
+
+ if enqueued.insert(key) {
+ order.push(edge.target);
+ primary.insert(key, (next_path.clone(), depth + 1));
queue.push_back((edge.target, next_path, depth + 1));
}
}
}
- Ok(steps)
+ let steps = order
+ .into_iter()
+ .map(|node_id| {
+ let key = node_id.as_bytes();
+ let (edges, depth) = primary.get(&key).cloned().unwrap_or_default();
+ let paths = in_paths.get(&key).cloned().unwrap_or_default();
+ TraversalStep {
+ node_id,
+ edge_kinds: edges,
+ depth,
+ in_paths: paths,
+ }
+ })
+ .collect();
+
+ Ok(TraversalOutcome {
+ steps,
+ depth_capped,
+ truncated,
+ })
}
pub fn count_by_kind(&self) -> Result, QueryError> {
@@ -113,6 +181,45 @@ impl<'a, S: GraphStore> GraphQuery<'a, S> {
}
Ok(counts)
}
+
+ pub fn resolution_fingerprint(&self) -> Result, QueryError> {
+ let mut labels: FxHashMap<[u8; 16], String> = FxHashMap::default();
+ let mut node_ids: Vec = Vec::new();
+ for kind in NodeKind::all() {
+ for node in self.store.nodes_by_type(*kind)? {
+ let label = node
+ .qualified_name
+ .clone()
+ .unwrap_or_else(|| format!("{}::{}", node.repo, node.name));
+ labels.insert(node.id.as_bytes(), label);
+ node_ids.push(node.id);
+ }
+ }
+
+ let mut lines = Vec::new();
+ for node_id in node_ids {
+ for edge in self.store.get_outgoing(node_id)? {
+ let source = labels
+ .get(&edge.source.as_bytes())
+ .map_or("", String::as_str);
+ let target = labels
+ .get(&edge.target.as_bytes())
+ .map_or("", String::as_str);
+ let resolver = edge.metadata.resolver.as_deref().unwrap_or("none");
+ let confidence = edge
+ .metadata
+ .confidence
+ .map_or_else(|| "none".to_owned(), |value| value.to_string());
+ lines.push(format!(
+ "{source}\t{}\t{target}\t{resolver}\t{confidence}",
+ edge.kind
+ ));
+ }
+ }
+ lines.sort();
+ lines.dedup();
+ Ok(lines)
+ }
}
#[cfg(test)]
@@ -221,13 +328,13 @@ mod tests {
);
assert_eq!(
query
- .get_edges(a.id, Some(EdgeKind::Calls))
+ .get_edges(a.id, Some(EdgeKind::Calls), None)
.expect("edges should load")
.len(),
1
);
let traversed = query
- .traverse(a.id, &[EdgeKind::Calls], 2)
+ .traverse(a.id, &[EdgeKind::Calls], 2, None)
.expect("traversal should succeed");
assert_eq!(traversed.len(), 2);
assert!(
@@ -242,6 +349,172 @@ mod tests {
);
}
+ #[test]
+ fn min_confidence_filters_low_confidence_edges_but_keeps_structural() {
+ let temp_db = TempDb::new("query-confidence");
+ let store = test_store(temp_db.path());
+ let file = node("service-a", "src/a.ts", NodeKind::File, "src/a.ts", 0);
+ let a = node("service-a", "src/a.ts", NodeKind::Function, "a", 0);
+ let trusted = node("service-a", "src/a.ts", NodeKind::Function, "trusted", 1);
+ let guessed = node("service-a", "src/a.ts", NodeKind::Function, "guessed", 2);
+
+ // a -> trusted has no confidence (a definite structural edge);
+ // a -> guessed is a low-confidence heuristic resolution.
+ let trusted_edge = edge(a.id, trusted.id, file.id);
+ let mut guessed_edge = edge(a.id, guessed.id, file.id);
+ guessed_edge.metadata.confidence = Some(300);
+
+ store
+ .bulk_insert(
+ &[file.clone(), a.clone(), trusted.clone(), guessed.clone()],
+ &[trusted_edge, guessed_edge],
+ )
+ .expect("graph should write");
+
+ let query = GraphQuery::new(&store);
+
+ // No threshold: both edges traversed.
+ let all = query
+ .traverse(a.id, &[EdgeKind::Calls], 1, None)
+ .expect("traversal should succeed");
+ assert_eq!(all.len(), 2);
+
+ // Threshold above the heuristic edge: the low-confidence edge is
+ // dropped, but the structural (None) edge is kept.
+ let filtered = query
+ .traverse(a.id, &[EdgeKind::Calls], 1, Some(500))
+ .expect("traversal should succeed");
+ assert_eq!(filtered.len(), 1);
+ assert_eq!(filtered[0].node_id, trusted.id);
+ }
+
+ #[test]
+ fn traverse_with_provenance_flags_depth_capping() {
+ let temp_db = TempDb::new("query-depth-cap");
+ let store = test_store(temp_db.path());
+ let file = node("service-a", "src/a.ts", NodeKind::File, "src/a.ts", 0);
+ let a = node("service-a", "src/a.ts", NodeKind::Function, "a", 0);
+ let b = node("service-a", "src/a.ts", NodeKind::Function, "b", 1);
+ let c = node("service-a", "src/a.ts", NodeKind::Function, "c", 2);
+ let d = node("service-a", "src/a.ts", NodeKind::Function, "d", 3);
+ store
+ .bulk_insert(
+ &[file.clone(), a.clone(), b.clone(), c.clone(), d.clone()],
+ &[
+ edge(a.id, b.id, file.id),
+ edge(b.id, c.id, file.id),
+ edge(c.id, d.id, file.id),
+ ],
+ )
+ .expect("graph should write");
+
+ let query = GraphQuery::new(&store);
+
+ let capped = query
+ .traverse_with_provenance(a.id, &[EdgeKind::Calls], 2, None)
+ .expect("traversal should succeed");
+ assert_eq!(capped.steps.len(), 2);
+ assert!(
+ capped.depth_capped,
+ "deeper edge beyond max_depth not flagged"
+ );
+ assert!(!capped.truncated);
+
+ let full = query
+ .traverse_with_provenance(a.id, &[EdgeKind::Calls], 8, None)
+ .expect("traversal should succeed");
+ assert_eq!(full.steps.len(), 3);
+ assert!(!full.depth_capped);
+ }
+
+ #[test]
+ fn traverse_with_provenance_records_every_in_path() {
+ let temp_db = TempDb::new("query-provenance");
+ let store = test_store(temp_db.path());
+ let file = node("service-a", "src/a.ts", NodeKind::File, "src/a.ts", 0);
+ let root = node("service-a", "src/a.ts", NodeKind::Function, "root", 0);
+ let a = node("service-a", "src/a.ts", NodeKind::Function, "a", 1);
+ let b = node("service-a", "src/a.ts", NodeKind::Function, "b", 2);
+ let c = node("service-a", "src/a.ts", NodeKind::Function, "c", 3);
+ let sink = node("service-a", "src/a.ts", NodeKind::Function, "sink", 4);
+ store
+ .bulk_insert(
+ &[
+ file.clone(),
+ root.clone(),
+ a.clone(),
+ b.clone(),
+ c.clone(),
+ sink.clone(),
+ ],
+ &[
+ edge(root.id, a.id, file.id),
+ edge(root.id, b.id, file.id),
+ edge(root.id, c.id, file.id),
+ edge(a.id, sink.id, file.id),
+ edge(b.id, sink.id, file.id),
+ edge(c.id, sink.id, file.id),
+ ],
+ )
+ .expect("graph should write");
+
+ let query = GraphQuery::new(&store);
+ let outcome = query
+ .traverse_with_provenance(root.id, &[EdgeKind::Calls], 4, None)
+ .expect("traversal should succeed");
+
+ let sink_step = outcome
+ .steps
+ .iter()
+ .find(|step| step.node_id == sink.id)
+ .expect("sink should be reached");
+ assert_eq!(
+ sink_step.in_paths.len(),
+ 3,
+ "a node reachable three ways should report three caller paths"
+ );
+ }
+
+ #[test]
+ fn traverse_with_provenance_flags_fan_out_truncation() {
+ let temp_db = TempDb::new("query-fan-out");
+ let store = test_store(temp_db.path());
+ let file = node("service-a", "src/a.ts", NodeKind::File, "src/a.ts", 0);
+ let hub = node("service-a", "src/a.ts", NodeKind::Function, "hub", 0);
+
+ let fan_out = 300_u16;
+ let mut nodes = vec![file.clone(), hub.clone()];
+ let mut edges = Vec::new();
+ for ordinal in 1..=fan_out {
+ let leaf = node(
+ "service-a",
+ "src/a.ts",
+ NodeKind::Function,
+ &format!("leaf{ordinal}"),
+ ordinal,
+ );
+ edges.push(edge(hub.id, leaf.id, file.id));
+ nodes.push(leaf);
+ }
+ store
+ .bulk_insert(&nodes, &edges)
+ .expect("graph should write");
+
+ let query = GraphQuery::new(&store);
+ let outcome = query
+ .traverse_with_provenance(hub.id, &[EdgeKind::Calls], 2, None)
+ .expect("traversal should succeed");
+ assert!(
+ outcome.truncated,
+ "fan-out above the cap must flag truncated"
+ );
+ assert!(
+ outcome.steps.len() <= 256,
+ "truncated traversal must not exceed the fan-out cap: {}",
+ outcome.steps.len()
+ );
+ }
+
#[test]
fn counts_nodes_and_edges_by_kind() {
let temp_db = TempDb::new("counts");
diff --git a/crates/gather-step-analysis/src/shared_component_usage.rs b/crates/gather-step-analysis/src/shared_component_usage.rs
new file mode 100644
index 00000000..e69b5599
--- /dev/null
+++ b/crates/gather-step-analysis/src/shared_component_usage.rs
@@ -0,0 +1,192 @@
+use std::collections::BTreeMap;
+
+use gather_step_core::{NodeData, NodeKind};
+use gather_step_storage::{GraphStore, GraphStoreError};
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum SharedComponentError {
+ #[error(transparent)]
+ Store(#[from] GraphStoreError),
+}
+
+const DESIGN_SYSTEM_MARKERS: &[&str] = &[
+ "shared/",
+ "design-system",
+ "packages/",
+ "/ui/components",
+ "@shared",
+ "common/",
+ "/lib/",
+ "libs/",
+ "internal/",
+ "/pkg/",
+];
+
+#[must_use]
+pub fn is_design_system_path(file_path: &str) -> bool {
+ DESIGN_SYSTEM_MARKERS
+ .iter()
+ .any(|marker| file_path.contains(marker))
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct ReuseOpportunity {
+ pub repo: String,
+ pub local_symbol: String,
+ pub local_file: String,
+ pub shared_symbol: String,
+ pub shared_file: String,
+}
+
+fn is_component_like(node: &NodeData) -> bool {
+ matches!(node.kind, NodeKind::Function | NodeKind::Class)
+}
+
+pub fn analyze_shared_component_reuse(
+ store: &S,
+ repo: &str,
+) -> Result, SharedComponentError> {
+ let nodes = store.nodes_by_repo(repo)?;
+
+ let mut shared: BTreeMap = BTreeMap::new();
+ for node in &nodes {
+ if is_component_like(node) && is_design_system_path(&node.file_path) {
+ shared
+ .entry(node.name.clone())
+ .or_insert_with(|| node.file_path.clone());
+ }
+ }
+
+ let mut opportunities = Vec::new();
+ for node in &nodes {
+ if !is_component_like(node) || is_design_system_path(&node.file_path) {
+ continue;
+ }
+ if let Some(shared_file) = shared.get(&node.name) {
+ opportunities.push(ReuseOpportunity {
+ repo: repo.to_owned(),
+ local_symbol: node.name.clone(),
+ local_file: node.file_path.clone(),
+ shared_symbol: node.name.clone(),
+ shared_file: shared_file.clone(),
+ });
+ }
+ }
+
+ opportunities.sort_by(|left, right| {
+ left.local_file
+ .cmp(&right.local_file)
+ .then(left.local_symbol.cmp(&right.local_symbol))
+ });
+ opportunities.dedup();
+ Ok(opportunities)
+}
+
+#[cfg(test)]
+mod tests {
+ use std::{
+ env, fs,
+ path::{Path, PathBuf},
+ process,
+ sync::atomic::{AtomicU64, Ordering},
+ };
+
+ use gather_step_core::{NodeData, NodeKind, SourceSpan, Visibility, node_id};
+ use gather_step_storage::{GraphStore, GraphStoreDb};
+
+ use super::{analyze_shared_component_reuse, is_design_system_path};
+
+ static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
+
+ struct TempDb {
+ path: PathBuf,
+ }
+
+ impl TempDb {
+ fn new(name: &str) -> Self {
+ let id = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
+ let path = env::temp_dir().join(format!(
+ "gather-step-shared-component-{name}-{}-{id}.redb",
+ process::id()
+ ));
+ Self { path }
+ }
+
+ fn path(&self) -> &Path {
+ &self.path
+ }
+ }
+
+ impl Drop for TempDb {
+ fn drop(&mut self) {
+ let _ = fs::remove_file(&self.path);
+ }
+ }
+
+ fn node(repo: &str, file_path: &str, name: &str, ordinal: u32) -> NodeData {
+ NodeData {
+ id: node_id(repo, file_path, NodeKind::Function, name),
+ kind: NodeKind::Function,
+ repo: repo.to_owned(),
+ file_path: file_path.to_owned(),
+ name: name.to_owned(),
+ qualified_name: Some(format!("{repo}::{name}")),
+ external_id: None,
+ signature: None,
+ visibility: Some(Visibility::Public),
+ span: Some(SourceSpan {
+ line_start: ordinal,
+ line_len: 0,
+ column_start: 0,
+ column_len: 1,
+ }),
+ is_virtual: false,
+ }
+ }
+
+ #[test]
+ fn design_system_path_markers() {
+ assert!(is_design_system_path("packages/ui/components/Button.tsx"));
+ assert!(is_design_system_path("src/shared/Card.tsx"));
+ assert!(!is_design_system_path("src/features/orders/Button.tsx"));
+ }
+
+ #[test]
+ fn flags_local_fork_of_a_shared_component() {
+ let temp = TempDb::new("fork");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let shared = node("web", "packages/ui/components/Button.tsx", "Button", 0);
+ let fork = node("web", "src/features/orders/Button.tsx", "Button", 1);
+ let unique = node("web", "src/features/orders/OrderList.tsx", "OrderList", 2);
+ store
+ .bulk_insert(&[shared.clone(), fork.clone(), unique.clone()], &[])
+ .expect("write");
+
+ let opportunities = analyze_shared_component_reuse(&store, "web").expect("analyze");
+ assert_eq!(opportunities.len(), 1);
+ assert_eq!(
+ opportunities[0].local_file,
+ "src/features/orders/Button.tsx"
+ );
+ assert_eq!(
+ opportunities[0].shared_file,
+ "packages/ui/components/Button.tsx"
+ );
+ }
+
+ #[test]
+ fn shared_only_or_no_duplicate_yields_nothing() {
+ let temp = TempDb::new("clean");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let shared = node("web", "packages/ui/components/Modal.tsx", "Modal", 0);
+ let local = node("web", "src/features/orders/OrderRow.tsx", "OrderRow", 1);
+ store.bulk_insert(&[shared, local], &[]).expect("write");
+
+ assert!(
+ analyze_shared_component_reuse(&store, "web")
+ .expect("analyze")
+ .is_empty()
+ );
+ }
+}
diff --git a/crates/gather-step-cli/src/commands/doctor.rs b/crates/gather-step-cli/src/commands/doctor.rs
index 38c2e651..ea85eaa6 100644
--- a/crates/gather-step-cli/src/commands/doctor.rs
+++ b/crates/gather-step-cli/src/commands/doctor.rs
@@ -1,7 +1,11 @@
use anyhow::{Context, Result, bail};
use clap::Args;
use comfy_table::{Cell, ContentArrangement, Table, presets::UTF8_BORDERS_ONLY};
-use gather_step_analysis::{SemanticHealthReport, semantic_health_for_repo};
+use gather_step_analysis::{
+ SemanticHealthReport, analyze_shared_component_reuse, find_cycles, find_mock_leakage,
+ semantic_health_for_repo,
+};
+use gather_step_core::EdgeKind;
use gather_step_core::NodeKind;
use gather_step_core::RegistryStore;
use gather_step_parser::resolve::{ResolutionInput, is_non_actionable_unresolved_call};
@@ -24,6 +28,8 @@ struct DoctorOutput {
issue_count: usize,
pack_metrics: PackDoctorOutput,
repos: Vec,
+ #[serde(default, skip_serializing_if = "Vec::is_empty")]
+ quality_advisories: Vec,
}
#[derive(Debug, Serialize)]
@@ -90,12 +96,16 @@ pub(crate) fn execute(
let issue_count = repos.iter().map(|repo| repo.issues.len()).sum();
let pack_metrics = pack_metrics(storage.metadata()).context("computing pack diagnostics")?;
+ let repo_names: Vec = repos.iter().map(|repo| repo.repo.clone()).collect();
+ let quality_advisories = collect_quality_advisories(storage, &repo_names)
+ .context("collecting code-quality advisories")?;
let payload = DoctorOutput {
event: "doctor_completed",
ok: issue_count == 0,
issue_count,
pack_metrics,
repos,
+ quality_advisories,
};
let mut lines = Vec::new();
@@ -136,6 +146,15 @@ pub(crate) fn execute(
payload.pack_metrics.unresolved_packs
));
}
+ if !payload.quality_advisories.is_empty() {
+ lines.push(format!(
+ "Code-quality advisories ({}):",
+ payload.quality_advisories.len()
+ ));
+ for advisory in &payload.quality_advisories {
+ lines.push(format!(" - {advisory}"));
+ }
+ }
let payload_json = json!(payload);
Ok(if issue_count == 0 {
RenderedCommand::success(payload_json, lines)
@@ -148,6 +167,75 @@ pub(crate) fn execute(
})
}
+const MAX_ADVISORIES_PER_CATEGORY: usize = 50;
+
+fn collect_quality_advisories(
+ storage: &StorageCoordinator,
+ repos: &[String],
+) -> Result> {
+ let graph = storage.graph();
+ let mut advisories = Vec::new();
+
+ let cycles = find_cycles(graph, Some(&[EdgeKind::Imports, EdgeKind::Calls]))
+ .context("detecting dependency cycles")?;
+ push_capped(
+ &mut advisories,
+ cycles.iter().map(|cycle| {
+ let scope = if cycle.cross_repo {
+ " (cross-repo)"
+ } else {
+ ""
+ };
+ format!("Dependency cycle{scope}: {}", cycle.nodes.join(" -> "))
+ }),
+ "dependency cycle",
+ );
+
+ for repo in repos {
+ let leaks = find_mock_leakage(graph, repo)
+ .with_context(|| format!("detecting mock leakage in `{repo}`"))?;
+ push_capped(
+ &mut advisories,
+ leaks.iter().map(|leak| {
+ format!(
+ "Mock import in production (`{repo}`): {} imports {}",
+ leak.importer_file, leak.mock_file
+ )
+ }),
+ "mock import in production",
+ );
+
+ let forks = analyze_shared_component_reuse(graph, repo)
+ .with_context(|| format!("auditing shared-component reuse in `{repo}`"))?;
+ push_capped(
+ &mut advisories,
+ forks.iter().map(|fork| {
+ format!(
+ "Reuse opportunity (`{repo}`): {} duplicates shared `{}` ({})",
+ fork.local_file, fork.shared_symbol, fork.shared_file
+ )
+ }),
+ "reuse opportunity",
+ );
+ }
+
+ Ok(advisories)
+}
+
+fn push_capped(advisories: &mut Vec, items: impl Iterator- , label: &str) {
+ let collected: Vec = items.collect();
+ let total = collected.len();
+ for item in collected.into_iter().take(MAX_ADVISORIES_PER_CATEGORY) {
+ advisories.push(item);
+ }
+ if total > MAX_ADVISORIES_PER_CATEGORY {
+ advisories.push(format!(
+ "... and {} more {label} finding(s) not shown",
+ total - MAX_ADVISORIES_PER_CATEGORY
+ ));
+ }
+}
+
fn inspect_repo(
repo: &str,
registered: &gather_step_core::RegisteredRepo,
diff --git a/crates/gather-step-cli/src/commands/status.rs b/crates/gather-step-cli/src/commands/status.rs
index 38d9ffe6..f05f24f0 100644
--- a/crates/gather-step-cli/src/commands/status.rs
+++ b/crates/gather-step-cli/src/commands/status.rs
@@ -6,8 +6,9 @@ use gather_step_analysis::{
GraphQuery, SemanticHealthReport, semantic_health_for_repo, semantic_health_for_workspace,
};
use gather_step_core::{DepthLevel, RegistryStore};
+use gather_step_git::{GitHistoryIndexer, GitRepoSource, IndexFreshness};
use gather_step_mcp::output::redact::relativize_to_workspace;
-use gather_step_storage::{ContextPackStats, GraphStore, StorageCoordinator};
+use gather_step_storage::{ContextPackStats, GraphStore, MetadataStore, StorageCoordinator};
use serde::Serialize;
use serde_json::{Value, json};
@@ -37,6 +38,7 @@ struct RepoStatusOutput {
path: String,
path_exists: bool,
depth_level: String,
+ freshness: String,
last_indexed_at: Option,
registry_file_count: u64,
registry_symbol_count: u64,
@@ -190,12 +192,18 @@ pub(crate) fn execute(
unresolved_inputs,
)
.with_context(|| format!("computing semantic health for `{repo}`"))?;
+ let indexed_sha = storage
+ .metadata()
+ .get_last_commit_sha(repo)
+ .with_context(|| format!("loading indexed commit SHA for `{repo}`"))?;
+ let freshness = repo_freshness(repo, ®istered.path, indexed_sha.as_deref());
Ok(RepoStatusOutput {
repo: repo.clone(),
path: relativize_to_workspace(®istered.path, workspace_path),
path_exists: registered.path.exists(),
depth_level: depth_label(registered.depth_level).to_owned(),
+ freshness,
last_indexed_at: registered.last_indexed_at.clone(),
registry_file_count: registered.file_count,
registry_symbol_count: registered.symbol_count,
@@ -268,6 +276,7 @@ pub(crate) fn execute(
repo_table.set_header(vec![
"Repo",
"Depth",
+ "Freshness",
"Indexed",
"Files",
"Symbols",
@@ -280,6 +289,7 @@ pub(crate) fn execute(
repo_table.add_row(vec![
Cell::new(&repo.repo),
Cell::new(&repo.depth_level),
+ Cell::new(&repo.freshness),
Cell::new(repo.last_indexed_at.as_deref().unwrap_or("never")),
Cell::new(format!(
"{}/{}",
@@ -330,6 +340,22 @@ fn format_semantic_summary(health: &SemanticHealthReport) -> String {
)
}
+fn repo_freshness(repo: &str, path: &std::path::Path, indexed_sha: Option<&str>) -> String {
+ let indexer = GitHistoryIndexer::new(GitRepoSource::from_path(path), repo);
+ match indexer.index_freshness(indexed_sha) {
+ Ok(freshness) => freshness_label(&freshness).to_owned(),
+ Err(_) => "unknown".to_owned(),
+ }
+}
+
+fn freshness_label(freshness: &IndexFreshness) -> &'static str {
+ match freshness {
+ IndexFreshness::Fresh { .. } => "fresh",
+ IndexFreshness::Stale { .. } => "stale",
+ IndexFreshness::NeverIndexed { .. } => "never_indexed",
+ }
+}
+
fn depth_label(depth: DepthLevel) -> &'static str {
match depth {
DepthLevel::Level1 => "level1",
@@ -470,5 +496,39 @@ mod tests {
Some("status_completed"),
"status payload should have event=status_completed"
);
+ for repo in repos {
+ let freshness = repo["freshness"]
+ .as_str()
+ .expect("each repo should carry a freshness verdict");
+ assert!(
+ matches!(freshness, "fresh" | "stale" | "never_indexed" | "unknown"),
+ "unexpected freshness verdict: {freshness}"
+ );
+ }
+ }
+
+ #[test]
+ fn freshness_label_maps_every_variant() {
+ use gather_step_git::IndexFreshness;
+
+ assert_eq!(
+ super::freshness_label(&IndexFreshness::Fresh {
+ head_sha: "abc".to_owned()
+ }),
+ "fresh"
+ );
+ assert_eq!(
+ super::freshness_label(&IndexFreshness::Stale {
+ indexed_sha: "old".to_owned(),
+ head_sha: "new".to_owned()
+ }),
+ "stale"
+ );
+ assert_eq!(
+ super::freshness_label(&IndexFreshness::NeverIndexed {
+ head_sha: "abc".to_owned()
+ }),
+ "never_indexed"
+ );
}
}
diff --git a/crates/gather-step-cli/src/errors.rs b/crates/gather-step-cli/src/errors.rs
index c7643de9..631379b0 100644
--- a/crates/gather-step-cli/src/errors.rs
+++ b/crates/gather-step-cli/src/errors.rs
@@ -6,6 +6,36 @@ use gather_step_storage::{GraphStoreError, MetadataStoreError, SearchStoreError}
const SCHEMA_VERSION_MISMATCH_MESSAGE: &str = "Index schema version mismatch — built by a different gather-step release. Next step: run `gather-step index --auto-recover` to rebuild, or `gather-step clean && gather-step index`.";
+pub const GRAPH_LOCKED_EXIT_CODE: u8 = 75;
+
+#[must_use]
+pub fn graph_lock_contention(error: &Error) -> bool {
+ for cause in error.chain() {
+ if let Some(graph_error) = cause.downcast_ref::()
+ && matches!(
+ graph_error,
+ GraphStoreError::StorageHeld { .. } | GraphStoreError::StorageHeldByDaemon { .. }
+ )
+ {
+ return true;
+ }
+ }
+ let full = error_chain_text(error);
+ contains_ascii_case_insensitive(&full, "locked by gather-step pid")
+ || contains_ascii_case_insensitive(&full, "already locked by another gather-step process")
+ || contains_ascii_case_insensitive(&full, "database already open")
+}
+
+#[must_use]
+pub fn graph_locked_json_disclosure(error: &Error) -> String {
+ serde_json::json!({
+ "event": "command_failed",
+ "degraded": "graph_locked",
+ "message": format_operator_error(error),
+ })
+ .to_string()
+}
+
#[must_use]
pub fn format_operator_error(error: &Error) -> String {
let full = error_chain_text(error);
@@ -177,6 +207,45 @@ mod tests {
);
}
+ #[test]
+ fn graph_lock_contention_detects_typed_lock_errors() {
+ use super::{GRAPH_LOCKED_EXIT_CODE, graph_lock_contention, graph_locked_json_disclosure};
+
+ let held = gather_step_storage::GraphStoreError::StorageHeld {
+ path: PathBuf::from("/tmp/graph.redb"),
+ };
+ let err: anyhow::Error = anyhow::Error::new(held);
+ assert!(graph_lock_contention(&err), "StorageHeld must be detected");
+
+ let by_daemon = gather_step_storage::GraphStoreError::StorageHeldByDaemon {
+ path: PathBuf::from("/tmp/graph.redb"),
+ pid: 4242,
+ started_at_epoch_ms: 1,
+ workspace_root: "/ws".to_owned(),
+ };
+ let err: anyhow::Error = anyhow::Error::new(by_daemon);
+ assert!(
+ graph_lock_contention(&err),
+ "StorageHeldByDaemon must be detected"
+ );
+
+ let disclosure: serde_json::Value =
+ serde_json::from_str(&graph_locked_json_disclosure(&err)).expect("valid json");
+ assert_eq!(disclosure["degraded"], "graph_locked");
+ assert_eq!(disclosure["event"], "command_failed");
+
+ assert_ne!(GRAPH_LOCKED_EXIT_CODE, 0);
+ assert_ne!(GRAPH_LOCKED_EXIT_CODE, 1);
+ }
+
+ #[test]
+ fn graph_lock_contention_ignores_unrelated_errors() {
+ use super::graph_lock_contention;
+
+ let err: anyhow::Error = anyhow::Error::msg("read /tmp/foo: permission denied");
+ assert!(!graph_lock_contention(&err));
+ }
+
#[test]
fn unhandled_error_preserves_full_cause_chain() {
// Wrap an inner error with anyhow::Context so the chain has two links.
diff --git a/crates/gather-step-cli/src/main.rs b/crates/gather-step-cli/src/main.rs
index f15135cb..5fd3192f 100644
--- a/crates/gather-step-cli/src/main.rs
+++ b/crates/gather-step-cli/src/main.rs
@@ -9,7 +9,10 @@ use clap::Parser;
use dhat::Alloc;
use gather_step::app::{self, AppContext};
use gather_step::commands::{self, Cli};
-use gather_step::errors::format_operator_error;
+use gather_step::errors::{
+ GRAPH_LOCKED_EXIT_CODE, format_operator_error, graph_lock_contention,
+ graph_locked_json_disclosure,
+};
#[cfg(not(feature = "dhat-heap"))]
use mimalloc::MiMalloc;
@@ -49,14 +52,27 @@ async fn run_main() -> Result {
commands::run(cli, app).await
}
-/// Print the operator-facing error to stderr and return exit code 1.
+/// Print the operator-facing error to stderr and return an exit code.
///
/// Returning `ExitCode` rather than calling `std::process::exit(1)` lets
/// tokio's runtime tear down cleanly and lets stdio buffers flush — important
/// for `pr-review` (which prints a structured report on stdout) and any
/// command that emits trailing tracing lines.
fn print_operator_error_and_code(error: &Error) -> ExitCode {
+ if graph_lock_contention(error) {
+ if wants_json_output() {
+ let mut stdout = std::io::stdout().lock();
+ let _ = writeln!(stdout, "{}", graph_locked_json_disclosure(error));
+ }
+ let mut stderr = std::io::stderr().lock();
+ let _ = writeln!(stderr, "{}", format_operator_error(error));
+ return ExitCode::from(GRAPH_LOCKED_EXIT_CODE);
+ }
let mut stderr = std::io::stderr().lock();
let _ = writeln!(stderr, "{}", format_operator_error(error));
ExitCode::from(1)
}
+
+fn wants_json_output() -> bool {
+ std::env::args().any(|arg| arg == "--json")
+}
diff --git a/crates/gather-step-cli/tests/cli_commands.rs b/crates/gather-step-cli/tests/cli_commands.rs
index f38a90a2..f11bf51e 100644
--- a/crates/gather-step-cli/tests/cli_commands.rs
+++ b/crates/gather-step-cli/tests/cli_commands.rs
@@ -348,6 +348,72 @@ fn write_qa_reference_v4_workspace(root: &Path) {
.expect("notifications fixture source");
}
+#[test]
+fn resolution_snapshot_matches_golden() {
+ use gather_step_analysis::GraphQuery;
+
+ let temp = TempDir::new("resolution-snapshot");
+ write_fixture_workspace(temp.path());
+ run_ok(temp.path(), &["init"]);
+ run_ok(temp.path(), &["--json", "index"]);
+
+ let graph =
+ GraphStoreDb::open(temp.path().join(".gather-step/storage/graph.redb")).expect("graph");
+ let actual = GraphQuery::new(&graph)
+ .resolution_fingerprint()
+ .expect("fingerprint")
+ .join("\n");
+
+ let golden_path =
+ Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/golden/resolution_snapshot.golden");
+ if env::var_os("GATHER_STEP_BLESS_SNAPSHOT").is_some() {
+ fs::create_dir_all(golden_path.parent().expect("golden parent")).expect("golden dir");
+ fs::write(&golden_path, &actual).expect("write golden");
+ }
+
+ let expected = fs::read_to_string(&golden_path).unwrap_or_else(|_| {
+ panic!(
+ "missing golden {}; regenerate with GATHER_STEP_BLESS_SNAPSHOT=1",
+ golden_path.display()
+ )
+ });
+ assert_eq!(
+ actual, expected,
+ "resolution snapshot drifted; if the change is intended, regenerate with GATHER_STEP_BLESS_SNAPSHOT=1"
+ );
+}
+
+#[test]
+fn read_command_during_graph_lock_succeeds_via_snapshot() {
+ let temp = TempDir::new("graph-lock");
+ write_fixture_workspace(temp.path());
+ run_ok(temp.path(), &["init"]);
+ run_ok(temp.path(), &["--json", "index"]);
+
+ let graph_path = temp
+ .path()
+ .join(".gather-step")
+ .join("storage")
+ .join("graph.redb");
+ let held = GraphStoreDb::open(&graph_path).expect("should hold the graph lock");
+
+ let output = run_ok(temp.path(), &["search", "OrderList", "--json"]);
+ let search_json = stdout_json(&output);
+ assert_eq!(search_json["event"], "search_completed");
+ assert!(
+ search_json["hits"]
+ .as_array()
+ .expect("hits array")
+ .iter()
+ .any(|item| item["symbol_name"] == "OrderList"),
+ "snapshot read should return the indexed hit while the store is held"
+ );
+
+ drop(held);
+ let recovered = run_ok(temp.path(), &["search", "OrderList", "--json"]);
+ assert_eq!(stdout_json(&recovered)["event"], "search_completed");
+}
+
#[test]
fn cli_commands_work_on_indexed_fixture_workspace() {
let temp = TempDir::new("cli-commands");
@@ -439,12 +505,17 @@ fn cli_commands_work_on_indexed_fixture_workspace() {
let status = run_ok(temp.path(), &["status", "--json"]);
let status_json = stdout_json(&status);
assert_eq!(status_json["event"], "status_completed");
- assert!(
- !status_json["repos"]
- .as_array()
- .expect("repos array")
- .is_empty()
- );
+ let status_repos = status_json["repos"].as_array().expect("repos array");
+ assert!(!status_repos.is_empty());
+ for repo in status_repos {
+ let freshness = repo["freshness"]
+ .as_str()
+ .expect("each repo should carry a freshness verdict");
+ assert!(
+ matches!(freshness, "fresh" | "stale" | "never_indexed" | "unknown"),
+ "unexpected freshness verdict in status output: {freshness}"
+ );
+ }
let search = run_ok(temp.path(), &["search", "OrderList", "--json"]);
let search_json = stdout_json(&search);
@@ -1164,7 +1235,7 @@ fn metadata_schema_user_version_mismatch_reports_recovery_hint() {
}
#[test]
-fn concurrent_graph_open_reports_stable_process_error() {
+fn concurrent_graph_open_reads_via_snapshot() {
let temp = TempDir::new("concurrent-open");
write_fixture_workspace(temp.path());
run_ok(temp.path(), &["init"]);
@@ -1172,10 +1243,8 @@ fn concurrent_graph_open_reports_stable_process_error() {
let _held_graph = GraphStoreDb::open(temp.path().join(".gather-step/storage/graph.redb"))
.expect("graph should open and hold the redb lock");
- let output = run_fail(temp.path(), &["status", "--json"]);
- let stderr = String::from_utf8_lossy(&output.stderr);
- assert!(stderr.contains("Another gather-step process is using this workspace"));
- assert!(stderr.contains("Stop `gather-step watch`"));
+ let output = run_ok(temp.path(), &["status", "--json"]);
+ assert_eq!(stdout_json(&output)["event"], "status_completed");
}
#[test]
diff --git a/crates/gather-step-cli/tests/golden/resolution_snapshot.golden b/crates/gather-step-cli/tests/golden/resolution_snapshot.golden
new file mode 100644
index 00000000..8f6c4ae9
--- /dev/null
+++ b/crates/gather-step-cli/tests/golden/resolution_snapshot.golden
@@ -0,0 +1,34 @@
+ServiceAController DependsOn __di__EventBusClient none none
+ServiceAController UsesDecorator ServiceAController::@Controller none none
+ServiceAController.constructor UsesDecorator ServiceAController.constructor::@Controller none none
+ServiceAController.handleOrderCreated Consumes __event__kafka__order.created none none
+ServiceAController.handleOrderCreated UsesDecorator ServiceAController.handleOrderCreated::@Controller none none
+ServiceAController.handleOrderCreated UsesDecorator ServiceAController.handleOrderCreated::@MessagePattern none none
+ServiceAController.handleOrderCreated UsesEventFrom __event__kafka__order.created none none
+ServiceAController.listOrders ProducesEventFor __event__kafka__order.created none none
+ServiceAController.listOrders Publishes __event__kafka__order.created none none
+ServiceAController.listOrders Serves __route__GET__/orders none none
+ServiceAController.listOrders UsesDecorator ServiceAController.listOrders::@Controller none none
+ServiceAController.listOrders UsesDecorator ServiceAController.listOrders::@Get none none
+backend_standard::package.json Defines backend_standard::__repo__ none none
+backend_standard::src/controller.ts Defines ServiceAController none none
+backend_standard::src/controller.ts Defines ServiceAController.constructor none none
+backend_standard::src/controller.ts Defines ServiceAController.handleOrderCreated none none
+backend_standard::src/controller.ts Defines ServiceAController.listOrders none none
+backend_standard::src/controller.ts Defines module::backend_standard::src/controller.ts none none
+backend_standard::src/controller.ts Defines src/controller.ts::Controller none none
+backend_standard::src/controller.ts Defines src/controller.ts::Get none none
+backend_standard::src/controller.ts Defines src/controller.ts::MessagePattern none none
+backend_standard::src/controller.ts Imports module-import::@nestjs/common none none
+backend_standard::src/controller.ts Imports module-import::@nestjs/microservices none none
+frontend_standard::package.json Defines frontend_standard::__repo__ none none
+frontend_standard::src/OrderList.test.tsx Defines module::frontend_standard::src/OrderList.test.tsx none none
+frontend_standard::src/OrderList.test.tsx Defines src/OrderList.test.tsx::OrderList none none
+frontend_standard::src/OrderList.test.tsx Imports module-import::./OrderList none none
+frontend_standard::src/OrderList.tsx Defines OrderList none none
+frontend_standard::src/OrderList.tsx Defines module::frontend_standard::src/OrderList.tsx none none
+module::backend_standard::src/controller.ts Exports ServiceAController none none
+module::backend_standard::src/controller.ts Exports ServiceAController.constructor none none
+module::backend_standard::src/controller.ts Exports ServiceAController.handleOrderCreated none none
+module::backend_standard::src/controller.ts Exports ServiceAController.listOrders none none
+module::frontend_standard::src/OrderList.tsx Exports OrderList none none
\ No newline at end of file
diff --git a/crates/gather-step-core/src/graph.rs b/crates/gather-step-core/src/graph.rs
index b040a17d..70c83744 100644
--- a/crates/gather-step-core/src/graph.rs
+++ b/crates/gather-step-core/src/graph.rs
@@ -194,6 +194,17 @@ impl EdgeMetadata {
self.resolver_strategy()
.map_or(0, crate::ResolverStrategy::strategy_weight)
}
+
+ /// Whether this edge passes a `min_confidence` traversal filter.
+ ///
+ /// `None` confidence means the edge is a definite structural fact (e.g. an
+ /// import-resolved call), not a heuristic guess, so it passes any threshold
+ /// rather than being filtered out. An explicit confidence is compared
+ /// against the threshold; a `None` threshold disables filtering entirely.
+ #[must_use]
+ pub fn passes_confidence(&self, min_confidence: Option) -> bool {
+ min_confidence.is_none_or(|min| self.confidence.is_none_or(|actual| actual >= min))
+ }
}
#[derive(
@@ -263,6 +274,25 @@ mod tests {
};
use crate::schema::{EdgeKind, NodeKind};
+ #[test]
+ fn passes_confidence_treats_none_as_trusted_and_filters_low() {
+ // No threshold disables filtering: everything passes.
+ let mut meta = EdgeMetadata::default();
+ assert!(meta.passes_confidence(None));
+
+ // None confidence is a definite structural edge (not a heuristic
+ // guess) and must pass any threshold rather than be filtered out.
+ assert!(meta.passes_confidence(Some(500)));
+
+ // An explicit low confidence is filtered out below the threshold.
+ meta.confidence = Some(300);
+ assert!(!meta.passes_confidence(Some(500)));
+
+ // At or above the threshold, it passes.
+ meta.confidence = Some(900);
+ assert!(meta.passes_confidence(Some(500)));
+ }
+
#[test]
fn node_id_is_deterministic_for_identical_inputs() {
let first = node_id("service-a", "src/foo.ts", NodeKind::Function, "execute");
diff --git a/crates/gather-step-core/src/schema.rs b/crates/gather-step-core/src/schema.rs
index 80e3caff..112fd8a7 100644
--- a/crates/gather-step-core/src/schema.rs
+++ b/crates/gather-step-core/src/schema.rs
@@ -227,6 +227,15 @@ impl EdgeKind {
self as u8
}
+ /// Whether this edge represents a *consumer*/usage of the target rather
+ /// than a structural `Defines` (file→symbol) or `Imports` edge. Used to
+ /// count real consumers for reuse ranking and resolution scoring, so a
+ /// "consumer count" reflects callers/users, not raw inbound-edge volume.
+ #[must_use]
+ pub const fn is_consumer_edge(self) -> bool {
+ !matches!(self, Self::Defines | Self::Imports)
+ }
+
/// Whether this edge kind represents a semantic bridge link
/// connecting a real symbol to a virtual bridge node (`Route`, `Topic`,
/// `SharedSymbol`, `PayloadContract`, …) — i.e. an edge whose correctness
diff --git a/crates/gather-step-git/src/history.rs b/crates/gather-step-git/src/history.rs
index 30f27163..afa06c81 100644
--- a/crates/gather-step-git/src/history.rs
+++ b/crates/gather-step-git/src/history.rs
@@ -163,6 +163,44 @@ pub enum HistorySyncOutcome {
NoChange { repo: String, head_sha: String },
}
+/// Query-time freshness of an indexed repo relative to the working tree's
+/// current HEAD (A13). Distinct from [`HistorySyncOutcome`], which describes an
+/// indexing run; this answers "can I trust what the index says right now?" so
+/// a trace/search against a stale index can be flagged instead of silently
+/// answering about code that no longer matches the tree.
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(tag = "kind", rename_all = "snake_case")]
+pub enum IndexFreshness {
+ /// The indexed commit matches the current HEAD — the index is current.
+ Fresh { head_sha: String },
+ /// The index reflects an older commit than the working tree's HEAD.
+ Stale {
+ indexed_sha: String,
+ head_sha: String,
+ },
+ /// No commit was recorded for this repo — it has never been indexed.
+ NeverIndexed { head_sha: String },
+}
+
+/// Classify index freshness by comparing the recorded indexed commit against
+/// the repository's current HEAD. Pure so it is unit-testable without git or a
+/// metadata store.
+#[must_use]
+pub fn classify_freshness(indexed_sha: Option<&str>, head_sha: &str) -> IndexFreshness {
+ match indexed_sha {
+ None => IndexFreshness::NeverIndexed {
+ head_sha: head_sha.to_owned(),
+ },
+ Some(sha) if sha == head_sha => IndexFreshness::Fresh {
+ head_sha: head_sha.to_owned(),
+ },
+ Some(sha) => IndexFreshness::Stale {
+ indexed_sha: sha.to_owned(),
+ head_sha: head_sha.to_owned(),
+ },
+ }
+}
+
impl HistorySyncOutcome {
#[must_use]
pub fn repo(&self) -> &str {
@@ -350,6 +388,17 @@ impl GitHistoryIndexer {
Ok(repo.head_id().map_err(Box::new)?.detach().to_string())
}
+ /// Classify how fresh an indexed repo is relative to its current HEAD (A13).
+ /// `indexed_sha` is the commit recorded for the repo (e.g. from
+ /// `MetadataStore::get_last_commit_sha`); `None` means never indexed.
+ pub fn index_freshness(
+ &self,
+ indexed_sha: Option<&str>,
+ ) -> Result {
+ let head = self.head_sha()?;
+ Ok(classify_freshness(indexed_sha, &head))
+ }
+
/// Walks the repository, persists the new commits + per-file deltas via
/// `store`, advances `repo_sync_state.last_commit_sha`, and returns the
/// outcome describing which sync branch ran.
@@ -1074,9 +1123,35 @@ mod tests {
use super::{
CommitFact, CommitFileChangeKind, CommitFileDelta, GitHistoryIndexer, GitRepoSource,
- HistorySyncOutcome, normalize_author_email_bytes,
+ HistorySyncOutcome, IndexFreshness, classify_freshness, normalize_author_email_bytes,
};
+ #[test]
+ fn classify_freshness_distinguishes_fresh_stale_and_never_indexed() {
+ // Indexed commit matches HEAD => fresh.
+ assert_eq!(
+ classify_freshness(Some("abc123"), "abc123"),
+ IndexFreshness::Fresh {
+ head_sha: "abc123".to_owned()
+ }
+ );
+ // Indexed commit older than HEAD => stale, carrying both SHAs.
+ assert_eq!(
+ classify_freshness(Some("old111"), "new222"),
+ IndexFreshness::Stale {
+ indexed_sha: "old111".to_owned(),
+ head_sha: "new222".to_owned(),
+ }
+ );
+ // No recorded commit => never indexed.
+ assert_eq!(
+ classify_freshness(None, "abc123"),
+ IndexFreshness::NeverIndexed {
+ head_sha: "abc123".to_owned()
+ }
+ );
+ }
+
/// Minimal hand-rolled `TempDir` that deletes itself on drop. Avoids
/// adding a `tempfile` dev-dependency.
struct TempDir {
diff --git a/crates/gather-step-git/src/lib.rs b/crates/gather-step-git/src/lib.rs
index 4c6868b0..8ce1f07b 100644
--- a/crates/gather-step-git/src/lib.rs
+++ b/crates/gather-step-git/src/lib.rs
@@ -24,7 +24,8 @@ pub use classify::{
};
pub use history::{
CommitFact, CommitFileChangeKind, CommitFileDelta, GitHistoryError, GitHistoryIndexer,
- GitHistorySyncError, GitIndexerOptions, GitRepoSource, HistorySyncOutcome,
+ GitHistorySyncError, GitIndexerOptions, GitRepoSource, HistorySyncOutcome, IndexFreshness,
+ classify_freshness,
};
pub use intelligence::{
RepoIntelligenceError, RepoIntelligenceOptions, RepoIntelligenceReport,
diff --git a/crates/gather-step-mcp/src/catalog.rs b/crates/gather-step-mcp/src/catalog.rs
index 1ae4c72a..75fd5fac 100644
--- a/crates/gather-step-mcp/src/catalog.rs
+++ b/crates/gather-step-mcp/src/catalog.rs
@@ -92,7 +92,10 @@ pub const MCP_TOOLS: &[(&str, &str)] = &[
"planning_pack",
"Context pack for architecture and planning tasks",
),
- ("plan_change", "Alias for planning-oriented context"),
+ (
+ "plan_change",
+ "Typed plan-change product (twelve planning sections)",
+ ),
("debug_pack", "Context pack for debugging production issues"),
("fix_pack", "Context pack scoped for a bug fix"),
("fix_surface", "Get a narrower fix-oriented surface"),
diff --git a/crates/gather-step-mcp/src/server.rs b/crates/gather-step-mcp/src/server.rs
index c93093cd..aaae37ed 100644
--- a/crates/gather-step-mcp/src/server.rs
+++ b/crates/gather-step-mcp/src/server.rs
@@ -50,11 +50,11 @@ use crate::{
},
orientation::{GraphSchemaResponse, ListReposResponse, get_graph_schema, list_repos},
packs::{
- ContextPackRequest, ContextPackResponse, ModePackRequest,
+ ContextPackRequest, ContextPackResponse, ModePackRequest, PlanChangeResponse,
change_impact_pack_tool as run_change_impact_pack,
context_pack_tool as run_context_pack, debug_pack_tool as run_debug_pack,
fix_pack_tool as run_fix_pack, planning_pack_tool as run_planning_pack,
- review_pack_tool as run_review_pack,
+ review_pack_tool as run_review_pack, run_plan_change,
},
pr_review::{
PrReviewInput, PrReviewResponse, PrReviewSetInput, PrReviewSetResponse, run_pr_review,
@@ -784,17 +784,17 @@ impl GatherStepMcpServer {
#[tool(
name = "plan_change",
- description = "Return a bounded planning pack for a target symbol.",
+ description = "Return a typed plan-change product (twelve planning sections) for a target symbol.",
annotations(read_only_hint = true)
)]
pub async fn plan_change_tool(
&self,
Parameters(request): Parameters,
- ) -> Result, String> {
+ ) -> Result, String> {
let args = serde_json::to_value(&request).unwrap_or_default();
let ctx = Arc::clone(&self.ctx);
self.traced_call("plan_change", &args, move || {
- run_planning_pack(&ctx, request)
+ run_plan_change(&ctx, request)
.map(Json)
.map_err(|error| error.to_string())
})
diff --git a/crates/gather-step-mcp/src/tools/composite.rs b/crates/gather-step-mcp/src/tools/composite.rs
index 9717e710..94aee1be 100644
--- a/crates/gather-step-mcp/src/tools/composite.rs
+++ b/crates/gather-step-mcp/src/tools/composite.rs
@@ -23,7 +23,7 @@ use crate::{
orientation::{RepoSummary, list_repos},
packs::{
ContextPackRequest, ModePackRequest, change_impact_pack_tool, context_pack_tool,
- debug_pack_tool, fix_pack_tool, planning_pack_tool, review_pack_tool,
+ debug_pack_tool, fix_pack_tool, planning_pack_tool, review_pack_tool, run_plan_change,
},
repo_intelligence::{
DeadCodeRequest, RepoScopedRequest, WhoOwnsRequest, get_conventions_tool,
@@ -513,11 +513,12 @@ fn execute_batch_op(ctx: &McpContext, op: BatchQueryOperation) -> BatchQueryResu
context_pack_tool(ctx, args).and_then(to_value)
})
}
- "planning_pack" | "plan_change" => {
- parse_and_run::(op.arguments, |args| {
- planning_pack_tool(ctx, args).and_then(to_value)
- })
- }
+ "planning_pack" => parse_and_run::(op.arguments, |args| {
+ planning_pack_tool(ctx, args).and_then(to_value)
+ }),
+ "plan_change" => parse_and_run::(op.arguments, |args| {
+ run_plan_change(ctx, args).and_then(to_value)
+ }),
"debug_pack" => parse_and_run::(op.arguments, |args| {
debug_pack_tool(ctx, args).and_then(to_value)
}),
diff --git a/crates/gather-step-mcp/src/tools/context_pack.rs b/crates/gather-step-mcp/src/tools/context_pack.rs
index 34e35cf9..bb3bae48 100644
--- a/crates/gather-step-mcp/src/tools/context_pack.rs
+++ b/crates/gather-step-mcp/src/tools/context_pack.rs
@@ -1,5 +1,6 @@
pub use crate::tools::packs::{
ContextPackData, ContextPackMeta, ContextPackRequest, ContextPackResponse, ModePackRequest,
- PackBridge, PackItem, context_pack_tool, debug_pack_tool, fix_pack_tool, planning_pack_tool,
- review_pack_tool,
+ PackBridge, PackItem, PlanChangeContract, PlanChangeResponse, context_pack_tool,
+ debug_pack_tool, fix_pack_tool, planning_pack_tool, review_pack_tool, run_plan_change,
+ validate_plan_change_contract,
};
diff --git a/crates/gather-step-mcp/src/tools/packs.rs b/crates/gather-step-mcp/src/tools/packs.rs
index 52c4d843..d949aee8 100644
--- a/crates/gather-step-mcp/src/tools/packs.rs
+++ b/crates/gather-step-mcp/src/tools/packs.rs
@@ -207,6 +207,247 @@ pub struct ContextPackResponse {
pub meta: Option,
}
+const PLAN_CHANGE_SCHEMA_VERSION: u8 = 3;
+
+const PLAN_CHANGE_SECTIONS: [&str; 12] = [
+ "reuse_candidates",
+ "sibling_clone_targets",
+ "standards_to_preserve",
+ "integration_checks",
+ "cross_repo_reachability",
+ "display_ownership_checks",
+ "write_path_or_state_machine_risks",
+ "required_braingent_records",
+ "open_unknowns",
+ "pass_two_gap_dimensions",
+ "v1_completeness_checklist",
+ "verification_plan",
+];
+
+const PASS_TWO_GAP_DIMENSIONS: [&str; 8] = [
+ "State machine: does this add/alter a status or lifecycle state? Enumerate transitions and conflict handling.",
+ "Token/URL security: are tokens, signed URLs, or IDs exposed or accepted? Validate scope, expiry, and tampering.",
+ "Dependent ticket: is there upstream/downstream work this depends on or unblocks? Link it.",
+ "Atomicity: do multi-document or multi-step writes need a transaction or idempotency guard?",
+ "Index-per-field: does any new query/filter field need an index? Confirm coverage.",
+ "Event contract: grep producers and consumers of any touched event/topic for set-difference drift.",
+ "Audit category: does this mutating path need an audit-log entry with the correct category?",
+ "Notification: should this change emit or suppress a user/system notification?",
+];
+
+const V1_COMPLETENESS_CHECKLIST: [&str; 19] = [
+ "Scale test: does it hold at production data volume? → verify: run against a realistic-size fixture.",
+ "Failure-path lifecycle: are partial-failure and retry paths handled? → verify: exercise the failure branch.",
+ "Consumer reachability: does the output actually reach every consumer? → verify: trace producer→consumer.",
+ "Downstream coupling: which downstream systems depend on this shape? → verify: enumerate consumers.",
+ "Delete/invalidate: are deletes and cache/index invalidations handled? → verify: delete-path test.",
+ "Single source of truth: is the value derived once, not duplicated? → verify: grep for parallel computations.",
+ "Non-functional: latency/memory/throughput within budget? → verify: measure against baseline.",
+ "Exclusion scope: what is explicitly out of scope? → verify: state exclusions in the PR.",
+ "Cross-commit regression: could a later commit re-break this? → verify: add a regression test.",
+ "Deferred as follow-ups: are deferrals recorded as tickets? → verify: link follow-up issues.",
+ "Prior-release correctness/security gate: did a prior release defer hardening here? → verify: re-check the deferred item.",
+ "Partial-migration tail: zero residual call-sites workspace-wide? → verify: grep shows no residual callers.",
+ "Release-profile divergence: verify flags on the installed profile. → verify: check [profile.dist].",
+ "Teardown under-spec: disposable artifacts specify creation/reuse-key/safe-delete/in-flight-writer guard. → verify: all four present.",
+ "Schema-version forward-compat: a version-mismatch refusal/rebuild path exists. → verify: mismatch is refused, not misread.",
+ "Derived-field blast radius: enumerate source, projection writer, all readers, filters, search/index, backfill. → verify: all six listed.",
+ "Cross-service ownership: should the owner publish a snapshot/resolver before adopting a join? → verify: question answered.",
+ "Lossy rollback: enumerate unrecoverable rows/fields. → verify: rollback notes list them.",
+ "Implied-surface sweep: events/audit/notification/rate-limit/idempotency even when the spec is silent. → verify: sweep performed.",
+];
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
+pub struct PlanChangeContract {
+ pub schema_version: u8,
+ pub sections: Vec,
+ #[serde(default, skip_serializing_if = "Vec::is_empty")]
+ pub exclusion_ledger: Vec,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
+pub struct PlanChangeResponse {
+ pub target: String,
+ pub found: bool,
+ pub reuse_candidates: Vec,
+ pub sibling_clone_targets: Vec,
+ pub standards_to_preserve: Vec,
+ pub integration_checks: Vec,
+ pub cross_repo_reachability: Vec,
+ pub display_ownership_checks: Vec,
+ pub write_path_or_state_machine_risks: Vec,
+ pub required_braingent_records: Vec,
+ pub open_unknowns: Vec,
+ pub pass_two_gap_dimensions: Vec,
+ pub v1_completeness_checklist: Vec,
+ pub verification_plan: Vec,
+ pub contract: PlanChangeContract,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub meta: Option,
+}
+
+fn build_plan_change(
+ target: &str,
+ found: bool,
+ items: &[PackItem],
+ unresolved_gaps: &[String],
+ planning_proofs: &[serde_json::Value],
+ next_steps: &[String],
+ change_impact: &ChangeImpactSummary,
+ meta: Option,
+) -> PlanChangeResponse {
+ let mut reuse_candidates = Vec::new();
+ let mut sibling_clone_targets = Vec::new();
+ for item in items {
+ if item.category == "target" {
+ continue;
+ }
+ if is_shared_module_path(&item.file_path) {
+ reuse_candidates.push(item.clone());
+ } else {
+ sibling_clone_targets.push(item.clone());
+ }
+ }
+
+ let verification_plan = next_steps
+ .iter()
+ .map(|step| format!("Run `{step}` and confirm the result before changing code."))
+ .collect();
+
+ let mut integration_checks = Vec::new();
+ for repo in &change_impact.confirmed_downstream_repos {
+ integration_checks.push(format!(
+ "Integration: verify confirmed downstream consumer `{repo}` still works after this change."
+ ));
+ }
+ for repo in &change_impact.probable_downstream_repos {
+ integration_checks.push(format!(
+ "Integration: check probable downstream `{repo}` (partial evidence) for impact."
+ ));
+ }
+
+ let mut display_ownership_checks = Vec::new();
+ for caller in &change_impact.cross_repo_callers {
+ display_ownership_checks.push(format!(
+ "Display ownership: cross-service ref `{}::{}` — confirm display fields are sourced \
+ from the owner service (snapshot/API), not a direct cross-service DB lookup, and \
+ that access control stays in the owner.",
+ caller.repo, caller.symbol_name
+ ));
+ }
+
+ let mut write_path_or_state_machine_risks = Vec::new();
+ for caller in &change_impact.cross_repo_callers {
+ write_path_or_state_machine_risks.push(format!(
+ "Cross-repo caller `{}::{}` depends on this behavior — preserve its contract.",
+ caller.repo, caller.symbol_name
+ ));
+ }
+ for unresolved in &change_impact.unresolved_possible {
+ write_path_or_state_machine_risks.push(format!(
+ "Unresolved possible impact: `{unresolved}` — confirm before changing."
+ ));
+ }
+ if change_impact.truncated_repos.is_some() {
+ write_path_or_state_machine_risks.push(
+ "Downstream fan-out was capped — affected repos exist beyond the listed set."
+ .to_owned(),
+ );
+ }
+
+ let mut exclusion_ledger = Vec::new();
+ if let Some(truncated) = &change_impact.truncated_repos {
+ exclusion_ledger.push(format!(
+ "Downstream fan-out capped: {} repo(s) omitted ({}).",
+ truncated.count,
+ truncated.reason_codes.join(", ")
+ ));
+ }
+ if let Some(meta) = &meta {
+ for warning in &meta.warnings {
+ exclusion_ledger.push(format!("Planning warning: {warning}"));
+ }
+ }
+ exclusion_ledger
+ .push("standards_to_preserve: not yet computed (no standards engine).".to_owned());
+ exclusion_ledger
+ .push("required_braingent_records: not yet computed (no record ingestion).".to_owned());
+ let contract = PlanChangeContract {
+ schema_version: PLAN_CHANGE_SCHEMA_VERSION,
+ sections: PLAN_CHANGE_SECTIONS
+ .iter()
+ .map(|s| (*s).to_owned())
+ .collect(),
+ exclusion_ledger,
+ };
+
+ PlanChangeResponse {
+ target: target.to_owned(),
+ found,
+ reuse_candidates,
+ sibling_clone_targets,
+ standards_to_preserve: Vec::new(),
+ integration_checks,
+ cross_repo_reachability: planning_proofs.to_vec(),
+ display_ownership_checks,
+ write_path_or_state_machine_risks,
+ required_braingent_records: Vec::new(),
+ open_unknowns: unresolved_gaps.to_vec(),
+ pass_two_gap_dimensions: PASS_TWO_GAP_DIMENSIONS
+ .iter()
+ .map(|item| (*item).to_owned())
+ .collect(),
+ v1_completeness_checklist: V1_COMPLETENESS_CHECKLIST
+ .iter()
+ .map(|item| (*item).to_owned())
+ .collect(),
+ verification_plan,
+ contract,
+ meta,
+ }
+}
+
+pub fn validate_plan_change_contract(plan: &PlanChangeResponse) -> Vec {
+ let mut violations = Vec::new();
+ if plan.contract.schema_version != PLAN_CHANGE_SCHEMA_VERSION {
+ violations.push(format!(
+ "schema_version {} != expected {PLAN_CHANGE_SCHEMA_VERSION}",
+ plan.contract.schema_version
+ ));
+ }
+ if !plan
+ .contract
+ .sections
+ .iter()
+ .map(String::as_str)
+ .eq(PLAN_CHANGE_SECTIONS)
+ {
+ violations.push(format!(
+ "section manifest {:?} != canonical {PLAN_CHANGE_SECTIONS:?}",
+ plan.contract.sections
+ ));
+ }
+ violations
+}
+
+pub fn run_plan_change(
+ ctx: &McpContext,
+ request: ModePackRequest,
+) -> Result {
+ let pack = planning_pack_tool(ctx, request)?;
+ let data = &pack.data;
+ Ok(build_plan_change(
+ &data.target,
+ data.found,
+ &data.items,
+ &data.unresolved_gaps,
+ &data.planning_proofs,
+ &data.next_steps,
+ &data.change_impact,
+ pack.meta.clone(),
+ ))
+}
+
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
pub struct ContextPackData {
pub mode: String,
@@ -1084,22 +1325,39 @@ fn assemble_context_pack_for_symbol(
.map(|item| item.repo.clone())
.collect::>();
items.extend(contract_impact_items);
- items.sort_by(|left, right| {
- right
- .score
- .cmp(&left.score)
- .then(left.repo.cmp(&right.repo))
- .then(left.file_path.cmp(&right.file_path))
- .then(left.line_start.cmp(&right.line_start))
- .then(left.symbol_name.cmp(&right.symbol_name))
- .then(left.symbol_id.cmp(&right.symbol_id))
- });
+ sort_pack_items(&mut items);
items.dedup_by(|left, right| {
left.symbol_id == right.symbol_id && left.category == right.category
});
if let Some(repo) = options.repo_filter {
items.retain(|item| item.repo == repo);
}
+
+ if mode == PackMode::Planning {
+ // KNOWN CEILING: the reuse boost re-ranks the top `limit*5` base-ranked
+ // candidates; a canonical symbol ranked below that window for a very
+ // high-fan-out target is not rescued. The window keeps the boost (and
+ // the bounded re-sort below) cheap.
+ let window = options.limit.saturating_mul(5).min(items.len());
+ let graph = ctx.graph();
+ // Precompute distinct consumer counts in one pre-pass so the boost loop
+ // does map lookups instead of per-item graph I/O, deduped by symbol_id.
+ let mut consumer_counts: std::collections::BTreeMap =
+ std::collections::BTreeMap::new();
+ for item in &items[..window] {
+ consumer_counts
+ .entry(item.symbol_id.clone())
+ .or_insert_with(|| consumer_count(graph, &item.symbol_id));
+ }
+ apply_reuse_boost_with(&mut items[..window], |symbol_id| {
+ consumer_counts.get(symbol_id).copied().unwrap_or(0)
+ });
+ // The boost only raised window items' scores, and the window holds the
+ // top base-ranked candidates, so every boosted item still outranks the
+ // untouched tail — re-sorting just the window keeps global order.
+ sort_pack_items(&mut items[..window]);
+ }
+
items.truncate(options.limit);
let evidence_populated = items.len() <= 20;
@@ -2497,12 +2755,82 @@ fn populate_evidence_chain(
/// Items with a confirmed evidence chain (graph-proven path from anchor) are
/// boosted by 50 score points so they appear before lexical-only hits at
/// equivalent depth. The list is re-sorted by the updated scores.
-fn apply_planning_evidence_ranking(items: &mut [PackItem]) {
+/// Whether a file path lives in a shared / design-system module — a strong
+/// signal that a symbol there is the canonical reusable thing rather than a
+/// local one-off. Markers are matched as-is (paths are conventionally
+/// lowercase), avoiding an allocation.
+fn is_shared_module_path(file_path: &str) -> bool {
+ // A path heuristic spanning common JS, Rust, Go, and Python shared-module
+ // layouts. NOTE: this is a heuristic; the pipeline-wide canonical signal
+ // (pack_assembly's `Canonical`) is the longer-term source of truth, pending
+ // threading it onto `PackItem`.
+ const MARKERS: &[&str] = &[
+ "shared/",
+ "design-system",
+ "packages/",
+ "/ui/components",
+ "@shared",
+ "common/",
+ "/lib/",
+ "libs/",
+ "internal/",
+ "/pkg/",
+ ];
+ MARKERS.iter().any(|marker| file_path.contains(marker))
+}
+
+/// S3 graph-derived reuse boost. A symbol in a shared/design-system path or
+/// consumed by many modules is more likely the canonical reusable surface, so
+/// it should rank above local candidates. Consumer influence is capped so a
+/// single hub node cannot dominate the ranking.
+fn reuse_evidence_boost(file_path: &str, consumer_count: usize) -> u16 {
+ let mut boost = 0_u16;
+ if is_shared_module_path(file_path) {
+ boost = boost.saturating_add(40);
+ }
+ // Each consumer adds weight, capped at 30 so a single hub node cannot
+ // dominate the ranking; 3 points per consumer => up to +90.
+ let consumers = u16::try_from(consumer_count.min(30)).unwrap_or(u16::MAX);
+ boost.saturating_add(consumers.saturating_mul(3))
+}
+
+/// Distinct reuse consumers of a symbol: the source nodes of inbound edges,
+/// excluding structural `Defines`/`Imports` edges that do not represent a
+/// caller/user. Mirrors the edge-kind filter used by the resolution scorer so
+/// the count reflects real consumers, not raw inbound-edge volume.
+fn consumer_count(graph: &S, symbol_id: &str) -> usize {
+ let Ok(node_id) = decode_node_id(symbol_id) else {
+ return 0;
+ };
+ let Ok(incoming) = graph.get_incoming(node_id) else {
+ return 0;
+ };
+ incoming
+ .iter()
+ .filter(|edge| edge.kind.is_consumer_edge())
+ .map(|edge| edge.source)
+ .collect::>()
+ .len()
+}
+
+/// Apply the graph-derived reuse boost to planning-pack items via a
+/// consumer-count lookup. Generic over the lookup so the boost is unit-testable
+/// without a graph store. Must run BEFORE truncation so a canonical reusable
+/// symbol can be promoted into the pack, not merely reordered within an
+/// already-cut set.
+fn apply_reuse_boost_with(items: &mut [PackItem], consumer_count: impl Fn(&str) -> usize) {
for item in items.iter_mut() {
- if item.evidence_chain.is_some() {
- item.score = item.score.saturating_add(50);
+ if item.category == "target" {
+ continue;
}
+ let boost = reuse_evidence_boost(&item.file_path, consumer_count(&item.symbol_id));
+ item.score = item.score.saturating_add(boost);
}
+}
+
+/// Canonical ordering for planning-pack items: descending score, then stable
+/// lexical tie-breaks for deterministic output.
+fn sort_pack_items(items: &mut [PackItem]) {
items.sort_by(|left, right| {
right
.score
@@ -2515,6 +2843,15 @@ fn apply_planning_evidence_ranking(items: &mut [PackItem]) {
});
}
+fn apply_planning_evidence_ranking(items: &mut [PackItem]) {
+ for item in items.iter_mut() {
+ if item.evidence_chain.is_some() {
+ item.score = item.score.saturating_add(50);
+ }
+ }
+ sort_pack_items(items);
+}
+
fn mode_pack_tool(
ctx: &McpContext,
request: ModePackRequest,
@@ -3525,12 +3862,12 @@ fn pack_resolution_edge_score(ctx: &McpContext, symbol_id: &str) -> Result 0);
+
+ // More consumers => larger boost (the canonical reusable thing).
+ assert!(reuse_evidence_boost("src/x.ts", 10) > reuse_evidence_boost("src/x.ts", 1));
+
+ // A shared + widely-consumed symbol outranks a local one-off.
+ assert!(
+ reuse_evidence_boost("packages/ui/Button.tsx", 8)
+ > reuse_evidence_boost("src/features/orders/local-button.tsx", 0)
+ );
+ }
+
+ #[test]
+ fn reuse_boost_promotes_canonical_symbol_past_truncation() {
+ use super::{apply_reuse_boost_with, sort_pack_items};
+
+ // The local one-off has the higher base score; the shared, widely
+ // consumed component has a lower base score and would be cut by a
+ // base-score truncate to 1 — unless reuse ranking runs before the cut.
+ let mut items = vec![
+ make_pack_item(100, "src/features/orders/local.tsx", false),
+ make_pack_item(80, "packages/ui/components/Button.tsx", false),
+ ];
+
+ apply_reuse_boost_with(&mut items, |symbol_id| {
+ if symbol_id.contains("Button") { 12 } else { 0 }
+ });
+ sort_pack_items(&mut items);
+ items.truncate(1);
+
+ assert_eq!(items.len(), 1);
+ assert_eq!(items[0].file_path, "packages/ui/components/Button.tsx");
+ }
+
+ #[test]
+ fn build_plan_change_projects_pack_into_typed_sections() {
+ use super::build_plan_change;
+
+ let mut target_item = make_pack_item(200, "src/orders/create-order.ts", false);
+ target_item.category = "target".to_owned();
+ let items = vec![
+ make_pack_item(120, "packages/ui/components/Button.tsx", false),
+ make_pack_item(90, "src/features/orders/local-helper.ts", false),
+ target_item,
+ ];
+ let gaps = vec!["Who emits status FINALIZED?".to_owned()];
+ let proofs = vec![serde_json::json!({"repo": "alert", "edge": "ConsumesApiFrom"})];
+ let next_steps = vec!["crud_trace".to_owned()];
+ let change_impact = super::ChangeImpactSummary {
+ confirmed_downstream_repos: vec!["alert".to_owned()],
+ cross_repo_callers: vec![super::CrossRepoCaller {
+ file_path: "src/consumer.ts".to_owned(),
+ line_start: None,
+ repo: "report".to_owned(),
+ symbol_id: "id_consumer".to_owned(),
+ symbol_kind: "function".to_owned(),
+ symbol_name: "useOrderTotals".to_owned(),
+ evidence: None,
+ }],
+ ..Default::default()
+ };
+
+ let plan = build_plan_change(
+ "createOrder",
+ true,
+ &items,
+ &gaps,
+ &proofs,
+ &next_steps,
+ &change_impact,
+ None,
+ );
+
+ assert!(plan.found);
+ // Shared component => reuse candidate; local helper => sibling target;
+ // the target item is excluded from both.
+ assert_eq!(plan.reuse_candidates.len(), 1);
+ assert_eq!(
+ plan.reuse_candidates[0].file_path,
+ "packages/ui/components/Button.tsx"
+ );
+ assert_eq!(plan.sibling_clone_targets.len(), 1);
+ assert_eq!(
+ plan.sibling_clone_targets[0].file_path,
+ "src/features/orders/local-helper.ts"
+ );
+ assert_eq!(plan.open_unknowns, gaps);
+ assert_eq!(plan.cross_repo_reachability.len(), 1);
+ assert_eq!(plan.verification_plan.len(), 1);
+ assert_eq!(plan.integration_checks.len(), 1);
+ assert!(plan.integration_checks[0].contains("alert"));
+ assert_eq!(plan.write_path_or_state_machine_risks.len(), 1);
+ assert!(plan.write_path_or_state_machine_risks[0].contains("report::useOrderTotals"));
+ // Contract: sections awaiting later work still exist (empty).
+ assert!(plan.standards_to_preserve.is_empty());
+ assert!(plan.required_braingent_records.is_empty());
+ }
+
+ #[test]
+ fn plan_change_includes_pass_two_and_v1_checklists() {
+ use super::{ChangeImpactSummary, build_plan_change};
+
+ let plan = build_plan_change(
+ "anyTarget",
+ true,
+ &[],
+ &[],
+ &[],
+ &[],
+ &ChangeImpactSummary::default(),
+ None,
+ );
+
+ assert_eq!(
+ plan.pass_two_gap_dimensions.len(),
+ 8,
+ "all 8 pass-2 dimensions must be present: {:?}",
+ plan.pass_two_gap_dimensions
+ );
+
+ assert!(!plan.v1_completeness_checklist.is_empty());
+ assert!(
+ plan.v1_completeness_checklist
+ .iter()
+ .all(|item| item.contains("→ verify:")),
+ "every v1-completeness item must carry a verify step"
+ );
+ assert!(
+ plan.v1_completeness_checklist
+ .iter()
+ .any(|item| item.contains("Derived-field blast radius")),
+ "deviation checklist item missing"
+ );
+
+ for section in ["pass_two_gap_dimensions", "v1_completeness_checklist"] {
+ assert!(
+ plan.contract.sections.iter().any(|s| s == section),
+ "missing section in manifest: {section}"
+ );
+ }
+ }
+
+ #[test]
+ fn plan_change_surfaces_display_ownership_for_cross_service_refs() {
+ use super::{ChangeImpactSummary, build_plan_change};
+
+ let change_impact = ChangeImpactSummary {
+ cross_repo_callers: vec![super::CrossRepoCaller {
+ file_path: "src/action_panel.ts".to_owned(),
+ line_start: None,
+ repo: "reporting".to_owned(),
+ symbol_id: "id_hub".to_owned(),
+ symbol_kind: "function".to_owned(),
+ symbol_name: "renderActionPanel".to_owned(),
+ evidence: None,
+ }],
+ ..Default::default()
+ };
+ let plan = build_plan_change(
+ "getOrderDisplayName",
+ true,
+ &[],
+ &[],
+ &[],
+ &[],
+ &change_impact,
+ None,
+ );
+
+ assert!(
+ plan.display_ownership_checks
+ .iter()
+ .any(|check| check.contains("reporting")),
+ "display ownership not surfaced for cross-service ref: {:?}",
+ plan.display_ownership_checks
+ );
+ assert!(
+ plan.contract
+ .sections
+ .iter()
+ .any(|s| s == "display_ownership_checks")
+ );
+ }
+
+ #[test]
+ fn plan_change_omits_display_ownership_without_cross_service_refs() {
+ use super::{ChangeImpactSummary, build_plan_change};
+
+ let plan = build_plan_change(
+ "localOnly",
+ true,
+ &[],
+ &[],
+ &[],
+ &[],
+ &ChangeImpactSummary::default(),
+ None,
+ );
+ assert!(
+ plan.display_ownership_checks.is_empty(),
+ "no cross-service refs should mean no display-ownership prompts"
+ );
+ }
+
+ #[test]
+ fn plan_change_contract_gate_validates_deterministic_metadata() {
+ use super::{
+ ChangeImpactSummary, PLAN_CHANGE_SCHEMA_VERSION, PLAN_CHANGE_SECTIONS,
+ build_plan_change, validate_plan_change_contract,
+ };
+
+ let mut plan = build_plan_change(
+ "createOrder",
+ true,
+ &[],
+ &[],
+ &[],
+ &[],
+ &ChangeImpactSummary::default(),
+ None,
+ );
+
+ // A freshly built product satisfies the contract: current schema
+ // version and the exact canonical section manifest, deterministically.
+ assert_eq!(plan.contract.schema_version, PLAN_CHANGE_SCHEMA_VERSION);
+ assert!(
+ plan.contract
+ .sections
+ .iter()
+ .map(String::as_str)
+ .eq(PLAN_CHANGE_SECTIONS)
+ );
+ assert!(validate_plan_change_contract(&plan).is_empty());
+
+ // The gate fails on a stale schema version.
+ plan.contract.schema_version = 99;
+ assert!(!validate_plan_change_contract(&plan).is_empty());
+ plan.contract.schema_version = PLAN_CHANGE_SCHEMA_VERSION;
+
+ // The gate fails on a mangled section manifest (missing/renamed section).
+ plan.contract.sections.pop();
+ assert!(!validate_plan_change_contract(&plan).is_empty());
+ }
+
+ #[test]
+ fn plan_change_exclusion_ledger_records_capped_fan_out() {
+ use super::{ChangeImpactSummary, TruncatedRepos, build_plan_change};
+
+ let change_impact = ChangeImpactSummary {
+ truncated_repos: Some(TruncatedRepos {
+ count: 4,
+ names: vec!["a".to_owned(), "b".to_owned()],
+ reason_codes: vec!["fan_out_cap".to_owned()],
+ }),
+ ..Default::default()
+ };
+ let plan = build_plan_change(
+ "createOrder",
+ true,
+ &[],
+ &[],
+ &[],
+ &[],
+ &change_impact,
+ None,
+ );
+
+ assert!(
+ plan.contract
+ .exclusion_ledger
+ .iter()
+ .any(|entry| entry.contains("capped") && entry.contains('4')),
+ "ledger should record the capped fan-out: {:?}",
+ plan.contract.exclusion_ledger
+ );
+
+ // Sections that are not yet computed are recorded, so an empty value is
+ // never silently read as "nothing applies".
+ assert!(
+ plan.contract
+ .exclusion_ledger
+ .iter()
+ .any(|entry| entry.contains("standards_to_preserve")),
+ "ledger should record not-yet-computed sections: {:?}",
+ plan.contract.exclusion_ledger
+ );
+ }
+
#[test]
fn items_without_evidence_are_not_boosted() {
let mut items = vec![make_pack_item(80, "a.rs", false)];
diff --git a/crates/gather-step-mcp/tests/batch_plan_change.rs b/crates/gather-step-mcp/tests/batch_plan_change.rs
new file mode 100644
index 00000000..4c13a4ee
--- /dev/null
+++ b/crates/gather-step-mcp/tests/batch_plan_change.rs
@@ -0,0 +1,62 @@
+//! Regression: `batch_query` must route `plan_change` to the typed
+//! twelve-section product (`PlanChangeResponse`), not the legacy planning-pack
+//! `ContextPackResponse`. The direct MCP route and the batch route must agree.
+
+use std::sync::Arc;
+
+use gather_step_mcp::{
+ config::{McpContext, McpServerConfig},
+ server::GatherStepMcpServer,
+ tools::composite::{BatchQueryOperation, BatchQueryRequest},
+};
+use gather_step_storage::WorkspaceStores;
+use rmcp::handler::server::wrapper::Parameters;
+
+#[tokio::test]
+async fn batch_query_plan_change_returns_typed_product() {
+ let tmp = tempfile::tempdir().expect("tempdir");
+ let root = tmp.path();
+ let stores = Arc::new(WorkspaceStores::open(root).expect("workspace stores should open"));
+ let config = McpServerConfig::new(root.join("registry.json"), root.join("graph.redb"));
+ let ctx = McpContext::from_workspace_stores(config, stores);
+ let server = GatherStepMcpServer::new(ctx);
+
+ let request = BatchQueryRequest {
+ ops: vec![BatchQueryOperation {
+ tool: "plan_change".to_owned(),
+ arguments: serde_json::json!({ "target": "anySymbol" }),
+ }],
+ };
+
+ let response = server
+ .batch_query_tool(Parameters(request))
+ .await
+ .expect("batch_query should succeed");
+
+ // These keys exist only on the typed PlanChangeResponse, never on the
+ // legacy ContextPackResponse — so their presence proves the batch route
+ // now matches the direct route.
+ let body = serde_json::to_string(&response.0).expect("serialize batch response");
+ assert!(
+ body.contains("reuse_candidates"),
+ "batch plan_change must return the typed product (missing reuse_candidates): {body}"
+ );
+ assert!(
+ body.contains("verification_plan") && body.contains("\"sections\""),
+ "batch plan_change must include the contract section manifest: {body}"
+ );
+ for section in [
+ "display_ownership_checks",
+ "pass_two_gap_dimensions",
+ "v1_completeness_checklist",
+ ] {
+ assert!(
+ body.contains(section),
+ "batch plan_change missing section `{section}`: {body}"
+ );
+ }
+ assert!(
+ body.contains("\"schema_version\":3"),
+ "batch plan_change must carry the current contract schema version: {body}"
+ );
+}
diff --git a/crates/gather-step-storage/src/graph_store.rs b/crates/gather-step-storage/src/graph_store.rs
index e0a002d4..1085cdb2 100644
--- a/crates/gather-step-storage/src/graph_store.rs
+++ b/crates/gather-step-storage/src/graph_store.rs
@@ -1,7 +1,8 @@
use std::{
- fs,
+ env, fs,
io::ErrorKind,
path::{Path, PathBuf},
+ sync::atomic::{AtomicU64, Ordering},
};
use gather_step_core::{
@@ -551,6 +552,7 @@ pub trait GraphStore {
pub struct GraphStoreDb {
db: Database,
path: PathBuf,
+ snapshot_cleanup: Option,
/// Active-`BulkModeGuard` reference count. When > 0, write
/// transactions use `Durability::None` to skip fsync. The counter
/// (rather than a flat `AtomicBool`) is what makes
@@ -623,6 +625,14 @@ impl GraphStoreError {
/// once the working set fits.
const DEFAULT_GRAPH_CACHE_BYTES: usize = 256 * 1024 * 1024;
+impl Drop for GraphStoreDb {
+ fn drop(&mut self) {
+ if let Some(snapshot) = self.snapshot_cleanup.take() {
+ let _ = fs::remove_file(snapshot);
+ }
+ }
+}
+
impl GraphStoreDb {
fn is_missing_table_error(error: &impl core::fmt::Display) -> bool {
error.to_string().contains("does not exist")
@@ -660,6 +670,7 @@ impl GraphStoreDb {
let store = Self {
db,
path,
+ snapshot_cleanup: None,
bulk_mode: std::sync::atomic::AtomicUsize::new(0),
};
if is_new {
@@ -670,6 +681,59 @@ impl GraphStoreDb {
Ok(store)
}
+ /// Open the graph store for reading without contending on the writer lock.
+ ///
+ /// The fast path is a normal [`Self::open`]. When the store is held by
+ /// another process (an in-progress index or watch) it falls back to a
+ /// point-in-time copy of the database file and opens that, so a read during
+ /// a write returns the last committed state instead of blocking. The copy is
+ /// removed when the returned store is dropped.
+ pub fn open_read_only(path: impl AsRef) -> Result {
+ let path = path.as_ref().to_path_buf();
+ match Self::open(&path) {
+ Err(
+ GraphStoreError::StorageHeld { .. } | GraphStoreError::StorageHeldByDaemon { .. },
+ ) => Self::open_snapshot(&path),
+ other => other,
+ }
+ }
+
+ fn open_snapshot(path: &Path) -> Result {
+ let snapshot_path = Self::snapshot_path(path);
+ fs::copy(path, &snapshot_path)?;
+ let mut builder = Database::builder();
+ builder.set_cache_size(DEFAULT_GRAPH_CACHE_BYTES);
+ let db = builder.open(&snapshot_path).map_err(|error| {
+ let _ = fs::remove_file(&snapshot_path);
+ Self::map_open_error(path, error)
+ })?;
+ let store = Self {
+ db,
+ path: path.to_path_buf(),
+ snapshot_cleanup: Some(snapshot_path),
+ bulk_mode: std::sync::atomic::AtomicUsize::new(0),
+ };
+ store.validate_schema_version()?;
+ Ok(store)
+ }
+
+ fn snapshot_path(path: &Path) -> PathBuf {
+ static COUNTER: AtomicU64 = AtomicU64::new(0);
+ let id = COUNTER.fetch_add(1, Ordering::Relaxed);
+ let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("graph");
+ env::temp_dir().join(format!(
+ "gather-step-snapshot-{stem}-{}-{id}.redb",
+ std::process::id()
+ ))
+ }
+
+ /// Whether this store is backed by a read-only snapshot copy rather than the
+ /// live database file.
+ #[must_use]
+ pub fn is_snapshot(&self) -> bool {
+ self.snapshot_cleanup.is_some()
+ }
+
fn write_schema_version(&self) -> Result<(), GraphStoreError> {
let write_txn = self.begin_write_txn()?;
{
@@ -3922,6 +3986,34 @@ mod tests {
));
}
+ #[test]
+ fn open_read_only_falls_back_to_snapshot_when_held() {
+ let (_workspace, _storage, graph_path) = temp_workspace_graph_path("held-snapshot");
+ let function = node("service-a", "src/a.ts", NodeKind::Function, "execute", 0);
+
+ let holder = GraphStoreDb::open(&graph_path).expect("first open should succeed");
+ holder
+ .insert_node(&function)
+ .expect("committed write before the snapshot");
+
+ let reader =
+ GraphStoreDb::open_read_only(&graph_path).expect("read-only open should snapshot");
+ assert!(reader.is_snapshot(), "held read must use a snapshot copy");
+ let loaded = reader
+ .get_node(function.id)
+ .expect("snapshot read should succeed")
+ .expect("committed node should be visible in the snapshot");
+ assert_eq!(loaded.name, "execute");
+ drop(reader);
+ drop(holder);
+
+ let direct = GraphStoreDb::open_read_only(&graph_path).expect("uncontended read-only open");
+ assert!(
+ !direct.is_snapshot(),
+ "an uncontended read must use the live file, not a snapshot"
+ );
+ }
+
#[test]
fn open_stamps_fresh_schema_version_zero() {
let graph_path = temp_db_path("fresh-graph-schema");
diff --git a/crates/gather-step-storage/src/search_store.rs b/crates/gather-step-storage/src/search_store.rs
index 372dd968..8b8dece3 100644
--- a/crates/gather-step-storage/src/search_store.rs
+++ b/crates/gather-step-storage/src/search_store.rs
@@ -686,6 +686,77 @@ impl TantivySearchStore {
file_path_stored,
})
}
+
+ /// Disjunction fallback for multi-**word** capability queries: each
+ /// whitespace-separated word becomes a `Should` clause and a hit must match
+ /// at least a majority of them (`ceil(n/2)`).
+ ///
+ /// Gated on the *original* query containing two or more words. A single
+ /// identifier such as `createOrderUseCase` is one word — even though it
+ /// camelCase-splits into several tokens — and must not fuzzily match
+ /// siblings that merely share tokens (e.g. `updateOrderUseCase`). Only a
+ /// genuine phrase like `email notification delivery` should relax to OR.
+ fn execute_search_disjunctive(
+ &self,
+ query_text: &str,
+ limit: usize,
+ filters: SearchFilters<'_>,
+ ) -> Result, SearchStoreError> {
+ let words: Vec<&str> = query_text.split_whitespace().collect();
+ if words.len() < 2 {
+ return Ok(Vec::new());
+ }
+
+ let reader = self.reader.lock();
+ let searcher = reader.searcher();
+ // Fuzzy on: this is the last-resort recall pass.
+ let parser = self.build_query_parser(true);
+ let clauses: Vec<(Occur, Box)> = words
+ .iter()
+ .map(|word| {
+ // Expand each word into (word OR synonyms) so capability phrases
+ // bridge to the symbol vocabulary (e.g. "login" -> authenticate).
+ let mut sub: Vec<(Occur, Box)> = Vec::new();
+ let (word_query, _) = parser.parse_query_lenient(word);
+ sub.push((Occur::Should, word_query));
+ for synonym in synonym_terms(word) {
+ let (synonym_query, _) = parser.parse_query_lenient(synonym);
+ sub.push((Occur::Should, synonym_query));
+ }
+ let clause: Box = if sub.len() == 1 {
+ sub.pop().expect("clause exists").1
+ } else {
+ Box::new(BooleanQuery::from(sub))
+ };
+ (Occur::Should, clause)
+ })
+ .collect();
+ // INTENTIONAL RECALL BEHAVIOR: ceil(n/2) is 1 for a 2-word query, so
+ // the last-resort fallback can match a symbol covering a single token
+ // (possibly via fuzzy/synonym). This trades exact-phrase precision for
+ // recall and only fires after both conjunctive passes returned empty;
+ // callers must not treat a non-empty fallback result as an exact match.
+ let floor = words.len().div_ceil(2).max(1);
+ let query: Box =
+ Box::new(BooleanQuery::with_minimum_required_clauses(clauses, floor));
+ let query = self.apply_filters(query, filters);
+
+ let fetch_limit = limit.max(1).saturating_mul(5).min(MAX_RESULT_WINDOW);
+ let collector = TopDocs::with_limit(fetch_limit).order_by_score();
+ let docs: Vec<(f32, DocAddress)> = searcher.search(&query, &collector)?;
+
+ let mut hits = docs
+ .into_iter()
+ .map(|(score, address)| self.decode_hit(&searcher, address, score, false))
+ .collect::, _>>()?;
+ rerank_hits(&mut hits, query_text);
+
+ Ok(hits
+ .into_iter()
+ .take(limit.max(1))
+ .map(|scored| scored.hit)
+ .collect())
+ }
}
impl SearchStore for TantivySearchStore {
@@ -718,7 +789,17 @@ impl SearchStore for TantivySearchStore {
return Ok(exact_hits);
}
- self.execute_search(trimmed, limit, false, true, filters)
+ let fuzzy_hits = self.execute_search(trimmed, limit, false, true, filters)?;
+ if !fuzzy_hits.is_empty() {
+ return Ok(fuzzy_hits);
+ }
+
+ // Both conjunctive passes require every query term to match, so a
+ // multi-word capability query ("email notification delivery") returns
+ // nothing when a symbol covers only some terms. Fall back to a
+ // disjunction with a majority floor so reuse-discovery queries still
+ // surface partial matches instead of an empty result.
+ self.execute_search_disjunctive(trimmed, limit, filters)
}
fn delete_by_files(&self, files: &[(&str, &str)]) -> Result<(), SearchStoreError> {
@@ -1071,6 +1152,62 @@ fn infra_repo_penalty(symbol_exact_boost: f32, is_exported: bool, repo: &str) ->
if is_infra { 0.85 } else { 1.0 }
}
+/// Curated concept→vocabulary synonym map (S4).
+///
+/// Returns extra terms to OR into a query word's clause in the disjunction
+/// fallback so capability phrases bridge to the identifiers code actually uses
+/// (e.g. "login" → authenticate). Intentionally small and high-signal; the
+/// original word is always searched too, so an empty list is a safe no-op.
+fn synonym_terms(word: &str) -> &'static [&'static str] {
+ // Symmetric concept groups: any member expands to the whole group, so
+ // recall does not depend on which side of a synonym pair the user typed
+ // (e.g. "login" and "authenticate" reach each other). The word's own clause
+ // is also added by the caller; a duplicate OR clause is harmless.
+ const GROUPS: &[&[&str]] = &[
+ &["login", "signin", "auth", "authenticate", "authentication"],
+ &["logout", "signout", "deauthenticate"],
+ &[
+ "paginate",
+ "pagination",
+ "paging",
+ "cursor",
+ "offset",
+ "page",
+ ],
+ &["delete", "remove", "destroy"],
+ &["create", "add", "insert"],
+ &["update", "edit", "modify"],
+ &["list", "fetch", "get", "find"],
+ ];
+ for group in GROUPS {
+ if group.iter().any(|member| word.eq_ignore_ascii_case(member)) {
+ return group;
+ }
+ }
+ &[]
+}
+
+/// Term-coverage boost (S2).
+///
+/// Rewards hits whose symbol-name tokens cover a larger fraction of the query's
+/// tokens, so a higher-coverage match outranks a lower-coverage one at equal
+/// base score. Scales linearly up to +25% at full coverage. Callers apply this
+/// only for multi-word queries; for a single identifier it is meaningless.
+fn query_term_coverage_boost(query_tokens: &[String], symbol_name: &str) -> f32 {
+ if query_tokens.is_empty() {
+ return 1.0;
+ }
+ let symbol_tokens = tokenize_camel_case(symbol_name);
+ let covered = query_tokens
+ .iter()
+ .filter(|qt| symbol_tokens.iter().any(|st| st == *qt))
+ .count();
+ // Token counts are tiny; u16 conversion avoids the cast-precision-loss lint.
+ let coverage = f32::from(u16::try_from(covered).unwrap_or(u16::MAX))
+ / f32::from(u16::try_from(query_tokens.len()).unwrap_or(u16::MAX));
+ 1.0 + coverage * 0.25
+}
+
fn rerank_hits(hits: &mut [ScoredSearchHit], query: &str) {
let newest_timestamp = hits
.iter()
@@ -1086,6 +1223,10 @@ fn rerank_hits(hits: &mut [ScoredSearchHit], query: &str) {
let query_is_pascal = query.chars().next().is_some_and(|c| c.is_ascii_uppercase());
// Tokenize the query once for the file-path token match boost.
let query_tokens = tokenize_camel_case(query);
+ // Coverage boost only applies to genuine multi-word capability queries,
+ // mirroring the disjunction fallback — a single identifier must not be
+ // re-ranked by partial token overlap.
+ let multiword_query = query.split_whitespace().count() >= 2;
for scored in hits.iter_mut() {
let export_boost = if scored.hit.is_exported { 1.10 } else { 1.0 };
@@ -1110,6 +1251,10 @@ fn rerank_hits(hits: &mut [ScoredSearchHit], query: &str) {
// Raised from 1.4× to break ties between multiple same-name hits where
// secondary factors (BM25, recency) were pulling unrelated-repo symbols
// above the user's likely target.
+ // NOTE: for a multi-word query (which contains spaces) this exact-match
+ // boost is always neutral — symbol names have no spaces, so neither
+ // comparison can hit. Term-coverage is the ordering signal for
+ // multi-word queries; this boost only differentiates single-token ones.
let symbol_name_matches = scored.hit.symbol_name.eq_ignore_ascii_case(query);
let symbol_token_matches = !symbol_name_matches
&& first_code_token(&scored.hit.symbol_name).eq_ignore_ascii_case(query);
@@ -1154,6 +1299,13 @@ fn rerank_hits(hits: &mut [ScoredSearchHit], query: &str) {
let infra_penalty =
infra_repo_penalty(symbol_exact_boost, scored.hit.is_exported, &scored.hit.repo);
+ // term-coverage boost (S2) — higher query-term coverage ranks higher.
+ let coverage_boost = if multiword_query {
+ query_term_coverage_boost(&query_tokens, &scored.hit.symbol_name)
+ } else {
+ 1.0
+ };
+
scored.hit.adjusted_score = scored.base_score
* export_boost
* exact_boost
@@ -1163,7 +1315,8 @@ fn rerank_hits(hits: &mut [ScoredSearchHit], query: &str) {
* pascal_type_boost
* path_boost
* hook_boost
- * infra_penalty;
+ * infra_penalty
+ * coverage_boost;
}
hits.sort_by(|left, right| {
@@ -1172,6 +1325,11 @@ fn rerank_hits(hits: &mut [ScoredSearchHit], query: &str) {
.adjusted_score
.partial_cmp(&left.hit.adjusted_score)
.unwrap_or(Ordering::Equal)
+ // Deterministic tie-breaks so equal-score hits have a stable order
+ // regardless of the underlying Tantivy doc-id ordering.
+ .then_with(|| left.hit.symbol_name.cmp(&right.hit.symbol_name))
+ .then_with(|| left.file_path_stored.cmp(&right.file_path_stored))
+ .then_with(|| left.hit.node_id.cmp(&right.hit.node_id))
});
}
@@ -1550,6 +1708,170 @@ mod tests {
assert!(results[0].exact_match);
}
+ #[test]
+ fn partial_multiword_query_recovers_via_disjunction_fallback() {
+ let store =
+ TantivySearchStore::open(temp_search_dir("or-fallback")).expect("store should open");
+ let mut symbol = node("dispatchEmailNotification", "src/notifications/dispatch.ts");
+ // Keep `content` empty so the match can only come from the symbol-name
+ // tokens (dispatch / email / notification), making the assertion precise.
+ symbol.signature = None;
+ store
+ .index_symbol(&SearchDocument::from_node(&symbol, 1))
+ .expect("document should index");
+
+ // Capability-style query: two of three terms ("email", "notification")
+ // match the symbol's camelCase tokens; "delivery" appears nowhere. The
+ // conjunctive (AND) default returns nothing, so a disjunction fallback
+ // with a majority floor must surface the symbol for reuse discovery.
+ let results = store
+ .search("email notification delivery", 10)
+ .expect("search should succeed");
+
+ assert_eq!(
+ results.len(),
+ 1,
+ "partial multi-word query should match via OR fallback"
+ );
+ assert_eq!(results[0].symbol_name, "dispatchEmailNotification");
+ }
+
+ #[test]
+ fn single_identifier_miss_does_not_fall_back_to_token_sharing_sibling() {
+ let store =
+ TantivySearchStore::open(temp_search_dir("or-precision")).expect("store should open");
+ let mut existing = node("updateOrderUseCase", "src/b.ts");
+ existing.signature = None;
+ store
+ .index_symbol(&SearchDocument::from_node(&existing, 1))
+ .expect("document should index");
+
+ // No `createOrderUseCase` exists. Because the query is a single
+ // identifier (one word), the disjunction fallback must NOT fire and
+ // resurrect the token-sharing sibling `updateOrderUseCase` on the
+ // shared Order/Use/Case tokens. Adversarial guard for the OR fallback.
+ let results = store
+ .search("createOrderUseCase", 10)
+ .expect("search should succeed");
+
+ assert!(
+ results.is_empty(),
+ "single-identifier miss must not OR-match a token-sharing sibling"
+ );
+ }
+
+ #[test]
+ fn rerank_prefers_higher_query_term_coverage_on_equal_base_score() {
+ use super::{ScoredSearchHit, SearchHit, rerank_hits};
+
+ fn scored(symbol: &str, base: f32) -> ScoredSearchHit {
+ ScoredSearchHit {
+ hit: SearchHit {
+ node_id: node_id("service-a", "src/x.ts", NodeKind::Function, symbol),
+ repo: "service-a".to_owned(),
+ file_path: String::new(),
+ symbol_name: symbol.to_owned(),
+ node_kind: NodeKind::Function,
+ adjusted_score: 0.0,
+ exact_match: false,
+ is_exported: true,
+ lang: "typescript".to_owned(),
+ },
+ base_score: base,
+ last_modified: 1,
+ file_path_stored: String::new(),
+ }
+ }
+
+ // Equal base score and identical flags. Against "email notification
+ // delivery", `dispatchEmailNotification` covers two query terms while
+ // `deliveryService` covers one. The higher-coverage hit must rank first
+ // even though it is listed second in the input.
+ let mut hits = vec![
+ scored("deliveryService", 1.0),
+ scored("dispatchEmailNotification", 1.0),
+ ];
+ rerank_hits(&mut hits, "email notification delivery");
+
+ assert_eq!(hits[0].hit.symbol_name, "dispatchEmailNotification");
+ }
+
+ #[test]
+ fn rerank_breaks_score_ties_deterministically_by_symbol_name() {
+ use super::{ScoredSearchHit, SearchHit, rerank_hits};
+
+ fn scored(symbol: &str) -> ScoredSearchHit {
+ ScoredSearchHit {
+ hit: SearchHit {
+ node_id: node_id("service-a", "src/x.ts", NodeKind::Function, symbol),
+ repo: "service-a".to_owned(),
+ file_path: String::new(),
+ symbol_name: symbol.to_owned(),
+ node_kind: NodeKind::Function,
+ adjusted_score: 0.0,
+ exact_match: false,
+ is_exported: true,
+ lang: "typescript".to_owned(),
+ },
+ base_score: 1.0,
+ last_modified: 1,
+ file_path_stored: String::new(),
+ }
+ }
+
+ // Single-word query (no coverage boost): both hits resolve to the same
+ // adjusted score, so the symbol-name tie-break must impose a stable
+ // order regardless of input order.
+ let mut hits = vec![scored("zebra"), scored("alpha")];
+ rerank_hits(&mut hits, "unrelated");
+
+ assert_eq!(hits[0].hit.symbol_name, "alpha");
+ assert_eq!(hits[1].hit.symbol_name, "zebra");
+ }
+
+ #[test]
+ fn disjunction_fallback_bridges_query_synonyms_to_symbol_vocabulary() {
+ let store = TantivySearchStore::open(temp_search_dir("synonym-fallback"))
+ .expect("store should open");
+ let mut symbol = node("authenticateUser", "src/auth/authenticate-user.ts");
+ symbol.signature = None;
+ store
+ .index_symbol(&SearchDocument::from_node(&symbol, 1))
+ .expect("document should index");
+
+ // "login" is a concept synonym of the symbol's "authenticate" token;
+ // "workflow" matches nothing. Without synonym expansion the disjunction
+ // fallback finds no matching term and returns empty; the login →
+ // authenticate bridge must surface the symbol for capability search.
+ let results = store
+ .search("login workflow", 10)
+ .expect("search should succeed");
+
+ assert_eq!(results.len(), 1);
+ assert_eq!(results[0].symbol_name, "authenticateUser");
+ }
+
+ #[test]
+ fn disjunction_fallback_synonyms_are_symmetric() {
+ let store = TantivySearchStore::open(temp_search_dir("synonym-symmetric"))
+ .expect("store should open");
+ let mut symbol = node("loginUser", "src/auth/login-user.ts");
+ symbol.signature = None;
+ store
+ .index_symbol(&SearchDocument::from_node(&symbol, 1))
+ .expect("document should index");
+
+ // Reverse of the previous test: the symbol uses "login" and the query
+ // uses "authenticate". Symmetric synonym groups must bridge either
+ // direction, so recall does not depend on which term the user typed.
+ let results = store
+ .search("authenticate workflow", 10)
+ .expect("search should succeed");
+
+ assert_eq!(results.len(), 1);
+ assert_eq!(results[0].symbol_name, "loginUser");
+ }
+
#[test]
fn search_filters_apply_before_collecting_hits() {
let store = TantivySearchStore::open(temp_search_dir("filtered-search"))
diff --git a/crates/gather-step-storage/src/stores.rs b/crates/gather-step-storage/src/stores.rs
index 4865cc2e..ac7e90a4 100644
--- a/crates/gather-step-storage/src/stores.rs
+++ b/crates/gather-step-storage/src/stores.rs
@@ -52,7 +52,7 @@ impl WorkspaceStores {
pub fn open_read_only_search(root: impl AsRef) -> Result {
let root = root.as_ref().to_path_buf();
- let graph = Arc::new(GraphStoreDb::open(root.join("graph.redb"))?);
+ let graph = Arc::new(GraphStoreDb::open_read_only(root.join("graph.redb"))?);
let search = Arc::new(TantivySearchStore::open_read_only(root.join("search"))?);
let metadata = Arc::new(MetadataStoreDb::open(root.join("metadata.sqlite"))?);
Ok(Self {
diff --git a/website/package.json b/website/package.json
index 9b136814..0b3f3e14 100644
--- a/website/package.json
+++ b/website/package.json
@@ -1,7 +1,7 @@
{
"name": "gather-step-website",
"type": "module",
- "version": "4.2.1",
+ "version": "4.3.1",
"private": true,
"packageManager": "bun@1.3.12",
"scripts": {
diff --git a/website/src/components/landing/Benchmark.astro b/website/src/components/landing/Benchmark.astro
index c84ba87d..78ff8c1e 100644
--- a/website/src/components/landing/Benchmark.astro
+++ b/website/src/components/landing/Benchmark.astro
@@ -69,7 +69,7 @@ const speedup = (totalManual / totalGatherStep).toFixed(1);
on the same machine; medians of three runs.
- v4.2.1 · 2026-06-02
+ v4.3.1 · 2026-06-02
@@ -141,7 +141,7 @@ const speedup = (totalManual / totalGatherStep).toFixed(1);
-
Planning oracle (v4.2.1)
+
Planning oracle (v4.3.1)
Full 25-scenario suite, every metric at the HIGH ceiling, latency well under release-gate thresholds (p50 ≤ 50 ms, p95 ≤ 300 ms, p99 ≤ 1000 ms).
diff --git a/website/src/components/landing/Hero.astro b/website/src/components/landing/Hero.astro
index ed4622e2..7135b601 100644
--- a/website/src/components/landing/Hero.astro
+++ b/website/src/components/landing/Hero.astro
@@ -6,7 +6,7 @@ const stats = [
},
{
label: 'Spec · Release',
- value: 'v4.2.1 · dependency refresh',
+ value: 'v4.3.1 · planning & reuse quality',
},
{
label: 'Spec · Deployment',
diff --git a/website/src/components/landing/Topology.astro b/website/src/components/landing/Topology.astro
index e158e2e2..2bc84d23 100644
--- a/website/src/components/landing/Topology.astro
+++ b/website/src/components/landing/Topology.astro
@@ -72,6 +72,6 @@ const consumers = [
CONSUMERS · N=4
-
Approved · v4.2.1
+
Approved · v4.3.1
diff --git a/website/src/content/docs/changelog.md b/website/src/content/docs/changelog.md
index 0c39768f..d6a5f5cf 100644
--- a/website/src/content/docs/changelog.md
+++ b/website/src/content/docs/changelog.md
@@ -5,6 +5,37 @@ description: "User-visible changes to gather-step, listed by release. Updated ma
This changelog lists significant user-visible changes. The latest release is shown in full at the top; earlier releases are collapsed under [Earlier releases](#earlier-releases) at the bottom of the page.
+## v4.3.1 (2026-06-02)
+
+Release status: **prepared**.
+
+Planning- and reuse-quality release. Fixes retrieval recall so reuse search stops returning empty, ranks reuse candidates using the graph, ships a typed `plan_change` product with a stable section contract, and adds the first batch of v4.3.1 gap-closure work (lock-contention disclosure, a display-ownership planning dimension, and mongo/Atlas structural detectors). Partial against the full v4.3.0/v4.3.1 backlog — see the PR for the done/deferred ledger.
+
+### Added
+
+- **`gather-step doctor` code-quality advisories** — non-gating findings over the indexed graph: dependency cycles (incl. cross-repo, via Tarjan SCC), mock/fixture imports leaking into production modules, and local forks of shared/design-system components that should be reused.
+- **Graph-ranked reuse evidence** in planning packs: reuse candidates are ranked by sibling-consumer count, shared/design-system membership, and cross-repo proof strength before truncation, so a blessed shared component ranks above a bespoke fork (S3).
+- **Typed `plan_change` product** with a fixed, contract-checked section set (E1). Sections are always present (possibly empty), with an exclusion ledger recording what was dropped so a capped result is never read as exhaustive.
+- **Display-ownership planning dimension** (`display_ownership_checks`): every cross-service reference surfaces the question of whether display fields come from the owner service (snapshot/API) rather than a direct cross-service DB lookup (DSO1).
+- **Mongo/Atlas structural safety detectors** with stable rule IDs and confidence: `$lookup` join-key coercion that defeats an index (`GS-MONGO-INDEX-DEFEAT`), bare `$toObjectId` on untrusted input (`GS-MONGO-UNSAFE-COERCION`), unguarded dotted-path `$set` (`GS-MONGO-NULL-PARENT-PATH`), and `dynamic:false` Atlas index↔doc-field drift (`GS-MONGO-ATLAS-INDEX-DRIFT`) (MQS1–MQS3, AIX1).
+- **Query-time index freshness** (`fresh`/`stale`/`never_indexed`) is now classified against the working tree's HEAD and surfaced per repo in `gather-step status` (A13).
+- **Multi-path traversal provenance**: graph traversals now report every distinct path into a node plus `depth_capped`/`truncated` signals when a walk is cut short by depth or fan-out bounds (B8/B9).
+
+### Changed
+
+- **Multi-word search recall**: a conjunctive query that returns nothing now falls back to a disjunction with a min-should-match floor, so a capability query sharing most of its terms still finds the target symbol (S1). Hits are re-ranked by query-term coverage (S2) and expanded through a curated synonym map (S4).
+- **Unified `min_confidence` edge filter** across trace/impact/pack traversal, with `None`-confidence edges treated as trusted (A14).
+- The `plan_change` contract gate is now **evidentiary** — it asserts schema version, the exact section manifest, and the exclusion ledger, not just section presence (G7).
+
+### Fixed
+
+- `batch_query` now routes `plan_change` requests to the typed product instead of the raw planning pack.
+- Read commands no longer block or fail when the graph store is held by an in-progress index or watch: they route to the holding daemon when one is serving, otherwise read a point-in-time snapshot of the graph. When neither is possible the command exits with a distinct, documented code and (under `--json`) a `degraded: graph_locked` disclosure, so a blocked read can never be mistaken for an empty-but-successful result.
+
+### Release-wide
+
+- Bumped the app, Cargo workspace, internal crate dependency versions, landing-page release stamps, and website package metadata to `4.3.1`.
+
## v4.2.1 (2026-06-02)
Release status: **prepared**.