From e2e91626a98564969097523fb50f9338d9de5940 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 19:49:40 +0700
Subject: [PATCH 01/31] feat: recall fallback to disjunction for multi-word
search
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Search used set_conjunction_by_default() for both the exact and fuzzy
passes, so a multi-word capability query ("email notification delivery")
returned nothing whenever a symbol covered only some terms — the root
cause behind empty reuse-discovery results (WS-1 / S1).
Add a third fallback pass that fires only when both conjunctive passes
return empty: each whitespace-separated word becomes a Should clause and
a hit must match a majority of them (ceil(n/2)) via
BooleanQuery::with_minimum_required_clauses.
Precision guard: the fallback is gated on the original query containing
two or more words. A single identifier like createOrderUseCase is one
word (even though it camelCase-splits into several tokens) and must not
fuzzily match token-sharing siblings such as updateOrderUseCase.
Tests (WS-0a): a partial multi-word query recovers via the OR fallback,
and a single-identifier miss does not OR-match a token-sharing sibling.
---
.../gather-step-storage/src/search_store.rs | 117 +++++++++++++++++-
1 file changed, 116 insertions(+), 1 deletion(-)
diff --git a/crates/gather-step-storage/src/search_store.rs b/crates/gather-step-storage/src/search_store.rs
index 372dd96..7e5f4eb 100644
--- a/crates/gather-step-storage/src/search_store.rs
+++ b/crates/gather-step-storage/src/search_store.rs
@@ -686,6 +686,59 @@ impl TantivySearchStore {
file_path_stored,
})
}
+
+ /// Disjunction fallback for multi-**word** capability queries: each
+ /// whitespace-separated word becomes a `Should` clause and a hit must match
+ /// at least a majority of them (`ceil(n/2)`).
+ ///
+ /// Gated on the *original* query containing two or more words. A single
+ /// identifier such as `createOrderUseCase` is one word — even though it
+ /// camelCase-splits into several tokens — and must not fuzzily match
+ /// siblings that merely share tokens (e.g. `updateOrderUseCase`). Only a
+ /// genuine phrase like `email notification delivery` should relax to OR.
+ fn execute_search_disjunctive(
+ &self,
+ query_text: &str,
+ limit: usize,
+ filters: SearchFilters<'_>,
+ ) -> Result, SearchStoreError> {
+ let words: Vec<&str> = query_text.split_whitespace().collect();
+ if words.len() < 2 {
+ return Ok(Vec::new());
+ }
+
+ let reader = self.reader.lock();
+ let searcher = reader.searcher();
+ // Fuzzy on: this is the last-resort recall pass.
+ let parser = self.build_query_parser(true);
+ let clauses: Vec<(Occur, Box)> = words
+ .iter()
+ .map(|word| {
+ let (query, _) = parser.parse_query_lenient(word);
+ (Occur::Should, query)
+ })
+ .collect();
+ let floor = words.len().div_ceil(2).max(1);
+ let query: Box =
+ Box::new(BooleanQuery::with_minimum_required_clauses(clauses, floor));
+ let query = self.apply_filters(query, filters);
+
+ let fetch_limit = limit.max(1).saturating_mul(5).min(MAX_RESULT_WINDOW);
+ let collector = TopDocs::with_limit(fetch_limit).order_by_score();
+ let docs: Vec<(f32, DocAddress)> = searcher.search(&query, &collector)?;
+
+ let mut hits = docs
+ .into_iter()
+ .map(|(score, address)| self.decode_hit(&searcher, address, score, false))
+ .collect::, _>>()?;
+ rerank_hits(&mut hits, query_text);
+
+ Ok(hits
+ .into_iter()
+ .take(limit.max(1))
+ .map(|scored| scored.hit)
+ .collect())
+ }
}
impl SearchStore for TantivySearchStore {
@@ -718,7 +771,17 @@ impl SearchStore for TantivySearchStore {
return Ok(exact_hits);
}
- self.execute_search(trimmed, limit, false, true, filters)
+ let fuzzy_hits = self.execute_search(trimmed, limit, false, true, filters)?;
+ if !fuzzy_hits.is_empty() {
+ return Ok(fuzzy_hits);
+ }
+
+ // Both conjunctive passes require every query term to match, so a
+ // multi-word capability query ("email notification delivery") returns
+ // nothing when a symbol covers only some terms. Fall back to a
+ // disjunction with a majority floor so reuse-discovery queries still
+ // surface partial matches instead of an empty result.
+ self.execute_search_disjunctive(trimmed, limit, filters)
}
fn delete_by_files(&self, files: &[(&str, &str)]) -> Result<(), SearchStoreError> {
@@ -1550,6 +1613,58 @@ mod tests {
assert!(results[0].exact_match);
}
+ #[test]
+ fn partial_multiword_query_recovers_via_disjunction_fallback() {
+ let store =
+ TantivySearchStore::open(temp_search_dir("or-fallback")).expect("store should open");
+ let mut symbol = node("dispatchEmailNotification", "src/notifications/dispatch.ts");
+ // Keep `content` empty so the match can only come from the symbol-name
+ // tokens (dispatch / email / notification), making the assertion precise.
+ symbol.signature = None;
+ store
+ .index_symbol(&SearchDocument::from_node(&symbol, 1))
+ .expect("document should index");
+
+ // Capability-style query: two of three terms ("email", "notification")
+ // match the symbol's camelCase tokens; "delivery" appears nowhere. The
+ // conjunctive (AND) default returns nothing, so a disjunction fallback
+ // with a majority floor must surface the symbol for reuse discovery.
+ let results = store
+ .search("email notification delivery", 10)
+ .expect("search should succeed");
+
+ assert_eq!(
+ results.len(),
+ 1,
+ "partial multi-word query should match via OR fallback"
+ );
+ assert_eq!(results[0].symbol_name, "dispatchEmailNotification");
+ }
+
+ #[test]
+ fn single_identifier_miss_does_not_fall_back_to_token_sharing_sibling() {
+ let store =
+ TantivySearchStore::open(temp_search_dir("or-precision")).expect("store should open");
+ let mut existing = node("updateOrderUseCase", "src/b.ts");
+ existing.signature = None;
+ store
+ .index_symbol(&SearchDocument::from_node(&existing, 1))
+ .expect("document should index");
+
+ // No `createOrderUseCase` exists. Because the query is a single
+ // identifier (one word), the disjunction fallback must NOT fire and
+ // resurrect the token-sharing sibling `updateOrderUseCase` on the
+ // shared Order/Use/Case tokens. Adversarial guard for the OR fallback.
+ let results = store
+ .search("createOrderUseCase", 10)
+ .expect("search should succeed");
+
+ assert!(
+ results.is_empty(),
+ "single-identifier miss must not OR-match a token-sharing sibling"
+ );
+ }
+
#[test]
fn search_filters_apply_before_collecting_hits() {
let store = TantivySearchStore::open(temp_search_dir("filtered-search"))
From 4dfa0a74d697e6aaea4b5e2b732ddac906717607 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 19:54:25 +0700
Subject: [PATCH 02/31] feat: rank multi-word search hits by query-term
coverage
Add a term-coverage boost to rerank_hits (WS-1 / S2): a hit whose
symbol-name tokens cover more of the query's tokens is ranked higher,
scaling up to +25% at full coverage. This keeps higher-coverage matches
above incidental single-term matches once the disjunction fallback (S1)
widens the candidate set.
Gated to genuine multi-word queries (mirrors the S1 fallback) so a
single identifier is never re-ranked by partial token overlap.
Test: at equal base score, a two-term-coverage hit outranks a
one-term-coverage hit regardless of input order.
---
.../gather-step-storage/src/search_store.rs | 69 ++++++++++++++++++-
1 file changed, 68 insertions(+), 1 deletion(-)
diff --git a/crates/gather-step-storage/src/search_store.rs b/crates/gather-step-storage/src/search_store.rs
index 7e5f4eb..6714238 100644
--- a/crates/gather-step-storage/src/search_store.rs
+++ b/crates/gather-step-storage/src/search_store.rs
@@ -1134,6 +1134,25 @@ fn infra_repo_penalty(symbol_exact_boost: f32, is_exported: bool, repo: &str) ->
if is_infra { 0.85 } else { 1.0 }
}
+/// Term-coverage boost (S2).
+///
+/// Rewards hits whose symbol-name tokens cover a larger fraction of the query's
+/// tokens, so a higher-coverage match outranks a lower-coverage one at equal
+/// base score. Scales linearly up to +25% at full coverage. Callers apply this
+/// only for multi-word queries; for a single identifier it is meaningless.
+fn query_term_coverage_boost(query_tokens: &[String], symbol_name: &str) -> f32 {
+ if query_tokens.is_empty() {
+ return 1.0;
+ }
+ let symbol_tokens = tokenize_camel_case(symbol_name);
+ let covered = query_tokens
+ .iter()
+ .filter(|qt| symbol_tokens.iter().any(|st| st == *qt))
+ .count();
+ let coverage = covered as f32 / query_tokens.len() as f32;
+ 1.0 + coverage * 0.25
+}
+
fn rerank_hits(hits: &mut [ScoredSearchHit], query: &str) {
let newest_timestamp = hits
.iter()
@@ -1149,6 +1168,10 @@ fn rerank_hits(hits: &mut [ScoredSearchHit], query: &str) {
let query_is_pascal = query.chars().next().is_some_and(|c| c.is_ascii_uppercase());
// Tokenize the query once for the file-path token match boost.
let query_tokens = tokenize_camel_case(query);
+ // Coverage boost only applies to genuine multi-word capability queries,
+ // mirroring the disjunction fallback — a single identifier must not be
+ // re-ranked by partial token overlap.
+ let multiword_query = query.split_whitespace().count() >= 2;
for scored in hits.iter_mut() {
let export_boost = if scored.hit.is_exported { 1.10 } else { 1.0 };
@@ -1217,6 +1240,13 @@ fn rerank_hits(hits: &mut [ScoredSearchHit], query: &str) {
let infra_penalty =
infra_repo_penalty(symbol_exact_boost, scored.hit.is_exported, &scored.hit.repo);
+ // term-coverage boost (S2) — higher query-term coverage ranks higher.
+ let coverage_boost = if multiword_query {
+ query_term_coverage_boost(&query_tokens, &scored.hit.symbol_name)
+ } else {
+ 1.0
+ };
+
scored.hit.adjusted_score = scored.base_score
* export_boost
* exact_boost
@@ -1226,7 +1256,8 @@ fn rerank_hits(hits: &mut [ScoredSearchHit], query: &str) {
* pascal_type_boost
* path_boost
* hook_boost
- * infra_penalty;
+ * infra_penalty
+ * coverage_boost;
}
hits.sort_by(|left, right| {
@@ -1665,6 +1696,42 @@ mod tests {
);
}
+ #[test]
+ fn rerank_prefers_higher_query_term_coverage_on_equal_base_score() {
+ use super::{ScoredSearchHit, SearchHit, rerank_hits};
+
+ fn scored(symbol: &str, base: f32) -> ScoredSearchHit {
+ ScoredSearchHit {
+ hit: SearchHit {
+ node_id: node_id("service-a", "src/x.ts", NodeKind::Function, symbol),
+ repo: "service-a".to_owned(),
+ file_path: String::new(),
+ symbol_name: symbol.to_owned(),
+ node_kind: NodeKind::Function,
+ adjusted_score: 0.0,
+ exact_match: false,
+ is_exported: true,
+ lang: "typescript".to_owned(),
+ },
+ base_score: base,
+ last_modified: 1,
+ file_path_stored: String::new(),
+ }
+ }
+
+ // Equal base score and identical flags. Against "email notification
+ // delivery", `dispatchEmailNotification` covers two query terms while
+ // `deliveryService` covers one. The higher-coverage hit must rank first
+ // even though it is listed second in the input.
+ let mut hits = vec![
+ scored("deliveryService", 1.0),
+ scored("dispatchEmailNotification", 1.0),
+ ];
+ rerank_hits(&mut hits, "email notification delivery");
+
+ assert_eq!(hits[0].hit.symbol_name, "dispatchEmailNotification");
+ }
+
#[test]
fn search_filters_apply_before_collecting_hits() {
let store = TantivySearchStore::open(temp_search_dir("filtered-search"))
From 7ba5c6d401bbd7112648611da43de338aaf19890 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 20:04:36 +0700
Subject: [PATCH 03/31] feat: expand query synonyms in multi-word search
fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add a curated concept->vocabulary synonym map (WS-1 / S4) so capability
phrases bridge to the identifiers code actually uses. In the disjunction
fallback each word becomes (word OR synonyms) — e.g. "login" also matches
authenticate/authentication — before the majority floor is applied.
The map is intentionally small and high-signal; the original word is
always searched too, so an unmapped word is a no-op. Lookup uses
eq_ignore_ascii_case (no allocation) per the repo's disallowed-methods.
Test: "login workflow" surfaces authenticateUser via the login ->
authenticate bridge, which returns empty without expansion.
---
.../gather-step-storage/src/search_store.rs | 73 ++++++++++++++++++-
1 file changed, 70 insertions(+), 3 deletions(-)
diff --git a/crates/gather-step-storage/src/search_store.rs b/crates/gather-step-storage/src/search_store.rs
index 6714238..1720dc2 100644
--- a/crates/gather-step-storage/src/search_store.rs
+++ b/crates/gather-step-storage/src/search_store.rs
@@ -714,8 +714,21 @@ impl TantivySearchStore {
let clauses: Vec<(Occur, Box)> = words
.iter()
.map(|word| {
- let (query, _) = parser.parse_query_lenient(word);
- (Occur::Should, query)
+ // Expand each word into (word OR synonyms) so capability phrases
+ // bridge to the symbol vocabulary (e.g. "login" -> authenticate).
+ let mut sub: Vec<(Occur, Box)> = Vec::new();
+ let (word_query, _) = parser.parse_query_lenient(word);
+ sub.push((Occur::Should, word_query));
+ for synonym in synonym_terms(word) {
+ let (synonym_query, _) = parser.parse_query_lenient(synonym);
+ sub.push((Occur::Should, synonym_query));
+ }
+ let clause: Box = if sub.len() == 1 {
+ sub.pop().expect("clause exists").1
+ } else {
+ Box::new(BooleanQuery::from(sub))
+ };
+ (Occur::Should, clause)
})
.collect();
let floor = words.len().div_ceil(2).max(1);
@@ -1134,6 +1147,36 @@ fn infra_repo_penalty(symbol_exact_boost: f32, is_exported: bool, repo: &str) ->
if is_infra { 0.85 } else { 1.0 }
}
+/// Curated concept→vocabulary synonym map (S4).
+///
+/// Returns extra terms to OR into a query word's clause in the disjunction
+/// fallback so capability phrases bridge to the identifiers code actually uses
+/// (e.g. "login" → authenticate). Intentionally small and high-signal; the
+/// original word is always searched too, so an empty list is a safe no-op.
+fn synonym_terms(word: &str) -> &'static [&'static str] {
+ const TABLE: &[(&[&str], &[&str])] = &[
+ (
+ &["login", "signin", "auth"],
+ &["authenticate", "authentication"],
+ ),
+ (&["logout", "signout"], &["deauthenticate"]),
+ (
+ &["paginate", "pagination", "paging"],
+ &["cursor", "offset", "page"],
+ ),
+ (&["delete", "remove"], &["destroy"]),
+ (&["create", "add"], &["insert"]),
+ (&["update", "edit"], &["modify"]),
+ (&["list", "fetch", "get"], &["find"]),
+ ];
+ for (keys, synonyms) in TABLE {
+ if keys.iter().any(|key| word.eq_ignore_ascii_case(key)) {
+ return synonyms;
+ }
+ }
+ &[]
+}
+
/// Term-coverage boost (S2).
///
/// Rewards hits whose symbol-name tokens cover a larger fraction of the query's
@@ -1149,7 +1192,9 @@ fn query_term_coverage_boost(query_tokens: &[String], symbol_name: &str) -> f32
.iter()
.filter(|qt| symbol_tokens.iter().any(|st| st == *qt))
.count();
- let coverage = covered as f32 / query_tokens.len() as f32;
+ // Token counts are tiny; u16 conversion avoids the cast-precision-loss lint.
+ let coverage = f32::from(u16::try_from(covered).unwrap_or(u16::MAX))
+ / f32::from(u16::try_from(query_tokens.len()).unwrap_or(u16::MAX));
1.0 + coverage * 0.25
}
@@ -1732,6 +1777,28 @@ mod tests {
assert_eq!(hits[0].hit.symbol_name, "dispatchEmailNotification");
}
+ #[test]
+ fn disjunction_fallback_bridges_query_synonyms_to_symbol_vocabulary() {
+ let store = TantivySearchStore::open(temp_search_dir("synonym-fallback"))
+ .expect("store should open");
+ let mut symbol = node("authenticateUser", "src/auth/authenticate-user.ts");
+ symbol.signature = None;
+ store
+ .index_symbol(&SearchDocument::from_node(&symbol, 1))
+ .expect("document should index");
+
+ // "login" is a concept synonym of the symbol's "authenticate" token;
+ // "workflow" matches nothing. Without synonym expansion the disjunction
+ // fallback finds no matching term and returns empty; the login →
+ // authenticate bridge must surface the symbol for capability search.
+ let results = store
+ .search("login workflow", 10)
+ .expect("search should succeed");
+
+ assert_eq!(results.len(), 1);
+ assert_eq!(results[0].symbol_name, "authenticateUser");
+ }
+
#[test]
fn search_filters_apply_before_collecting_hits() {
let store = TantivySearchStore::open(temp_search_dir("filtered-search"))
From 8414476e6bab7d1a60cbe1996f091eb123d17c12 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 20:21:44 +0700
Subject: [PATCH 04/31] feat: unified min_confidence edge filter with
None-passes semantics
Edge confidence was already stored on EdgeMetadata and rendered by trace,
but no traversal could filter on it and the meaning of a None confidence
was undefined (WS-2 / A14).
Add EdgeMetadata::passes_confidence(min): a None confidence is a definite
structural edge (import-resolved call, not a heuristic guess) and passes
any threshold; an explicit confidence is compared against it; a None
threshold disables filtering. This fixes the "structural edges silently
dropped" failure mode a naive `confidence >= min` would cause.
Thread an Option min_confidence through QueryEngine::traverse,
get_edges, and get_reverse_edges so trace/impact-style traversals can
grade proven vs guessed edges.
Tests: None passes any threshold, explicit-low is filtered, explicit-high
passes; and traversal keeps a structural edge while dropping a low-
confidence sibling under a threshold.
---
crates/gather-step-analysis/src/query.rs | 51 +++++++++++++++++++++++-
crates/gather-step-core/src/graph.rs | 30 ++++++++++++++
2 files changed, 79 insertions(+), 2 deletions(-)
diff --git a/crates/gather-step-analysis/src/query.rs b/crates/gather-step-analysis/src/query.rs
index 697d6eb..95475f6 100644
--- a/crates/gather-step-analysis/src/query.rs
+++ b/crates/gather-step-analysis/src/query.rs
@@ -40,11 +40,13 @@ impl<'a, S: GraphStore> GraphQuery<'a, S> {
&self,
source: NodeId,
edge_kind: Option,
+ min_confidence: Option,
) -> Result, QueryError> {
let mut edges = self.store.get_outgoing(source)?;
if let Some(edge_kind) = edge_kind {
edges.retain(|edge| edge.kind == edge_kind);
}
+ edges.retain(|edge| edge.metadata.passes_confidence(min_confidence));
Ok(edges)
}
@@ -52,11 +54,13 @@ impl<'a, S: GraphStore> GraphQuery<'a, S> {
&self,
target: NodeId,
edge_kind: Option,
+ min_confidence: Option,
) -> Result, QueryError> {
let mut edges = self.store.get_incoming(target)?;
if let Some(edge_kind) = edge_kind {
edges.retain(|edge| edge.kind == edge_kind);
}
+ edges.retain(|edge| edge.metadata.passes_confidence(min_confidence));
Ok(edges)
}
@@ -65,6 +69,7 @@ impl<'a, S: GraphStore> GraphQuery<'a, S> {
start: NodeId,
edge_kinds: &[EdgeKind],
max_depth: usize,
+ min_confidence: Option,
) -> Result, QueryError> {
let mut queue = VecDeque::from([(start, Vec::::new(), 0_usize)]);
let mut seen = FxHashSet::from_iter([start.as_bytes()]);
@@ -79,6 +84,9 @@ impl<'a, S: GraphStore> GraphQuery<'a, S> {
if !edge_kinds.is_empty() && !edge_kinds.contains(&edge.kind) {
continue;
}
+ if !edge.metadata.passes_confidence(min_confidence) {
+ continue;
+ }
let mut next_path = path.clone();
next_path.push(edge.kind);
if seen.insert(edge.target.as_bytes()) {
@@ -221,13 +229,13 @@ mod tests {
);
assert_eq!(
query
- .get_edges(a.id, Some(EdgeKind::Calls))
+ .get_edges(a.id, Some(EdgeKind::Calls), None)
.expect("edges should load")
.len(),
1
);
let traversed = query
- .traverse(a.id, &[EdgeKind::Calls], 2)
+ .traverse(a.id, &[EdgeKind::Calls], 2, None)
.expect("traversal should succeed");
assert_eq!(traversed.len(), 2);
assert!(
@@ -242,6 +250,45 @@ mod tests {
);
}
+ #[test]
+ fn min_confidence_filters_low_confidence_edges_but_keeps_structural() {
+ let temp_db = TempDb::new("query-confidence");
+ let store = test_store(temp_db.path());
+ let file = node("service-a", "src/a.ts", NodeKind::File, "src/a.ts", 0);
+ let a = node("service-a", "src/a.ts", NodeKind::Function, "a", 0);
+ let trusted = node("service-a", "src/a.ts", NodeKind::Function, "trusted", 1);
+ let guessed = node("service-a", "src/a.ts", NodeKind::Function, "guessed", 2);
+
+ // a -> trusted has no confidence (a definite structural edge);
+ // a -> guessed is a low-confidence heuristic resolution.
+ let trusted_edge = edge(a.id, trusted.id, file.id);
+ let mut guessed_edge = edge(a.id, guessed.id, file.id);
+ guessed_edge.metadata.confidence = Some(300);
+
+ store
+ .bulk_insert(
+ &[file.clone(), a.clone(), trusted.clone(), guessed.clone()],
+ &[trusted_edge, guessed_edge],
+ )
+ .expect("graph should write");
+
+ let query = GraphQuery::new(&store);
+
+ // No threshold: both edges traversed.
+ let all = query
+ .traverse(a.id, &[EdgeKind::Calls], 1, None)
+ .expect("traversal should succeed");
+ assert_eq!(all.len(), 2);
+
+ // Threshold above the heuristic edge: the low-confidence edge is
+ // dropped, but the structural (None) edge is kept.
+ let filtered = query
+ .traverse(a.id, &[EdgeKind::Calls], 1, Some(500))
+ .expect("traversal should succeed");
+ assert_eq!(filtered.len(), 1);
+ assert_eq!(filtered[0].node_id, trusted.id);
+ }
+
#[test]
fn counts_nodes_and_edges_by_kind() {
let temp_db = TempDb::new("counts");
diff --git a/crates/gather-step-core/src/graph.rs b/crates/gather-step-core/src/graph.rs
index b040a17..70c8374 100644
--- a/crates/gather-step-core/src/graph.rs
+++ b/crates/gather-step-core/src/graph.rs
@@ -194,6 +194,17 @@ impl EdgeMetadata {
self.resolver_strategy()
.map_or(0, crate::ResolverStrategy::strategy_weight)
}
+
+ /// Whether this edge passes a `min_confidence` traversal filter.
+ ///
+ /// `None` confidence means the edge is a definite structural fact (e.g. an
+ /// import-resolved call), not a heuristic guess, so it passes any threshold
+ /// rather than being filtered out. An explicit confidence is compared
+ /// against the threshold; a `None` threshold disables filtering entirely.
+ #[must_use]
+ pub fn passes_confidence(&self, min_confidence: Option) -> bool {
+ min_confidence.is_none_or(|min| self.confidence.is_none_or(|actual| actual >= min))
+ }
}
#[derive(
@@ -263,6 +274,25 @@ mod tests {
};
use crate::schema::{EdgeKind, NodeKind};
+ #[test]
+ fn passes_confidence_treats_none_as_trusted_and_filters_low() {
+ // No threshold disables filtering: everything passes.
+ let mut meta = EdgeMetadata::default();
+ assert!(meta.passes_confidence(None));
+
+ // None confidence is a definite structural edge (not a heuristic
+ // guess) and must pass any threshold rather than be filtered out.
+ assert!(meta.passes_confidence(Some(500)));
+
+ // An explicit low confidence is filtered out below the threshold.
+ meta.confidence = Some(300);
+ assert!(!meta.passes_confidence(Some(500)));
+
+ // At or above the threshold, it passes.
+ meta.confidence = Some(900);
+ assert!(meta.passes_confidence(Some(500)));
+ }
+
#[test]
fn node_id_is_deterministic_for_identical_inputs() {
let first = node_id("service-a", "src/foo.ts", NodeKind::Function, "execute");
From 3c31e56582627f5ee4002320a14bf43ecd390935 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 20:36:31 +0700
Subject: [PATCH 05/31] feat: graph-ranked reuse evidence in planning packs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Planning-pack ranking only boosted items that had an evidence chain, so
the canonical reusable symbol did not surface above local one-offs
(WS-2 / S3). Add a graph-derived reuse boost applied before the existing
evidence ranking:
- shared / design-system membership inferred from the file path (+40)
- sibling-consumer count from inbound graph edges (3 pts/consumer, capped
at 30 consumers so a hub node cannot dominate)
reuse_evidence_boost is a pure, unit-tested helper; apply_reuse_evidence_
ranking wires it to the graph at the planning call site. This is the
ranking half of "is this already a reusable component?" — it runs after
node rehydration at the pack layer, not inside the lexical reranker.
Test: a shared + widely-consumed symbol outranks a local one-off; more
consumers yield a larger boost.
---
crates/gather-step-mcp/src/tools/packs.rs | 74 +++++++++++++++++++++++
1 file changed, 74 insertions(+)
diff --git a/crates/gather-step-mcp/src/tools/packs.rs b/crates/gather-step-mcp/src/tools/packs.rs
index 52c4d84..24be277 100644
--- a/crates/gather-step-mcp/src/tools/packs.rs
+++ b/crates/gather-step-mcp/src/tools/packs.rs
@@ -1114,6 +1114,7 @@ fn assemble_context_pack_for_symbol(
}
}
if mode == PackMode::Planning && evidence_populated {
+ apply_reuse_evidence_ranking(&mut items, ctx.graph());
apply_planning_evidence_ranking(&mut items);
}
@@ -2497,6 +2498,56 @@ fn populate_evidence_chain(
/// Items with a confirmed evidence chain (graph-proven path from anchor) are
/// boosted by 50 score points so they appear before lexical-only hits at
/// equivalent depth. The list is re-sorted by the updated scores.
+/// Whether a file path lives in a shared / design-system module — a strong
+/// signal that a symbol there is the canonical reusable thing rather than a
+/// local one-off. Markers are matched as-is (paths are conventionally
+/// lowercase), avoiding an allocation.
+fn is_shared_module_path(file_path: &str) -> bool {
+ const MARKERS: &[&str] = &[
+ "shared/",
+ "design-system",
+ "packages/ui",
+ "/ui/components",
+ "@shared",
+ ];
+ MARKERS.iter().any(|marker| file_path.contains(marker))
+}
+
+/// S3 graph-derived reuse boost. A symbol in a shared/design-system path or
+/// consumed by many modules is more likely the canonical reusable surface, so
+/// it should rank above local candidates. Consumer influence is capped so a
+/// single hub node cannot dominate the ranking.
+fn reuse_evidence_boost(file_path: &str, consumer_count: usize) -> u16 {
+ let mut boost = 0_u16;
+ if is_shared_module_path(file_path) {
+ boost = boost.saturating_add(40);
+ }
+ // Each consumer adds weight, capped at 30 so a single hub node cannot
+ // dominate the ranking; 3 points per consumer => up to +90.
+ let consumers = u16::try_from(consumer_count.min(30)).unwrap_or(u16::MAX);
+ boost.saturating_add(consumers.saturating_mul(3))
+}
+
+/// Apply the S3 graph-derived reuse boost to planning-pack items: shared-module
+/// membership (from the path) plus sibling-consumer count (inbound edges in the
+/// graph). Runs before [`apply_planning_evidence_ranking`], which then sorts.
+fn apply_reuse_evidence_ranking(
+ items: &mut [PackItem],
+ graph: &S,
+) {
+ for item in items.iter_mut() {
+ if item.category == "target" {
+ continue;
+ }
+ let consumer_count = decode_node_id(&item.symbol_id)
+ .ok()
+ .and_then(|id| graph.get_incoming(id).ok())
+ .map_or(0, |edges| edges.len());
+ let boost = reuse_evidence_boost(&item.file_path, consumer_count);
+ item.score = item.score.saturating_add(boost);
+ }
+}
+
fn apply_planning_evidence_ranking(items: &mut [PackItem]) {
for item in items.iter_mut() {
if item.evidence_chain.is_some() {
@@ -5172,6 +5223,29 @@ mod tests {
assert_eq!(items[0].score, 130);
}
+ #[test]
+ fn reuse_evidence_boost_prefers_shared_and_widely_consumed() {
+ use super::reuse_evidence_boost;
+
+ // Local one-off with no consumers gets no reuse boost.
+ assert_eq!(
+ reuse_evidence_boost("src/features/orders/local-button.tsx", 0),
+ 0
+ );
+
+ // A shared / design-system symbol is boosted even with no consumers.
+ assert!(reuse_evidence_boost("packages/ui/components/Button.tsx", 0) > 0);
+
+ // More consumers => larger boost (the canonical reusable thing).
+ assert!(reuse_evidence_boost("src/x.ts", 10) > reuse_evidence_boost("src/x.ts", 1));
+
+ // A shared + widely-consumed symbol outranks a local one-off.
+ assert!(
+ reuse_evidence_boost("packages/ui/Button.tsx", 8)
+ > reuse_evidence_boost("src/features/orders/local-button.tsx", 0)
+ );
+ }
+
#[test]
fn items_without_evidence_are_not_boosted() {
let mut items = vec![make_pack_item(80, "a.rs", false)];
From e03ee82ee505c0531118aee016ec208cb9871488 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 21:18:00 +0700
Subject: [PATCH 06/31] fix: rank reuse before truncation; filter consumers;
stable rerank
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Tightens WS-2 reuse ranking after review found the boost could not change
which items surface.
- HIGH: reuse ranking ran after items.truncate(limit), so a canonical
reusable symbol below the base-score cut was discarded before the boost
applied. Score reuse on a bounded window (limit*5) BEFORE the cut and
re-sort, so it can be promoted into the pack, not merely reordered.
- consumer count now excludes structural Defines/Imports edges and counts
distinct source nodes, mirroring the resolution scorer — raw inbound
edge volume overstated reuse.
- deterministic search rerank: equal adjusted scores now tie-break on
symbol name, stored path, then node id instead of Tantivy doc order.
Refactor: extract sort_pack_items for the shared pack-item comparator.
Tests: a shared, widely-consumed symbol with a lower base score is
promoted past a truncate-to-1; equal-score rerank hits sort stably.
---
crates/gather-step-mcp/src/tools/packs.rs | 103 +++++++++++++-----
.../gather-step-storage/src/search_store.rs | 38 +++++++
2 files changed, 112 insertions(+), 29 deletions(-)
diff --git a/crates/gather-step-mcp/src/tools/packs.rs b/crates/gather-step-mcp/src/tools/packs.rs
index 24be277..e4e642a 100644
--- a/crates/gather-step-mcp/src/tools/packs.rs
+++ b/crates/gather-step-mcp/src/tools/packs.rs
@@ -1084,22 +1084,26 @@ fn assemble_context_pack_for_symbol(
.map(|item| item.repo.clone())
.collect::>();
items.extend(contract_impact_items);
- items.sort_by(|left, right| {
- right
- .score
- .cmp(&left.score)
- .then(left.repo.cmp(&right.repo))
- .then(left.file_path.cmp(&right.file_path))
- .then(left.line_start.cmp(&right.line_start))
- .then(left.symbol_name.cmp(&right.symbol_name))
- .then(left.symbol_id.cmp(&right.symbol_id))
- });
+ sort_pack_items(&mut items);
items.dedup_by(|left, right| {
left.symbol_id == right.symbol_id && left.category == right.category
});
if let Some(repo) = options.repo_filter {
items.retain(|item| item.repo == repo);
}
+
+ // S3: apply the graph-derived reuse boost on a bounded candidate window
+ // BEFORE the cut, then re-sort, so a canonical reusable symbol can be
+ // promoted into the pack rather than discarded by the base-score truncate.
+ if mode == PackMode::Planning {
+ let window = options.limit.saturating_mul(5).min(items.len());
+ let graph = ctx.graph();
+ apply_reuse_boost_with(&mut items[..window], |symbol_id| {
+ consumer_count(graph, symbol_id)
+ });
+ sort_pack_items(&mut items);
+ }
+
items.truncate(options.limit);
let evidence_populated = items.len() <= 20;
@@ -1114,7 +1118,6 @@ fn assemble_context_pack_for_symbol(
}
}
if mode == PackMode::Planning && evidence_populated {
- apply_reuse_evidence_ranking(&mut items, ctx.graph());
apply_planning_evidence_ranking(&mut items);
}
@@ -2528,32 +2531,43 @@ fn reuse_evidence_boost(file_path: &str, consumer_count: usize) -> u16 {
boost.saturating_add(consumers.saturating_mul(3))
}
-/// Apply the S3 graph-derived reuse boost to planning-pack items: shared-module
-/// membership (from the path) plus sibling-consumer count (inbound edges in the
-/// graph). Runs before [`apply_planning_evidence_ranking`], which then sorts.
-fn apply_reuse_evidence_ranking(
- items: &mut [PackItem],
- graph: &S,
-) {
+/// Distinct reuse consumers of a symbol: the source nodes of inbound edges,
+/// excluding structural `Defines`/`Imports` edges that do not represent a
+/// caller/user. Mirrors the edge-kind filter used by the resolution scorer so
+/// the count reflects real consumers, not raw inbound-edge volume.
+fn consumer_count(graph: &S, symbol_id: &str) -> usize {
+ let Ok(node_id) = decode_node_id(symbol_id) else {
+ return 0;
+ };
+ let Ok(incoming) = graph.get_incoming(node_id) else {
+ return 0;
+ };
+ incoming
+ .iter()
+ .filter(|edge| !matches!(edge.kind, EdgeKind::Defines | EdgeKind::Imports))
+ .map(|edge| edge.source)
+ .collect::>()
+ .len()
+}
+
+/// Apply the S3 graph-derived reuse boost to planning-pack items via a
+/// consumer-count lookup. Generic over the lookup so the boost is unit-testable
+/// without a graph store. Must run BEFORE truncation so a canonical reusable
+/// symbol can be promoted into the pack, not merely reordered within an
+/// already-cut set.
+fn apply_reuse_boost_with(items: &mut [PackItem], consumer_count: impl Fn(&str) -> usize) {
for item in items.iter_mut() {
if item.category == "target" {
continue;
}
- let consumer_count = decode_node_id(&item.symbol_id)
- .ok()
- .and_then(|id| graph.get_incoming(id).ok())
- .map_or(0, |edges| edges.len());
- let boost = reuse_evidence_boost(&item.file_path, consumer_count);
+ let boost = reuse_evidence_boost(&item.file_path, consumer_count(&item.symbol_id));
item.score = item.score.saturating_add(boost);
}
}
-fn apply_planning_evidence_ranking(items: &mut [PackItem]) {
- for item in items.iter_mut() {
- if item.evidence_chain.is_some() {
- item.score = item.score.saturating_add(50);
- }
- }
+/// Canonical ordering for planning-pack items: descending score, then stable
+/// lexical tie-breaks for deterministic output.
+fn sort_pack_items(items: &mut [PackItem]) {
items.sort_by(|left, right| {
right
.score
@@ -2566,6 +2580,15 @@ fn apply_planning_evidence_ranking(items: &mut [PackItem]) {
});
}
+fn apply_planning_evidence_ranking(items: &mut [PackItem]) {
+ for item in items.iter_mut() {
+ if item.evidence_chain.is_some() {
+ item.score = item.score.saturating_add(50);
+ }
+ }
+ sort_pack_items(items);
+}
+
fn mode_pack_tool(
ctx: &McpContext,
request: ModePackRequest,
@@ -5246,6 +5269,28 @@ mod tests {
);
}
+ #[test]
+ fn reuse_boost_promotes_canonical_symbol_past_truncation() {
+ use super::{apply_reuse_boost_with, sort_pack_items};
+
+ // The local one-off has the higher base score; the shared, widely
+ // consumed component has a lower base score and would be cut by a
+ // base-score truncate to 1 — unless reuse ranking runs before the cut.
+ let mut items = vec![
+ make_pack_item(100, "src/features/orders/local.tsx", false),
+ make_pack_item(80, "packages/ui/components/Button.tsx", false),
+ ];
+
+ apply_reuse_boost_with(&mut items, |symbol_id| {
+ if symbol_id.contains("Button") { 12 } else { 0 }
+ });
+ sort_pack_items(&mut items);
+ items.truncate(1);
+
+ assert_eq!(items.len(), 1);
+ assert_eq!(items[0].file_path, "packages/ui/components/Button.tsx");
+ }
+
#[test]
fn items_without_evidence_are_not_boosted() {
let mut items = vec![make_pack_item(80, "a.rs", false)];
diff --git a/crates/gather-step-storage/src/search_store.rs b/crates/gather-step-storage/src/search_store.rs
index 1720dc2..4136a89 100644
--- a/crates/gather-step-storage/src/search_store.rs
+++ b/crates/gather-step-storage/src/search_store.rs
@@ -1311,6 +1311,11 @@ fn rerank_hits(hits: &mut [ScoredSearchHit], query: &str) {
.adjusted_score
.partial_cmp(&left.hit.adjusted_score)
.unwrap_or(Ordering::Equal)
+ // Deterministic tie-breaks so equal-score hits have a stable order
+ // regardless of the underlying Tantivy doc-id ordering.
+ .then_with(|| left.hit.symbol_name.cmp(&right.hit.symbol_name))
+ .then_with(|| left.file_path_stored.cmp(&right.file_path_stored))
+ .then_with(|| left.hit.node_id.cmp(&right.hit.node_id))
});
}
@@ -1777,6 +1782,39 @@ mod tests {
assert_eq!(hits[0].hit.symbol_name, "dispatchEmailNotification");
}
+ #[test]
+ fn rerank_breaks_score_ties_deterministically_by_symbol_name() {
+ use super::{ScoredSearchHit, SearchHit, rerank_hits};
+
+ fn scored(symbol: &str) -> ScoredSearchHit {
+ ScoredSearchHit {
+ hit: SearchHit {
+ node_id: node_id("service-a", "src/x.ts", NodeKind::Function, symbol),
+ repo: "service-a".to_owned(),
+ file_path: String::new(),
+ symbol_name: symbol.to_owned(),
+ node_kind: NodeKind::Function,
+ adjusted_score: 0.0,
+ exact_match: false,
+ is_exported: true,
+ lang: "typescript".to_owned(),
+ },
+ base_score: 1.0,
+ last_modified: 1,
+ file_path_stored: String::new(),
+ }
+ }
+
+ // Single-word query (no coverage boost): both hits resolve to the same
+ // adjusted score, so the symbol-name tie-break must impose a stable
+ // order regardless of input order.
+ let mut hits = vec![scored("zebra"), scored("alpha")];
+ rerank_hits(&mut hits, "unrelated");
+
+ assert_eq!(hits[0].hit.symbol_name, "alpha");
+ assert_eq!(hits[1].hit.symbol_name, "zebra");
+ }
+
#[test]
fn disjunction_fallback_bridges_query_synonyms_to_symbol_vocabulary() {
let store = TantivySearchStore::open(temp_search_dir("synonym-fallback"))
From 1a2f7737d13a3f62e0502ac7e10e653b3eb544b1 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 21:24:58 +0700
Subject: [PATCH 07/31] feat: typed plan_change product with nine planning
sections
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
plan_change was a bare alias for planning_pack, returning a
ContextPackResponse (WS-3 / E1). Make it a distinct typed product:
- PlanChangeResponse with the nine fixed sections (reuse_candidates,
sibling_clone_targets, standards_to_preserve, integration_checks,
cross_repo_reachability, write_path_or_state_machine_risks,
required_braingent_records, open_unknowns, verification_plan) — every
section always present so the contract is stable for consumers and the
G7 gate.
- build_plan_change projects the planning-pack data into those sections
(reuse_candidates = shared-module members surfaced by S3 ranking;
sibling_clone_targets = local related items; cross_repo_reachability =
planning proofs; open_unknowns = unresolved gaps). Pure over its inputs
for unit testing.
- plan_change_tool now returns Json via run_plan_change.
Sections needing proactive queries (B7) and evidentiary fields (G7) are
populated by later WS-3 slices; they ship as empty-but-present for now.
Test: the projection routes shared vs local items, gaps, and proofs into
the correct sections and excludes the target item.
---
crates/gather-step-mcp/src/server.rs | 10 +-
.../gather-step-mcp/src/tools/context_pack.rs | 4 +-
crates/gather-step-mcp/src/tools/packs.rs | 143 ++++++++++++++++++
3 files changed, 150 insertions(+), 7 deletions(-)
diff --git a/crates/gather-step-mcp/src/server.rs b/crates/gather-step-mcp/src/server.rs
index c93093c..d41ea3d 100644
--- a/crates/gather-step-mcp/src/server.rs
+++ b/crates/gather-step-mcp/src/server.rs
@@ -50,11 +50,11 @@ use crate::{
},
orientation::{GraphSchemaResponse, ListReposResponse, get_graph_schema, list_repos},
packs::{
- ContextPackRequest, ContextPackResponse, ModePackRequest,
+ ContextPackRequest, ContextPackResponse, ModePackRequest, PlanChangeResponse,
change_impact_pack_tool as run_change_impact_pack,
context_pack_tool as run_context_pack, debug_pack_tool as run_debug_pack,
fix_pack_tool as run_fix_pack, planning_pack_tool as run_planning_pack,
- review_pack_tool as run_review_pack,
+ review_pack_tool as run_review_pack, run_plan_change,
},
pr_review::{
PrReviewInput, PrReviewResponse, PrReviewSetInput, PrReviewSetResponse, run_pr_review,
@@ -784,17 +784,17 @@ impl GatherStepMcpServer {
#[tool(
name = "plan_change",
- description = "Return a bounded planning pack for a target symbol.",
+ description = "Return a typed plan-change product (nine planning sections) for a target symbol.",
annotations(read_only_hint = true)
)]
pub async fn plan_change_tool(
&self,
Parameters(request): Parameters,
- ) -> Result, String> {
+ ) -> Result, String> {
let args = serde_json::to_value(&request).unwrap_or_default();
let ctx = Arc::clone(&self.ctx);
self.traced_call("plan_change", &args, move || {
- run_planning_pack(&ctx, request)
+ run_plan_change(&ctx, request)
.map(Json)
.map_err(|error| error.to_string())
})
diff --git a/crates/gather-step-mcp/src/tools/context_pack.rs b/crates/gather-step-mcp/src/tools/context_pack.rs
index 34e35cf..445b50e 100644
--- a/crates/gather-step-mcp/src/tools/context_pack.rs
+++ b/crates/gather-step-mcp/src/tools/context_pack.rs
@@ -1,5 +1,5 @@
pub use crate::tools::packs::{
ContextPackData, ContextPackMeta, ContextPackRequest, ContextPackResponse, ModePackRequest,
- PackBridge, PackItem, context_pack_tool, debug_pack_tool, fix_pack_tool, planning_pack_tool,
- review_pack_tool,
+ PackBridge, PackItem, PlanChangeResponse, context_pack_tool, debug_pack_tool, fix_pack_tool,
+ planning_pack_tool, review_pack_tool, run_plan_change,
};
diff --git a/crates/gather-step-mcp/src/tools/packs.rs b/crates/gather-step-mcp/src/tools/packs.rs
index e4e642a..bf4d08d 100644
--- a/crates/gather-step-mcp/src/tools/packs.rs
+++ b/crates/gather-step-mcp/src/tools/packs.rs
@@ -207,6 +207,102 @@ pub struct ContextPackResponse {
pub meta: Option,
}
+/// Typed `plan_change` product (WS-3 / E1).
+///
+/// A distinct response shape — not an alias for the planning pack — with the
+/// nine fixed planning sections. v1 projects the existing planning-pack data
+/// into these sections; later slices (B7 proactive queries, G7 evidentiary
+/// fields) enrich them. Every section is always present (possibly empty) so the
+/// contract is stable for consumers and the G7 gate.
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
+pub struct PlanChangeResponse {
+ pub target: String,
+ pub found: bool,
+ /// Existing reusable symbols a change should prefer over a new fork —
+ /// shared/design-system members surfaced by the S3 reuse ranking.
+ pub reuse_candidates: Vec,
+ /// Local siblings worth cloning/aligning with (same neighbourhood, not in a
+ /// shared module).
+ pub sibling_clone_targets: Vec,
+ pub standards_to_preserve: Vec,
+ pub integration_checks: Vec,
+ /// Cross-repo reachability proofs (producer/consumer edges) for the target.
+ pub cross_repo_reachability: Vec,
+ pub write_path_or_state_machine_risks: Vec,
+ pub required_braingent_records: Vec,
+ pub open_unknowns: Vec,
+ pub verification_plan: Vec,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub meta: Option,
+}
+
+/// Project planning-pack pieces into the typed `plan_change` sections (WS-3 /
+/// E1). Pure over its inputs so the projection is unit-testable without an MCP
+/// context or graph. Reuse candidates are shared-module members; remaining
+/// related items become sibling clone targets.
+fn build_plan_change(
+ target: &str,
+ found: bool,
+ items: &[PackItem],
+ unresolved_gaps: &[String],
+ planning_proofs: &[serde_json::Value],
+ next_steps: &[String],
+ meta: Option,
+) -> PlanChangeResponse {
+ let mut reuse_candidates = Vec::new();
+ let mut sibling_clone_targets = Vec::new();
+ for item in items {
+ if item.category == "target" {
+ continue;
+ }
+ if is_shared_module_path(&item.file_path) {
+ reuse_candidates.push(item.clone());
+ } else {
+ sibling_clone_targets.push(item.clone());
+ }
+ }
+
+ let verification_plan = next_steps
+ .iter()
+ .map(|step| format!("Run `{step}` and confirm the result before changing code."))
+ .collect();
+
+ PlanChangeResponse {
+ target: target.to_owned(),
+ found,
+ reuse_candidates,
+ sibling_clone_targets,
+ // Populated by later WS-3 slices (B7 proactive queries / G7 evidence).
+ standards_to_preserve: Vec::new(),
+ integration_checks: Vec::new(),
+ cross_repo_reachability: planning_proofs.to_vec(),
+ write_path_or_state_machine_risks: Vec::new(),
+ required_braingent_records: Vec::new(),
+ open_unknowns: unresolved_gaps.to_vec(),
+ verification_plan,
+ meta,
+ }
+}
+
+/// Build the typed `plan_change` product (WS-3 / E1): run the planning pack,
+/// then project its data into the nine fixed sections.
+pub fn run_plan_change(
+ ctx: &McpContext,
+ request: ModePackRequest,
+) -> Result {
+ let pack = planning_pack_tool(ctx, request)?;
+ let data = &pack.data;
+ Ok(build_plan_change(
+ &data.target,
+ data.found,
+ &data.items,
+ &data.unresolved_gaps,
+ &data.planning_proofs,
+ &data.next_steps,
+ pack.meta.clone(),
+ ))
+}
+
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
pub struct ContextPackData {
pub mode: String,
@@ -5291,6 +5387,53 @@ mod tests {
assert_eq!(items[0].file_path, "packages/ui/components/Button.tsx");
}
+ #[test]
+ fn build_plan_change_projects_pack_into_typed_sections() {
+ use super::build_plan_change;
+
+ let mut target_item = make_pack_item(200, "src/orders/create-order.ts", false);
+ target_item.category = "target".to_owned();
+ let items = vec![
+ make_pack_item(120, "packages/ui/components/Button.tsx", false),
+ make_pack_item(90, "src/features/orders/local-helper.ts", false),
+ target_item,
+ ];
+ let gaps = vec!["Who emits status FINALIZED?".to_owned()];
+ let proofs = vec![serde_json::json!({"repo": "alert", "edge": "ConsumesApiFrom"})];
+ let next_steps = vec!["crud_trace".to_owned()];
+
+ let plan = build_plan_change(
+ "createOrder",
+ true,
+ &items,
+ &gaps,
+ &proofs,
+ &next_steps,
+ None,
+ );
+
+ assert!(plan.found);
+ // Shared component => reuse candidate; local helper => sibling target;
+ // the target item is excluded from both.
+ assert_eq!(plan.reuse_candidates.len(), 1);
+ assert_eq!(
+ plan.reuse_candidates[0].file_path,
+ "packages/ui/components/Button.tsx"
+ );
+ assert_eq!(plan.sibling_clone_targets.len(), 1);
+ assert_eq!(
+ plan.sibling_clone_targets[0].file_path,
+ "src/features/orders/local-helper.ts"
+ );
+ assert_eq!(plan.open_unknowns, gaps);
+ assert_eq!(plan.cross_repo_reachability.len(), 1);
+ assert_eq!(plan.verification_plan.len(), 1);
+ // Contract: the sections not yet populated still exist (empty).
+ assert!(plan.standards_to_preserve.is_empty());
+ assert!(plan.integration_checks.is_empty());
+ assert!(plan.required_braingent_records.is_empty());
+ }
+
#[test]
fn items_without_evidence_are_not_boosted() {
let mut items = vec![make_pack_item(80, "a.rs", false)];
From 6d93ccadefcd5734c54c320b8a7693e9bb63353d Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 21:46:09 +0700
Subject: [PATCH 08/31] feat: populate plan_change integration + write-path
sections (B7)
The typed plan_change sections integration_checks and
write_path_or_state_machine_risks shipped empty (WS-3 / B7). Surface the
change-impact evidence the planning pack already gathers into them:
- integration_checks: confirmed downstream consumers ("verify X still
works") and probable downstream repos ("check X, partial evidence").
- write_path_or_state_machine_risks: cross-repo callers whose contract
must be preserved, unresolved-possible impacts to confirm, and a note
when downstream fan-out was capped.
Kept as a pure projection over ChangeImpactSummary so it stays unit-
testable; threaded through run_plan_change from the pack data.
Test: confirmed downstream + cross-repo caller populate the two sections;
standards_to_preserve / required_braingent_records remain empty for G7.
---
crates/gather-step-mcp/src/tools/packs.rs | 63 +++++++++++++++++++++--
1 file changed, 58 insertions(+), 5 deletions(-)
diff --git a/crates/gather-step-mcp/src/tools/packs.rs b/crates/gather-step-mcp/src/tools/packs.rs
index bf4d08d..b68ca10 100644
--- a/crates/gather-step-mcp/src/tools/packs.rs
+++ b/crates/gather-step-mcp/src/tools/packs.rs
@@ -247,6 +247,7 @@ fn build_plan_change(
unresolved_gaps: &[String],
planning_proofs: &[serde_json::Value],
next_steps: &[String],
+ change_impact: &ChangeImpactSummary,
meta: Option,
) -> PlanChangeResponse {
let mut reuse_candidates = Vec::new();
@@ -267,16 +268,49 @@ fn build_plan_change(
.map(|step| format!("Run `{step}` and confirm the result before changing code."))
.collect();
+ // B7: surface the change-impact evidence the planning pack already gathered
+ // into the typed sections, rather than leaving them empty.
+ let mut integration_checks = Vec::new();
+ for repo in &change_impact.confirmed_downstream_repos {
+ integration_checks.push(format!(
+ "Integration: verify confirmed downstream consumer `{repo}` still works after this change."
+ ));
+ }
+ for repo in &change_impact.probable_downstream_repos {
+ integration_checks.push(format!(
+ "Integration: check probable downstream `{repo}` (partial evidence) for impact."
+ ));
+ }
+
+ let mut write_path_or_state_machine_risks = Vec::new();
+ for caller in &change_impact.cross_repo_callers {
+ write_path_or_state_machine_risks.push(format!(
+ "Cross-repo caller `{}::{}` depends on this behavior — preserve its contract.",
+ caller.repo, caller.symbol_name
+ ));
+ }
+ for unresolved in &change_impact.unresolved_possible {
+ write_path_or_state_machine_risks.push(format!(
+ "Unresolved possible impact: `{unresolved}` — confirm before changing."
+ ));
+ }
+ if change_impact.truncated_repos.is_some() {
+ write_path_or_state_machine_risks.push(
+ "Downstream fan-out was capped — affected repos exist beyond the listed set."
+ .to_owned(),
+ );
+ }
+
PlanChangeResponse {
target: target.to_owned(),
found,
reuse_candidates,
sibling_clone_targets,
- // Populated by later WS-3 slices (B7 proactive queries / G7 evidence).
+ // standards_to_preserve / required_braingent_records arrive with G7.
standards_to_preserve: Vec::new(),
- integration_checks: Vec::new(),
+ integration_checks,
cross_repo_reachability: planning_proofs.to_vec(),
- write_path_or_state_machine_risks: Vec::new(),
+ write_path_or_state_machine_risks,
required_braingent_records: Vec::new(),
open_unknowns: unresolved_gaps.to_vec(),
verification_plan,
@@ -299,6 +333,7 @@ pub fn run_plan_change(
&data.unresolved_gaps,
&data.planning_proofs,
&data.next_steps,
+ &data.change_impact,
pack.meta.clone(),
))
}
@@ -5401,6 +5436,19 @@ mod tests {
let gaps = vec!["Who emits status FINALIZED?".to_owned()];
let proofs = vec![serde_json::json!({"repo": "alert", "edge": "ConsumesApiFrom"})];
let next_steps = vec!["crud_trace".to_owned()];
+ let change_impact = super::ChangeImpactSummary {
+ confirmed_downstream_repos: vec!["alert".to_owned()],
+ cross_repo_callers: vec![super::CrossRepoCaller {
+ file_path: "src/consumer.ts".to_owned(),
+ line_start: None,
+ repo: "report".to_owned(),
+ symbol_id: "id_consumer".to_owned(),
+ symbol_kind: "function".to_owned(),
+ symbol_name: "useOrderTotals".to_owned(),
+ evidence: None,
+ }],
+ ..Default::default()
+ };
let plan = build_plan_change(
"createOrder",
@@ -5409,6 +5457,7 @@ mod tests {
&gaps,
&proofs,
&next_steps,
+ &change_impact,
None,
);
@@ -5428,9 +5477,13 @@ mod tests {
assert_eq!(plan.open_unknowns, gaps);
assert_eq!(plan.cross_repo_reachability.len(), 1);
assert_eq!(plan.verification_plan.len(), 1);
- // Contract: the sections not yet populated still exist (empty).
+ // B7: change-impact evidence now populates these sections.
+ assert_eq!(plan.integration_checks.len(), 1);
+ assert!(plan.integration_checks[0].contains("alert"));
+ assert_eq!(plan.write_path_or_state_machine_risks.len(), 1);
+ assert!(plan.write_path_or_state_machine_risks[0].contains("report::useOrderTotals"));
+ // Contract: sections awaiting G7 still exist (empty).
assert!(plan.standards_to_preserve.is_empty());
- assert!(plan.integration_checks.is_empty());
assert!(plan.required_braingent_records.is_empty());
}
From 54e0d1cbc5edd26674546d38deaaae0bd24a5efe Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 21:52:50 +0700
Subject: [PATCH 09/31] feat: evidentiary plan_change contract gate (G7)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Make the plan_change contract evidentiary, not just structural (WS-3 / G7):
- PlanChangeContract carries deterministic metadata — a schema version
(PLAN_CHANGE_SCHEMA_VERSION) and the fixed nine-section manifest in
canonical order — plus an exclusion_ledger recording what was dropped
(downstream fan-out caps, planning warnings) so a consumer never reads a
capped/filtered result as exhaustive.
- validate_plan_change_contract is the gate: it fails on a stale schema
version or a mangled/incomplete section manifest.
Rule provenance and requirements traceability are intentionally deferred
to a follow-up workstream (they need a rule-id registry and AC ingestion
that don't exist yet).
Tests: a freshly built product passes the gate deterministically; the
gate fails on a bumped schema version and on a popped section; the
exclusion ledger records a capped fan-out.
---
.../gather-step-mcp/src/tools/context_pack.rs | 5 +-
crates/gather-step-mcp/src/tools/packs.rs | 159 +++++++++++++++++-
2 files changed, 161 insertions(+), 3 deletions(-)
diff --git a/crates/gather-step-mcp/src/tools/context_pack.rs b/crates/gather-step-mcp/src/tools/context_pack.rs
index 445b50e..bb3bae4 100644
--- a/crates/gather-step-mcp/src/tools/context_pack.rs
+++ b/crates/gather-step-mcp/src/tools/context_pack.rs
@@ -1,5 +1,6 @@
pub use crate::tools::packs::{
ContextPackData, ContextPackMeta, ContextPackRequest, ContextPackResponse, ModePackRequest,
- PackBridge, PackItem, PlanChangeResponse, context_pack_tool, debug_pack_tool, fix_pack_tool,
- planning_pack_tool, review_pack_tool, run_plan_change,
+ PackBridge, PackItem, PlanChangeContract, PlanChangeResponse, context_pack_tool,
+ debug_pack_tool, fix_pack_tool, planning_pack_tool, review_pack_tool, run_plan_change,
+ validate_plan_change_contract,
};
diff --git a/crates/gather-step-mcp/src/tools/packs.rs b/crates/gather-step-mcp/src/tools/packs.rs
index b68ca10..94e8eaa 100644
--- a/crates/gather-step-mcp/src/tools/packs.rs
+++ b/crates/gather-step-mcp/src/tools/packs.rs
@@ -207,6 +207,37 @@ pub struct ContextPackResponse {
pub meta: Option,
}
+/// Schema version of the [`PlanChangeResponse`] contract. Bump on any change to
+/// the section set or contract shape.
+const PLAN_CHANGE_SCHEMA_VERSION: u8 = 1;
+
+/// The nine fixed `plan_change` section names, in canonical order. The contract
+/// manifest must equal this exactly so consumers (and the G7 gate) can assert
+/// completeness deterministically.
+const PLAN_CHANGE_SECTIONS: [&str; 9] = [
+ "reuse_candidates",
+ "sibling_clone_targets",
+ "standards_to_preserve",
+ "integration_checks",
+ "cross_repo_reachability",
+ "write_path_or_state_machine_risks",
+ "required_braingent_records",
+ "open_unknowns",
+ "verification_plan",
+];
+
+/// Evidentiary contract for the typed `plan_change` product (WS-3 / G7):
+/// deterministic metadata (schema version + the fixed section manifest) plus an
+/// exclusion ledger recording what was dropped from the response and why, so a
+/// consumer never mistakes a capped/filtered result for an exhaustive one.
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
+pub struct PlanChangeContract {
+ pub schema_version: u8,
+ pub sections: Vec,
+ #[serde(default, skip_serializing_if = "Vec::is_empty")]
+ pub exclusion_ledger: Vec,
+}
+
/// Typed `plan_change` product (WS-3 / E1).
///
/// A distinct response shape — not an alias for the planning pack — with the
@@ -232,6 +263,8 @@ pub struct PlanChangeResponse {
pub required_braingent_records: Vec,
pub open_unknowns: Vec,
pub verification_plan: Vec,
+ /// Evidentiary contract: schema version, section manifest, exclusion ledger.
+ pub contract: PlanChangeContract,
#[serde(skip_serializing_if = "Option::is_none")]
pub meta: Option,
}
@@ -301,6 +334,30 @@ fn build_plan_change(
);
}
+ // G7: exclusion ledger — record what was dropped from the response so a
+ // consumer never reads a capped/filtered result as exhaustive.
+ let mut exclusion_ledger = Vec::new();
+ if let Some(truncated) = &change_impact.truncated_repos {
+ exclusion_ledger.push(format!(
+ "Downstream fan-out capped: {} repo(s) omitted ({}).",
+ truncated.count,
+ truncated.reason_codes.join(", ")
+ ));
+ }
+ if let Some(meta) = &meta {
+ for warning in &meta.warnings {
+ exclusion_ledger.push(format!("Planning warning: {warning}"));
+ }
+ }
+ let contract = PlanChangeContract {
+ schema_version: PLAN_CHANGE_SCHEMA_VERSION,
+ sections: PLAN_CHANGE_SECTIONS
+ .iter()
+ .map(|s| (*s).to_owned())
+ .collect(),
+ exclusion_ledger,
+ };
+
PlanChangeResponse {
target: target.to_owned(),
found,
@@ -314,10 +371,37 @@ fn build_plan_change(
required_braingent_records: Vec::new(),
open_unknowns: unresolved_gaps.to_vec(),
verification_plan,
+ contract,
meta,
}
}
+/// G7 contract gate: assert a [`PlanChangeResponse`] carries deterministic
+/// contract metadata — the current schema version and the exact canonical
+/// section manifest. Returns the list of violations (empty == passes).
+pub fn validate_plan_change_contract(plan: &PlanChangeResponse) -> Vec {
+ let mut violations = Vec::new();
+ if plan.contract.schema_version != PLAN_CHANGE_SCHEMA_VERSION {
+ violations.push(format!(
+ "schema_version {} != expected {PLAN_CHANGE_SCHEMA_VERSION}",
+ plan.contract.schema_version
+ ));
+ }
+ if !plan
+ .contract
+ .sections
+ .iter()
+ .map(String::as_str)
+ .eq(PLAN_CHANGE_SECTIONS)
+ {
+ violations.push(format!(
+ "section manifest {:?} != canonical {PLAN_CHANGE_SECTIONS:?}",
+ plan.contract.sections
+ ));
+ }
+ violations
+}
+
/// Build the typed `plan_change` product (WS-3 / E1): run the planning pack,
/// then project its data into the nine fixed sections.
pub fn run_plan_change(
@@ -5482,11 +5566,84 @@ mod tests {
assert!(plan.integration_checks[0].contains("alert"));
assert_eq!(plan.write_path_or_state_machine_risks.len(), 1);
assert!(plan.write_path_or_state_machine_risks[0].contains("report::useOrderTotals"));
- // Contract: sections awaiting G7 still exist (empty).
+ // Contract: sections awaiting later work still exist (empty).
assert!(plan.standards_to_preserve.is_empty());
assert!(plan.required_braingent_records.is_empty());
}
+ #[test]
+ fn plan_change_contract_gate_validates_deterministic_metadata() {
+ use super::{
+ ChangeImpactSummary, PLAN_CHANGE_SCHEMA_VERSION, PLAN_CHANGE_SECTIONS,
+ build_plan_change, validate_plan_change_contract,
+ };
+
+ let mut plan = build_plan_change(
+ "createOrder",
+ true,
+ &[],
+ &[],
+ &[],
+ &[],
+ &ChangeImpactSummary::default(),
+ None,
+ );
+
+ // A freshly built product satisfies the contract: current schema
+ // version and the exact canonical section manifest, deterministically.
+ assert_eq!(plan.contract.schema_version, PLAN_CHANGE_SCHEMA_VERSION);
+ assert!(
+ plan.contract
+ .sections
+ .iter()
+ .map(String::as_str)
+ .eq(PLAN_CHANGE_SECTIONS)
+ );
+ assert!(validate_plan_change_contract(&plan).is_empty());
+
+ // The gate fails on a stale schema version.
+ plan.contract.schema_version = 99;
+ assert!(!validate_plan_change_contract(&plan).is_empty());
+ plan.contract.schema_version = PLAN_CHANGE_SCHEMA_VERSION;
+
+ // The gate fails on a mangled section manifest (missing/renamed section).
+ plan.contract.sections.pop();
+ assert!(!validate_plan_change_contract(&plan).is_empty());
+ }
+
+ #[test]
+ fn plan_change_exclusion_ledger_records_capped_fan_out() {
+ use super::{ChangeImpactSummary, TruncatedRepos, build_plan_change};
+
+ let change_impact = ChangeImpactSummary {
+ truncated_repos: Some(TruncatedRepos {
+ count: 4,
+ names: vec!["a".to_owned(), "b".to_owned()],
+ reason_codes: vec!["fan_out_cap".to_owned()],
+ }),
+ ..Default::default()
+ };
+ let plan = build_plan_change(
+ "createOrder",
+ true,
+ &[],
+ &[],
+ &[],
+ &[],
+ &change_impact,
+ None,
+ );
+
+ assert!(
+ plan.contract
+ .exclusion_ledger
+ .iter()
+ .any(|entry| entry.contains("capped") && entry.contains('4')),
+ "ledger should record the capped fan-out: {:?}",
+ plan.contract.exclusion_ledger
+ );
+ }
+
#[test]
fn items_without_evidence_are_not_boosted() {
let mut items = vec![make_pack_item(80, "a.rs", false)];
From 397fa288daf80b537b0218d32ee1ccfe42bee3c7 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 22:30:02 +0700
Subject: [PATCH 10/31] fix: route batch_query plan_change to the typed product
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The direct MCP route used run_plan_change (typed nine-section product),
but the batch_query dispatcher still routed both planning_pack and
plan_change to planning_pack_tool, so batched callers silently got the
legacy ContextPackResponse shape — a split-brain contract.
- Split the composite dispatcher arm: plan_change now calls
run_plan_change; planning_pack keeps planning_pack_tool.
- Update the tool catalog: plan_change is the typed plan-change product,
not an "alias for planning-oriented context".
Test: a batch_query plan_change op returns reuse_candidates and the
contract section manifest (keys absent from ContextPackResponse), proving
the batch and direct routes now agree.
---
crates/gather-step-mcp/src/catalog.rs | 5 +-
crates/gather-step-mcp/src/tools/composite.rs | 13 ++---
.../tests/batch_plan_change.rs | 48 +++++++++++++++++++
3 files changed, 59 insertions(+), 7 deletions(-)
create mode 100644 crates/gather-step-mcp/tests/batch_plan_change.rs
diff --git a/crates/gather-step-mcp/src/catalog.rs b/crates/gather-step-mcp/src/catalog.rs
index 1ae4c72..20fee5f 100644
--- a/crates/gather-step-mcp/src/catalog.rs
+++ b/crates/gather-step-mcp/src/catalog.rs
@@ -92,7 +92,10 @@ pub const MCP_TOOLS: &[(&str, &str)] = &[
"planning_pack",
"Context pack for architecture and planning tasks",
),
- ("plan_change", "Alias for planning-oriented context"),
+ (
+ "plan_change",
+ "Typed plan-change product (nine planning sections)",
+ ),
("debug_pack", "Context pack for debugging production issues"),
("fix_pack", "Context pack scoped for a bug fix"),
("fix_surface", "Get a narrower fix-oriented surface"),
diff --git a/crates/gather-step-mcp/src/tools/composite.rs b/crates/gather-step-mcp/src/tools/composite.rs
index 9717e71..94aee1b 100644
--- a/crates/gather-step-mcp/src/tools/composite.rs
+++ b/crates/gather-step-mcp/src/tools/composite.rs
@@ -23,7 +23,7 @@ use crate::{
orientation::{RepoSummary, list_repos},
packs::{
ContextPackRequest, ModePackRequest, change_impact_pack_tool, context_pack_tool,
- debug_pack_tool, fix_pack_tool, planning_pack_tool, review_pack_tool,
+ debug_pack_tool, fix_pack_tool, planning_pack_tool, review_pack_tool, run_plan_change,
},
repo_intelligence::{
DeadCodeRequest, RepoScopedRequest, WhoOwnsRequest, get_conventions_tool,
@@ -513,11 +513,12 @@ fn execute_batch_op(ctx: &McpContext, op: BatchQueryOperation) -> BatchQueryResu
context_pack_tool(ctx, args).and_then(to_value)
})
}
- "planning_pack" | "plan_change" => {
- parse_and_run::(op.arguments, |args| {
- planning_pack_tool(ctx, args).and_then(to_value)
- })
- }
+ "planning_pack" => parse_and_run::(op.arguments, |args| {
+ planning_pack_tool(ctx, args).and_then(to_value)
+ }),
+ "plan_change" => parse_and_run::(op.arguments, |args| {
+ run_plan_change(ctx, args).and_then(to_value)
+ }),
"debug_pack" => parse_and_run::(op.arguments, |args| {
debug_pack_tool(ctx, args).and_then(to_value)
}),
diff --git a/crates/gather-step-mcp/tests/batch_plan_change.rs b/crates/gather-step-mcp/tests/batch_plan_change.rs
new file mode 100644
index 0000000..7180640
--- /dev/null
+++ b/crates/gather-step-mcp/tests/batch_plan_change.rs
@@ -0,0 +1,48 @@
+//! Regression: `batch_query` must route `plan_change` to the typed
+//! nine-section product (`PlanChangeResponse`), not the legacy planning-pack
+//! `ContextPackResponse`. The direct MCP route and the batch route must agree.
+
+use std::sync::Arc;
+
+use gather_step_mcp::{
+ config::{McpContext, McpServerConfig},
+ server::GatherStepMcpServer,
+ tools::composite::{BatchQueryOperation, BatchQueryRequest},
+};
+use gather_step_storage::WorkspaceStores;
+use rmcp::handler::server::wrapper::Parameters;
+
+#[tokio::test]
+async fn batch_query_plan_change_returns_typed_product() {
+ let tmp = tempfile::tempdir().expect("tempdir");
+ let root = tmp.path();
+ let stores = Arc::new(WorkspaceStores::open(root).expect("workspace stores should open"));
+ let config = McpServerConfig::new(root.join("registry.json"), root.join("graph.redb"));
+ let ctx = McpContext::from_workspace_stores(config, stores);
+ let server = GatherStepMcpServer::new(ctx);
+
+ let request = BatchQueryRequest {
+ ops: vec![BatchQueryOperation {
+ tool: "plan_change".to_owned(),
+ arguments: serde_json::json!({ "target": "anySymbol" }),
+ }],
+ };
+
+ let response = server
+ .batch_query_tool(Parameters(request))
+ .await
+ .expect("batch_query should succeed");
+
+ // These keys exist only on the typed PlanChangeResponse, never on the
+ // legacy ContextPackResponse — so their presence proves the batch route
+ // now matches the direct route.
+ let body = serde_json::to_string(&response.0).expect("serialize batch response");
+ assert!(
+ body.contains("reuse_candidates"),
+ "batch plan_change must return the typed product (missing reuse_candidates): {body}"
+ );
+ assert!(
+ body.contains("verification_plan") && body.contains("\"sections\""),
+ "batch plan_change must include the contract section manifest: {body}"
+ );
+}
From 307a510ecd5c829b0f4f706e390423970c287ef1 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 22:33:57 +0700
Subject: [PATCH 11/31] feat: query-time index freshness detection (A13)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The index records last_commit_sha per repo and head_sha() resolves the
current HEAD, but nothing compared them at query time, so a trace/search
could silently answer about code that no longer matches the working tree
(WS-4 / A13). Storage and HEAD resolution already exist; this adds the
comparison.
- IndexFreshness { Fresh | Stale { indexed_sha, head_sha } | NeverIndexed }
— a query-time freshness verdict, distinct from HistorySyncOutcome
(which describes an indexing run).
- classify_freshness(indexed_sha, head_sha): pure, unit-testable.
- GitHistoryIndexer::index_freshness(indexed_sha): resolves HEAD and
classifies. Both re-exported at the crate root.
Test: matching SHA => Fresh; older indexed SHA => Stale (carrying both);
no recorded SHA => NeverIndexed.
---
crates/gather-step-git/src/history.rs | 77 ++++++++++++++++++++++++++-
crates/gather-step-git/src/lib.rs | 3 +-
2 files changed, 78 insertions(+), 2 deletions(-)
diff --git a/crates/gather-step-git/src/history.rs b/crates/gather-step-git/src/history.rs
index 30f2716..afa06c8 100644
--- a/crates/gather-step-git/src/history.rs
+++ b/crates/gather-step-git/src/history.rs
@@ -163,6 +163,44 @@ pub enum HistorySyncOutcome {
NoChange { repo: String, head_sha: String },
}
+/// Query-time freshness of an indexed repo relative to the working tree's
+/// current HEAD (A13). Distinct from [`HistorySyncOutcome`], which describes an
+/// indexing run; this answers "can I trust what the index says right now?" so
+/// a trace/search against a stale index can be flagged instead of silently
+/// answering about code that no longer matches the tree.
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(tag = "kind", rename_all = "snake_case")]
+pub enum IndexFreshness {
+ /// The indexed commit matches the current HEAD — the index is current.
+ Fresh { head_sha: String },
+ /// The index reflects an older commit than the working tree's HEAD.
+ Stale {
+ indexed_sha: String,
+ head_sha: String,
+ },
+ /// No commit was recorded for this repo — it has never been indexed.
+ NeverIndexed { head_sha: String },
+}
+
+/// Classify index freshness by comparing the recorded indexed commit against
+/// the repository's current HEAD. Pure so it is unit-testable without git or a
+/// metadata store.
+#[must_use]
+pub fn classify_freshness(indexed_sha: Option<&str>, head_sha: &str) -> IndexFreshness {
+ match indexed_sha {
+ None => IndexFreshness::NeverIndexed {
+ head_sha: head_sha.to_owned(),
+ },
+ Some(sha) if sha == head_sha => IndexFreshness::Fresh {
+ head_sha: head_sha.to_owned(),
+ },
+ Some(sha) => IndexFreshness::Stale {
+ indexed_sha: sha.to_owned(),
+ head_sha: head_sha.to_owned(),
+ },
+ }
+}
+
impl HistorySyncOutcome {
#[must_use]
pub fn repo(&self) -> &str {
@@ -350,6 +388,17 @@ impl GitHistoryIndexer {
Ok(repo.head_id().map_err(Box::new)?.detach().to_string())
}
+ /// Classify how fresh an indexed repo is relative to its current HEAD (A13).
+ /// `indexed_sha` is the commit recorded for the repo (e.g. from
+ /// `MetadataStore::get_last_commit_sha`); `None` means never indexed.
+ pub fn index_freshness(
+ &self,
+ indexed_sha: Option<&str>,
+ ) -> Result {
+ let head = self.head_sha()?;
+ Ok(classify_freshness(indexed_sha, &head))
+ }
+
/// Walks the repository, persists the new commits + per-file deltas via
/// `store`, advances `repo_sync_state.last_commit_sha`, and returns the
/// outcome describing which sync branch ran.
@@ -1074,9 +1123,35 @@ mod tests {
use super::{
CommitFact, CommitFileChangeKind, CommitFileDelta, GitHistoryIndexer, GitRepoSource,
- HistorySyncOutcome, normalize_author_email_bytes,
+ HistorySyncOutcome, IndexFreshness, classify_freshness, normalize_author_email_bytes,
};
+ #[test]
+ fn classify_freshness_distinguishes_fresh_stale_and_never_indexed() {
+ // Indexed commit matches HEAD => fresh.
+ assert_eq!(
+ classify_freshness(Some("abc123"), "abc123"),
+ IndexFreshness::Fresh {
+ head_sha: "abc123".to_owned()
+ }
+ );
+ // Indexed commit older than HEAD => stale, carrying both SHAs.
+ assert_eq!(
+ classify_freshness(Some("old111"), "new222"),
+ IndexFreshness::Stale {
+ indexed_sha: "old111".to_owned(),
+ head_sha: "new222".to_owned(),
+ }
+ );
+ // No recorded commit => never indexed.
+ assert_eq!(
+ classify_freshness(None, "abc123"),
+ IndexFreshness::NeverIndexed {
+ head_sha: "abc123".to_owned()
+ }
+ );
+ }
+
/// Minimal hand-rolled `TempDir` that deletes itself on drop. Avoids
/// adding a `tempfile` dev-dependency.
struct TempDir {
diff --git a/crates/gather-step-git/src/lib.rs b/crates/gather-step-git/src/lib.rs
index 4c6868b..8ce1f07 100644
--- a/crates/gather-step-git/src/lib.rs
+++ b/crates/gather-step-git/src/lib.rs
@@ -24,7 +24,8 @@ pub use classify::{
};
pub use history::{
CommitFact, CommitFileChangeKind, CommitFileDelta, GitHistoryError, GitHistoryIndexer,
- GitHistorySyncError, GitIndexerOptions, GitRepoSource, HistorySyncOutcome,
+ GitHistorySyncError, GitIndexerOptions, GitRepoSource, HistorySyncOutcome, IndexFreshness,
+ classify_freshness,
};
pub use intelligence::{
RepoIntelligenceError, RepoIntelligenceOptions, RepoIntelligenceReport,
From 3786da4adf294dacd6060645991c6a7386a74971 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 22:54:23 +0700
Subject: [PATCH 12/31] refactor: address plan_change/reuse review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Follow-up review pass on the recall + reuse + plan_change work.
Correctness:
- plan_change exclusion_ledger now records standards_to_preserve and
required_braingent_records as "not yet computed", so an empty section
is never read as "nothing applies"; stale "arrive with G7" comment fixed.
- Synonym table is now symmetric concept groups, so recall no longer
depends on which side of a pair the user typed (login <-> authenticate).
- is_shared_module_path markers made language-agnostic (common/, /lib/,
libs/, internal/, /pkg/, packages/) so non-JS layouts aren't mis-bucketed.
Cleanup / perf:
- Extracted EdgeKind::is_consumer_edge() in core; replaced the four
duplicated `!matches!(.., Defines | Imports)` filters (packs x3, anchor).
- Reuse boost precomputes distinct consumer counts in one pre-pass
(deduped by symbol_id) instead of per-item graph I/O in the loop, and
re-sorts only the boosted window (bounded) instead of the full vec.
Docs (explicit reviewer decisions):
- limit*5 reuse window is a documented recall ceiling for high fan-out.
- exact-match boost is inert for multi-word queries (coverage ranks them).
- ceil(n/2)=1 partial-match recall for 2-word fallback is intentional.
Tests: symmetric-synonym reverse bridge; ledger records not-yet-computed
sections.
Not changed: threading Canonical onto PackItem (#6-full) — canonical_for_
node yields cross-repo identity, not a reusable-module signal, so it is the
wrong input for reuse_candidates; a true classifier is the deferred A6 work.
---
crates/gather-step-analysis/src/anchor.rs | 2 +-
crates/gather-step-core/src/schema.rs | 9 +++
crates/gather-step-mcp/src/tools/packs.rs | 57 +++++++++++++--
.../gather-step-storage/src/search_store.rs | 69 ++++++++++++++-----
4 files changed, 112 insertions(+), 25 deletions(-)
diff --git a/crates/gather-step-analysis/src/anchor.rs b/crates/gather-step-analysis/src/anchor.rs
index c68cb0e..8c42c99 100644
--- a/crates/gather-step-analysis/src/anchor.rs
+++ b/crates/gather-step-analysis/src/anchor.rs
@@ -148,7 +148,7 @@ fn score_candidate(
if boundary_bonus > 0.0 {
let downstream = outgoing
.iter()
- .filter(|edge| !matches!(edge.kind, EdgeKind::Defines | EdgeKind::Imports))
+ .filter(|edge| edge.kind.is_consumer_edge())
.count();
rationale.push(AnchorRationale::ControllerService {
downstream_nodes: downstream,
diff --git a/crates/gather-step-core/src/schema.rs b/crates/gather-step-core/src/schema.rs
index 80e3caf..112fd8a 100644
--- a/crates/gather-step-core/src/schema.rs
+++ b/crates/gather-step-core/src/schema.rs
@@ -227,6 +227,15 @@ impl EdgeKind {
self as u8
}
+ /// Whether this edge represents a *consumer*/usage of the target rather
+ /// than a structural `Defines` (file→symbol) or `Imports` edge. Used to
+ /// count real consumers for reuse ranking and resolution scoring, so a
+ /// "consumer count" reflects callers/users, not raw inbound-edge volume.
+ #[must_use]
+ pub const fn is_consumer_edge(self) -> bool {
+ !matches!(self, Self::Defines | Self::Imports)
+ }
+
/// Whether this edge kind represents a semantic bridge link
/// connecting a real symbol to a virtual bridge node (`Route`, `Topic`,
/// `SharedSymbol`, `PayloadContract`, …) — i.e. an edge whose correctness
diff --git a/crates/gather-step-mcp/src/tools/packs.rs b/crates/gather-step-mcp/src/tools/packs.rs
index 94e8eaa..a0360c3 100644
--- a/crates/gather-step-mcp/src/tools/packs.rs
+++ b/crates/gather-step-mcp/src/tools/packs.rs
@@ -349,6 +349,12 @@ fn build_plan_change(
exclusion_ledger.push(format!("Planning warning: {warning}"));
}
}
+ // Sections that are intentionally not computed yet are recorded here so an
+ // empty value is read as "not computed", not "nothing applies".
+ exclusion_ledger
+ .push("standards_to_preserve: not yet computed (no standards engine).".to_owned());
+ exclusion_ledger
+ .push("required_braingent_records: not yet computed (no record ingestion).".to_owned());
let contract = PlanChangeContract {
schema_version: PLAN_CHANGE_SCHEMA_VERSION,
sections: PLAN_CHANGE_SECTIONS
@@ -363,7 +369,8 @@ fn build_plan_change(
found,
reuse_candidates,
sibling_clone_targets,
- // standards_to_preserve / required_braingent_records arrive with G7.
+ // Not yet computed; their absence is recorded in the exclusion ledger
+ // so an empty value is not read as "nothing applies".
standards_to_preserve: Vec::new(),
integration_checks,
cross_repo_reachability: planning_proofs.to_vec(),
@@ -1311,12 +1318,28 @@ fn assemble_context_pack_for_symbol(
// BEFORE the cut, then re-sort, so a canonical reusable symbol can be
// promoted into the pack rather than discarded by the base-score truncate.
if mode == PackMode::Planning {
+ // KNOWN CEILING: the reuse boost re-ranks the top `limit*5` base-ranked
+ // candidates; a canonical symbol ranked below that window for a very
+ // high-fan-out target is not rescued. The window keeps the boost (and
+ // the bounded re-sort below) cheap.
let window = options.limit.saturating_mul(5).min(items.len());
let graph = ctx.graph();
+ // Precompute distinct consumer counts in one pre-pass so the boost loop
+ // does map lookups instead of per-item graph I/O, deduped by symbol_id.
+ let mut consumer_counts: std::collections::BTreeMap =
+ std::collections::BTreeMap::new();
+ for item in &items[..window] {
+ consumer_counts
+ .entry(item.symbol_id.clone())
+ .or_insert_with(|| consumer_count(graph, &item.symbol_id));
+ }
apply_reuse_boost_with(&mut items[..window], |symbol_id| {
- consumer_count(graph, symbol_id)
+ consumer_counts.get(symbol_id).copied().unwrap_or(0)
});
- sort_pack_items(&mut items);
+ // The boost only raised window items' scores, and the window holds the
+ // top base-ranked candidates, so every boosted item still outranks the
+ // untouched tail — re-sorting just the window keeps global order.
+ sort_pack_items(&mut items[..window]);
}
items.truncate(options.limit);
@@ -2721,12 +2744,21 @@ fn populate_evidence_chain(
/// local one-off. Markers are matched as-is (paths are conventionally
/// lowercase), avoiding an allocation.
fn is_shared_module_path(file_path: &str) -> bool {
+ // A path heuristic spanning common JS, Rust, Go, and Python shared-module
+ // layouts. NOTE: this is a heuristic; the pipeline-wide canonical signal
+ // (pack_assembly's `Canonical`) is the longer-term source of truth, pending
+ // threading it onto `PackItem`.
const MARKERS: &[&str] = &[
"shared/",
"design-system",
- "packages/ui",
+ "packages/",
"/ui/components",
"@shared",
+ "common/",
+ "/lib/",
+ "libs/",
+ "internal/",
+ "/pkg/",
];
MARKERS.iter().any(|marker| file_path.contains(marker))
}
@@ -2759,7 +2791,7 @@ fn consumer_count(graph: &S, symbol_id: &str
};
incoming
.iter()
- .filter(|edge| !matches!(edge.kind, EdgeKind::Defines | EdgeKind::Imports))
+ .filter(|edge| edge.kind.is_consumer_edge())
.map(|edge| edge.source)
.collect::>()
.len()
@@ -3814,12 +3846,12 @@ fn pack_resolution_edge_score(ctx: &McpContext, symbol_id: &str) -> Result =
Box::new(BooleanQuery::with_minimum_required_clauses(clauses, floor));
@@ -1154,24 +1159,29 @@ fn infra_repo_penalty(symbol_exact_boost: f32, is_exported: bool, repo: &str) ->
/// (e.g. "login" → authenticate). Intentionally small and high-signal; the
/// original word is always searched too, so an empty list is a safe no-op.
fn synonym_terms(word: &str) -> &'static [&'static str] {
- const TABLE: &[(&[&str], &[&str])] = &[
- (
- &["login", "signin", "auth"],
- &["authenticate", "authentication"],
- ),
- (&["logout", "signout"], &["deauthenticate"]),
- (
- &["paginate", "pagination", "paging"],
- &["cursor", "offset", "page"],
- ),
- (&["delete", "remove"], &["destroy"]),
- (&["create", "add"], &["insert"]),
- (&["update", "edit"], &["modify"]),
- (&["list", "fetch", "get"], &["find"]),
+ // Symmetric concept groups: any member expands to the whole group, so
+ // recall does not depend on which side of a synonym pair the user typed
+ // (e.g. "login" and "authenticate" reach each other). The word's own clause
+ // is also added by the caller; a duplicate OR clause is harmless.
+ const GROUPS: &[&[&str]] = &[
+ &["login", "signin", "auth", "authenticate", "authentication"],
+ &["logout", "signout", "deauthenticate"],
+ &[
+ "paginate",
+ "pagination",
+ "paging",
+ "cursor",
+ "offset",
+ "page",
+ ],
+ &["delete", "remove", "destroy"],
+ &["create", "add", "insert"],
+ &["update", "edit", "modify"],
+ &["list", "fetch", "get", "find"],
];
- for (keys, synonyms) in TABLE {
- if keys.iter().any(|key| word.eq_ignore_ascii_case(key)) {
- return synonyms;
+ for group in GROUPS {
+ if group.iter().any(|member| word.eq_ignore_ascii_case(member)) {
+ return group;
}
}
&[]
@@ -1241,6 +1251,10 @@ fn rerank_hits(hits: &mut [ScoredSearchHit], query: &str) {
// Raised from 1.4× to break ties between multiple same-name hits where
// secondary factors (BM25, recency) were pulling unrelated-repo symbols
// above the user's likely target.
+ // NOTE: for a multi-word query (which contains spaces) this exact-match
+ // boost is always neutral — symbol names have no spaces, so neither
+ // comparison can hit. Term-coverage is the ordering signal for
+ // multi-word queries; this boost only differentiates single-token ones.
let symbol_name_matches = scored.hit.symbol_name.eq_ignore_ascii_case(query);
let symbol_token_matches = !symbol_name_matches
&& first_code_token(&scored.hit.symbol_name).eq_ignore_ascii_case(query);
@@ -1837,6 +1851,27 @@ mod tests {
assert_eq!(results[0].symbol_name, "authenticateUser");
}
+ #[test]
+ fn disjunction_fallback_synonyms_are_symmetric() {
+ let store = TantivySearchStore::open(temp_search_dir("synonym-symmetric"))
+ .expect("store should open");
+ let mut symbol = node("loginUser", "src/auth/login-user.ts");
+ symbol.signature = None;
+ store
+ .index_symbol(&SearchDocument::from_node(&symbol, 1))
+ .expect("document should index");
+
+ // Reverse of the previous test: the symbol uses "login" and the query
+ // uses "authenticate". Symmetric synonym groups must bridge either
+ // direction, so recall does not depend on which term the user typed.
+ let results = store
+ .search("authenticate workflow", 10)
+ .expect("search should succeed");
+
+ assert_eq!(results.len(), 1);
+ assert_eq!(results[0].symbol_name, "loginUser");
+ }
+
#[test]
fn search_filters_apply_before_collecting_hits() {
let store = TantivySearchStore::open(temp_search_dir("filtered-search"))
From 24bf64762376425a45ca5a56d08d4578b54c51dc Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 23:03:02 +0700
Subject: [PATCH 13/31] feat: traverse provenance + depth/fan-out cap signaling
(B8/B9)
---
crates/gather-step-analysis/src/query.rs | 199 +++++++++++++++++++++--
1 file changed, 183 insertions(+), 16 deletions(-)
diff --git a/crates/gather-step-analysis/src/query.rs b/crates/gather-step-analysis/src/query.rs
index 95475f6..00eb9d1 100644
--- a/crates/gather-step-analysis/src/query.rs
+++ b/crates/gather-step-analysis/src/query.rs
@@ -2,9 +2,14 @@ use std::collections::{BTreeMap, VecDeque};
use gather_step_core::{EdgeData, EdgeKind, NodeData, NodeId, NodeKind};
use gather_step_storage::{GraphStore, GraphStoreError};
-use rustc_hash::FxHashSet;
+use rustc_hash::{FxHashMap, FxHashSet};
use thiserror::Error;
+/// Maximum out-edges a single node contributes, and the maximum number of
+/// distinct in-paths recorded per node, before a traversal marks itself
+/// `truncated`. Bounds memory/work on very-high-fan-out hub nodes.
+const DEFAULT_FANOUT_CAP: usize = 256;
+
#[derive(Debug, Error)]
pub enum QueryError {
#[error(transparent)]
@@ -14,8 +19,23 @@ pub enum QueryError {
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct TraversalStep {
pub node_id: NodeId,
+ /// The first (BFS-shortest) path of edge kinds discovered into this node.
pub edge_kinds: Vec,
pub depth: usize,
+ /// Every distinct path of edge kinds that reaches this node (capped at
+ /// `DEFAULT_FANOUT_CAP`). A node reachable N ways reports N caller paths.
+ pub in_paths: Vec>,
+}
+
+/// The result of a bounded graph traversal, carrying the visited steps plus
+/// signals about whether the walk was cut short by a depth or fan-out bound.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct TraversalOutcome {
+ pub steps: Vec,
+ /// At least one node sat at `max_depth` with deeper edges left unfollowed.
+ pub depth_capped: bool,
+ /// At least one node's fan-out (or recorded in-paths) hit `DEFAULT_FANOUT_CAP`.
+ pub truncated: bool,
}
pub struct GraphQuery<'a, S> {
@@ -71,36 +91,92 @@ impl<'a, S: GraphStore> GraphQuery<'a, S> {
max_depth: usize,
min_confidence: Option,
) -> Result, QueryError> {
+ Ok(self
+ .traverse_with_provenance(start, edge_kinds, max_depth, min_confidence)?
+ .steps)
+ }
+
+ /// Bounded BFS that records multi-path provenance (every distinct path into
+ /// each node) and signals when the walk was cut short by the depth limit
+ /// (`depth_capped`) or the fan-out bound (`truncated`).
+ pub fn traverse_with_provenance(
+ &self,
+ start: NodeId,
+ edge_kinds: &[EdgeKind],
+ max_depth: usize,
+ min_confidence: Option,
+ ) -> Result {
let mut queue = VecDeque::from([(start, Vec::::new(), 0_usize)]);
- let mut seen = FxHashSet::from_iter([start.as_bytes()]);
- let mut steps = Vec::new();
+ let mut enqueued = FxHashSet::from_iter([start.as_bytes()]);
+ // Discovery order of reached nodes, so step order is deterministic.
+ let mut order: Vec = Vec::new();
+ // First-discovered path + depth per node.
+ let mut primary: FxHashMap<[u8; 16], (Vec, usize)> = FxHashMap::default();
+ // Every distinct path into each node (bounded).
+ let mut in_paths: FxHashMap<[u8; 16], Vec>> = FxHashMap::default();
+ let mut depth_capped = false;
+ let mut truncated = false;
while let Some((node_id, path, depth)) = queue.pop_front() {
+ let outgoing: Vec = self
+ .store
+ .get_outgoing(node_id)?
+ .into_iter()
+ .filter(|edge| edge_kinds.is_empty() || edge_kinds.contains(&edge.kind))
+ .filter(|edge| edge.metadata.passes_confidence(min_confidence))
+ .collect();
+
if depth >= max_depth {
+ if !outgoing.is_empty() {
+ depth_capped = true;
+ }
continue;
}
- for edge in self.store.get_outgoing(node_id)? {
- if !edge_kinds.is_empty() && !edge_kinds.contains(&edge.kind) {
- continue;
- }
- if !edge.metadata.passes_confidence(min_confidence) {
- continue;
+ for (index, edge) in outgoing.iter().enumerate() {
+ if index >= DEFAULT_FANOUT_CAP {
+ truncated = true;
+ break;
}
let mut next_path = path.clone();
next_path.push(edge.kind);
- if seen.insert(edge.target.as_bytes()) {
- steps.push(TraversalStep {
- node_id: edge.target,
- edge_kinds: next_path.clone(),
- depth: depth + 1,
- });
+ let key = edge.target.as_bytes();
+
+ let recorded = in_paths.entry(key).or_default();
+ if recorded.len() < DEFAULT_FANOUT_CAP {
+ recorded.push(next_path.clone());
+ } else {
+ truncated = true;
+ }
+
+ if enqueued.insert(key) {
+ order.push(edge.target);
+ primary.insert(key, (next_path.clone(), depth + 1));
queue.push_back((edge.target, next_path, depth + 1));
}
}
}
- Ok(steps)
+ let steps = order
+ .into_iter()
+ .map(|node_id| {
+ let key = node_id.as_bytes();
+ let (edges, depth) = primary.get(&key).cloned().unwrap_or_default();
+ let paths = in_paths.get(&key).cloned().unwrap_or_default();
+ TraversalStep {
+ node_id,
+ edge_kinds: edges,
+ depth,
+ in_paths: paths,
+ }
+ })
+ .collect();
+
+ Ok(TraversalOutcome {
+ steps,
+ depth_capped,
+ truncated,
+ })
}
pub fn count_by_kind(&self) -> Result, QueryError> {
@@ -289,6 +365,97 @@ mod tests {
assert_eq!(filtered[0].node_id, trusted.id);
}
+ #[test]
+ fn traverse_with_provenance_flags_depth_capping() {
+ let temp_db = TempDb::new("query-depth-cap");
+ let store = test_store(temp_db.path());
+ let file = node("service-a", "src/a.ts", NodeKind::File, "src/a.ts", 0);
+ let a = node("service-a", "src/a.ts", NodeKind::Function, "a", 0);
+ let b = node("service-a", "src/a.ts", NodeKind::Function, "b", 1);
+ let c = node("service-a", "src/a.ts", NodeKind::Function, "c", 2);
+ let d = node("service-a", "src/a.ts", NodeKind::Function, "d", 3);
+ store
+ .bulk_insert(
+ &[file.clone(), a.clone(), b.clone(), c.clone(), d.clone()],
+ &[
+ edge(a.id, b.id, file.id),
+ edge(b.id, c.id, file.id),
+ edge(c.id, d.id, file.id),
+ ],
+ )
+ .expect("graph should write");
+
+ let query = GraphQuery::new(&store);
+
+ // A chain longer than max_depth: c sits at the depth boundary and still
+ // has an outgoing edge to d, so the traversal must flag depth capping.
+ let capped = query
+ .traverse_with_provenance(a.id, &[EdgeKind::Calls], 2, None)
+ .expect("traversal should succeed");
+ assert_eq!(capped.steps.len(), 2);
+ assert!(
+ capped.depth_capped,
+ "deeper edge beyond max_depth not flagged"
+ );
+ assert!(!capped.truncated);
+
+ // A depth that reaches the whole chain leaves nothing capped.
+ let full = query
+ .traverse_with_provenance(a.id, &[EdgeKind::Calls], 8, None)
+ .expect("traversal should succeed");
+ assert_eq!(full.steps.len(), 3);
+ assert!(!full.depth_capped);
+ }
+
+ #[test]
+ fn traverse_with_provenance_records_every_in_path() {
+ let temp_db = TempDb::new("query-provenance");
+ let store = test_store(temp_db.path());
+ let file = node("service-a", "src/a.ts", NodeKind::File, "src/a.ts", 0);
+ let root = node("service-a", "src/a.ts", NodeKind::Function, "root", 0);
+ let a = node("service-a", "src/a.ts", NodeKind::Function, "a", 1);
+ let b = node("service-a", "src/a.ts", NodeKind::Function, "b", 2);
+ let c = node("service-a", "src/a.ts", NodeKind::Function, "c", 3);
+ // sink is reachable three ways: root->a->sink, root->b->sink, root->c->sink.
+ let sink = node("service-a", "src/a.ts", NodeKind::Function, "sink", 4);
+ store
+ .bulk_insert(
+ &[
+ file.clone(),
+ root.clone(),
+ a.clone(),
+ b.clone(),
+ c.clone(),
+ sink.clone(),
+ ],
+ &[
+ edge(root.id, a.id, file.id),
+ edge(root.id, b.id, file.id),
+ edge(root.id, c.id, file.id),
+ edge(a.id, sink.id, file.id),
+ edge(b.id, sink.id, file.id),
+ edge(c.id, sink.id, file.id),
+ ],
+ )
+ .expect("graph should write");
+
+ let query = GraphQuery::new(&store);
+ let outcome = query
+ .traverse_with_provenance(root.id, &[EdgeKind::Calls], 4, None)
+ .expect("traversal should succeed");
+
+ let sink_step = outcome
+ .steps
+ .iter()
+ .find(|step| step.node_id == sink.id)
+ .expect("sink should be reached");
+ assert_eq!(
+ sink_step.in_paths.len(),
+ 3,
+ "a node reachable three ways should report three caller paths"
+ );
+ }
+
#[test]
fn counts_nodes_and_edges_by_kind() {
let temp_db = TempDb::new("counts");
From 6febce8660581d5120afa31a14ee8bb91568b406 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 23:06:32 +0700
Subject: [PATCH 14/31] feat: surface A13 index freshness in status output
---
crates/gather-step-cli/src/commands/status.rs | 68 ++++++++++++++++++-
1 file changed, 67 insertions(+), 1 deletion(-)
diff --git a/crates/gather-step-cli/src/commands/status.rs b/crates/gather-step-cli/src/commands/status.rs
index 38d9ffe..f189b74 100644
--- a/crates/gather-step-cli/src/commands/status.rs
+++ b/crates/gather-step-cli/src/commands/status.rs
@@ -6,8 +6,9 @@ use gather_step_analysis::{
GraphQuery, SemanticHealthReport, semantic_health_for_repo, semantic_health_for_workspace,
};
use gather_step_core::{DepthLevel, RegistryStore};
+use gather_step_git::{GitHistoryIndexer, GitRepoSource, IndexFreshness};
use gather_step_mcp::output::redact::relativize_to_workspace;
-use gather_step_storage::{ContextPackStats, GraphStore, StorageCoordinator};
+use gather_step_storage::{ContextPackStats, GraphStore, MetadataStore, StorageCoordinator};
use serde::Serialize;
use serde_json::{Value, json};
@@ -37,6 +38,9 @@ struct RepoStatusOutput {
path: String,
path_exists: bool,
depth_level: String,
+ /// Query-time index freshness vs the working tree's HEAD (A13): one of
+ /// `fresh`, `stale`, `never_indexed`, or `unknown` (git unavailable).
+ freshness: String,
last_indexed_at: Option,
registry_file_count: u64,
registry_symbol_count: u64,
@@ -190,12 +194,18 @@ pub(crate) fn execute(
unresolved_inputs,
)
.with_context(|| format!("computing semantic health for `{repo}`"))?;
+ let indexed_sha = storage
+ .metadata()
+ .get_last_commit_sha(repo)
+ .with_context(|| format!("loading indexed commit SHA for `{repo}`"))?;
+ let freshness = repo_freshness(repo, ®istered.path, indexed_sha.as_deref());
Ok(RepoStatusOutput {
repo: repo.clone(),
path: relativize_to_workspace(®istered.path, workspace_path),
path_exists: registered.path.exists(),
depth_level: depth_label(registered.depth_level).to_owned(),
+ freshness,
last_indexed_at: registered.last_indexed_at.clone(),
registry_file_count: registered.file_count,
registry_symbol_count: registered.symbol_count,
@@ -268,6 +278,7 @@ pub(crate) fn execute(
repo_table.set_header(vec![
"Repo",
"Depth",
+ "Freshness",
"Indexed",
"Files",
"Symbols",
@@ -280,6 +291,7 @@ pub(crate) fn execute(
repo_table.add_row(vec![
Cell::new(&repo.repo),
Cell::new(&repo.depth_level),
+ Cell::new(&repo.freshness),
Cell::new(repo.last_indexed_at.as_deref().unwrap_or("never")),
Cell::new(format!(
"{}/{}",
@@ -330,6 +342,25 @@ fn format_semantic_summary(health: &SemanticHealthReport) -> String {
)
}
+/// Classify a repo's index freshness vs its current git HEAD (A13). Opening
+/// git is best-effort: a non-git path or git failure yields `unknown` rather
+/// than failing the whole status command.
+fn repo_freshness(repo: &str, path: &std::path::Path, indexed_sha: Option<&str>) -> String {
+ let indexer = GitHistoryIndexer::new(GitRepoSource::from_path(path), repo);
+ match indexer.index_freshness(indexed_sha) {
+ Ok(freshness) => freshness_label(&freshness).to_owned(),
+ Err(_) => "unknown".to_owned(),
+ }
+}
+
+fn freshness_label(freshness: &IndexFreshness) -> &'static str {
+ match freshness {
+ IndexFreshness::Fresh { .. } => "fresh",
+ IndexFreshness::Stale { .. } => "stale",
+ IndexFreshness::NeverIndexed { .. } => "never_indexed",
+ }
+}
+
fn depth_label(depth: DepthLevel) -> &'static str {
match depth {
DepthLevel::Level1 => "level1",
@@ -470,5 +501,40 @@ mod tests {
Some("status_completed"),
"status payload should have event=status_completed"
);
+ // A13: every repo row carries a query-time index-freshness verdict.
+ for repo in repos {
+ let freshness = repo["freshness"]
+ .as_str()
+ .expect("each repo should carry a freshness verdict");
+ assert!(
+ matches!(freshness, "fresh" | "stale" | "never_indexed" | "unknown"),
+ "unexpected freshness verdict: {freshness}"
+ );
+ }
+ }
+
+ #[test]
+ fn freshness_label_maps_every_variant() {
+ use gather_step_git::IndexFreshness;
+
+ assert_eq!(
+ super::freshness_label(&IndexFreshness::Fresh {
+ head_sha: "abc".to_owned()
+ }),
+ "fresh"
+ );
+ assert_eq!(
+ super::freshness_label(&IndexFreshness::Stale {
+ indexed_sha: "old".to_owned(),
+ head_sha: "new".to_owned()
+ }),
+ "stale"
+ );
+ assert_eq!(
+ super::freshness_label(&IndexFreshness::NeverIndexed {
+ head_sha: "abc".to_owned()
+ }),
+ "never_indexed"
+ );
}
}
From dd76761b791d9fbfef3e78930ab934fabfc5f5d6 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 23:09:21 +0700
Subject: [PATCH 15/31] feat: distinct exit code + json disclosure for graph
lock (REL1)
---
crates/gather-step-cli/src/errors.rs | 83 ++++++++++++++++++++++++++++
crates/gather-step-cli/src/main.rs | 27 ++++++++-
2 files changed, 108 insertions(+), 2 deletions(-)
diff --git a/crates/gather-step-cli/src/errors.rs b/crates/gather-step-cli/src/errors.rs
index c7643de..91873c7 100644
--- a/crates/gather-step-cli/src/errors.rs
+++ b/crates/gather-step-cli/src/errors.rs
@@ -6,6 +6,46 @@ use gather_step_storage::{GraphStoreError, MetadataStoreError, SearchStoreError}
const SCHEMA_VERSION_MISMATCH_MESSAGE: &str = "Index schema version mismatch — built by a different gather-step release. Next step: run `gather-step index --auto-recover` to rebuild, or `gather-step clean && gather-step index`.";
+/// Sysexits-style exit code (`EX_TEMPFAIL`) returned when a read command cannot
+/// proceed because the graph store is locked by another gather-step process
+/// (REL1). Distinct from the generic failure code (1) so a caller — CI, a
+/// wrapper, or an agent — can tell "blocked, results incomplete" apart from
+/// "ran fine, found nothing" and never silently fall back to grep.
+pub const GRAPH_LOCKED_EXIT_CODE: u8 = 75;
+
+/// True when the error chain carries graph-store lock contention
+/// (`StorageHeld` / `StorageHeldByDaemon`), including the text-fallback forms
+/// produced when the typed error is flattened to a string mid-chain.
+#[must_use]
+pub fn graph_lock_contention(error: &Error) -> bool {
+ for cause in error.chain() {
+ if let Some(graph_error) = cause.downcast_ref::()
+ && matches!(
+ graph_error,
+ GraphStoreError::StorageHeld { .. } | GraphStoreError::StorageHeldByDaemon { .. }
+ )
+ {
+ return true;
+ }
+ }
+ let full = error_chain_text(error);
+ contains_ascii_case_insensitive(&full, "locked by gather-step pid")
+ || contains_ascii_case_insensitive(&full, "already locked by another gather-step process")
+ || contains_ascii_case_insensitive(&full, "database already open")
+}
+
+/// Machine-readable degraded-mode disclosure for `--json` consumers when a read
+/// command is blocked by graph lock contention (REL1).
+#[must_use]
+pub fn graph_locked_json_disclosure(error: &Error) -> String {
+ serde_json::json!({
+ "event": "command_failed",
+ "degraded": "graph_locked",
+ "message": format_operator_error(error),
+ })
+ .to_string()
+}
+
#[must_use]
pub fn format_operator_error(error: &Error) -> String {
let full = error_chain_text(error);
@@ -177,6 +217,49 @@ mod tests {
);
}
+ #[test]
+ fn graph_lock_contention_detects_typed_lock_errors() {
+ use super::{GRAPH_LOCKED_EXIT_CODE, graph_lock_contention, graph_locked_json_disclosure};
+
+ let held = gather_step_storage::GraphStoreError::StorageHeld {
+ path: PathBuf::from("/tmp/graph.redb"),
+ };
+ let err: anyhow::Error = anyhow::Error::new(held);
+ assert!(graph_lock_contention(&err), "StorageHeld must be detected");
+
+ let by_daemon = gather_step_storage::GraphStoreError::StorageHeldByDaemon {
+ path: PathBuf::from("/tmp/graph.redb"),
+ pid: 4242,
+ started_at_epoch_ms: 1,
+ workspace_root: "/ws".to_owned(),
+ };
+ let err: anyhow::Error = anyhow::Error::new(by_daemon);
+ assert!(
+ graph_lock_contention(&err),
+ "StorageHeldByDaemon must be detected"
+ );
+
+ // The JSON disclosure names the degraded mode so a blocked read can
+ // never look like an empty-but-successful result.
+ let disclosure: serde_json::Value =
+ serde_json::from_str(&graph_locked_json_disclosure(&err)).expect("valid json");
+ assert_eq!(disclosure["degraded"], "graph_locked");
+ assert_eq!(disclosure["event"], "command_failed");
+
+ // The dedicated exit code is distinct from success (0) and the generic
+ // failure code (1) so callers can branch on it.
+ assert_ne!(GRAPH_LOCKED_EXIT_CODE, 0);
+ assert_ne!(GRAPH_LOCKED_EXIT_CODE, 1);
+ }
+
+ #[test]
+ fn graph_lock_contention_ignores_unrelated_errors() {
+ use super::graph_lock_contention;
+
+ let err: anyhow::Error = anyhow::Error::msg("read /tmp/foo: permission denied");
+ assert!(!graph_lock_contention(&err));
+ }
+
#[test]
fn unhandled_error_preserves_full_cause_chain() {
// Wrap an inner error with anyhow::Context so the chain has two links.
diff --git a/crates/gather-step-cli/src/main.rs b/crates/gather-step-cli/src/main.rs
index f15135c..b3b5ee2 100644
--- a/crates/gather-step-cli/src/main.rs
+++ b/crates/gather-step-cli/src/main.rs
@@ -9,7 +9,10 @@ use clap::Parser;
use dhat::Alloc;
use gather_step::app::{self, AppContext};
use gather_step::commands::{self, Cli};
-use gather_step::errors::format_operator_error;
+use gather_step::errors::{
+ GRAPH_LOCKED_EXIT_CODE, format_operator_error, graph_lock_contention,
+ graph_locked_json_disclosure,
+};
#[cfg(not(feature = "dhat-heap"))]
use mimalloc::MiMalloc;
@@ -49,14 +52,34 @@ async fn run_main() -> Result {
commands::run(cli, app).await
}
-/// Print the operator-facing error to stderr and return exit code 1.
+/// Print the operator-facing error to stderr and return an exit code.
+///
+/// Graph lock contention (REL1) returns a dedicated exit code and, under
+/// `--json`, a `degraded: graph_locked` disclosure on stdout so a blocked read
+/// can never be mistaken for an empty-but-successful result. All other failures
+/// return exit code 1.
///
/// Returning `ExitCode` rather than calling `std::process::exit(1)` lets
/// tokio's runtime tear down cleanly and lets stdio buffers flush — important
/// for `pr-review` (which prints a structured report on stdout) and any
/// command that emits trailing tracing lines.
fn print_operator_error_and_code(error: &Error) -> ExitCode {
+ if graph_lock_contention(error) {
+ if wants_json_output() {
+ let mut stdout = std::io::stdout().lock();
+ let _ = writeln!(stdout, "{}", graph_locked_json_disclosure(error));
+ }
+ let mut stderr = std::io::stderr().lock();
+ let _ = writeln!(stderr, "{}", format_operator_error(error));
+ return ExitCode::from(GRAPH_LOCKED_EXIT_CODE);
+ }
let mut stderr = std::io::stderr().lock();
let _ = writeln!(stderr, "{}", format_operator_error(error));
ExitCode::from(1)
}
+
+/// Best-effort check of the raw argv for the global `--json` flag. Used only on
+/// the error path, where the parsed `Cli` is unavailable.
+fn wants_json_output() -> bool {
+ std::env::args().any(|arg| arg == "--json")
+}
From 25842e5052d02333f830ba86758e3c5863e334f1 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 23:13:42 +0700
Subject: [PATCH 16/31] feat: display-ownership planning section in plan_change
(DSO1)
---
crates/gather-step-mcp/src/catalog.rs | 2 +-
crates/gather-step-mcp/src/server.rs | 2 +-
crates/gather-step-mcp/src/tools/packs.rs | 96 +++++++++++++++++++++--
3 files changed, 92 insertions(+), 8 deletions(-)
diff --git a/crates/gather-step-mcp/src/catalog.rs b/crates/gather-step-mcp/src/catalog.rs
index 20fee5f..52c62b4 100644
--- a/crates/gather-step-mcp/src/catalog.rs
+++ b/crates/gather-step-mcp/src/catalog.rs
@@ -94,7 +94,7 @@ pub const MCP_TOOLS: &[(&str, &str)] = &[
),
(
"plan_change",
- "Typed plan-change product (nine planning sections)",
+ "Typed plan-change product (ten planning sections)",
),
("debug_pack", "Context pack for debugging production issues"),
("fix_pack", "Context pack scoped for a bug fix"),
diff --git a/crates/gather-step-mcp/src/server.rs b/crates/gather-step-mcp/src/server.rs
index d41ea3d..bf5d07c 100644
--- a/crates/gather-step-mcp/src/server.rs
+++ b/crates/gather-step-mcp/src/server.rs
@@ -784,7 +784,7 @@ impl GatherStepMcpServer {
#[tool(
name = "plan_change",
- description = "Return a typed plan-change product (nine planning sections) for a target symbol.",
+ description = "Return a typed plan-change product (ten planning sections) for a target symbol.",
annotations(read_only_hint = true)
)]
pub async fn plan_change_tool(
diff --git a/crates/gather-step-mcp/src/tools/packs.rs b/crates/gather-step-mcp/src/tools/packs.rs
index a0360c3..aa92805 100644
--- a/crates/gather-step-mcp/src/tools/packs.rs
+++ b/crates/gather-step-mcp/src/tools/packs.rs
@@ -208,18 +208,19 @@ pub struct ContextPackResponse {
}
/// Schema version of the [`PlanChangeResponse`] contract. Bump on any change to
-/// the section set or contract shape.
-const PLAN_CHANGE_SCHEMA_VERSION: u8 = 1;
+/// the section set or contract shape. v2 adds `display_ownership_checks` (DSO1).
+const PLAN_CHANGE_SCHEMA_VERSION: u8 = 2;
-/// The nine fixed `plan_change` section names, in canonical order. The contract
+/// The fixed `plan_change` section names, in canonical order. The contract
/// manifest must equal this exactly so consumers (and the G7 gate) can assert
/// completeness deterministically.
-const PLAN_CHANGE_SECTIONS: [&str; 9] = [
+const PLAN_CHANGE_SECTIONS: [&str; 10] = [
"reuse_candidates",
"sibling_clone_targets",
"standards_to_preserve",
"integration_checks",
"cross_repo_reachability",
+ "display_ownership_checks",
"write_path_or_state_machine_risks",
"required_braingent_records",
"open_unknowns",
@@ -241,7 +242,7 @@ pub struct PlanChangeContract {
/// Typed `plan_change` product (WS-3 / E1).
///
/// A distinct response shape — not an alias for the planning pack — with the
-/// nine fixed planning sections. v1 projects the existing planning-pack data
+/// ten fixed planning sections. v1 projects the existing planning-pack data
/// into these sections; later slices (B7 proactive queries, G7 evidentiary
/// fields) enrich them. Every section is always present (possibly empty) so the
/// contract is stable for consumers and the G7 gate.
@@ -259,6 +260,10 @@ pub struct PlanChangeResponse {
pub integration_checks: Vec,
/// Cross-repo reachability proofs (producer/consumer edges) for the target.
pub cross_repo_reachability: Vec,
+ /// DSO1: for each cross-service reference, the display-ownership question —
+ /// confirm display fields come from the owner service (snapshot/API), not a
+ /// direct cross-service DB lookup, and that access control stays in the owner.
+ pub display_ownership_checks: Vec,
pub write_path_or_state_machine_risks: Vec,
pub required_braingent_records: Vec,
pub open_unknowns: Vec,
@@ -315,6 +320,19 @@ fn build_plan_change(
));
}
+ // DSO1: every cross-service reference raises the display-ownership question
+ // as a named planning dimension (REG-13833 — a cross-service display lookup
+ // was added without planning who owns the field).
+ let mut display_ownership_checks = Vec::new();
+ for caller in &change_impact.cross_repo_callers {
+ display_ownership_checks.push(format!(
+ "Display ownership: cross-service ref `{}::{}` — confirm display fields are sourced \
+ from the owner service (snapshot/API), not a direct cross-service DB lookup, and \
+ that access control stays in the owner.",
+ caller.repo, caller.symbol_name
+ ));
+ }
+
let mut write_path_or_state_machine_risks = Vec::new();
for caller in &change_impact.cross_repo_callers {
write_path_or_state_machine_risks.push(format!(
@@ -374,6 +392,7 @@ fn build_plan_change(
standards_to_preserve: Vec::new(),
integration_checks,
cross_repo_reachability: planning_proofs.to_vec(),
+ display_ownership_checks,
write_path_or_state_machine_risks,
required_braingent_records: Vec::new(),
open_unknowns: unresolved_gaps.to_vec(),
@@ -410,7 +429,7 @@ pub fn validate_plan_change_contract(plan: &PlanChangeResponse) -> Vec {
}
/// Build the typed `plan_change` product (WS-3 / E1): run the planning pack,
-/// then project its data into the nine fixed sections.
+/// then project its data into the ten fixed sections.
pub fn run_plan_change(
ctx: &McpContext,
request: ModePackRequest,
@@ -5603,6 +5622,71 @@ mod tests {
assert!(plan.required_braingent_records.is_empty());
}
+ #[test]
+ fn plan_change_surfaces_display_ownership_for_cross_service_refs() {
+ use super::{ChangeImpactSummary, build_plan_change};
+
+ let change_impact = ChangeImpactSummary {
+ cross_repo_callers: vec![super::CrossRepoCaller {
+ file_path: "src/action_hub.ts".to_owned(),
+ line_start: None,
+ repo: "label-review".to_owned(),
+ symbol_id: "id_hub".to_owned(),
+ symbol_kind: "function".to_owned(),
+ symbol_name: "renderActionHub".to_owned(),
+ evidence: None,
+ }],
+ ..Default::default()
+ };
+ let plan = build_plan_change(
+ "getOrderDisplayName",
+ true,
+ &[],
+ &[],
+ &[],
+ &[],
+ &change_impact,
+ None,
+ );
+
+ // DSO1: cross-service references raise the display-ownership question as
+ // a named section, citing the owning service.
+ assert!(
+ plan.display_ownership_checks
+ .iter()
+ .any(|check| check.contains("label-review")),
+ "display ownership not surfaced for cross-service ref: {:?}",
+ plan.display_ownership_checks
+ );
+ // The new section is part of the deterministic contract manifest.
+ assert!(
+ plan.contract
+ .sections
+ .iter()
+ .any(|s| s == "display_ownership_checks")
+ );
+ }
+
+ #[test]
+ fn plan_change_omits_display_ownership_without_cross_service_refs() {
+ use super::{ChangeImpactSummary, build_plan_change};
+
+ let plan = build_plan_change(
+ "localOnly",
+ true,
+ &[],
+ &[],
+ &[],
+ &[],
+ &ChangeImpactSummary::default(),
+ None,
+ );
+ assert!(
+ plan.display_ownership_checks.is_empty(),
+ "no cross-service refs should mean no display-ownership prompts"
+ );
+ }
+
#[test]
fn plan_change_contract_gate_validates_deterministic_metadata() {
use super::{
From 2cfe3711b2aaa841a00c6120a296dd89572358a9 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 23:16:57 +0700
Subject: [PATCH 17/31] feat: mongo/atlas structural safety detectors
(MQS1/MQS2/MQS3)
---
crates/gather-step-analysis/src/lib.rs | 7 +-
.../src/mongo_query_safety.rs | 241 ++++++++++++++++++
2 files changed, 247 insertions(+), 1 deletion(-)
create mode 100644 crates/gather-step-analysis/src/mongo_query_safety.rs
diff --git a/crates/gather-step-analysis/src/lib.rs b/crates/gather-step-analysis/src/lib.rs
index 3ac29fd..0d9c613 100644
--- a/crates/gather-step-analysis/src/lib.rs
+++ b/crates/gather-step-analysis/src/lib.rs
@@ -14,6 +14,7 @@ pub mod deployment_topology;
pub mod event_topology;
pub mod evidence;
pub mod impact;
+pub mod mongo_query_safety;
pub mod overview;
pub mod pack_assembly;
pub mod projection_impact;
@@ -54,6 +55,10 @@ pub use event_topology::{
pub use impact::{
BoundaryRole, EvidenceBand, ImpactError, ImpactMap, ImpactedFile, shared_contract_impact,
};
+pub use mongo_query_safety::{
+ MongoQueryFinding, RULE_INDEX_DEFEAT, RULE_NULL_PARENT_PATH, RULE_UNSAFE_COERCION,
+ analyze_mongo_value,
+};
pub use overview::{ModuleSummary, OverviewError, RepoOverview, build_overview};
pub use pack_assembly::{
CandidateKey, Pack, PackAssembler, PackItem, PackMode, QueryShape, SimplePackAssembler,
@@ -68,7 +73,7 @@ pub use proofs::{
MAX_PROOFS_PER_REPO, ProofCaller, ProofEngineError, ProofEngineOptions, ProofEngineOutput,
build_pack_proofs, derive_repo_sets, finalize_proofs, proof_strength,
};
-pub use query::{GraphQuery, QueryError, TraversalStep};
+pub use query::{GraphQuery, QueryError, TraversalOutcome, TraversalStep};
pub use semantic_health::{
SemanticHealthError, SemanticHealthReport, SemanticLinkHealth, semantic_health_for_repo,
semantic_health_for_workspace,
diff --git a/crates/gather-step-analysis/src/mongo_query_safety.rs b/crates/gather-step-analysis/src/mongo_query_safety.rs
new file mode 100644
index 0000000..c26eac7
--- /dev/null
+++ b/crates/gather-step-analysis/src/mongo_query_safety.rs
@@ -0,0 +1,241 @@
+//! Mongo/Atlas structural safety detectors (WS-G3).
+//!
+//! These flag AST-detectable antipatterns that recur in ad-hoc mongo queries
+//! and aggregations, not just migrations. Each finding carries a stable rule ID
+//! and a confidence score so it can be surfaced in review without prose-only
+//! noise (the v4.3.1 non-goal "no prose-only findings" applies here).
+//!
+//! The detectors are pure over a [`serde_json::Value`] modelling a mongo
+//! operation (an update document or an aggregation pipeline), so they are
+//! unit-testable without a live parser. Wiring the parser's pipeline extraction
+//! into these detectors is a separate concern (tracked under MQS1's key files).
+
+use serde_json::Value;
+
+/// MQS1 — an aggregation `$lookup` whose join key is coerced (`$toString` /
+/// `$toObjectId`) inside a sub-pipeline, defeating the index on the join field.
+pub const RULE_INDEX_DEFEAT: &str = "GS-MONGO-INDEX-DEFEAT";
+/// MQS2 — a bare `$toObjectId` coercion (recommend `$convert` with `onError`).
+pub const RULE_UNSAFE_COERCION: &str = "GS-MONGO-UNSAFE-COERCION";
+/// MQS3 — a `$set` on a dotted path with no existence/`$type:object` guard on
+/// the parent, which can clobber or fail on a null/scalar parent.
+pub const RULE_NULL_PARENT_PATH: &str = "GS-MONGO-NULL-PARENT-PATH";
+
+/// One mongo-query-safety finding. `path` is a dotted location into the analysed
+/// value so a caller can point at the offending stage/field.
+#[derive(Clone, Debug, PartialEq)]
+pub struct MongoQueryFinding {
+ pub rule_id: &'static str,
+ pub confidence: f64,
+ pub message: String,
+ pub path: String,
+}
+
+/// Analyse a JSON-shaped mongo operation (update document or aggregation
+/// pipeline) for the WS-G3 structural traps. Findings are sorted by
+/// `(rule_id, path)` so the output is deterministic.
+#[must_use]
+pub fn analyze_mongo_value(value: &Value) -> Vec {
+ let mut findings = Vec::new();
+ walk(value, "$", &mut findings);
+ findings.sort_by(|left, right| {
+ left.rule_id
+ .cmp(right.rule_id)
+ .then_with(|| left.path.cmp(&right.path))
+ });
+ findings
+}
+
+fn walk(value: &Value, path: &str, findings: &mut Vec) {
+ match value {
+ Value::Object(map) => {
+ for (key, child) in map {
+ let child_path = format!("{path}.{key}");
+ match key.as_str() {
+ "$lookup" => detect_index_defeat(child, &child_path, findings),
+ "$toObjectId" => findings.push(MongoQueryFinding {
+ rule_id: RULE_UNSAFE_COERCION,
+ confidence: 0.8,
+ message: "Bare `$toObjectId` throws on a malformed id; use `$convert` \
+ with an `onError` fallback for untrusted input."
+ .to_owned(),
+ path: child_path.clone(),
+ }),
+ "$set" => detect_null_parent_path(child, &child_path, findings),
+ _ => {}
+ }
+ walk(child, &child_path, findings);
+ }
+ }
+ Value::Array(items) => {
+ for (index, item) in items.iter().enumerate() {
+ walk(item, &format!("{path}[{index}]"), findings);
+ }
+ }
+ _ => {}
+ }
+}
+
+/// MQS1: a `$lookup` is index-defeating when its join key is coerced with
+/// `$toString`/`$toObjectId` (typically inside a `let` + sub-`pipeline`). The
+/// index-aligned `localField`/`foreignField` form does no coercion.
+fn detect_index_defeat(lookup: &Value, path: &str, findings: &mut Vec) {
+ if mentions_operator(lookup, "$toString") || mentions_operator(lookup, "$toObjectId") {
+ findings.push(MongoQueryFinding {
+ rule_id: RULE_INDEX_DEFEAT,
+ confidence: 0.7,
+ message: "`$lookup` coerces its join key (`$toString`/`$toObjectId`), defeating the \
+ index on the join field; align field types or pre-store the join key."
+ .to_owned(),
+ path: path.to_owned(),
+ });
+ }
+}
+
+/// MQS3: a dotted-path key under `$set` is unguarded when neither the value nor
+/// the sibling assignments establish the parent object first.
+fn detect_null_parent_path(set_doc: &Value, path: &str, findings: &mut Vec) {
+ let Value::Object(map) = set_doc else {
+ return;
+ };
+ for (field, expr) in map {
+ let Some((parent, _)) = field.rsplit_once('.') else {
+ continue;
+ };
+ if value_guards_parent(expr) || sibling_assigns_parent(map, parent) {
+ continue;
+ }
+ findings.push(MongoQueryFinding {
+ rule_id: RULE_NULL_PARENT_PATH,
+ confidence: 0.6,
+ message: format!(
+ "`$set` on dotted path `{field}` has no existence/`$type:object` guard on parent \
+ `{parent}`; a null or scalar parent will be clobbered or error."
+ ),
+ path: format!("{path}.{field}"),
+ });
+ }
+}
+
+/// The assigned expression itself guards the parent (e.g. `$ifNull`/`$cond`).
+fn value_guards_parent(expr: &Value) -> bool {
+ mentions_operator(expr, "$ifNull") || mentions_operator(expr, "$cond")
+}
+
+/// A sibling key in the same `$set` establishes the parent (or an ancestor) as
+/// an object before the dotted write — i.e. a sibling that is exactly the
+/// parent path or a prefix-ancestor of it.
+fn sibling_assigns_parent(map: &serde_json::Map, parent: &str) -> bool {
+ map.keys()
+ .any(|key| key == parent || parent.starts_with(&format!("{key}.")))
+}
+
+/// Whether `value` contains `op` as an object key anywhere in its tree.
+fn mentions_operator(value: &Value, op: &str) -> bool {
+ match value {
+ Value::Object(map) => map
+ .iter()
+ .any(|(key, child)| key == op || mentions_operator(child, op)),
+ Value::Array(items) => items.iter().any(|item| mentions_operator(item, op)),
+ _ => false,
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{
+ RULE_INDEX_DEFEAT, RULE_NULL_PARENT_PATH, RULE_UNSAFE_COERCION, analyze_mongo_value,
+ };
+ use serde_json::json;
+
+ fn rule_ids(value: &serde_json::Value) -> Vec<&'static str> {
+ analyze_mongo_value(value)
+ .into_iter()
+ .map(|finding| finding.rule_id)
+ .collect()
+ }
+
+ #[test]
+ fn flags_lookup_that_coerces_join_key_but_not_index_aligned_lookup() {
+ // GO4(a): a $lookup coercing the join key with $toString defeats the index.
+ let coercing = json!([{
+ "$lookup": {
+ "from": "users",
+ "let": { "uid": "$userId" },
+ "pipeline": [
+ { "$match": { "$expr": { "$eq": [{ "$toString": "$_id" }, "$$uid"] } } }
+ ],
+ "as": "user"
+ }
+ }]);
+ assert!(rule_ids(&coercing).contains(&RULE_INDEX_DEFEAT));
+
+ // An index-aligned localField/foreignField join is clean.
+ let aligned = json!([{
+ "$lookup": {
+ "from": "users",
+ "localField": "userId",
+ "foreignField": "_id",
+ "as": "user"
+ }
+ }]);
+ assert!(!rule_ids(&aligned).contains(&RULE_INDEX_DEFEAT));
+ }
+
+ #[test]
+ fn flags_bare_to_object_id_but_not_convert_with_on_error() {
+ // GO4(b): bare $toObjectId on untrusted input.
+ let bare = json!({ "$match": { "_id": { "$toObjectId": "$$req.id" } } });
+ assert!(rule_ids(&bare).contains(&RULE_UNSAFE_COERCION));
+
+ // $convert with onError is the safe sibling — no $toObjectId key.
+ let safe = json!({
+ "$match": {
+ "_id": { "$convert": { "input": "$$req.id", "to": "objectId", "onError": null } }
+ }
+ });
+ assert!(!rule_ids(&safe).contains(&RULE_UNSAFE_COERCION));
+ }
+
+ #[test]
+ fn flags_unguarded_dotted_set_but_not_guarded_sibling() {
+ // GO4(c): dotted $set with no parent guard.
+ let unguarded = json!({ "$set": { "meta.flags.active": true } });
+ assert!(rule_ids(&unguarded).contains(&RULE_NULL_PARENT_PATH));
+
+ // Guarded by wrapping the value in $ifNull.
+ let guarded_value =
+ json!({ "$set": { "meta.flags.active": { "$ifNull": ["$meta.flags.active", true] } } });
+ assert!(!rule_ids(&guarded_value).contains(&RULE_NULL_PARENT_PATH));
+
+ // Guarded by establishing the parent in a sibling assignment.
+ let guarded_sibling =
+ json!({ "$set": { "meta": { "flags": {} }, "meta.flags.active": true } });
+ assert!(!rule_ids(&guarded_sibling).contains(&RULE_NULL_PARENT_PATH));
+ }
+
+ #[test]
+ fn clean_pipeline_yields_no_findings() {
+ let clean = json!([
+ { "$match": { "status": "active" } },
+ { "$project": { "name": 1, "email": 1 } }
+ ]);
+ assert!(analyze_mongo_value(&clean).is_empty());
+ }
+
+ #[test]
+ fn findings_are_deterministically_sorted() {
+ let value = json!({
+ "$set": { "a.b": true },
+ "stage": { "$toObjectId": "$x" }
+ });
+ let first = analyze_mongo_value(&value);
+ let second = analyze_mongo_value(&value);
+ assert_eq!(first, second);
+ // Findings come back sorted by (rule_id, path).
+ let ids: Vec<_> = first.iter().map(|f| f.rule_id).collect();
+ let mut sorted = ids.clone();
+ sorted.sort_unstable();
+ assert_eq!(ids, sorted);
+ }
+}
From a23d1036c5bd6da0916d253c556a3bc29557df80 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 23:25:59 +0700
Subject: [PATCH 18/31] chore: prepare 4.3.1 release
---
Cargo.lock | 20 ++++++-------
Cargo.toml | 20 ++++++-------
website/package.json | 2 +-
.../src/components/landing/Benchmark.astro | 4 +--
website/src/components/landing/Hero.astro | 2 +-
website/src/components/landing/Topology.astro | 2 +-
website/src/content/docs/changelog.md | 30 +++++++++++++++++++
7 files changed, 55 insertions(+), 25 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 757dfb8..f1ba993 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1103,7 +1103,7 @@ dependencies = [
[[package]]
name = "gather-step"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"anyhow",
"blake3",
@@ -1145,7 +1145,7 @@ dependencies = [
[[package]]
name = "gather-step-analysis"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"gather-step-core",
"gather-step-parser",
@@ -1160,7 +1160,7 @@ dependencies = [
[[package]]
name = "gather-step-bench"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"anyhow",
"chrono",
@@ -1190,7 +1190,7 @@ dependencies = [
[[package]]
name = "gather-step-core"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"bitcode",
"blake3",
@@ -1204,7 +1204,7 @@ dependencies = [
[[package]]
name = "gather-step-deploy"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"gather-step-core",
"pretty_assertions",
@@ -1216,7 +1216,7 @@ dependencies = [
[[package]]
name = "gather-step-git"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"blake3",
"gather-step-core",
@@ -1233,7 +1233,7 @@ dependencies = [
[[package]]
name = "gather-step-mcp"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"blake3",
"criterion",
@@ -1264,7 +1264,7 @@ dependencies = [
[[package]]
name = "gather-step-output"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"chrono",
"gather-step-analysis",
@@ -1276,7 +1276,7 @@ dependencies = [
[[package]]
name = "gather-step-parser"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"aho-corasick",
"blake3",
@@ -1318,7 +1318,7 @@ dependencies = [
[[package]]
name = "gather-step-storage"
-version = "4.2.1"
+version = "4.3.1"
dependencies = [
"bitcode",
"blake3",
diff --git a/Cargo.toml b/Cargo.toml
index 2b5e0f2..a90acb5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,7 @@ members = [
]
[workspace.package]
-version = "4.2.1"
+version = "4.3.1"
authors = ["JJ Adonis"]
edition = "2024"
rust-version = "1.96"
@@ -24,15 +24,15 @@ homepage = "https://github.com/thedoublejay/gather-step"
description = "High-performance multi-repo codebase intelligence engine"
[workspace.dependencies]
-gather-step = { path = "crates/gather-step-cli", version = "4.2.1" }
-gather-step-analysis = { path = "crates/gather-step-analysis", version = "4.2.1" }
-gather-step-core = { path = "crates/gather-step-core", version = "4.2.1" }
-gather-step-deploy = { path = "crates/gather-step-deploy", version = "4.2.1" }
-gather-step-git = { path = "crates/gather-step-git", version = "4.2.1" }
-gather-step-mcp = { path = "crates/gather-step-mcp", version = "4.2.1" }
-gather-step-output = { path = "crates/gather-step-output", version = "4.2.1" }
-gather-step-parser = { path = "crates/gather-step-parser", version = "4.2.1" }
-gather-step-storage = { path = "crates/gather-step-storage", version = "4.2.1" }
+gather-step = { path = "crates/gather-step-cli", version = "4.3.1" }
+gather-step-analysis = { path = "crates/gather-step-analysis", version = "4.3.1" }
+gather-step-core = { path = "crates/gather-step-core", version = "4.3.1" }
+gather-step-deploy = { path = "crates/gather-step-deploy", version = "4.3.1" }
+gather-step-git = { path = "crates/gather-step-git", version = "4.3.1" }
+gather-step-mcp = { path = "crates/gather-step-mcp", version = "4.3.1" }
+gather-step-output = { path = "crates/gather-step-output", version = "4.3.1" }
+gather-step-parser = { path = "crates/gather-step-parser", version = "4.3.1" }
+gather-step-storage = { path = "crates/gather-step-storage", version = "4.3.1" }
tree-sitter = "=0.26.9"
tree-sitter-typescript = "0.23.2"
diff --git a/website/package.json b/website/package.json
index 9b13681..0b3f3e1 100644
--- a/website/package.json
+++ b/website/package.json
@@ -1,7 +1,7 @@
{
"name": "gather-step-website",
"type": "module",
- "version": "4.2.1",
+ "version": "4.3.1",
"private": true,
"packageManager": "bun@1.3.12",
"scripts": {
diff --git a/website/src/components/landing/Benchmark.astro b/website/src/components/landing/Benchmark.astro
index c84ba87..78ff8c1 100644
--- a/website/src/components/landing/Benchmark.astro
+++ b/website/src/components/landing/Benchmark.astro
@@ -69,7 +69,7 @@ const speedup = (totalManual / totalGatherStep).toFixed(1);
on the same machine; medians of three runs.
- v4.2.1 · 2026-06-02
+ v4.3.1 · 2026-06-02
@@ -141,7 +141,7 @@ const speedup = (totalManual / totalGatherStep).toFixed(1);
-
Planning oracle (v4.2.1)
+
Planning oracle (v4.3.1)
Full 25-scenario suite, every metric at the HIGH ceiling, latency well under release-gate thresholds (p50 ≤ 50 ms, p95 ≤ 300 ms, p99 ≤ 1000 ms).
diff --git a/website/src/components/landing/Hero.astro b/website/src/components/landing/Hero.astro
index ed4622e..7135b60 100644
--- a/website/src/components/landing/Hero.astro
+++ b/website/src/components/landing/Hero.astro
@@ -6,7 +6,7 @@ const stats = [
},
{
label: 'Spec · Release',
- value: 'v4.2.1 · dependency refresh',
+ value: 'v4.3.1 · planning & reuse quality',
},
{
label: 'Spec · Deployment',
diff --git a/website/src/components/landing/Topology.astro b/website/src/components/landing/Topology.astro
index e158e2e..2bc84d2 100644
--- a/website/src/components/landing/Topology.astro
+++ b/website/src/components/landing/Topology.astro
@@ -72,6 +72,6 @@ const consumers = [
CONSUMERS · N=4
-
Approved · v4.2.1
+
Approved · v4.3.1
diff --git a/website/src/content/docs/changelog.md b/website/src/content/docs/changelog.md
index 0c39768..15aad39 100644
--- a/website/src/content/docs/changelog.md
+++ b/website/src/content/docs/changelog.md
@@ -5,6 +5,36 @@ description: "User-visible changes to gather-step, listed by release. Updated ma
This changelog lists significant user-visible changes. The latest release is shown in full at the top; earlier releases are collapsed under [Earlier releases](#earlier-releases) at the bottom of the page.
+## v4.3.1 (2026-06-02)
+
+Release status: **prepared**.
+
+Planning- and reuse-quality release. Fixes retrieval recall so reuse search stops returning empty, ranks reuse candidates using the graph, ships a typed `plan_change` product with a stable section contract, and adds the first batch of v4.3.1 gap-closure work (lock-contention disclosure, a display-ownership planning dimension, and mongo/Atlas structural detectors). Partial against the full v4.3.0/v4.3.1 backlog — see the PR for the done/deferred ledger.
+
+### Added
+
+- **Graph-ranked reuse evidence** in planning packs: reuse candidates are ranked by sibling-consumer count, shared/design-system membership, and cross-repo proof strength before truncation, so a blessed shared component ranks above a bespoke fork (S3).
+- **Typed `plan_change` product** with a fixed, contract-checked section set (E1). Sections are always present (possibly empty), with an exclusion ledger recording what was dropped so a capped result is never read as exhaustive.
+- **Display-ownership planning dimension** (`display_ownership_checks`): every cross-service reference surfaces the question of whether display fields come from the owner service (snapshot/API) rather than a direct cross-service DB lookup (DSO1).
+- **Mongo/Atlas structural safety detectors** with stable rule IDs and confidence: `$lookup` join-key coercion that defeats an index (`GS-MONGO-INDEX-DEFEAT`), bare `$toObjectId` on untrusted input (`GS-MONGO-UNSAFE-COERCION`), and unguarded dotted-path `$set` (`GS-MONGO-NULL-PARENT-PATH`) (MQS1–MQS3).
+- **Query-time index freshness** (`fresh`/`stale`/`never_indexed`) is now classified against the working tree's HEAD and surfaced per repo in `gather-step status` (A13).
+- **Multi-path traversal provenance**: graph traversals now report every distinct path into a node plus `depth_capped`/`truncated` signals when a walk is cut short by depth or fan-out bounds (B8/B9).
+
+### Changed
+
+- **Multi-word search recall**: a conjunctive query that returns nothing now falls back to a disjunction with a min-should-match floor, so a capability query sharing most of its terms still finds the target symbol (S1). Hits are re-ranked by query-term coverage (S2) and expanded through a curated synonym map (S4).
+- **Unified `min_confidence` edge filter** across trace/impact/pack traversal, with `None`-confidence edges treated as trusted (A14).
+- The `plan_change` contract gate is now **evidentiary** — it asserts schema version, the exact section manifest, and the exclusion ledger, not just section presence (G7).
+
+### Fixed
+
+- `batch_query` now routes `plan_change` requests to the typed product instead of the raw planning pack.
+- Graph **lock contention** on read commands now exits with a distinct, documented code and (under `--json`) a `degraded: graph_locked` disclosure, so a blocked read can never be mistaken for an empty-but-successful result (REL1).
+
+### Release-wide
+
+- Bumped the app, Cargo workspace, internal crate dependency versions, landing-page release stamps, and website package metadata to `4.3.1`.
+
## v4.2.1 (2026-06-02)
Release status: **prepared**.
From c57933d766666e2fede3b17316766a8699a2cbd4 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Tue, 2 Jun 2026 23:29:31 +0700
Subject: [PATCH 19/31] feat: atlas index/doc-field drift detector (AIX1)
---
crates/gather-step-analysis/src/lib.rs | 4 +-
.../src/mongo_query_safety.rs | 66 +++++++++++++++++++
website/src/content/docs/changelog.md | 2 +-
3 files changed, 69 insertions(+), 3 deletions(-)
diff --git a/crates/gather-step-analysis/src/lib.rs b/crates/gather-step-analysis/src/lib.rs
index 0d9c613..0fa084d 100644
--- a/crates/gather-step-analysis/src/lib.rs
+++ b/crates/gather-step-analysis/src/lib.rs
@@ -56,8 +56,8 @@ pub use impact::{
BoundaryRole, EvidenceBand, ImpactError, ImpactMap, ImpactedFile, shared_contract_impact,
};
pub use mongo_query_safety::{
- MongoQueryFinding, RULE_INDEX_DEFEAT, RULE_NULL_PARENT_PATH, RULE_UNSAFE_COERCION,
- analyze_mongo_value,
+ MongoQueryFinding, RULE_ATLAS_INDEX_DRIFT, RULE_INDEX_DEFEAT, RULE_NULL_PARENT_PATH,
+ RULE_UNSAFE_COERCION, analyze_atlas_index_drift, analyze_mongo_value,
};
pub use overview::{ModuleSummary, OverviewError, RepoOverview, build_overview};
pub use pack_assembly::{
diff --git a/crates/gather-step-analysis/src/mongo_query_safety.rs b/crates/gather-step-analysis/src/mongo_query_safety.rs
index c26eac7..9fee880 100644
--- a/crates/gather-step-analysis/src/mongo_query_safety.rs
+++ b/crates/gather-step-analysis/src/mongo_query_safety.rs
@@ -20,6 +20,10 @@ pub const RULE_UNSAFE_COERCION: &str = "GS-MONGO-UNSAFE-COERCION";
/// MQS3 — a `$set` on a dotted path with no existence/`$type:object` guard on
/// the parent, which can clobber or fail on a null/scalar parent.
pub const RULE_NULL_PARENT_PATH: &str = "GS-MONGO-NULL-PARENT-PATH";
+/// AIX1 — a doc field is referenced in a `$search`/filter but absent from a
+/// `dynamic:false` Atlas search-index mapping, so the query silently matches
+/// nothing on that field.
+pub const RULE_ATLAS_INDEX_DRIFT: &str = "GS-MONGO-ATLAS-INDEX-DRIFT";
/// One mongo-query-safety finding. `path` is a dotted location into the analysed
/// value so a caller can point at the offending stage/field.
@@ -46,6 +50,46 @@ pub fn analyze_mongo_value(value: &Value) -> Vec {
findings
}
+/// AIX1: flag doc fields referenced in a `$search`/filter that are absent from
+/// a `dynamic:false` Atlas search-index mapping. A `dynamic:true` (or missing —
+/// Atlas defaults to dynamic) mapping covers all fields, so it never drifts.
+/// Pure over the index definition + the referenced field set.
+#[must_use]
+pub fn analyze_atlas_index_drift(
+ index_def: &Value,
+ referenced_fields: &[&str],
+) -> Vec {
+ let mappings = index_def.get("mappings");
+ let dynamic = mappings
+ .and_then(|m| m.get("dynamic"))
+ .and_then(Value::as_bool)
+ .unwrap_or(true);
+ if dynamic {
+ return Vec::new();
+ }
+ let mapped: Vec<&str> = mappings
+ .and_then(|m| m.get("fields"))
+ .and_then(Value::as_object)
+ .map(|fields| fields.keys().map(String::as_str).collect())
+ .unwrap_or_default();
+
+ let mut findings: Vec = referenced_fields
+ .iter()
+ .filter(|field| !mapped.contains(*field))
+ .map(|field| MongoQueryFinding {
+ rule_id: RULE_ATLAS_INDEX_DRIFT,
+ confidence: 0.75,
+ message: format!(
+ "Field `{field}` is queried but absent from the `dynamic:false` Atlas index \
+ mapping; the search silently matches nothing on it. Add it to the mapping."
+ ),
+ path: (*field).to_owned(),
+ })
+ .collect();
+ findings.sort_by(|left, right| left.path.cmp(&right.path));
+ findings
+}
+
fn walk(value: &Value, path: &str, findings: &mut Vec) {
match value {
Value::Object(map) => {
@@ -214,6 +258,28 @@ mod tests {
assert!(!rule_ids(&guarded_sibling).contains(&RULE_NULL_PARENT_PATH));
}
+ #[test]
+ fn flags_unmapped_field_on_dynamic_false_index_but_not_mapped_field() {
+ use super::{RULE_ATLAS_INDEX_DRIFT, analyze_atlas_index_drift};
+
+ // GO4(d): a dynamic:false index missing a referenced field.
+ let index = json!({
+ "mappings": { "dynamic": false, "fields": { "title": {}, "body": {} } }
+ });
+ let findings = analyze_atlas_index_drift(&index, &["title", "entity"]);
+ let ids: Vec<_> = findings.iter().map(|f| f.rule_id).collect();
+ assert!(ids.contains(&RULE_ATLAS_INDEX_DRIFT));
+ assert!(findings.iter().any(|f| f.path == "entity"));
+ assert!(
+ !findings.iter().any(|f| f.path == "title"),
+ "a mapped field must not drift"
+ );
+
+ // A dynamic:true index covers every field — no drift.
+ let dynamic_index = json!({ "mappings": { "dynamic": true } });
+ assert!(analyze_atlas_index_drift(&dynamic_index, &["anything"]).is_empty());
+ }
+
#[test]
fn clean_pipeline_yields_no_findings() {
let clean = json!([
diff --git a/website/src/content/docs/changelog.md b/website/src/content/docs/changelog.md
index 15aad39..949cbb1 100644
--- a/website/src/content/docs/changelog.md
+++ b/website/src/content/docs/changelog.md
@@ -16,7 +16,7 @@ Planning- and reuse-quality release. Fixes retrieval recall so reuse search stop
- **Graph-ranked reuse evidence** in planning packs: reuse candidates are ranked by sibling-consumer count, shared/design-system membership, and cross-repo proof strength before truncation, so a blessed shared component ranks above a bespoke fork (S3).
- **Typed `plan_change` product** with a fixed, contract-checked section set (E1). Sections are always present (possibly empty), with an exclusion ledger recording what was dropped so a capped result is never read as exhaustive.
- **Display-ownership planning dimension** (`display_ownership_checks`): every cross-service reference surfaces the question of whether display fields come from the owner service (snapshot/API) rather than a direct cross-service DB lookup (DSO1).
-- **Mongo/Atlas structural safety detectors** with stable rule IDs and confidence: `$lookup` join-key coercion that defeats an index (`GS-MONGO-INDEX-DEFEAT`), bare `$toObjectId` on untrusted input (`GS-MONGO-UNSAFE-COERCION`), and unguarded dotted-path `$set` (`GS-MONGO-NULL-PARENT-PATH`) (MQS1–MQS3).
+- **Mongo/Atlas structural safety detectors** with stable rule IDs and confidence: `$lookup` join-key coercion that defeats an index (`GS-MONGO-INDEX-DEFEAT`), bare `$toObjectId` on untrusted input (`GS-MONGO-UNSAFE-COERCION`), unguarded dotted-path `$set` (`GS-MONGO-NULL-PARENT-PATH`), and `dynamic:false` Atlas index↔doc-field drift (`GS-MONGO-ATLAS-INDEX-DRIFT`) (MQS1–MQS3, AIX1).
- **Query-time index freshness** (`fresh`/`stale`/`never_indexed`) is now classified against the working tree's HEAD and surfaced per repo in `gather-step status` (A13).
- **Multi-path traversal provenance**: graph traversals now report every distinct path into a node plus `depth_capped`/`truncated` signals when a walk is cut short by depth or fan-out bounds (B8/B9).
From 7f1f1714c56a9b61bfdac2c3bde75561cf80c38a Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Wed, 3 Jun 2026 06:35:40 +0700
Subject: [PATCH 20/31] feat: pass-2 gap + v1-completeness checklists in
plan_change (B1/B3/WS-16)
---
crates/gather-step-mcp/src/catalog.rs | 2 +-
crates/gather-step-mcp/src/server.rs | 2 +-
crates/gather-step-mcp/src/tools/packs.rs | 113 +++++++++++++++++++++-
3 files changed, 110 insertions(+), 7 deletions(-)
diff --git a/crates/gather-step-mcp/src/catalog.rs b/crates/gather-step-mcp/src/catalog.rs
index 52c62b4..75fd5fa 100644
--- a/crates/gather-step-mcp/src/catalog.rs
+++ b/crates/gather-step-mcp/src/catalog.rs
@@ -94,7 +94,7 @@ pub const MCP_TOOLS: &[(&str, &str)] = &[
),
(
"plan_change",
- "Typed plan-change product (ten planning sections)",
+ "Typed plan-change product (twelve planning sections)",
),
("debug_pack", "Context pack for debugging production issues"),
("fix_pack", "Context pack scoped for a bug fix"),
diff --git a/crates/gather-step-mcp/src/server.rs b/crates/gather-step-mcp/src/server.rs
index bf5d07c..aaae37e 100644
--- a/crates/gather-step-mcp/src/server.rs
+++ b/crates/gather-step-mcp/src/server.rs
@@ -784,7 +784,7 @@ impl GatherStepMcpServer {
#[tool(
name = "plan_change",
- description = "Return a typed plan-change product (ten planning sections) for a target symbol.",
+ description = "Return a typed plan-change product (twelve planning sections) for a target symbol.",
annotations(read_only_hint = true)
)]
pub async fn plan_change_tool(
diff --git a/crates/gather-step-mcp/src/tools/packs.rs b/crates/gather-step-mcp/src/tools/packs.rs
index aa92805..fed0cae 100644
--- a/crates/gather-step-mcp/src/tools/packs.rs
+++ b/crates/gather-step-mcp/src/tools/packs.rs
@@ -208,13 +208,15 @@ pub struct ContextPackResponse {
}
/// Schema version of the [`PlanChangeResponse`] contract. Bump on any change to
-/// the section set or contract shape. v2 adds `display_ownership_checks` (DSO1).
-const PLAN_CHANGE_SCHEMA_VERSION: u8 = 2;
+/// the section set or contract shape. v2 adds `display_ownership_checks` (DSO1);
+/// v3 adds `pass_two_gap_dimensions` (B1) and `v1_completeness_checklist`
+/// (B3 + WS-16).
+const PLAN_CHANGE_SCHEMA_VERSION: u8 = 3;
/// The fixed `plan_change` section names, in canonical order. The contract
/// manifest must equal this exactly so consumers (and the G7 gate) can assert
/// completeness deterministically.
-const PLAN_CHANGE_SECTIONS: [&str; 10] = [
+const PLAN_CHANGE_SECTIONS: [&str; 12] = [
"reuse_candidates",
"sibling_clone_targets",
"standards_to_preserve",
@@ -224,9 +226,48 @@ const PLAN_CHANGE_SECTIONS: [&str; 10] = [
"write_path_or_state_machine_risks",
"required_braingent_records",
"open_unknowns",
+ "pass_two_gap_dimensions",
+ "v1_completeness_checklist",
"verification_plan",
];
+/// B1: the eight fixed pass-2 gap-enumeration dimensions, surfaced as a required
+/// checklist on every plan so a planner cannot skip them.
+const PASS_TWO_GAP_DIMENSIONS: [&str; 8] = [
+ "State machine: does this add/alter a status or lifecycle state? Enumerate transitions and conflict handling.",
+ "Token/URL security: are tokens, signed URLs, or IDs exposed or accepted? Validate scope, expiry, and tampering.",
+ "Dependent ticket: is there upstream/downstream work this depends on or unblocks? Link it.",
+ "Atomicity: do multi-document or multi-step writes need a transaction or idempotency guard?",
+ "Index-per-field: does any new query/filter field need an index? Confirm coverage.",
+ "Event contract: grep producers and consumers of any touched event/topic for set-difference drift.",
+ "Audit category: does this mutating path need an audit-log entry with the correct category?",
+ "Notification: should this change emit or suppress a user/system notification?",
+];
+
+/// B3 (v1-completeness) + WS-16 (v1 deviation taxonomy V1–V9), folded into one
+/// checklist. Every item carries a `→ verify:` so progress is observable.
+const V1_COMPLETENESS_CHECKLIST: [&str; 19] = [
+ "Scale test: does it hold at production data volume? → verify: run against a realistic-size fixture.",
+ "Failure-path lifecycle: are partial-failure and retry paths handled? → verify: exercise the failure branch.",
+ "Consumer reachability: does the output actually reach every consumer? → verify: trace producer→consumer.",
+ "Downstream coupling: which downstream systems depend on this shape? → verify: enumerate consumers.",
+ "Delete/invalidate: are deletes and cache/index invalidations handled? → verify: delete-path test.",
+ "Single source of truth: is the value derived once, not duplicated? → verify: grep for parallel computations.",
+ "Non-functional: latency/memory/throughput within budget? → verify: measure against baseline.",
+ "Exclusion scope: what is explicitly out of scope? → verify: state exclusions in the PR.",
+ "Cross-commit regression: could a later commit re-break this? → verify: add a regression test.",
+ "Deferred as follow-ups: are deferrals recorded as tickets? → verify: link follow-up issues.",
+ "Prior-release correctness/security gate: did a prior release defer hardening here? → verify: re-check the deferred item.",
+ "Partial-migration tail: zero residual call-sites workspace-wide? → verify: grep shows no residual callers.",
+ "Release-profile divergence: verify flags on the installed profile. → verify: check [profile.dist].",
+ "Teardown under-spec: disposable artifacts specify creation/reuse-key/safe-delete/in-flight-writer guard. → verify: all four present.",
+ "Schema-version forward-compat: a version-mismatch refusal/rebuild path exists. → verify: mismatch is refused, not mis-read.",
+ "Derived-field blast radius: enumerate source, projection writer, all readers, filters, search/index, backfill. → verify: all six listed.",
+ "Cross-service ownership: should the owner publish a snapshot/resolver before adopting a join? → verify: question answered.",
+ "Lossy rollback: enumerate unrecoverable rows/fields. → verify: rollback notes list them.",
+ "Implied-surface sweep: events/audit/notification/rate-limit/idempotency even when the spec is silent. → verify: sweep performed.",
+];
+
/// Evidentiary contract for the typed `plan_change` product (WS-3 / G7):
/// deterministic metadata (schema version + the fixed section manifest) plus an
/// exclusion ledger recording what was dropped from the response and why, so a
@@ -242,7 +283,7 @@ pub struct PlanChangeContract {
/// Typed `plan_change` product (WS-3 / E1).
///
/// A distinct response shape — not an alias for the planning pack — with the
-/// ten fixed planning sections. v1 projects the existing planning-pack data
+/// twelve fixed planning sections. v1 projects the existing planning-pack data
/// into these sections; later slices (B7 proactive queries, G7 evidentiary
/// fields) enrich them. Every section is always present (possibly empty) so the
/// contract is stable for consumers and the G7 gate.
@@ -267,6 +308,11 @@ pub struct PlanChangeResponse {
pub write_path_or_state_machine_risks: Vec,
pub required_braingent_records: Vec,
pub open_unknowns: Vec,
+ /// B1: the eight fixed pass-2 gap-enumeration dimensions (required checklist).
+ pub pass_two_gap_dimensions: Vec,
+ /// B3 + WS-16: the v1-completeness checklist with the v1 deviation taxonomy
+ /// folded in; every item carries a `→ verify:`.
+ pub v1_completeness_checklist: Vec,
pub verification_plan: Vec,
/// Evidentiary contract: schema version, section manifest, exclusion ledger.
pub contract: PlanChangeContract,
@@ -396,6 +442,14 @@ fn build_plan_change(
write_path_or_state_machine_risks,
required_braingent_records: Vec::new(),
open_unknowns: unresolved_gaps.to_vec(),
+ pass_two_gap_dimensions: PASS_TWO_GAP_DIMENSIONS
+ .iter()
+ .map(|item| (*item).to_owned())
+ .collect(),
+ v1_completeness_checklist: V1_COMPLETENESS_CHECKLIST
+ .iter()
+ .map(|item| (*item).to_owned())
+ .collect(),
verification_plan,
contract,
meta,
@@ -429,7 +483,7 @@ pub fn validate_plan_change_contract(plan: &PlanChangeResponse) -> Vec {
}
/// Build the typed `plan_change` product (WS-3 / E1): run the planning pack,
-/// then project its data into the ten fixed sections.
+/// then project its data into the twelve fixed sections.
pub fn run_plan_change(
ctx: &McpContext,
request: ModePackRequest,
@@ -5622,6 +5676,55 @@ mod tests {
assert!(plan.required_braingent_records.is_empty());
}
+ #[test]
+ fn plan_change_includes_pass_two_and_v1_checklists() {
+ use super::{ChangeImpactSummary, build_plan_change};
+
+ let plan = build_plan_change(
+ "anyTarget",
+ true,
+ &[],
+ &[],
+ &[],
+ &[],
+ &ChangeImpactSummary::default(),
+ None,
+ );
+
+ // B1: all eight pass-2 gap dimensions are present as a required checklist.
+ assert_eq!(
+ plan.pass_two_gap_dimensions.len(),
+ 8,
+ "all 8 pass-2 dimensions must be present: {:?}",
+ plan.pass_two_gap_dimensions
+ );
+
+ // B3 + WS-16: the v1-completeness checklist is non-empty and every item
+ // carries a `→ verify:` so progress is observable.
+ assert!(!plan.v1_completeness_checklist.is_empty());
+ assert!(
+ plan.v1_completeness_checklist
+ .iter()
+ .all(|item| item.contains("→ verify:")),
+ "every v1-completeness item must carry a verify step"
+ );
+ // WS-16: the deviation taxonomy (V1–V9) is folded into the checklist.
+ assert!(
+ plan.v1_completeness_checklist
+ .iter()
+ .any(|item| item.contains("Derived-field blast radius")),
+ "V6 deviation item missing from checklist"
+ );
+
+ // Both new sections are part of the deterministic contract manifest.
+ for section in ["pass_two_gap_dimensions", "v1_completeness_checklist"] {
+ assert!(
+ plan.contract.sections.iter().any(|s| s == section),
+ "missing section in manifest: {section}"
+ );
+ }
+ }
+
#[test]
fn plan_change_surfaces_display_ownership_for_cross_service_refs() {
use super::{ChangeImpactSummary, build_plan_change};
From a1119aa89d2feb68059169b209d214d81dcc41df Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Wed, 3 Jun 2026 06:52:51 +0700
Subject: [PATCH 21/31] test: e2e graph-lock (REL1), fan-out cap, A13 status,
plan_change sections
---
crates/gather-step-analysis/src/query.rs | 42 ++++++++++
crates/gather-step-cli/tests/cli_commands.rs | 81 +++++++++++++++++--
.../tests/batch_plan_change.rs | 19 ++++-
3 files changed, 135 insertions(+), 7 deletions(-)
diff --git a/crates/gather-step-analysis/src/query.rs b/crates/gather-step-analysis/src/query.rs
index 00eb9d1..0b4be7b 100644
--- a/crates/gather-step-analysis/src/query.rs
+++ b/crates/gather-step-analysis/src/query.rs
@@ -456,6 +456,48 @@ mod tests {
);
}
+ #[test]
+ fn traverse_with_provenance_flags_fan_out_truncation() {
+ let temp_db = TempDb::new("query-fan-out");
+ let store = test_store(temp_db.path());
+ let file = node("service-a", "src/a.ts", NodeKind::File, "src/a.ts", 0);
+ let hub = node("service-a", "src/a.ts", NodeKind::Function, "hub", 0);
+
+ // A hub node with more out-edges than DEFAULT_FANOUT_CAP (256) must trip
+ // the truncation signal rather than silently exploring a bounded subset.
+ let fan_out = 300_u16;
+ let mut nodes = vec![file.clone(), hub.clone()];
+ let mut edges = Vec::new();
+ for ordinal in 1..=fan_out {
+ let leaf = node(
+ "service-a",
+ "src/a.ts",
+ NodeKind::Function,
+ &format!("leaf{ordinal}"),
+ ordinal,
+ );
+ edges.push(edge(hub.id, leaf.id, file.id));
+ nodes.push(leaf);
+ }
+ store
+ .bulk_insert(&nodes, &edges)
+ .expect("graph should write");
+
+ let query = GraphQuery::new(&store);
+ let outcome = query
+ .traverse_with_provenance(hub.id, &[EdgeKind::Calls], 2, None)
+ .expect("traversal should succeed");
+ assert!(
+ outcome.truncated,
+ "fan-out above the cap must flag truncated"
+ );
+ assert!(
+ outcome.steps.len() <= 256,
+ "truncated traversal must not exceed the fan-out cap: {}",
+ outcome.steps.len()
+ );
+ }
+
#[test]
fn counts_nodes_and_edges_by_kind() {
let temp_db = TempDb::new("counts");
diff --git a/crates/gather-step-cli/tests/cli_commands.rs b/crates/gather-step-cli/tests/cli_commands.rs
index f38a90a..574402c 100644
--- a/crates/gather-step-cli/tests/cli_commands.rs
+++ b/crates/gather-step-cli/tests/cli_commands.rs
@@ -348,6 +348,69 @@ fn write_qa_reference_v4_workspace(root: &Path) {
.expect("notifications fixture source");
}
+/// REL1 end-to-end: while the graph store is held by another handle (simulating
+/// an in-progress index/watch), a read command must exit with the dedicated
+/// lock-contention code (75) and, under `--json`, disclose `degraded:
+/// graph_locked` on stdout — so a blocked read can never look like an
+/// empty-but-successful result. The lock is held in-process; the read runs as a
+/// separate binary, exercising redb's real cross-process file lock.
+#[test]
+fn read_command_during_graph_lock_exits_distinctly_and_discloses() {
+ let temp = TempDir::new("graph-lock-rel1");
+ write_fixture_workspace(temp.path());
+ run_ok(temp.path(), &["init"]);
+ run_ok(temp.path(), &["--json", "index"]);
+
+ let graph_path = temp
+ .path()
+ .join(".gather-step")
+ .join("storage")
+ .join("graph.redb");
+ let held = GraphStoreDb::open(&graph_path).expect("should hold the graph lock");
+
+ let output = gather_step()
+ .arg("--workspace")
+ .arg(temp.path())
+ .args(["search", "OrderList", "--json"])
+ .output()
+ .expect("command should run");
+
+ assert_eq!(
+ output.status.code(),
+ Some(75),
+ "read under lock must exit with the distinct lock-contention code 75; stderr:\n{}",
+ String::from_utf8_lossy(&output.stderr)
+ );
+ let disclosure = stdout_json(&output);
+ assert_eq!(disclosure["event"], "command_failed");
+ assert_eq!(disclosure["degraded"], "graph_locked");
+
+ // Without --json the exit code is still the distinct 75 and the operator
+ // message lands on stderr (no machine disclosure on stdout).
+ let plain = gather_step()
+ .arg("--workspace")
+ .arg(temp.path())
+ .args(["search", "OrderList"])
+ .output()
+ .expect("command should run");
+ assert_eq!(plain.status.code(), Some(75));
+ assert!(
+ String::from_utf8_lossy(&plain.stderr).contains("Another gather-step process"),
+ "plain-mode lock error should explain the contention on stderr: {}",
+ String::from_utf8_lossy(&plain.stderr)
+ );
+ assert!(
+ plain.stdout.is_empty(),
+ "plain-mode lock failure must not emit a machine disclosure on stdout"
+ );
+
+ // Releasing the lock lets the same read succeed — proving 75 meant "blocked",
+ // not "broken".
+ drop(held);
+ let recovered = run_ok(temp.path(), &["search", "OrderList", "--json"]);
+ assert_eq!(stdout_json(&recovered)["event"], "search_completed");
+}
+
#[test]
fn cli_commands_work_on_indexed_fixture_workspace() {
let temp = TempDir::new("cli-commands");
@@ -439,12 +502,18 @@ fn cli_commands_work_on_indexed_fixture_workspace() {
let status = run_ok(temp.path(), &["status", "--json"]);
let status_json = stdout_json(&status);
assert_eq!(status_json["event"], "status_completed");
- assert!(
- !status_json["repos"]
- .as_array()
- .expect("repos array")
- .is_empty()
- );
+ let status_repos = status_json["repos"].as_array().expect("repos array");
+ assert!(!status_repos.is_empty());
+ // A13: each repo row carries a query-time index-freshness verdict.
+ for repo in status_repos {
+ let freshness = repo["freshness"]
+ .as_str()
+ .expect("each repo should carry a freshness verdict");
+ assert!(
+ matches!(freshness, "fresh" | "stale" | "never_indexed" | "unknown"),
+ "unexpected freshness verdict in status output: {freshness}"
+ );
+ }
let search = run_ok(temp.path(), &["search", "OrderList", "--json"]);
let search_json = stdout_json(&search);
diff --git a/crates/gather-step-mcp/tests/batch_plan_change.rs b/crates/gather-step-mcp/tests/batch_plan_change.rs
index 7180640..2ea5641 100644
--- a/crates/gather-step-mcp/tests/batch_plan_change.rs
+++ b/crates/gather-step-mcp/tests/batch_plan_change.rs
@@ -1,5 +1,5 @@
//! Regression: `batch_query` must route `plan_change` to the typed
-//! nine-section product (`PlanChangeResponse`), not the legacy planning-pack
+//! twelve-section product (`PlanChangeResponse`), not the legacy planning-pack
//! `ContextPackResponse`. The direct MCP route and the batch route must agree.
use std::sync::Arc;
@@ -45,4 +45,21 @@ async fn batch_query_plan_change_returns_typed_product() {
body.contains("verification_plan") && body.contains("\"sections\""),
"batch plan_change must include the contract section manifest: {body}"
);
+ // The newer sections must round-trip through the batch route too: DSO1
+ // (display ownership) and B1/B3/WS-16 (the pass-2 + v1-completeness
+ // checklists), plus the current contract schema version.
+ for section in [
+ "display_ownership_checks",
+ "pass_two_gap_dimensions",
+ "v1_completeness_checklist",
+ ] {
+ assert!(
+ body.contains(section),
+ "batch plan_change missing section `{section}`: {body}"
+ );
+ }
+ assert!(
+ body.contains("\"schema_version\":3"),
+ "batch plan_change must carry the current contract schema version: {body}"
+ );
}
From fe30b9135d3ebdcaaaf177eca3359de607e3fdee Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Wed, 3 Jun 2026 08:53:58 +0700
Subject: [PATCH 22/31] refactor: drop planning comments and product-specific
references
---
.../src/mongo_query_safety.rs | 94 +++++++---------
crates/gather-step-analysis/src/query.rs | 22 ----
crates/gather-step-cli/src/commands/status.rs | 6 -
crates/gather-step-cli/src/errors.rs | 14 ---
crates/gather-step-cli/src/main.rs | 7 --
crates/gather-step-cli/tests/cli_commands.rs | 13 +--
crates/gather-step-mcp/src/tools/packs.rs | 105 ++++++------------
.../tests/batch_plan_change.rs | 3 -
8 files changed, 76 insertions(+), 188 deletions(-)
diff --git a/crates/gather-step-analysis/src/mongo_query_safety.rs b/crates/gather-step-analysis/src/mongo_query_safety.rs
index 9fee880..3bd741d 100644
--- a/crates/gather-step-analysis/src/mongo_query_safety.rs
+++ b/crates/gather-step-analysis/src/mongo_query_safety.rs
@@ -1,32 +1,14 @@
-//! Mongo/Atlas structural safety detectors (WS-G3).
-//!
-//! These flag AST-detectable antipatterns that recur in ad-hoc mongo queries
-//! and aggregations, not just migrations. Each finding carries a stable rule ID
-//! and a confidence score so it can be surfaced in review without prose-only
-//! noise (the v4.3.1 non-goal "no prose-only findings" applies here).
-//!
-//! The detectors are pure over a [`serde_json::Value`] modelling a mongo
-//! operation (an update document or an aggregation pipeline), so they are
-//! unit-testable without a live parser. Wiring the parser's pipeline extraction
-//! into these detectors is a separate concern (tracked under MQS1's key files).
+//! Structural safety detectors for Mongo queries, aggregations, and Atlas
+//! search-index definitions. Pure over a [`serde_json::Value`] so they are
+//! testable without a live parser.
use serde_json::Value;
-/// MQS1 — an aggregation `$lookup` whose join key is coerced (`$toString` /
-/// `$toObjectId`) inside a sub-pipeline, defeating the index on the join field.
pub const RULE_INDEX_DEFEAT: &str = "GS-MONGO-INDEX-DEFEAT";
-/// MQS2 — a bare `$toObjectId` coercion (recommend `$convert` with `onError`).
pub const RULE_UNSAFE_COERCION: &str = "GS-MONGO-UNSAFE-COERCION";
-/// MQS3 — a `$set` on a dotted path with no existence/`$type:object` guard on
-/// the parent, which can clobber or fail on a null/scalar parent.
pub const RULE_NULL_PARENT_PATH: &str = "GS-MONGO-NULL-PARENT-PATH";
-/// AIX1 — a doc field is referenced in a `$search`/filter but absent from a
-/// `dynamic:false` Atlas search-index mapping, so the query silently matches
-/// nothing on that field.
pub const RULE_ATLAS_INDEX_DRIFT: &str = "GS-MONGO-ATLAS-INDEX-DRIFT";
-/// One mongo-query-safety finding. `path` is a dotted location into the analysed
-/// value so a caller can point at the offending stage/field.
#[derive(Clone, Debug, PartialEq)]
pub struct MongoQueryFinding {
pub rule_id: &'static str,
@@ -35,9 +17,6 @@ pub struct MongoQueryFinding {
pub path: String,
}
-/// Analyse a JSON-shaped mongo operation (update document or aggregation
-/// pipeline) for the WS-G3 structural traps. Findings are sorted by
-/// `(rule_id, path)` so the output is deterministic.
#[must_use]
pub fn analyze_mongo_value(value: &Value) -> Vec {
let mut findings = Vec::new();
@@ -50,10 +29,6 @@ pub fn analyze_mongo_value(value: &Value) -> Vec {
findings
}
-/// AIX1: flag doc fields referenced in a `$search`/filter that are absent from
-/// a `dynamic:false` Atlas search-index mapping. A `dynamic:true` (or missing —
-/// Atlas defaults to dynamic) mapping covers all fields, so it never drifts.
-/// Pure over the index definition + the referenced field set.
#[must_use]
pub fn analyze_atlas_index_drift(
index_def: &Value,
@@ -120,9 +95,6 @@ fn walk(value: &Value, path: &str, findings: &mut Vec) {
}
}
-/// MQS1: a `$lookup` is index-defeating when its join key is coerced with
-/// `$toString`/`$toObjectId` (typically inside a `let` + sub-`pipeline`). The
-/// index-aligned `localField`/`foreignField` form does no coercion.
fn detect_index_defeat(lookup: &Value, path: &str, findings: &mut Vec) {
if mentions_operator(lookup, "$toString") || mentions_operator(lookup, "$toObjectId") {
findings.push(MongoQueryFinding {
@@ -136,8 +108,6 @@ fn detect_index_defeat(lookup: &Value, path: &str, findings: &mut Vec) {
let Value::Object(map) = set_doc else {
return;
@@ -161,20 +131,15 @@ fn detect_null_parent_path(set_doc: &Value, path: &str, findings: &mut Vec bool {
mentions_operator(expr, "$ifNull") || mentions_operator(expr, "$cond")
}
-/// A sibling key in the same `$set` establishes the parent (or an ancestor) as
-/// an object before the dotted write — i.e. a sibling that is exactly the
-/// parent path or a prefix-ancestor of it.
fn sibling_assigns_parent(map: &serde_json::Map, parent: &str) -> bool {
map.keys()
.any(|key| key == parent || parent.starts_with(&format!("{key}.")))
}
-/// Whether `value` contains `op` as an object key anywhere in its tree.
fn mentions_operator(value: &Value, op: &str) -> bool {
match value {
Value::Object(map) => map
@@ -201,7 +166,6 @@ mod tests {
#[test]
fn flags_lookup_that_coerces_join_key_but_not_index_aligned_lookup() {
- // GO4(a): a $lookup coercing the join key with $toString defeats the index.
let coercing = json!([{
"$lookup": {
"from": "users",
@@ -214,7 +178,6 @@ mod tests {
}]);
assert!(rule_ids(&coercing).contains(&RULE_INDEX_DEFEAT));
- // An index-aligned localField/foreignField join is clean.
let aligned = json!([{
"$lookup": {
"from": "users",
@@ -228,11 +191,9 @@ mod tests {
#[test]
fn flags_bare_to_object_id_but_not_convert_with_on_error() {
- // GO4(b): bare $toObjectId on untrusted input.
let bare = json!({ "$match": { "_id": { "$toObjectId": "$$req.id" } } });
assert!(rule_ids(&bare).contains(&RULE_UNSAFE_COERCION));
- // $convert with onError is the safe sibling — no $toObjectId key.
let safe = json!({
"$match": {
"_id": { "$convert": { "input": "$$req.id", "to": "objectId", "onError": null } }
@@ -243,16 +204,13 @@ mod tests {
#[test]
fn flags_unguarded_dotted_set_but_not_guarded_sibling() {
- // GO4(c): dotted $set with no parent guard.
let unguarded = json!({ "$set": { "meta.flags.active": true } });
assert!(rule_ids(&unguarded).contains(&RULE_NULL_PARENT_PATH));
- // Guarded by wrapping the value in $ifNull.
let guarded_value =
json!({ "$set": { "meta.flags.active": { "$ifNull": ["$meta.flags.active", true] } } });
assert!(!rule_ids(&guarded_value).contains(&RULE_NULL_PARENT_PATH));
- // Guarded by establishing the parent in a sibling assignment.
let guarded_sibling =
json!({ "$set": { "meta": { "flags": {} }, "meta.flags.active": true } });
assert!(!rule_ids(&guarded_sibling).contains(&RULE_NULL_PARENT_PATH));
@@ -262,7 +220,6 @@ mod tests {
fn flags_unmapped_field_on_dynamic_false_index_but_not_mapped_field() {
use super::{RULE_ATLAS_INDEX_DRIFT, analyze_atlas_index_drift};
- // GO4(d): a dynamic:false index missing a referenced field.
let index = json!({
"mappings": { "dynamic": false, "fields": { "title": {}, "body": {} } }
});
@@ -270,16 +227,50 @@ mod tests {
let ids: Vec<_> = findings.iter().map(|f| f.rule_id).collect();
assert!(ids.contains(&RULE_ATLAS_INDEX_DRIFT));
assert!(findings.iter().any(|f| f.path == "entity"));
- assert!(
- !findings.iter().any(|f| f.path == "title"),
- "a mapped field must not drift"
- );
+ assert!(!findings.iter().any(|f| f.path == "title"));
- // A dynamic:true index covers every field — no drift.
let dynamic_index = json!({ "mappings": { "dynamic": true } });
assert!(analyze_atlas_index_drift(&dynamic_index, &["anything"]).is_empty());
}
+ #[test]
+ fn trap_detectors_cover_each_pattern_and_clean_siblings() {
+ use super::{
+ RULE_ATLAS_INDEX_DRIFT, RULE_INDEX_DEFEAT, RULE_NULL_PARENT_PATH, RULE_UNSAFE_COERCION,
+ analyze_atlas_index_drift,
+ };
+
+ let trap_a = json!([{ "$lookup": {
+ "from": "u", "let": { "k": "$userId" },
+ "pipeline": [{ "$match": { "$expr": { "$eq": [{ "$toString": "$_id" }, "$$k"] } } }],
+ "as": "u"
+ }}]);
+ let clean_a = json!([{ "$lookup": {
+ "from": "u", "localField": "userId", "foreignField": "_id", "as": "u"
+ }}]);
+ assert!(rule_ids(&trap_a).contains(&RULE_INDEX_DEFEAT));
+ assert!(!rule_ids(&clean_a).contains(&RULE_INDEX_DEFEAT));
+
+ let trap_b = json!({ "_id": { "$toObjectId": "$$req.id" } });
+ let clean_b = json!({ "_id": { "$convert": { "input": "$$req.id", "to": "objectId", "onError": null } } });
+ assert!(rule_ids(&trap_b).contains(&RULE_UNSAFE_COERCION));
+ assert!(!rule_ids(&clean_b).contains(&RULE_UNSAFE_COERCION));
+
+ let trap_c = json!({ "$set": { "meta.flags.active": true } });
+ let clean_c = json!({ "$set": { "meta": { "flags": {} }, "meta.flags.active": true } });
+ assert!(rule_ids(&trap_c).contains(&RULE_NULL_PARENT_PATH));
+ assert!(!rule_ids(&clean_c).contains(&RULE_NULL_PARENT_PATH));
+
+ let index = json!({ "mappings": { "dynamic": false, "fields": { "title": {} } } });
+ let trap_d = analyze_atlas_index_drift(&index, &["title", "entity"]);
+ assert!(
+ trap_d
+ .iter()
+ .any(|f| f.rule_id == RULE_ATLAS_INDEX_DRIFT && f.path == "entity")
+ );
+ assert!(!trap_d.iter().any(|f| f.path == "title"));
+ }
+
#[test]
fn clean_pipeline_yields_no_findings() {
let clean = json!([
@@ -298,7 +289,6 @@ mod tests {
let first = analyze_mongo_value(&value);
let second = analyze_mongo_value(&value);
assert_eq!(first, second);
- // Findings come back sorted by (rule_id, path).
let ids: Vec<_> = first.iter().map(|f| f.rule_id).collect();
let mut sorted = ids.clone();
sorted.sort_unstable();
diff --git a/crates/gather-step-analysis/src/query.rs b/crates/gather-step-analysis/src/query.rs
index 0b4be7b..e4f9d38 100644
--- a/crates/gather-step-analysis/src/query.rs
+++ b/crates/gather-step-analysis/src/query.rs
@@ -5,9 +5,6 @@ use gather_step_storage::{GraphStore, GraphStoreError};
use rustc_hash::{FxHashMap, FxHashSet};
use thiserror::Error;
-/// Maximum out-edges a single node contributes, and the maximum number of
-/// distinct in-paths recorded per node, before a traversal marks itself
-/// `truncated`. Bounds memory/work on very-high-fan-out hub nodes.
const DEFAULT_FANOUT_CAP: usize = 256;
#[derive(Debug, Error)]
@@ -19,22 +16,15 @@ pub enum QueryError {
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct TraversalStep {
pub node_id: NodeId,
- /// The first (BFS-shortest) path of edge kinds discovered into this node.
pub edge_kinds: Vec,
pub depth: usize,
- /// Every distinct path of edge kinds that reaches this node (capped at
- /// `DEFAULT_FANOUT_CAP`). A node reachable N ways reports N caller paths.
pub in_paths: Vec>,
}
-/// The result of a bounded graph traversal, carrying the visited steps plus
-/// signals about whether the walk was cut short by a depth or fan-out bound.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct TraversalOutcome {
pub steps: Vec,
- /// At least one node sat at `max_depth` with deeper edges left unfollowed.
pub depth_capped: bool,
- /// At least one node's fan-out (or recorded in-paths) hit `DEFAULT_FANOUT_CAP`.
pub truncated: bool,
}
@@ -96,9 +86,6 @@ impl<'a, S: GraphStore> GraphQuery<'a, S> {
.steps)
}
- /// Bounded BFS that records multi-path provenance (every distinct path into
- /// each node) and signals when the walk was cut short by the depth limit
- /// (`depth_capped`) or the fan-out bound (`truncated`).
pub fn traverse_with_provenance(
&self,
start: NodeId,
@@ -108,11 +95,8 @@ impl<'a, S: GraphStore> GraphQuery<'a, S> {
) -> Result {
let mut queue = VecDeque::from([(start, Vec::::new(), 0_usize)]);
let mut enqueued = FxHashSet::from_iter([start.as_bytes()]);
- // Discovery order of reached nodes, so step order is deterministic.
let mut order: Vec = Vec::new();
- // First-discovered path + depth per node.
let mut primary: FxHashMap<[u8; 16], (Vec, usize)> = FxHashMap::default();
- // Every distinct path into each node (bounded).
let mut in_paths: FxHashMap<[u8; 16], Vec>> = FxHashMap::default();
let mut depth_capped = false;
let mut truncated = false;
@@ -387,8 +371,6 @@ mod tests {
let query = GraphQuery::new(&store);
- // A chain longer than max_depth: c sits at the depth boundary and still
- // has an outgoing edge to d, so the traversal must flag depth capping.
let capped = query
.traverse_with_provenance(a.id, &[EdgeKind::Calls], 2, None)
.expect("traversal should succeed");
@@ -399,7 +381,6 @@ mod tests {
);
assert!(!capped.truncated);
- // A depth that reaches the whole chain leaves nothing capped.
let full = query
.traverse_with_provenance(a.id, &[EdgeKind::Calls], 8, None)
.expect("traversal should succeed");
@@ -416,7 +397,6 @@ mod tests {
let a = node("service-a", "src/a.ts", NodeKind::Function, "a", 1);
let b = node("service-a", "src/a.ts", NodeKind::Function, "b", 2);
let c = node("service-a", "src/a.ts", NodeKind::Function, "c", 3);
- // sink is reachable three ways: root->a->sink, root->b->sink, root->c->sink.
let sink = node("service-a", "src/a.ts", NodeKind::Function, "sink", 4);
store
.bulk_insert(
@@ -463,8 +443,6 @@ mod tests {
let file = node("service-a", "src/a.ts", NodeKind::File, "src/a.ts", 0);
let hub = node("service-a", "src/a.ts", NodeKind::Function, "hub", 0);
- // A hub node with more out-edges than DEFAULT_FANOUT_CAP (256) must trip
- // the truncation signal rather than silently exploring a bounded subset.
let fan_out = 300_u16;
let mut nodes = vec![file.clone(), hub.clone()];
let mut edges = Vec::new();
diff --git a/crates/gather-step-cli/src/commands/status.rs b/crates/gather-step-cli/src/commands/status.rs
index f189b74..f05f24f 100644
--- a/crates/gather-step-cli/src/commands/status.rs
+++ b/crates/gather-step-cli/src/commands/status.rs
@@ -38,8 +38,6 @@ struct RepoStatusOutput {
path: String,
path_exists: bool,
depth_level: String,
- /// Query-time index freshness vs the working tree's HEAD (A13): one of
- /// `fresh`, `stale`, `never_indexed`, or `unknown` (git unavailable).
freshness: String,
last_indexed_at: Option,
registry_file_count: u64,
@@ -342,9 +340,6 @@ fn format_semantic_summary(health: &SemanticHealthReport) -> String {
)
}
-/// Classify a repo's index freshness vs its current git HEAD (A13). Opening
-/// git is best-effort: a non-git path or git failure yields `unknown` rather
-/// than failing the whole status command.
fn repo_freshness(repo: &str, path: &std::path::Path, indexed_sha: Option<&str>) -> String {
let indexer = GitHistoryIndexer::new(GitRepoSource::from_path(path), repo);
match indexer.index_freshness(indexed_sha) {
@@ -501,7 +496,6 @@ mod tests {
Some("status_completed"),
"status payload should have event=status_completed"
);
- // A13: every repo row carries a query-time index-freshness verdict.
for repo in repos {
let freshness = repo["freshness"]
.as_str()
diff --git a/crates/gather-step-cli/src/errors.rs b/crates/gather-step-cli/src/errors.rs
index 91873c7..631379b 100644
--- a/crates/gather-step-cli/src/errors.rs
+++ b/crates/gather-step-cli/src/errors.rs
@@ -6,16 +6,8 @@ use gather_step_storage::{GraphStoreError, MetadataStoreError, SearchStoreError}
const SCHEMA_VERSION_MISMATCH_MESSAGE: &str = "Index schema version mismatch — built by a different gather-step release. Next step: run `gather-step index --auto-recover` to rebuild, or `gather-step clean && gather-step index`.";
-/// Sysexits-style exit code (`EX_TEMPFAIL`) returned when a read command cannot
-/// proceed because the graph store is locked by another gather-step process
-/// (REL1). Distinct from the generic failure code (1) so a caller — CI, a
-/// wrapper, or an agent — can tell "blocked, results incomplete" apart from
-/// "ran fine, found nothing" and never silently fall back to grep.
pub const GRAPH_LOCKED_EXIT_CODE: u8 = 75;
-/// True when the error chain carries graph-store lock contention
-/// (`StorageHeld` / `StorageHeldByDaemon`), including the text-fallback forms
-/// produced when the typed error is flattened to a string mid-chain.
#[must_use]
pub fn graph_lock_contention(error: &Error) -> bool {
for cause in error.chain() {
@@ -34,8 +26,6 @@ pub fn graph_lock_contention(error: &Error) -> bool {
|| contains_ascii_case_insensitive(&full, "database already open")
}
-/// Machine-readable degraded-mode disclosure for `--json` consumers when a read
-/// command is blocked by graph lock contention (REL1).
#[must_use]
pub fn graph_locked_json_disclosure(error: &Error) -> String {
serde_json::json!({
@@ -239,15 +229,11 @@ mod tests {
"StorageHeldByDaemon must be detected"
);
- // The JSON disclosure names the degraded mode so a blocked read can
- // never look like an empty-but-successful result.
let disclosure: serde_json::Value =
serde_json::from_str(&graph_locked_json_disclosure(&err)).expect("valid json");
assert_eq!(disclosure["degraded"], "graph_locked");
assert_eq!(disclosure["event"], "command_failed");
- // The dedicated exit code is distinct from success (0) and the generic
- // failure code (1) so callers can branch on it.
assert_ne!(GRAPH_LOCKED_EXIT_CODE, 0);
assert_ne!(GRAPH_LOCKED_EXIT_CODE, 1);
}
diff --git a/crates/gather-step-cli/src/main.rs b/crates/gather-step-cli/src/main.rs
index b3b5ee2..5fd3192 100644
--- a/crates/gather-step-cli/src/main.rs
+++ b/crates/gather-step-cli/src/main.rs
@@ -54,11 +54,6 @@ async fn run_main() -> Result {
/// Print the operator-facing error to stderr and return an exit code.
///
-/// Graph lock contention (REL1) returns a dedicated exit code and, under
-/// `--json`, a `degraded: graph_locked` disclosure on stdout so a blocked read
-/// can never be mistaken for an empty-but-successful result. All other failures
-/// return exit code 1.
-///
/// Returning `ExitCode` rather than calling `std::process::exit(1)` lets
/// tokio's runtime tear down cleanly and lets stdio buffers flush — important
/// for `pr-review` (which prints a structured report on stdout) and any
@@ -78,8 +73,6 @@ fn print_operator_error_and_code(error: &Error) -> ExitCode {
ExitCode::from(1)
}
-/// Best-effort check of the raw argv for the global `--json` flag. Used only on
-/// the error path, where the parsed `Cli` is unavailable.
fn wants_json_output() -> bool {
std::env::args().any(|arg| arg == "--json")
}
diff --git a/crates/gather-step-cli/tests/cli_commands.rs b/crates/gather-step-cli/tests/cli_commands.rs
index 574402c..02c8f96 100644
--- a/crates/gather-step-cli/tests/cli_commands.rs
+++ b/crates/gather-step-cli/tests/cli_commands.rs
@@ -348,15 +348,9 @@ fn write_qa_reference_v4_workspace(root: &Path) {
.expect("notifications fixture source");
}
-/// REL1 end-to-end: while the graph store is held by another handle (simulating
-/// an in-progress index/watch), a read command must exit with the dedicated
-/// lock-contention code (75) and, under `--json`, disclose `degraded:
-/// graph_locked` on stdout — so a blocked read can never look like an
-/// empty-but-successful result. The lock is held in-process; the read runs as a
-/// separate binary, exercising redb's real cross-process file lock.
#[test]
fn read_command_during_graph_lock_exits_distinctly_and_discloses() {
- let temp = TempDir::new("graph-lock-rel1");
+ let temp = TempDir::new("graph-lock");
write_fixture_workspace(temp.path());
run_ok(temp.path(), &["init"]);
run_ok(temp.path(), &["--json", "index"]);
@@ -385,8 +379,6 @@ fn read_command_during_graph_lock_exits_distinctly_and_discloses() {
assert_eq!(disclosure["event"], "command_failed");
assert_eq!(disclosure["degraded"], "graph_locked");
- // Without --json the exit code is still the distinct 75 and the operator
- // message lands on stderr (no machine disclosure on stdout).
let plain = gather_step()
.arg("--workspace")
.arg(temp.path())
@@ -404,8 +396,6 @@ fn read_command_during_graph_lock_exits_distinctly_and_discloses() {
"plain-mode lock failure must not emit a machine disclosure on stdout"
);
- // Releasing the lock lets the same read succeed — proving 75 meant "blocked",
- // not "broken".
drop(held);
let recovered = run_ok(temp.path(), &["search", "OrderList", "--json"]);
assert_eq!(stdout_json(&recovered)["event"], "search_completed");
@@ -504,7 +494,6 @@ fn cli_commands_work_on_indexed_fixture_workspace() {
assert_eq!(status_json["event"], "status_completed");
let status_repos = status_json["repos"].as_array().expect("repos array");
assert!(!status_repos.is_empty());
- // A13: each repo row carries a query-time index-freshness verdict.
for repo in status_repos {
let freshness = repo["freshness"]
.as_str()
diff --git a/crates/gather-step-mcp/src/tools/packs.rs b/crates/gather-step-mcp/src/tools/packs.rs
index fed0cae..9af4e91 100644
--- a/crates/gather-step-mcp/src/tools/packs.rs
+++ b/crates/gather-step-mcp/src/tools/packs.rs
@@ -207,15 +207,8 @@ pub struct ContextPackResponse {
pub meta: Option,
}
-/// Schema version of the [`PlanChangeResponse`] contract. Bump on any change to
-/// the section set or contract shape. v2 adds `display_ownership_checks` (DSO1);
-/// v3 adds `pass_two_gap_dimensions` (B1) and `v1_completeness_checklist`
-/// (B3 + WS-16).
const PLAN_CHANGE_SCHEMA_VERSION: u8 = 3;
-/// The fixed `plan_change` section names, in canonical order. The contract
-/// manifest must equal this exactly so consumers (and the G7 gate) can assert
-/// completeness deterministically.
const PLAN_CHANGE_SECTIONS: [&str; 12] = [
"reuse_candidates",
"sibling_clone_targets",
@@ -231,8 +224,6 @@ const PLAN_CHANGE_SECTIONS: [&str; 12] = [
"verification_plan",
];
-/// B1: the eight fixed pass-2 gap-enumeration dimensions, surfaced as a required
-/// checklist on every plan so a planner cannot skip them.
const PASS_TWO_GAP_DIMENSIONS: [&str; 8] = [
"State machine: does this add/alter a status or lifecycle state? Enumerate transitions and conflict handling.",
"Token/URL security: are tokens, signed URLs, or IDs exposed or accepted? Validate scope, expiry, and tampering.",
@@ -244,8 +235,6 @@ const PASS_TWO_GAP_DIMENSIONS: [&str; 8] = [
"Notification: should this change emit or suppress a user/system notification?",
];
-/// B3 (v1-completeness) + WS-16 (v1 deviation taxonomy V1–V9), folded into one
-/// checklist. Every item carries a `→ verify:` so progress is observable.
const V1_COMPLETENESS_CHECKLIST: [&str; 19] = [
"Scale test: does it hold at production data volume? → verify: run against a realistic-size fixture.",
"Failure-path lifecycle: are partial-failure and retry paths handled? → verify: exercise the failure branch.",
@@ -268,10 +257,6 @@ const V1_COMPLETENESS_CHECKLIST: [&str; 19] = [
"Implied-surface sweep: events/audit/notification/rate-limit/idempotency even when the spec is silent. → verify: sweep performed.",
];
-/// Evidentiary contract for the typed `plan_change` product (WS-3 / G7):
-/// deterministic metadata (schema version + the fixed section manifest) plus an
-/// exclusion ledger recording what was dropped from the response and why, so a
-/// consumer never mistakes a capped/filtered result for an exhaustive one.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
pub struct PlanChangeContract {
pub schema_version: u8,
@@ -280,50 +265,27 @@ pub struct PlanChangeContract {
pub exclusion_ledger: Vec,
}
-/// Typed `plan_change` product (WS-3 / E1).
-///
-/// A distinct response shape — not an alias for the planning pack — with the
-/// twelve fixed planning sections. v1 projects the existing planning-pack data
-/// into these sections; later slices (B7 proactive queries, G7 evidentiary
-/// fields) enrich them. Every section is always present (possibly empty) so the
-/// contract is stable for consumers and the G7 gate.
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
pub struct PlanChangeResponse {
pub target: String,
pub found: bool,
- /// Existing reusable symbols a change should prefer over a new fork —
- /// shared/design-system members surfaced by the S3 reuse ranking.
pub reuse_candidates: Vec,
- /// Local siblings worth cloning/aligning with (same neighbourhood, not in a
- /// shared module).
pub sibling_clone_targets: Vec,
pub standards_to_preserve: Vec,
pub integration_checks: Vec,
- /// Cross-repo reachability proofs (producer/consumer edges) for the target.
pub cross_repo_reachability: Vec,
- /// DSO1: for each cross-service reference, the display-ownership question —
- /// confirm display fields come from the owner service (snapshot/API), not a
- /// direct cross-service DB lookup, and that access control stays in the owner.
pub display_ownership_checks: Vec,
pub write_path_or_state_machine_risks: Vec,
pub required_braingent_records: Vec,
pub open_unknowns: Vec,
- /// B1: the eight fixed pass-2 gap-enumeration dimensions (required checklist).
pub pass_two_gap_dimensions: Vec,
- /// B3 + WS-16: the v1-completeness checklist with the v1 deviation taxonomy
- /// folded in; every item carries a `→ verify:`.
pub v1_completeness_checklist: Vec,
pub verification_plan: Vec,
- /// Evidentiary contract: schema version, section manifest, exclusion ledger.
pub contract: PlanChangeContract,
#[serde(skip_serializing_if = "Option::is_none")]
pub meta: Option,
}
-/// Project planning-pack pieces into the typed `plan_change` sections (WS-3 /
-/// E1). Pure over its inputs so the projection is unit-testable without an MCP
-/// context or graph. Reuse candidates are shared-module members; remaining
-/// related items become sibling clone targets.
fn build_plan_change(
target: &str,
found: bool,
@@ -352,8 +314,6 @@ fn build_plan_change(
.map(|step| format!("Run `{step}` and confirm the result before changing code."))
.collect();
- // B7: surface the change-impact evidence the planning pack already gathered
- // into the typed sections, rather than leaving them empty.
let mut integration_checks = Vec::new();
for repo in &change_impact.confirmed_downstream_repos {
integration_checks.push(format!(
@@ -366,9 +326,6 @@ fn build_plan_change(
));
}
- // DSO1: every cross-service reference raises the display-ownership question
- // as a named planning dimension (REG-13833 — a cross-service display lookup
- // was added without planning who owns the field).
let mut display_ownership_checks = Vec::new();
for caller in &change_impact.cross_repo_callers {
display_ownership_checks.push(format!(
@@ -398,8 +355,6 @@ fn build_plan_change(
);
}
- // G7: exclusion ledger — record what was dropped from the response so a
- // consumer never reads a capped/filtered result as exhaustive.
let mut exclusion_ledger = Vec::new();
if let Some(truncated) = &change_impact.truncated_repos {
exclusion_ledger.push(format!(
@@ -413,8 +368,6 @@ fn build_plan_change(
exclusion_ledger.push(format!("Planning warning: {warning}"));
}
}
- // Sections that are intentionally not computed yet are recorded here so an
- // empty value is read as "not computed", not "nothing applies".
exclusion_ledger
.push("standards_to_preserve: not yet computed (no standards engine).".to_owned());
exclusion_ledger
@@ -433,8 +386,6 @@ fn build_plan_change(
found,
reuse_candidates,
sibling_clone_targets,
- // Not yet computed; their absence is recorded in the exclusion ledger
- // so an empty value is not read as "nothing applies".
standards_to_preserve: Vec::new(),
integration_checks,
cross_repo_reachability: planning_proofs.to_vec(),
@@ -456,9 +407,6 @@ fn build_plan_change(
}
}
-/// G7 contract gate: assert a [`PlanChangeResponse`] carries deterministic
-/// contract metadata — the current schema version and the exact canonical
-/// section manifest. Returns the list of violations (empty == passes).
pub fn validate_plan_change_contract(plan: &PlanChangeResponse) -> Vec {
let mut violations = Vec::new();
if plan.contract.schema_version != PLAN_CHANGE_SCHEMA_VERSION {
@@ -482,8 +430,6 @@ pub fn validate_plan_change_contract(plan: &PlanChangeResponse) -> Vec {
violations
}
-/// Build the typed `plan_change` product (WS-3 / E1): run the planning pack,
-/// then project its data into the twelve fixed sections.
pub fn run_plan_change(
ctx: &McpContext,
request: ModePackRequest,
@@ -1387,9 +1333,6 @@ fn assemble_context_pack_for_symbol(
items.retain(|item| item.repo == repo);
}
- // S3: apply the graph-derived reuse boost on a bounded candidate window
- // BEFORE the cut, then re-sort, so a canonical reusable symbol can be
- // promoted into the pack rather than discarded by the base-score truncate.
if mode == PackMode::Planning {
// KNOWN CEILING: the reuse boost re-ranks the top `limit*5` base-ranked
// candidates; a canonical symbol ranked below that window for a very
@@ -2870,7 +2813,7 @@ fn consumer_count(graph: &S, symbol_id: &str
.len()
}
-/// Apply the S3 graph-derived reuse boost to planning-pack items via a
+/// Apply the graph-derived reuse boost to planning-pack items via a
/// consumer-count lookup. Generic over the lookup so the boost is unit-testable
/// without a graph store. Must run BEFORE truncation so a canonical reusable
/// symbol can be promoted into the pack, not merely reordered within an
@@ -5566,6 +5509,33 @@ mod tests {
assert_eq!(items[0].score, 130);
}
+ #[test]
+ fn reuse_ranking_promotes_shared_component_over_bespoke_fork() {
+ use super::{apply_reuse_boost_with, sort_pack_items};
+
+ let fork = make_pack_item(100, "src/features/orders/OrderButtonFork.tsx", false);
+ let shared = make_pack_item(80, "packages/ui/components/Button.tsx", false);
+ let mut items = vec![fork.clone(), shared.clone()];
+
+ let mut raw = items.clone();
+ sort_pack_items(&mut raw);
+ assert_eq!(raw[0].file_path, fork.file_path);
+
+ apply_reuse_boost_with(&mut items, |symbol_id| {
+ if symbol_id == shared.symbol_id { 10 } else { 0 }
+ });
+ sort_pack_items(&mut items);
+
+ assert_eq!(
+ items[0].file_path, shared.file_path,
+ "shared component must rank top-1 after the reuse boost"
+ );
+ assert_ne!(
+ items[0].file_path, fork.file_path,
+ "bespoke fork must not be top-1"
+ );
+ }
+
#[test]
fn reuse_evidence_boost_prefers_shared_and_widely_consumed() {
use super::reuse_evidence_boost;
@@ -5666,7 +5636,6 @@ mod tests {
assert_eq!(plan.open_unknowns, gaps);
assert_eq!(plan.cross_repo_reachability.len(), 1);
assert_eq!(plan.verification_plan.len(), 1);
- // B7: change-impact evidence now populates these sections.
assert_eq!(plan.integration_checks.len(), 1);
assert!(plan.integration_checks[0].contains("alert"));
assert_eq!(plan.write_path_or_state_machine_risks.len(), 1);
@@ -5691,7 +5660,6 @@ mod tests {
None,
);
- // B1: all eight pass-2 gap dimensions are present as a required checklist.
assert_eq!(
plan.pass_two_gap_dimensions.len(),
8,
@@ -5699,8 +5667,6 @@ mod tests {
plan.pass_two_gap_dimensions
);
- // B3 + WS-16: the v1-completeness checklist is non-empty and every item
- // carries a `→ verify:` so progress is observable.
assert!(!plan.v1_completeness_checklist.is_empty());
assert!(
plan.v1_completeness_checklist
@@ -5708,15 +5674,13 @@ mod tests {
.all(|item| item.contains("→ verify:")),
"every v1-completeness item must carry a verify step"
);
- // WS-16: the deviation taxonomy (V1–V9) is folded into the checklist.
assert!(
plan.v1_completeness_checklist
.iter()
.any(|item| item.contains("Derived-field blast radius")),
- "V6 deviation item missing from checklist"
+ "deviation checklist item missing"
);
- // Both new sections are part of the deterministic contract manifest.
for section in ["pass_two_gap_dimensions", "v1_completeness_checklist"] {
assert!(
plan.contract.sections.iter().any(|s| s == section),
@@ -5731,12 +5695,12 @@ mod tests {
let change_impact = ChangeImpactSummary {
cross_repo_callers: vec![super::CrossRepoCaller {
- file_path: "src/action_hub.ts".to_owned(),
+ file_path: "src/action_panel.ts".to_owned(),
line_start: None,
- repo: "label-review".to_owned(),
+ repo: "reporting".to_owned(),
symbol_id: "id_hub".to_owned(),
symbol_kind: "function".to_owned(),
- symbol_name: "renderActionHub".to_owned(),
+ symbol_name: "renderActionPanel".to_owned(),
evidence: None,
}],
..Default::default()
@@ -5752,16 +5716,13 @@ mod tests {
None,
);
- // DSO1: cross-service references raise the display-ownership question as
- // a named section, citing the owning service.
assert!(
plan.display_ownership_checks
.iter()
- .any(|check| check.contains("label-review")),
+ .any(|check| check.contains("reporting")),
"display ownership not surfaced for cross-service ref: {:?}",
plan.display_ownership_checks
);
- // The new section is part of the deterministic contract manifest.
assert!(
plan.contract
.sections
diff --git a/crates/gather-step-mcp/tests/batch_plan_change.rs b/crates/gather-step-mcp/tests/batch_plan_change.rs
index 2ea5641..4c13a4e 100644
--- a/crates/gather-step-mcp/tests/batch_plan_change.rs
+++ b/crates/gather-step-mcp/tests/batch_plan_change.rs
@@ -45,9 +45,6 @@ async fn batch_query_plan_change_returns_typed_product() {
body.contains("verification_plan") && body.contains("\"sections\""),
"batch plan_change must include the contract section manifest: {body}"
);
- // The newer sections must round-trip through the batch route too: DSO1
- // (display ownership) and B1/B3/WS-16 (the pass-2 + v1-completeness
- // checklists), plus the current contract schema version.
for section in [
"display_ownership_checks",
"pass_two_gap_dimensions",
From 6305f3609f68696a2f64fa93f4fb0efd02513149 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Wed, 3 Jun 2026 09:20:46 +0700
Subject: [PATCH 23/31] feat: fall back to a read-only graph snapshot when the
store is locked
---
crates/gather-step-cli/tests/cli_commands.rs | 50 +++-------
crates/gather-step-storage/src/graph_store.rs | 94 ++++++++++++++++++-
crates/gather-step-storage/src/stores.rs | 2 +-
3 files changed, 107 insertions(+), 39 deletions(-)
diff --git a/crates/gather-step-cli/tests/cli_commands.rs b/crates/gather-step-cli/tests/cli_commands.rs
index 02c8f96..d34fe5b 100644
--- a/crates/gather-step-cli/tests/cli_commands.rs
+++ b/crates/gather-step-cli/tests/cli_commands.rs
@@ -349,7 +349,7 @@ fn write_qa_reference_v4_workspace(root: &Path) {
}
#[test]
-fn read_command_during_graph_lock_exits_distinctly_and_discloses() {
+fn read_command_during_graph_lock_succeeds_via_snapshot() {
let temp = TempDir::new("graph-lock");
write_fixture_workspace(temp.path());
run_ok(temp.path(), &["init"]);
@@ -362,38 +362,16 @@ fn read_command_during_graph_lock_exits_distinctly_and_discloses() {
.join("graph.redb");
let held = GraphStoreDb::open(&graph_path).expect("should hold the graph lock");
- let output = gather_step()
- .arg("--workspace")
- .arg(temp.path())
- .args(["search", "OrderList", "--json"])
- .output()
- .expect("command should run");
-
- assert_eq!(
- output.status.code(),
- Some(75),
- "read under lock must exit with the distinct lock-contention code 75; stderr:\n{}",
- String::from_utf8_lossy(&output.stderr)
- );
- let disclosure = stdout_json(&output);
- assert_eq!(disclosure["event"], "command_failed");
- assert_eq!(disclosure["degraded"], "graph_locked");
-
- let plain = gather_step()
- .arg("--workspace")
- .arg(temp.path())
- .args(["search", "OrderList"])
- .output()
- .expect("command should run");
- assert_eq!(plain.status.code(), Some(75));
- assert!(
- String::from_utf8_lossy(&plain.stderr).contains("Another gather-step process"),
- "plain-mode lock error should explain the contention on stderr: {}",
- String::from_utf8_lossy(&plain.stderr)
- );
+ let output = run_ok(temp.path(), &["search", "OrderList", "--json"]);
+ let search_json = stdout_json(&output);
+ assert_eq!(search_json["event"], "search_completed");
assert!(
- plain.stdout.is_empty(),
- "plain-mode lock failure must not emit a machine disclosure on stdout"
+ search_json["hits"]
+ .as_array()
+ .expect("hits array")
+ .iter()
+ .any(|item| item["symbol_name"] == "OrderList"),
+ "snapshot read should return the indexed hit while the store is held"
);
drop(held);
@@ -1222,7 +1200,7 @@ fn metadata_schema_user_version_mismatch_reports_recovery_hint() {
}
#[test]
-fn concurrent_graph_open_reports_stable_process_error() {
+fn concurrent_graph_open_reads_via_snapshot() {
let temp = TempDir::new("concurrent-open");
write_fixture_workspace(temp.path());
run_ok(temp.path(), &["init"]);
@@ -1230,10 +1208,8 @@ fn concurrent_graph_open_reports_stable_process_error() {
let _held_graph = GraphStoreDb::open(temp.path().join(".gather-step/storage/graph.redb"))
.expect("graph should open and hold the redb lock");
- let output = run_fail(temp.path(), &["status", "--json"]);
- let stderr = String::from_utf8_lossy(&output.stderr);
- assert!(stderr.contains("Another gather-step process is using this workspace"));
- assert!(stderr.contains("Stop `gather-step watch`"));
+ let output = run_ok(temp.path(), &["status", "--json"]);
+ assert_eq!(stdout_json(&output)["event"], "status_completed");
}
#[test]
diff --git a/crates/gather-step-storage/src/graph_store.rs b/crates/gather-step-storage/src/graph_store.rs
index e0a002d..1085cdb 100644
--- a/crates/gather-step-storage/src/graph_store.rs
+++ b/crates/gather-step-storage/src/graph_store.rs
@@ -1,7 +1,8 @@
use std::{
- fs,
+ env, fs,
io::ErrorKind,
path::{Path, PathBuf},
+ sync::atomic::{AtomicU64, Ordering},
};
use gather_step_core::{
@@ -551,6 +552,7 @@ pub trait GraphStore {
pub struct GraphStoreDb {
db: Database,
path: PathBuf,
+ snapshot_cleanup: Option,
/// Active-`BulkModeGuard` reference count. When > 0, write
/// transactions use `Durability::None` to skip fsync. The counter
/// (rather than a flat `AtomicBool`) is what makes
@@ -623,6 +625,14 @@ impl GraphStoreError {
/// once the working set fits.
const DEFAULT_GRAPH_CACHE_BYTES: usize = 256 * 1024 * 1024;
+impl Drop for GraphStoreDb {
+ fn drop(&mut self) {
+ if let Some(snapshot) = self.snapshot_cleanup.take() {
+ let _ = fs::remove_file(snapshot);
+ }
+ }
+}
+
impl GraphStoreDb {
fn is_missing_table_error(error: &impl core::fmt::Display) -> bool {
error.to_string().contains("does not exist")
@@ -660,6 +670,7 @@ impl GraphStoreDb {
let store = Self {
db,
path,
+ snapshot_cleanup: None,
bulk_mode: std::sync::atomic::AtomicUsize::new(0),
};
if is_new {
@@ -670,6 +681,59 @@ impl GraphStoreDb {
Ok(store)
}
+ /// Open the graph store for reading without contending on the writer lock.
+ ///
+ /// The fast path is a normal [`Self::open`]. When the store is held by
+ /// another process (an in-progress index or watch) it falls back to a
+ /// point-in-time copy of the database file and opens that, so a read during
+ /// a write returns the last committed state instead of blocking. The copy is
+ /// removed when the returned store is dropped.
+ pub fn open_read_only(path: impl AsRef) -> Result {
+ let path = path.as_ref().to_path_buf();
+ match Self::open(&path) {
+ Err(
+ GraphStoreError::StorageHeld { .. } | GraphStoreError::StorageHeldByDaemon { .. },
+ ) => Self::open_snapshot(&path),
+ other => other,
+ }
+ }
+
+ fn open_snapshot(path: &Path) -> Result {
+ let snapshot_path = Self::snapshot_path(path);
+ fs::copy(path, &snapshot_path)?;
+ let mut builder = Database::builder();
+ builder.set_cache_size(DEFAULT_GRAPH_CACHE_BYTES);
+ let db = builder.open(&snapshot_path).map_err(|error| {
+ let _ = fs::remove_file(&snapshot_path);
+ Self::map_open_error(path, error)
+ })?;
+ let store = Self {
+ db,
+ path: path.to_path_buf(),
+ snapshot_cleanup: Some(snapshot_path),
+ bulk_mode: std::sync::atomic::AtomicUsize::new(0),
+ };
+ store.validate_schema_version()?;
+ Ok(store)
+ }
+
+ fn snapshot_path(path: &Path) -> PathBuf {
+ static COUNTER: AtomicU64 = AtomicU64::new(0);
+ let id = COUNTER.fetch_add(1, Ordering::Relaxed);
+ let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("graph");
+ env::temp_dir().join(format!(
+ "gather-step-snapshot-{stem}-{}-{id}.redb",
+ std::process::id()
+ ))
+ }
+
+ /// Whether this store is backed by a read-only snapshot copy rather than the
+ /// live database file.
+ #[must_use]
+ pub fn is_snapshot(&self) -> bool {
+ self.snapshot_cleanup.is_some()
+ }
+
fn write_schema_version(&self) -> Result<(), GraphStoreError> {
let write_txn = self.begin_write_txn()?;
{
@@ -3922,6 +3986,34 @@ mod tests {
));
}
+ #[test]
+ fn open_read_only_falls_back_to_snapshot_when_held() {
+ let (_workspace, _storage, graph_path) = temp_workspace_graph_path("held-snapshot");
+ let function = node("service-a", "src/a.ts", NodeKind::Function, "execute", 0);
+
+ let holder = GraphStoreDb::open(&graph_path).expect("first open should succeed");
+ holder
+ .insert_node(&function)
+ .expect("committed write before the snapshot");
+
+ let reader =
+ GraphStoreDb::open_read_only(&graph_path).expect("read-only open should snapshot");
+ assert!(reader.is_snapshot(), "held read must use a snapshot copy");
+ let loaded = reader
+ .get_node(function.id)
+ .expect("snapshot read should succeed")
+ .expect("committed node should be visible in the snapshot");
+ assert_eq!(loaded.name, "execute");
+ drop(reader);
+ drop(holder);
+
+ let direct = GraphStoreDb::open_read_only(&graph_path).expect("uncontended read-only open");
+ assert!(
+ !direct.is_snapshot(),
+ "an uncontended read must use the live file, not a snapshot"
+ );
+ }
+
#[test]
fn open_stamps_fresh_schema_version_zero() {
let graph_path = temp_db_path("fresh-graph-schema");
diff --git a/crates/gather-step-storage/src/stores.rs b/crates/gather-step-storage/src/stores.rs
index 4865cc2..ac7e90a 100644
--- a/crates/gather-step-storage/src/stores.rs
+++ b/crates/gather-step-storage/src/stores.rs
@@ -52,7 +52,7 @@ impl WorkspaceStores {
pub fn open_read_only_search(root: impl AsRef) -> Result {
let root = root.as_ref().to_path_buf();
- let graph = Arc::new(GraphStoreDb::open(root.join("graph.redb"))?);
+ let graph = Arc::new(GraphStoreDb::open_read_only(root.join("graph.redb"))?);
let search = Arc::new(TantivySearchStore::open_read_only(root.join("search"))?);
let metadata = Arc::new(MetadataStoreDb::open(root.join("metadata.sqlite"))?);
Ok(Self {
From a24cec480a62acf24ff021386910cb5273628992 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Wed, 3 Jun 2026 09:21:01 +0700
Subject: [PATCH 24/31] docs: note lock-free snapshot reads in 4.3.1 changelog
---
website/src/content/docs/changelog.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/website/src/content/docs/changelog.md b/website/src/content/docs/changelog.md
index 949cbb1..1b4a5a8 100644
--- a/website/src/content/docs/changelog.md
+++ b/website/src/content/docs/changelog.md
@@ -29,7 +29,7 @@ Planning- and reuse-quality release. Fixes retrieval recall so reuse search stop
### Fixed
- `batch_query` now routes `plan_change` requests to the typed product instead of the raw planning pack.
-- Graph **lock contention** on read commands now exits with a distinct, documented code and (under `--json`) a `degraded: graph_locked` disclosure, so a blocked read can never be mistaken for an empty-but-successful result (REL1).
+- Read commands no longer block or fail when the graph store is held by an in-progress index or watch: they route to the holding daemon when one is serving, otherwise read a point-in-time snapshot of the graph. When neither is possible the command exits with a distinct, documented code and (under `--json`) a `degraded: graph_locked` disclosure, so a blocked read can never be mistaken for an empty-but-successful result.
### Release-wide
From a837afa6bc4cd695ce82c81d287b8e33ee8abd9c Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Wed, 3 Jun 2026 09:31:06 +0700
Subject: [PATCH 25/31] test: resolution snapshot ratchet guarding against
silent edge drift
---
crates/gather-step-analysis/src/query.rs | 39 +++++++++++++++++++
crates/gather-step-cli/tests/cli_commands.rs | 35 +++++++++++++++++
.../tests/golden/resolution_snapshot.golden | 34 ++++++++++++++++
3 files changed, 108 insertions(+)
create mode 100644 crates/gather-step-cli/tests/golden/resolution_snapshot.golden
diff --git a/crates/gather-step-analysis/src/query.rs b/crates/gather-step-analysis/src/query.rs
index e4f9d38..d259ee2 100644
--- a/crates/gather-step-analysis/src/query.rs
+++ b/crates/gather-step-analysis/src/query.rs
@@ -181,6 +181,45 @@ impl<'a, S: GraphStore> GraphQuery<'a, S> {
}
Ok(counts)
}
+
+ pub fn resolution_fingerprint(&self) -> Result, QueryError> {
+ let mut labels: FxHashMap<[u8; 16], String> = FxHashMap::default();
+ let mut node_ids: Vec = Vec::new();
+ for kind in NodeKind::all() {
+ for node in self.store.nodes_by_type(*kind)? {
+ let label = node
+ .qualified_name
+ .clone()
+ .unwrap_or_else(|| format!("{}::{}", node.repo, node.name));
+ labels.insert(node.id.as_bytes(), label);
+ node_ids.push(node.id);
+ }
+ }
+
+ let mut lines = Vec::new();
+ for node_id in node_ids {
+ for edge in self.store.get_outgoing(node_id)? {
+ let source = labels
+ .get(&edge.source.as_bytes())
+ .map_or("", String::as_str);
+ let target = labels
+ .get(&edge.target.as_bytes())
+ .map_or("", String::as_str);
+ let resolver = edge.metadata.resolver.as_deref().unwrap_or("none");
+ let confidence = edge
+ .metadata
+ .confidence
+ .map_or_else(|| "none".to_owned(), |value| value.to_string());
+ lines.push(format!(
+ "{source}\t{}\t{target}\t{resolver}\t{confidence}",
+ edge.kind
+ ));
+ }
+ }
+ lines.sort();
+ lines.dedup();
+ Ok(lines)
+ }
}
#[cfg(test)]
diff --git a/crates/gather-step-cli/tests/cli_commands.rs b/crates/gather-step-cli/tests/cli_commands.rs
index d34fe5b..f11bf51 100644
--- a/crates/gather-step-cli/tests/cli_commands.rs
+++ b/crates/gather-step-cli/tests/cli_commands.rs
@@ -348,6 +348,41 @@ fn write_qa_reference_v4_workspace(root: &Path) {
.expect("notifications fixture source");
}
+#[test]
+fn resolution_snapshot_matches_golden() {
+ use gather_step_analysis::GraphQuery;
+
+ let temp = TempDir::new("resolution-snapshot");
+ write_fixture_workspace(temp.path());
+ run_ok(temp.path(), &["init"]);
+ run_ok(temp.path(), &["--json", "index"]);
+
+ let graph =
+ GraphStoreDb::open(temp.path().join(".gather-step/storage/graph.redb")).expect("graph");
+ let actual = GraphQuery::new(&graph)
+ .resolution_fingerprint()
+ .expect("fingerprint")
+ .join("\n");
+
+ let golden_path =
+ Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/golden/resolution_snapshot.golden");
+ if env::var_os("GATHER_STEP_BLESS_SNAPSHOT").is_some() {
+ fs::create_dir_all(golden_path.parent().expect("golden parent")).expect("golden dir");
+ fs::write(&golden_path, &actual).expect("write golden");
+ }
+
+ let expected = fs::read_to_string(&golden_path).unwrap_or_else(|_| {
+ panic!(
+ "missing golden {}; regenerate with GATHER_STEP_BLESS_SNAPSHOT=1",
+ golden_path.display()
+ )
+ });
+ assert_eq!(
+ actual, expected,
+ "resolution snapshot drifted; if the change is intended, regenerate with GATHER_STEP_BLESS_SNAPSHOT=1"
+ );
+}
+
#[test]
fn read_command_during_graph_lock_succeeds_via_snapshot() {
let temp = TempDir::new("graph-lock");
diff --git a/crates/gather-step-cli/tests/golden/resolution_snapshot.golden b/crates/gather-step-cli/tests/golden/resolution_snapshot.golden
new file mode 100644
index 0000000..8f6c4ae
--- /dev/null
+++ b/crates/gather-step-cli/tests/golden/resolution_snapshot.golden
@@ -0,0 +1,34 @@
+ServiceAController DependsOn __di__EventBusClient none none
+ServiceAController UsesDecorator ServiceAController::@Controller none none
+ServiceAController.constructor UsesDecorator ServiceAController.constructor::@Controller none none
+ServiceAController.handleOrderCreated Consumes __event__kafka__order.created none none
+ServiceAController.handleOrderCreated UsesDecorator ServiceAController.handleOrderCreated::@Controller none none
+ServiceAController.handleOrderCreated UsesDecorator ServiceAController.handleOrderCreated::@MessagePattern none none
+ServiceAController.handleOrderCreated UsesEventFrom __event__kafka__order.created none none
+ServiceAController.listOrders ProducesEventFor __event__kafka__order.created none none
+ServiceAController.listOrders Publishes __event__kafka__order.created none none
+ServiceAController.listOrders Serves __route__GET__/orders none none
+ServiceAController.listOrders UsesDecorator ServiceAController.listOrders::@Controller none none
+ServiceAController.listOrders UsesDecorator ServiceAController.listOrders::@Get none none
+backend_standard::package.json Defines backend_standard::__repo__ none none
+backend_standard::src/controller.ts Defines ServiceAController none none
+backend_standard::src/controller.ts Defines ServiceAController.constructor none none
+backend_standard::src/controller.ts Defines ServiceAController.handleOrderCreated none none
+backend_standard::src/controller.ts Defines ServiceAController.listOrders none none
+backend_standard::src/controller.ts Defines module::backend_standard::src/controller.ts none none
+backend_standard::src/controller.ts Defines src/controller.ts::Controller none none
+backend_standard::src/controller.ts Defines src/controller.ts::Get none none
+backend_standard::src/controller.ts Defines src/controller.ts::MessagePattern none none
+backend_standard::src/controller.ts Imports module-import::@nestjs/common none none
+backend_standard::src/controller.ts Imports module-import::@nestjs/microservices none none
+frontend_standard::package.json Defines frontend_standard::__repo__ none none
+frontend_standard::src/OrderList.test.tsx Defines module::frontend_standard::src/OrderList.test.tsx none none
+frontend_standard::src/OrderList.test.tsx Defines src/OrderList.test.tsx::OrderList none none
+frontend_standard::src/OrderList.test.tsx Imports module-import::./OrderList none none
+frontend_standard::src/OrderList.tsx Defines OrderList none none
+frontend_standard::src/OrderList.tsx Defines module::frontend_standard::src/OrderList.tsx none none
+module::backend_standard::src/controller.ts Exports ServiceAController none none
+module::backend_standard::src/controller.ts Exports ServiceAController.constructor none none
+module::backend_standard::src/controller.ts Exports ServiceAController.handleOrderCreated none none
+module::backend_standard::src/controller.ts Exports ServiceAController.listOrders none none
+module::frontend_standard::src/OrderList.tsx Exports OrderList none none
\ No newline at end of file
From 63f8fb8a8e1b297b70373f5c4a048055111634ea Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Wed, 3 Jun 2026 09:46:34 +0700
Subject: [PATCH 26/31] feat: shared-component reuse-opportunity analysis
(design-system audit)
---
crates/gather-step-analysis/src/lib.rs | 4 +
.../src/shared_component_usage.rs | 192 ++++++++++++++++++
2 files changed, 196 insertions(+)
create mode 100644 crates/gather-step-analysis/src/shared_component_usage.rs
diff --git a/crates/gather-step-analysis/src/lib.rs b/crates/gather-step-analysis/src/lib.rs
index 0fa084d..b07a887 100644
--- a/crates/gather-step-analysis/src/lib.rs
+++ b/crates/gather-step-analysis/src/lib.rs
@@ -21,6 +21,7 @@ pub mod projection_impact;
pub mod proofs;
pub mod query;
pub mod semantic_health;
+pub mod shared_component_usage;
pub mod shared_contract;
pub mod transport;
@@ -78,6 +79,9 @@ pub use semantic_health::{
SemanticHealthError, SemanticHealthReport, SemanticLinkHealth, semantic_health_for_repo,
semantic_health_for_workspace,
};
+pub use shared_component_usage::{
+ ReuseOpportunity, SharedComponentError, analyze_shared_component_reuse, is_design_system_path,
+};
pub use shared_contract::{
guard_class_name_for_anchor, looks_like_guard_entrypoint, peer_matches_guard_class_name,
shared_contract_candidate_ids,
diff --git a/crates/gather-step-analysis/src/shared_component_usage.rs b/crates/gather-step-analysis/src/shared_component_usage.rs
new file mode 100644
index 0000000..e69b559
--- /dev/null
+++ b/crates/gather-step-analysis/src/shared_component_usage.rs
@@ -0,0 +1,192 @@
+use std::collections::BTreeMap;
+
+use gather_step_core::{NodeData, NodeKind};
+use gather_step_storage::{GraphStore, GraphStoreError};
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum SharedComponentError {
+ #[error(transparent)]
+ Store(#[from] GraphStoreError),
+}
+
+const DESIGN_SYSTEM_MARKERS: &[&str] = &[
+ "shared/",
+ "design-system",
+ "packages/",
+ "/ui/components",
+ "@shared",
+ "common/",
+ "/lib/",
+ "libs/",
+ "internal/",
+ "/pkg/",
+];
+
+#[must_use]
+pub fn is_design_system_path(file_path: &str) -> bool {
+ DESIGN_SYSTEM_MARKERS
+ .iter()
+ .any(|marker| file_path.contains(marker))
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct ReuseOpportunity {
+ pub repo: String,
+ pub local_symbol: String,
+ pub local_file: String,
+ pub shared_symbol: String,
+ pub shared_file: String,
+}
+
+fn is_component_like(node: &NodeData) -> bool {
+ matches!(node.kind, NodeKind::Function | NodeKind::Class)
+}
+
+pub fn analyze_shared_component_reuse(
+ store: &S,
+ repo: &str,
+) -> Result, SharedComponentError> {
+ let nodes = store.nodes_by_repo(repo)?;
+
+ let mut shared: BTreeMap = BTreeMap::new();
+ for node in &nodes {
+ if is_component_like(node) && is_design_system_path(&node.file_path) {
+ shared
+ .entry(node.name.clone())
+ .or_insert_with(|| node.file_path.clone());
+ }
+ }
+
+ let mut opportunities = Vec::new();
+ for node in &nodes {
+ if !is_component_like(node) || is_design_system_path(&node.file_path) {
+ continue;
+ }
+ if let Some(shared_file) = shared.get(&node.name) {
+ opportunities.push(ReuseOpportunity {
+ repo: repo.to_owned(),
+ local_symbol: node.name.clone(),
+ local_file: node.file_path.clone(),
+ shared_symbol: node.name.clone(),
+ shared_file: shared_file.clone(),
+ });
+ }
+ }
+
+ opportunities.sort_by(|left, right| {
+ left.local_file
+ .cmp(&right.local_file)
+ .then(left.local_symbol.cmp(&right.local_symbol))
+ });
+ opportunities.dedup();
+ Ok(opportunities)
+}
+
+#[cfg(test)]
+mod tests {
+ use std::{
+ env, fs,
+ path::{Path, PathBuf},
+ process,
+ sync::atomic::{AtomicU64, Ordering},
+ };
+
+ use gather_step_core::{NodeData, NodeKind, SourceSpan, Visibility, node_id};
+ use gather_step_storage::{GraphStore, GraphStoreDb};
+
+ use super::{analyze_shared_component_reuse, is_design_system_path};
+
+ static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
+
+ struct TempDb {
+ path: PathBuf,
+ }
+
+ impl TempDb {
+ fn new(name: &str) -> Self {
+ let id = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
+ let path = env::temp_dir().join(format!(
+ "gather-step-shared-component-{name}-{}-{id}.redb",
+ process::id()
+ ));
+ Self { path }
+ }
+
+ fn path(&self) -> &Path {
+ &self.path
+ }
+ }
+
+ impl Drop for TempDb {
+ fn drop(&mut self) {
+ let _ = fs::remove_file(&self.path);
+ }
+ }
+
+ fn node(repo: &str, file_path: &str, name: &str, ordinal: u32) -> NodeData {
+ NodeData {
+ id: node_id(repo, file_path, NodeKind::Function, name),
+ kind: NodeKind::Function,
+ repo: repo.to_owned(),
+ file_path: file_path.to_owned(),
+ name: name.to_owned(),
+ qualified_name: Some(format!("{repo}::{name}")),
+ external_id: None,
+ signature: None,
+ visibility: Some(Visibility::Public),
+ span: Some(SourceSpan {
+ line_start: ordinal,
+ line_len: 0,
+ column_start: 0,
+ column_len: 1,
+ }),
+ is_virtual: false,
+ }
+ }
+
+ #[test]
+ fn design_system_path_markers() {
+ assert!(is_design_system_path("packages/ui/components/Button.tsx"));
+ assert!(is_design_system_path("src/shared/Card.tsx"));
+ assert!(!is_design_system_path("src/features/orders/Button.tsx"));
+ }
+
+ #[test]
+ fn flags_local_fork_of_a_shared_component() {
+ let temp = TempDb::new("fork");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let shared = node("web", "packages/ui/components/Button.tsx", "Button", 0);
+ let fork = node("web", "src/features/orders/Button.tsx", "Button", 1);
+ let unique = node("web", "src/features/orders/OrderList.tsx", "OrderList", 2);
+ store
+ .bulk_insert(&[shared.clone(), fork.clone(), unique.clone()], &[])
+ .expect("write");
+
+ let opportunities = analyze_shared_component_reuse(&store, "web").expect("analyze");
+ assert_eq!(opportunities.len(), 1);
+ assert_eq!(
+ opportunities[0].local_file,
+ "src/features/orders/Button.tsx"
+ );
+ assert_eq!(
+ opportunities[0].shared_file,
+ "packages/ui/components/Button.tsx"
+ );
+ }
+
+ #[test]
+ fn shared_only_or_no_duplicate_yields_nothing() {
+ let temp = TempDb::new("clean");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let shared = node("web", "packages/ui/components/Modal.tsx", "Modal", 0);
+ let local = node("web", "src/features/orders/OrderRow.tsx", "OrderRow", 1);
+ store.bulk_insert(&[shared, local], &[]).expect("write");
+
+ assert!(
+ analyze_shared_component_reuse(&store, "web")
+ .expect("analyze")
+ .is_empty()
+ );
+ }
+}
From 018fda8fa255ba3eb3bd584b941cd126822b445b Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Wed, 3 Jun 2026 09:54:55 +0700
Subject: [PATCH 27/31] feat: cross-repo cycle detection via Tarjan SCC
---
crates/gather-step-analysis/src/cycles.rs | 286 ++++++++++++++++++++++
crates/gather-step-analysis/src/lib.rs | 2 +
2 files changed, 288 insertions(+)
create mode 100644 crates/gather-step-analysis/src/cycles.rs
diff --git a/crates/gather-step-analysis/src/cycles.rs b/crates/gather-step-analysis/src/cycles.rs
new file mode 100644
index 0000000..9a6f89d
--- /dev/null
+++ b/crates/gather-step-analysis/src/cycles.rs
@@ -0,0 +1,286 @@
+use gather_step_core::{EdgeKind, NodeKind};
+use gather_step_storage::{GraphStore, GraphStoreError};
+use rustc_hash::FxHashMap;
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum CycleError {
+ #[error(transparent)]
+ Store(#[from] GraphStoreError),
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct Cycle {
+ pub nodes: Vec,
+ pub repos: Vec,
+ pub cross_repo: bool,
+}
+
+pub fn find_cycles(
+ store: &S,
+ edge_kinds: Option<&[EdgeKind]>,
+) -> Result, CycleError> {
+ let mut index_of: FxHashMap<[u8; 16], usize> = FxHashMap::default();
+ let mut labels: Vec = Vec::new();
+ let mut repos: Vec = Vec::new();
+ let mut node_ids = Vec::new();
+ for kind in NodeKind::all() {
+ for node in store.nodes_by_type(*kind)? {
+ let key = node.id.as_bytes();
+ if index_of.contains_key(&key) {
+ continue;
+ }
+ index_of.insert(key, labels.len());
+ labels.push(
+ node.qualified_name
+ .clone()
+ .unwrap_or_else(|| format!("{}::{}", node.repo, node.name)),
+ );
+ repos.push(node.repo.clone());
+ node_ids.push(node.id);
+ }
+ }
+
+ let count = node_ids.len();
+ let mut adjacency: Vec> = vec![Vec::new(); count];
+ let mut self_loop = vec![false; count];
+ for (source_index, node_id) in node_ids.iter().enumerate() {
+ for edge in store.get_outgoing(*node_id)? {
+ if let Some(kinds) = edge_kinds
+ && !kinds.contains(&edge.kind)
+ {
+ continue;
+ }
+ let Some(&target_index) = index_of.get(&edge.target.as_bytes()) else {
+ continue;
+ };
+ if target_index == source_index {
+ self_loop[source_index] = true;
+ }
+ adjacency[source_index].push(target_index);
+ }
+ }
+
+ let sccs = tarjan(&adjacency);
+
+ let mut cycles = Vec::new();
+ for scc in sccs {
+ let is_cycle = scc.len() > 1 || (scc.len() == 1 && self_loop[scc[0]]);
+ if !is_cycle {
+ continue;
+ }
+ let mut nodes: Vec = scc.iter().map(|&i| labels[i].clone()).collect();
+ nodes.sort();
+ let mut cycle_repos: Vec = scc.iter().map(|&i| repos[i].clone()).collect();
+ cycle_repos.sort();
+ cycle_repos.dedup();
+ let cross_repo = cycle_repos.len() > 1;
+ cycles.push(Cycle {
+ nodes,
+ repos: cycle_repos,
+ cross_repo,
+ });
+ }
+ cycles.sort_by(|left, right| left.nodes.cmp(&right.nodes));
+ Ok(cycles)
+}
+
+fn tarjan(adjacency: &[Vec]) -> Vec> {
+ let count = adjacency.len();
+ let mut indices = vec![usize::MAX; count];
+ let mut lowlink = vec![0_usize; count];
+ let mut on_stack = vec![false; count];
+ let mut stack: Vec = Vec::new();
+ let mut sccs: Vec> = Vec::new();
+ let mut next_index = 0_usize;
+
+ for root in 0..count {
+ if indices[root] != usize::MAX {
+ continue;
+ }
+ let mut call_stack: Vec<(usize, usize)> = vec![(root, 0)];
+ while let Some(&(node, child)) = call_stack.last() {
+ if child == 0 {
+ indices[node] = next_index;
+ lowlink[node] = next_index;
+ next_index += 1;
+ stack.push(node);
+ on_stack[node] = true;
+ }
+
+ if child < adjacency[node].len() {
+ let top = call_stack.len() - 1;
+ call_stack[top].1 += 1;
+ let target = adjacency[node][child];
+ if indices[target] == usize::MAX {
+ call_stack.push((target, 0));
+ } else if on_stack[target] {
+ lowlink[node] = lowlink[node].min(indices[target]);
+ }
+ } else {
+ if lowlink[node] == indices[node] {
+ let mut scc = Vec::new();
+ while let Some(member) = stack.pop() {
+ on_stack[member] = false;
+ scc.push(member);
+ if member == node {
+ break;
+ }
+ }
+ sccs.push(scc);
+ }
+ call_stack.pop();
+ if let Some(&(parent, _)) = call_stack.last() {
+ lowlink[parent] = lowlink[parent].min(lowlink[node]);
+ }
+ }
+ }
+ }
+ sccs
+}
+
+#[cfg(test)]
+mod tests {
+ use std::{
+ env, fs,
+ path::{Path, PathBuf},
+ process,
+ sync::atomic::{AtomicU64, Ordering},
+ };
+
+ use gather_step_core::{EdgeData, EdgeKind, EdgeMetadata, NodeData, NodeId, NodeKind, node_id};
+ use gather_step_storage::{GraphStore, GraphStoreDb};
+
+ use super::find_cycles;
+
+ static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
+
+ struct TempDb {
+ path: PathBuf,
+ }
+
+ impl TempDb {
+ fn new(name: &str) -> Self {
+ let id = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
+ let path = env::temp_dir().join(format!(
+ "gather-step-cycles-{name}-{}-{id}.redb",
+ process::id()
+ ));
+ Self { path }
+ }
+
+ fn path(&self) -> &Path {
+ &self.path
+ }
+ }
+
+ impl Drop for TempDb {
+ fn drop(&mut self) {
+ let _ = fs::remove_file(&self.path);
+ }
+ }
+
+ fn func(repo: &str, name: &str) -> NodeData {
+ NodeData {
+ id: node_id(repo, "src/a.ts", NodeKind::Function, name),
+ kind: NodeKind::Function,
+ repo: repo.to_owned(),
+ file_path: "src/a.ts".to_owned(),
+ name: name.to_owned(),
+ qualified_name: Some(format!("{repo}::{name}")),
+ external_id: None,
+ signature: None,
+ visibility: None,
+ span: None,
+ is_virtual: false,
+ }
+ }
+
+ fn calls(source: NodeId, target: NodeId, owner: NodeId) -> EdgeData {
+ EdgeData {
+ source,
+ target,
+ kind: EdgeKind::Calls,
+ metadata: EdgeMetadata::default(),
+ owner_file: owner,
+ is_cross_file: false,
+ }
+ }
+
+ #[test]
+ fn detects_a_simple_cycle_and_ignores_acyclic_chains() {
+ let temp = TempDb::new("simple");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let file = NodeData {
+ kind: NodeKind::File,
+ ..func("web", "src/a.ts")
+ };
+ let a = func("web", "a");
+ let b = func("web", "b");
+ let c = func("web", "c");
+ let d = func("web", "d");
+ store
+ .bulk_insert(
+ &[file.clone(), a.clone(), b.clone(), c.clone(), d.clone()],
+ &[
+ calls(a.id, b.id, file.id),
+ calls(b.id, c.id, file.id),
+ calls(c.id, a.id, file.id),
+ calls(c.id, d.id, file.id),
+ ],
+ )
+ .expect("write");
+
+ let cycles = find_cycles(&store, Some(&[EdgeKind::Calls])).expect("cycles");
+ assert_eq!(cycles.len(), 1, "expected one cycle, got {cycles:?}");
+ assert_eq!(cycles[0].nodes, vec!["web::a", "web::b", "web::c"]);
+ assert!(!cycles[0].cross_repo);
+ }
+
+ #[test]
+ fn acyclic_graph_has_no_cycles() {
+ let temp = TempDb::new("acyclic");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let file = NodeData {
+ kind: NodeKind::File,
+ ..func("web", "src/a.ts")
+ };
+ let a = func("web", "a");
+ let b = func("web", "b");
+ store
+ .bulk_insert(
+ &[file.clone(), a.clone(), b.clone()],
+ &[calls(a.id, b.id, file.id)],
+ )
+ .expect("write");
+
+ assert!(
+ find_cycles(&store, Some(&[EdgeKind::Calls]))
+ .expect("cycles")
+ .is_empty()
+ );
+ }
+
+ #[test]
+ fn flags_cross_repo_cycle() {
+ let temp = TempDb::new("cross-repo");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let file = NodeData {
+ kind: NodeKind::File,
+ ..func("web", "src/a.ts")
+ };
+ let a = func("web", "a");
+ let b = func("api", "b");
+ store
+ .bulk_insert(
+ &[file.clone(), a.clone(), b.clone()],
+ &[calls(a.id, b.id, file.id), calls(b.id, a.id, file.id)],
+ )
+ .expect("write");
+
+ let cycles = find_cycles(&store, Some(&[EdgeKind::Calls])).expect("cycles");
+ assert_eq!(cycles.len(), 1);
+ assert!(cycles[0].cross_repo);
+ assert_eq!(cycles[0].repos, vec!["api", "web"]);
+ }
+}
diff --git a/crates/gather-step-analysis/src/lib.rs b/crates/gather-step-analysis/src/lib.rs
index b07a887..481f413 100644
--- a/crates/gather-step-analysis/src/lib.rs
+++ b/crates/gather-step-analysis/src/lib.rs
@@ -9,6 +9,7 @@ pub mod contract_drift;
pub mod conventions;
pub mod cross_repo;
pub mod crud_trace;
+pub mod cycles;
pub mod dead_code;
pub mod deployment_topology;
pub mod event_topology;
@@ -38,6 +39,7 @@ pub use cross_repo::{
pub use crud_trace::{
CrudTrace, CrudTraceEntry, CrudTraceError, CrudTraceRole, trace_crud_route, trace_crud_symbol,
};
+pub use cycles::{Cycle, CycleError, find_cycles};
pub use dead_code::{
ConfidenceBand, DeadCodeError, DeadCodeFinding, DeadCodeReport, DetectorBasis, find_dead_code,
find_dead_code_with_manifest,
From bf4bc752c812855fc9b4cc2befa363fd2411813a Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Wed, 3 Jun 2026 10:05:11 +0700
Subject: [PATCH 28/31] feat: detect mock/fixture imports leaking into
production modules
---
crates/gather-step-analysis/src/lib.rs | 2 +
.../gather-step-analysis/src/mock_leakage.rs | 207 ++++++++++++++++++
2 files changed, 209 insertions(+)
create mode 100644 crates/gather-step-analysis/src/mock_leakage.rs
diff --git a/crates/gather-step-analysis/src/lib.rs b/crates/gather-step-analysis/src/lib.rs
index 481f413..07517af 100644
--- a/crates/gather-step-analysis/src/lib.rs
+++ b/crates/gather-step-analysis/src/lib.rs
@@ -15,6 +15,7 @@ pub mod deployment_topology;
pub mod event_topology;
pub mod evidence;
pub mod impact;
+pub mod mock_leakage;
pub mod mongo_query_safety;
pub mod overview;
pub mod pack_assembly;
@@ -58,6 +59,7 @@ pub use event_topology::{
pub use impact::{
BoundaryRole, EvidenceBand, ImpactError, ImpactMap, ImpactedFile, shared_contract_impact,
};
+pub use mock_leakage::{MockLeakage, MockLeakageError, find_mock_leakage, is_mock_path};
pub use mongo_query_safety::{
MongoQueryFinding, RULE_ATLAS_INDEX_DRIFT, RULE_INDEX_DEFEAT, RULE_NULL_PARENT_PATH,
RULE_UNSAFE_COERCION, analyze_atlas_index_drift, analyze_mongo_value,
diff --git a/crates/gather-step-analysis/src/mock_leakage.rs b/crates/gather-step-analysis/src/mock_leakage.rs
new file mode 100644
index 0000000..d2d41bd
--- /dev/null
+++ b/crates/gather-step-analysis/src/mock_leakage.rs
@@ -0,0 +1,207 @@
+use gather_step_core::EdgeKind;
+use gather_step_storage::{GraphStore, GraphStoreError};
+use rustc_hash::FxHashMap;
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum MockLeakageError {
+ #[error(transparent)]
+ Store(#[from] GraphStoreError),
+}
+
+const MOCK_MARKERS: &[&str] = &[
+ "__mocks__",
+ ".mock.",
+ "/mocks/",
+ ".fixture.",
+ "/fixtures/",
+ ".stub.",
+];
+
+const TEST_MARKERS: &[&str] = &[".test.", ".spec.", "/__tests__/", "/tests/", "/test/"];
+
+#[must_use]
+pub fn is_mock_path(file_path: &str) -> bool {
+ MOCK_MARKERS.iter().any(|marker| file_path.contains(marker))
+}
+
+#[must_use]
+pub fn is_test_path(file_path: &str) -> bool {
+ TEST_MARKERS.iter().any(|marker| file_path.contains(marker))
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct MockLeakage {
+ pub repo: String,
+ pub importer_file: String,
+ pub mock_file: String,
+}
+
+pub fn find_mock_leakage(
+ store: &S,
+ repo: &str,
+) -> Result, MockLeakageError> {
+ let nodes = store.nodes_by_repo(repo)?;
+ let file_of: FxHashMap<[u8; 16], String> = nodes
+ .iter()
+ .map(|node| (node.id.as_bytes(), node.file_path.clone()))
+ .collect();
+
+ let mut leaks = Vec::new();
+ for node in &nodes {
+ if is_test_path(&node.file_path) || is_mock_path(&node.file_path) {
+ continue;
+ }
+ for edge in store.get_outgoing(node.id)? {
+ if edge.kind != EdgeKind::Imports {
+ continue;
+ }
+ let Some(target_file) = file_of.get(&edge.target.as_bytes()) else {
+ continue;
+ };
+ if is_mock_path(target_file) {
+ leaks.push(MockLeakage {
+ repo: repo.to_owned(),
+ importer_file: node.file_path.clone(),
+ mock_file: target_file.clone(),
+ });
+ }
+ }
+ }
+
+ leaks.sort_by(|left, right| {
+ left.importer_file
+ .cmp(&right.importer_file)
+ .then(left.mock_file.cmp(&right.mock_file))
+ });
+ leaks.dedup();
+ Ok(leaks)
+}
+
+#[cfg(test)]
+mod tests {
+ use std::{
+ env, fs,
+ path::{Path, PathBuf},
+ process,
+ sync::atomic::{AtomicU64, Ordering},
+ };
+
+ use gather_step_core::{EdgeData, EdgeKind, EdgeMetadata, NodeData, NodeId, NodeKind, node_id};
+ use gather_step_storage::{GraphStore, GraphStoreDb};
+
+ use super::find_mock_leakage;
+
+ static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
+
+ struct TempDb {
+ path: PathBuf,
+ }
+
+ impl TempDb {
+ fn new(name: &str) -> Self {
+ let id = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
+ let path = env::temp_dir().join(format!(
+ "gather-step-mock-leakage-{name}-{}-{id}.redb",
+ process::id()
+ ));
+ Self { path }
+ }
+
+ fn path(&self) -> &Path {
+ &self.path
+ }
+ }
+
+ impl Drop for TempDb {
+ fn drop(&mut self) {
+ let _ = fs::remove_file(&self.path);
+ }
+ }
+
+ fn module(repo: &str, file_path: &str) -> NodeData {
+ NodeData {
+ id: node_id(repo, file_path, NodeKind::File, file_path),
+ kind: NodeKind::File,
+ repo: repo.to_owned(),
+ file_path: file_path.to_owned(),
+ name: file_path.to_owned(),
+ qualified_name: Some(file_path.to_owned()),
+ external_id: None,
+ signature: None,
+ visibility: None,
+ span: None,
+ is_virtual: false,
+ }
+ }
+
+ fn imports(source: NodeId, target: NodeId, owner: NodeId) -> EdgeData {
+ EdgeData {
+ source,
+ target,
+ kind: EdgeKind::Imports,
+ metadata: EdgeMetadata::default(),
+ owner_file: owner,
+ is_cross_file: true,
+ }
+ }
+
+ #[test]
+ fn flags_prod_module_importing_a_mock() {
+ let temp = TempDb::new("leak");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let prod = module("web", "src/features/OrderList.tsx");
+ let mock = module("web", "src/features/__mocks__/orders.mock.ts");
+ store
+ .bulk_insert(
+ &[prod.clone(), mock.clone()],
+ &[imports(prod.id, mock.id, prod.id)],
+ )
+ .expect("write");
+
+ let leaks = find_mock_leakage(&store, "web").expect("analyze");
+ assert_eq!(leaks.len(), 1);
+ assert_eq!(leaks[0].importer_file, "src/features/OrderList.tsx");
+ assert_eq!(leaks[0].mock_file, "src/features/__mocks__/orders.mock.ts");
+ }
+
+ #[test]
+ fn test_file_importing_a_mock_is_not_flagged() {
+ let temp = TempDb::new("test-import");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let spec = module("web", "src/features/OrderList.test.tsx");
+ let mock = module("web", "src/features/__mocks__/orders.mock.ts");
+ store
+ .bulk_insert(
+ &[spec.clone(), mock.clone()],
+ &[imports(spec.id, mock.id, spec.id)],
+ )
+ .expect("write");
+
+ assert!(
+ find_mock_leakage(&store, "web")
+ .expect("analyze")
+ .is_empty()
+ );
+ }
+
+ #[test]
+ fn prod_importing_prod_is_not_flagged() {
+ let temp = TempDb::new("clean");
+ let store = GraphStoreDb::open(temp.path()).expect("store");
+ let prod = module("web", "src/features/OrderList.tsx");
+ let helper = module("web", "src/features/format.ts");
+ store
+ .bulk_insert(
+ &[prod.clone(), helper.clone()],
+ &[imports(prod.id, helper.id, prod.id)],
+ )
+ .expect("write");
+
+ assert!(
+ find_mock_leakage(&store, "web")
+ .expect("analyze")
+ .is_empty()
+ );
+ }
+}
From a3ea3400aced1e57474734e8eca7cc7fc64894f2 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Wed, 3 Jun 2026 10:21:13 +0700
Subject: [PATCH 29/31] feat: surface cycle, mock-leakage, and reuse advisories
in doctor
---
crates/gather-step-cli/src/commands/doctor.rs | 90 ++++++++++++++++++-
1 file changed, 89 insertions(+), 1 deletion(-)
diff --git a/crates/gather-step-cli/src/commands/doctor.rs b/crates/gather-step-cli/src/commands/doctor.rs
index 38c2e65..ea85eaa 100644
--- a/crates/gather-step-cli/src/commands/doctor.rs
+++ b/crates/gather-step-cli/src/commands/doctor.rs
@@ -1,7 +1,11 @@
use anyhow::{Context, Result, bail};
use clap::Args;
use comfy_table::{Cell, ContentArrangement, Table, presets::UTF8_BORDERS_ONLY};
-use gather_step_analysis::{SemanticHealthReport, semantic_health_for_repo};
+use gather_step_analysis::{
+ SemanticHealthReport, analyze_shared_component_reuse, find_cycles, find_mock_leakage,
+ semantic_health_for_repo,
+};
+use gather_step_core::EdgeKind;
use gather_step_core::NodeKind;
use gather_step_core::RegistryStore;
use gather_step_parser::resolve::{ResolutionInput, is_non_actionable_unresolved_call};
@@ -24,6 +28,8 @@ struct DoctorOutput {
issue_count: usize,
pack_metrics: PackDoctorOutput,
repos: Vec,
+ #[serde(default, skip_serializing_if = "Vec::is_empty")]
+ quality_advisories: Vec,
}
#[derive(Debug, Serialize)]
@@ -90,12 +96,16 @@ pub(crate) fn execute(
let issue_count = repos.iter().map(|repo| repo.issues.len()).sum();
let pack_metrics = pack_metrics(storage.metadata()).context("computing pack diagnostics")?;
+ let repo_names: Vec = repos.iter().map(|repo| repo.repo.clone()).collect();
+ let quality_advisories = collect_quality_advisories(storage, &repo_names)
+ .context("collecting code-quality advisories")?;
let payload = DoctorOutput {
event: "doctor_completed",
ok: issue_count == 0,
issue_count,
pack_metrics,
repos,
+ quality_advisories,
};
let mut lines = Vec::new();
@@ -136,6 +146,15 @@ pub(crate) fn execute(
payload.pack_metrics.unresolved_packs
));
}
+ if !payload.quality_advisories.is_empty() {
+ lines.push(format!(
+ "Code-quality advisories ({}):",
+ payload.quality_advisories.len()
+ ));
+ for advisory in &payload.quality_advisories {
+ lines.push(format!(" - {advisory}"));
+ }
+ }
let payload_json = json!(payload);
Ok(if issue_count == 0 {
RenderedCommand::success(payload_json, lines)
@@ -148,6 +167,75 @@ pub(crate) fn execute(
})
}
+const MAX_ADVISORIES_PER_CATEGORY: usize = 50;
+
+fn collect_quality_advisories(
+ storage: &StorageCoordinator,
+ repos: &[String],
+) -> Result> {
+ let graph = storage.graph();
+ let mut advisories = Vec::new();
+
+ let cycles = find_cycles(graph, Some(&[EdgeKind::Imports, EdgeKind::Calls]))
+ .context("detecting dependency cycles")?;
+ push_capped(
+ &mut advisories,
+ cycles.iter().map(|cycle| {
+ let scope = if cycle.cross_repo {
+ " (cross-repo)"
+ } else {
+ ""
+ };
+ format!("Dependency cycle{scope}: {}", cycle.nodes.join(" -> "))
+ }),
+ "dependency cycle",
+ );
+
+ for repo in repos {
+ let leaks = find_mock_leakage(graph, repo)
+ .with_context(|| format!("detecting mock leakage in `{repo}`"))?;
+ push_capped(
+ &mut advisories,
+ leaks.iter().map(|leak| {
+ format!(
+ "Mock import in production (`{repo}`): {} imports {}",
+ leak.importer_file, leak.mock_file
+ )
+ }),
+ "mock import in production",
+ );
+
+ let forks = analyze_shared_component_reuse(graph, repo)
+ .with_context(|| format!("auditing shared-component reuse in `{repo}`"))?;
+ push_capped(
+ &mut advisories,
+ forks.iter().map(|fork| {
+ format!(
+ "Reuse opportunity (`{repo}`): {} duplicates shared `{}` ({})",
+ fork.local_file, fork.shared_symbol, fork.shared_file
+ )
+ }),
+ "reuse opportunity",
+ );
+ }
+
+ Ok(advisories)
+}
+
+fn push_capped(advisories: &mut Vec, items: impl Iterator- , label: &str) {
+ let collected: Vec = items.collect();
+ let total = collected.len();
+ for item in collected.into_iter().take(MAX_ADVISORIES_PER_CATEGORY) {
+ advisories.push(item);
+ }
+ if total > MAX_ADVISORIES_PER_CATEGORY {
+ advisories.push(format!(
+ "... and {} more {label} finding(s) not shown",
+ total - MAX_ADVISORIES_PER_CATEGORY
+ ));
+ }
+}
+
fn inspect_repo(
repo: &str,
registered: &gather_step_core::RegisteredRepo,
From 0c73181e48fb924476d326b56fc15444465b7a86 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Wed, 3 Jun 2026 10:22:32 +0700
Subject: [PATCH 30/31] docs: note doctor code-quality advisories in 4.3.1
changelog
---
website/src/content/docs/changelog.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/website/src/content/docs/changelog.md b/website/src/content/docs/changelog.md
index 1b4a5a8..d6a5f5c 100644
--- a/website/src/content/docs/changelog.md
+++ b/website/src/content/docs/changelog.md
@@ -13,6 +13,7 @@ Planning- and reuse-quality release. Fixes retrieval recall so reuse search stop
### Added
+- **`gather-step doctor` code-quality advisories** — non-gating findings over the indexed graph: dependency cycles (incl. cross-repo, via Tarjan SCC), mock/fixture imports leaking into production modules, and local forks of shared/design-system components that should be reused.
- **Graph-ranked reuse evidence** in planning packs: reuse candidates are ranked by sibling-consumer count, shared/design-system membership, and cross-repo proof strength before truncation, so a blessed shared component ranks above a bespoke fork (S3).
- **Typed `plan_change` product** with a fixed, contract-checked section set (E1). Sections are always present (possibly empty), with an exclusion ledger recording what was dropped so a capped result is never read as exhaustive.
- **Display-ownership planning dimension** (`display_ownership_checks`): every cross-service reference surfaces the question of whether display fields come from the owner service (snapshot/API) rather than a direct cross-service DB lookup (DSO1).
From 94496e68d8ff5ce1ec57a760b48fd422dd1c3358 Mon Sep 17 00:00:00 2001
From: thedoublejay
Date: Wed, 3 Jun 2026 10:38:07 +0700
Subject: [PATCH 31/31] fix: correct 'mis-read' typo failing the spell-check
lint
---
crates/gather-step-mcp/src/tools/packs.rs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/crates/gather-step-mcp/src/tools/packs.rs b/crates/gather-step-mcp/src/tools/packs.rs
index 9af4e91..d949aee 100644
--- a/crates/gather-step-mcp/src/tools/packs.rs
+++ b/crates/gather-step-mcp/src/tools/packs.rs
@@ -250,7 +250,7 @@ const V1_COMPLETENESS_CHECKLIST: [&str; 19] = [
"Partial-migration tail: zero residual call-sites workspace-wide? → verify: grep shows no residual callers.",
"Release-profile divergence: verify flags on the installed profile. → verify: check [profile.dist].",
"Teardown under-spec: disposable artifacts specify creation/reuse-key/safe-delete/in-flight-writer guard. → verify: all four present.",
- "Schema-version forward-compat: a version-mismatch refusal/rebuild path exists. → verify: mismatch is refused, not mis-read.",
+ "Schema-version forward-compat: a version-mismatch refusal/rebuild path exists. → verify: mismatch is refused, not misread.",
"Derived-field blast radius: enumerate source, projection writer, all readers, filters, search/index, backfill. → verify: all six listed.",
"Cross-service ownership: should the owner publish a snapshot/resolver before adopting a join? → verify: question answered.",
"Lossy rollback: enumerate unrecoverable rows/fields. → verify: rollback notes list them.",