Skip to content

Commit 76c14c8

Browse files
committed
[gobby-#314] feat: add indexed grep and graph sync contract
1 parent 1fdbc66 commit 76c14c8

14 files changed

Lines changed: 1158 additions & 36 deletions

File tree

CHANGELOG.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
## [Unreleased]
1111

12+
## [0.9.5] — gcode
13+
14+
### Added
15+
16+
#### gcode
17+
18+
- **Indexed grep**`gcode grep <pattern> [PATH ...]` now provides exact
19+
line-oriented search over the indexed `code_content_chunks` corpus. It
20+
supports `-i`, `-F`, `-C/-A/-B`, `-g/--glob`, and `-m/--max-count`, with text
21+
output shaped like grep and JSON output that includes match spans, context,
22+
scan counts, and truncation state.
23+
24+
### Changed
25+
26+
#### gcode
27+
28+
- **Graph sync-file contract**`gcode graph sync-file` now classifies missing
29+
indexed projects and files from PostgreSQL before FalkorDB access. Human
30+
defaults stay strict with typed JSON errors and exit code `2`, while daemon
31+
and background-worker callers can use `--allow-missing-indexed-file` to turn
32+
stale missing-file work into a skipped payload.
33+
1234
## [0.4.4] — gobby-hooks
1335

1436
### Added

Cargo.lock

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ This workspace contains four Gobby CLI tools plus a shared library:
2727

2828
AST-aware code search powered by tree-sitter. Indexes 18 languages plus safe
2929
repo text files into the Gobby PostgreSQL hub, with pg_search BM25 for symbol
30-
lookup, repo-content search across source/docs/config/scripts, file tree
30+
lookup, exact indexed grep over repo content chunks, ranked repo-content search
31+
across source/docs/config/scripts, file tree
3132
navigation, and hybrid ranking. When FalkorDB, Qdrant, and an embeddings
3233
endpoint are configured - typically through Gobby - `gcode` adds graph-aware
3334
search, semantic search, optional graph expansion for exact symbol lookup

crates/gcode/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "gobby-code"
3-
version = "0.9.4"
3+
version = "0.9.5"
44
edition = "2024"
55
rust-version = "1.88"
66
authors = ["Josh Wilhelmi <hello@gobby.ai>"]
@@ -70,6 +70,7 @@ streaming-iterator = "0.1"
7070

7171
# Utilities
7272
glob = "0.3"
73+
regex = "1"
7374
shlex = "1"
7475
sha2 = "0.10"
7576
uuid = { version = "1", features = ["v5"] }

crates/gcode/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ gcode search-symbol "outline" --kind function --language rust
126126
gcode search-symbol "Context" crates/gcode/src
127127
gcode search-text "query" # BM25 on symbol names/signatures
128128
gcode search-text "query" crates/gcode/src
129+
gcode grep "pattern" # Exact indexed content grep
130+
gcode grep "pattern" src -m 50 # Cap matching lines globally
129131
gcode search-content "query" # BM25 on source, comments, skill files, docs/Markdown, configs, CSS, SQL, and extensionless text
130132
gcode search-content "query" docs/**/*.md crates/gcode/src
131133

@@ -145,6 +147,7 @@ gcode blast-radius "handleAuth" --depth 3 # Transitive impact analysis
145147
# Graph lifecycle (requires FalkorDB)
146148
gcode graph clear # Clear current project's graph projection
147149
gcode graph clear --project-id <id> # Clear graph projection by explicit project id
150+
gcode graph sync-file --file src/lib.rs # Sync one indexed file into the graph projection
148151
gcode graph rebuild # Rebuild current project's graph projection
149152

150153
# Project management
@@ -245,6 +248,9 @@ lifecycle depends on Qdrant plus embeddings for sync/rebuild. All code-index
245248
projection lifecycle paths are Rust-owned and scoped to code projection state:
246249
graph clears target code-index FalkorDB labels only, and vector clears target
247250
only `code_symbols_{project_id}` rather than memory vector collections.
251+
`gcode graph sync-file --allow-missing-indexed-file` is reserved for daemon and
252+
background-worker stale work; humans should let the default strict missing-file
253+
error surface stale or incorrect sync requests.
248254

249255
## Language Support
250256

crates/gcode/assets/SKILL.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@ This project is indexed. Use `gcode` via Bash for fast code search and navigatio
1616
- `gcode search "query" [PATH ...]` — hybrid search: pg_search BM25 + semantic + graph boost (best for fuzzy or natural-language queries)
1717
- `gcode search-symbol "name" [PATH ...]` — exact-first symbol lookup with deterministic ranking; add `--with-graph` to include FalkorDB graph neighbors when available
1818
- `gcode search-text "query" [PATH ...]` — pg_search BM25 search on symbol names, signatures, and docstrings
19+
- `gcode grep "pattern" [PATH ...]` — exact indexed content grep over `code_content_chunks`; use `gcode grep "pattern" src -m 50` to cap matching lines
1920
- `gcode search-content "query" [PATH ...]` — full-text search across repo text chunks: source, comments, docs/Markdown, skill files, configs, scripts, CSS, SQL, and extensionless text
2021

21-
Search filters compose: `search` and `search-symbol` accept `--kind <kind>`; use `gcode kinds` to discover values. All search commands accept positional path filters after the query (paths or globs, OR semantics), plus `--language <lang>`, `--limit N`, and `--offset N` for scoped or paginated results. Hybrid JSON results include final display `score`, raw `rrf_score`, and deterministic `sources`; path globs that require post-filter fallback surface a hint/warning.
22+
Search filters compose: `search` and `search-symbol` accept `--kind <kind>`; use `gcode kinds` to discover values. Ranked search commands accept positional path filters after the query (paths or globs, OR semantics), plus `--language <lang>`, `--limit N`, and `--offset N` for scoped or paginated results. `gcode grep` accepts positional paths, `-g/--glob`, `-i`, `-F`, `-C/-A/-B`, and `-m/--max-count`; it rejects `--limit`. Hybrid JSON results include final display `score`, raw `rrf_score`, and deterministic `sources`; path globs that require post-filter fallback surface a hint/warning.
2223

2324
## Retrieval
2425

@@ -32,7 +33,7 @@ Symbol IDs must be full stored UUIDs from `gcode search`, `gcode search-symbol`,
3233

3334
When navigating code for context or understanding:
3435

35-
1. **Locate with gcode**: `gcode search "concept"`, `gcode search-symbol "name"`, or `gcode search-content "text"` to find relevant hits.
36+
1. **Locate with gcode**: `gcode grep "exact string"` for exact line matches, `gcode search "concept"`, `gcode search-symbol "name"`, or `gcode search-content "text"` for ranked/fuzzy hits.
3637
2. **Survey file structure**: `gcode outline path/to/file` to see the symbol hierarchy without reading the whole file.
3738
3. **Retrieve exact code**: `gcode symbol <full-uuid>` or `gcode symbols <full-uuid> <full-uuid> ...` using IDs from search or outline.
3839
4. **Fetch tight neighboring context only when needed**: use `sed`/`awk` only for tight neighboring context (1-3 lines) after symbol retrieval.
@@ -64,6 +65,7 @@ Use `gcode` directly for the code-index graph projection via the Gobby daemon.
6465
for the UI, but graph sync/read/lifecycle behavior lives in `gcode`.
6566

6667
- `gcode graph sync-file --file <file>` — sync one indexed file into the graph projection
68+
- `gcode graph sync-file --file <file> --allow-missing-indexed-file` — daemon/background-worker stale-work tolerance only
6769
- `gcode graph clear` — clear the current project's graph projection
6870
- `gcode graph clear --project-id <id>` — clear a projection without resolving a project root
6971
- `gcode graph rebuild` — rebuild it (cheaper than `gcode invalidate` + reindex; doesn't touch PostgreSQL symbol/content rows)
@@ -74,7 +76,8 @@ for the UI, but graph sync/read/lifecycle behavior lives in `gcode`.
7476
|---|---|
7577
| A function or class by concept (fuzzy) | `gcode search "concept"` |
7678
| A symbol you know the exact name of | `gcode search-symbol "name"` |
77-
| A string literal, doc phrase, config value, comment, script line, CSS rule | `gcode search-content "text"` |
79+
| An exact string literal, doc phrase, config value, comment, script line, CSS rule | `gcode grep "pattern" [PATH ...]` |
80+
| Ranked content search across comments/docs/config/source text | `gcode search-content "query" [PATH ...]` |
7881
| Structure of a file without reading it | `gcode outline path/to/file` |
7982
| Source code of a specific symbol | `gcode symbol <full-uuid>` |
8083
| What breaks if I change X | `gcode blast-radius <name>` |

crates/gcode/src/commands/graph.rs

Lines changed: 146 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,66 @@ use serde_json::{Value, json};
1313

1414
const GOBBY_HINT: &str =
1515
"Graph commands require FalkorDB, available with Gobby. See: https://github.com/GobbyAI/gobby";
16+
pub const GRAPH_SYNC_CONTRACT_EXIT_CODE: u8 = 2;
17+
18+
#[derive(Debug)]
19+
pub struct GraphSyncContractError {
20+
payload: Value,
21+
}
22+
23+
impl GraphSyncContractError {
24+
fn project_not_indexed(ctx: &Context, file_path: &str) -> Self {
25+
Self {
26+
payload: json!({
27+
"success": false,
28+
"project_id": ctx.project_id,
29+
"file_path": file_path,
30+
"status": "error",
31+
"reason": "project_not_indexed",
32+
"error": format!("project {} is not indexed", ctx.project_id),
33+
}),
34+
}
35+
}
36+
37+
fn indexed_file_not_found(ctx: &Context, file_path: &str) -> Self {
38+
Self {
39+
payload: json!({
40+
"success": false,
41+
"project_id": ctx.project_id,
42+
"file_path": file_path,
43+
"status": "error",
44+
"reason": "indexed_file_not_found",
45+
"error": format!("indexed file `{file_path}` was not found for project {}", ctx.project_id),
46+
}),
47+
}
48+
}
49+
50+
pub fn exit_code(&self) -> u8 {
51+
GRAPH_SYNC_CONTRACT_EXIT_CODE
52+
}
53+
54+
pub fn print(&self) -> anyhow::Result<()> {
55+
output::print_json(&self.payload)
56+
}
57+
58+
pub fn payload(&self) -> &Value {
59+
&self.payload
60+
}
61+
}
62+
63+
impl std::fmt::Display for GraphSyncContractError {
64+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65+
let reason = self
66+
.payload
67+
.get("reason")
68+
.and_then(Value::as_str)
69+
.unwrap_or("graph_sync_contract_error");
70+
write!(f, "graph sync-file contract error: {reason}")
71+
}
72+
}
73+
74+
impl std::error::Error for GraphSyncContractError {}
75+
1676
fn format_success_text(output: &GraphLifecycleOutput) -> String {
1777
format!(
1878
"{} for project {}: {}",
@@ -54,20 +114,46 @@ fn lifecycle_output(
54114
}
55115
}
56116

57-
struct GraphFileSyncOutcome {
58-
relationships_written: usize,
59-
symbols_synced: usize,
117+
enum GraphFileSyncOutcome {
118+
Synced {
119+
relationships_written: usize,
120+
symbols_synced: usize,
121+
},
122+
SkippedMissingIndexedFile,
60123
}
61124

62-
fn sync_file_graph(ctx: &Context, file_path: &str) -> anyhow::Result<GraphFileSyncOutcome> {
63-
code_graph::require_graph_reads(ctx)?;
125+
fn skipped_missing_indexed_file_payload(ctx: &Context, file_path: &str) -> Value {
126+
json!({
127+
"project_id": ctx.project_id,
128+
"file_path": file_path,
129+
"status": "skipped",
130+
"reason": "indexed_file_not_found",
131+
})
132+
}
133+
134+
fn sync_file_graph(
135+
ctx: &Context,
136+
file_path: &str,
137+
allow_missing_indexed_file: bool,
138+
) -> anyhow::Result<GraphFileSyncOutcome> {
64139
let mut conn = db::connect_readwrite(&ctx.database_url)?;
140+
if !db::indexed_project_exists(&mut conn, &ctx.project_id)? {
141+
return Err(GraphSyncContractError::project_not_indexed(ctx, file_path).into());
142+
}
143+
if !db::indexed_file_exists(&mut conn, &ctx.project_id, file_path)? {
144+
if allow_missing_indexed_file {
145+
return Ok(GraphFileSyncOutcome::SkippedMissingIndexedFile);
146+
}
147+
return Err(GraphSyncContractError::indexed_file_not_found(ctx, file_path).into());
148+
}
149+
150+
code_graph::require_graph_reads(ctx)?;
65151
let facts = db::read_graph_file_facts(&mut conn, &ctx.project_id, file_path)?;
66152
if !db::mark_graph_sync_attempted(&mut conn, &ctx.project_id, file_path)? {
67-
anyhow::bail!(
68-
"indexed file `{file_path}` was not found for project {}",
69-
ctx.project_id
70-
);
153+
if allow_missing_indexed_file {
154+
return Ok(GraphFileSyncOutcome::SkippedMissingIndexedFile);
155+
}
156+
return Err(GraphSyncContractError::indexed_file_not_found(ctx, file_path).into());
71157
}
72158
let relationships_written = code_graph::sync_file_graph(
73159
ctx,
@@ -77,7 +163,7 @@ fn sync_file_graph(ctx: &Context, file_path: &str) -> anyhow::Result<GraphFileSy
77163
&facts.calls,
78164
)?;
79165
db::mark_graph_synced(&mut conn, &ctx.project_id, file_path)?;
80-
Ok(GraphFileSyncOutcome {
166+
Ok(GraphFileSyncOutcome::Synced {
81167
relationships_written,
82168
symbols_synced: facts.definitions.len(),
83169
})
@@ -186,10 +272,25 @@ pub fn rebuild(ctx: &Context, format: Format) -> anyhow::Result<()> {
186272
run_lifecycle_action(ctx, GraphLifecycleAction::Rebuild, format)
187273
}
188274

189-
pub fn sync_file(ctx: &Context, file_path: &str, format: Format) -> anyhow::Result<()> {
190-
let sync = sync_file_graph(ctx, file_path)?;
191-
let relationships_written = sync.relationships_written;
192-
let report = ProjectionSyncReport::ok(1, sync.symbols_synced);
275+
pub fn sync_file(
276+
ctx: &Context,
277+
file_path: &str,
278+
allow_missing_indexed_file: bool,
279+
format: Format,
280+
) -> anyhow::Result<()> {
281+
let sync = sync_file_graph(ctx, file_path, allow_missing_indexed_file)?;
282+
let GraphFileSyncOutcome::Synced {
283+
relationships_written,
284+
symbols_synced,
285+
} = sync
286+
else {
287+
let payload = skipped_missing_indexed_file_payload(ctx, file_path);
288+
return match format {
289+
Format::Json => output::print_json(&payload),
290+
Format::Text => output::print_json_compact(&payload),
291+
};
292+
};
293+
let report = ProjectionSyncReport::ok(1, symbols_synced);
193294
let summary = format!("synced {relationships_written} graph relationships for {file_path}");
194295
let payload = json!({
195296
"success": true,
@@ -691,6 +792,37 @@ mod tests {
691792
assert!(!source.contains(&daemon_lifecycle));
692793
}
693794

795+
#[test]
796+
fn missing_project_sync_error_has_typed_payload() {
797+
let ctx = make_ctx_no_falkordb();
798+
let error = GraphSyncContractError::project_not_indexed(&ctx, "src/lib.rs");
799+
800+
assert_eq!(error.exit_code(), GRAPH_SYNC_CONTRACT_EXIT_CODE);
801+
assert_eq!(error.payload()["project_id"], "test-project");
802+
assert_eq!(error.payload()["file_path"], "src/lib.rs");
803+
assert_eq!(error.payload()["status"], "error");
804+
assert_eq!(error.payload()["reason"], "project_not_indexed");
805+
}
806+
807+
#[test]
808+
fn missing_file_sync_error_and_skip_payloads_are_typed() {
809+
let ctx = make_ctx_no_falkordb();
810+
let error = GraphSyncContractError::indexed_file_not_found(&ctx, "src/missing.rs");
811+
let skipped = skipped_missing_indexed_file_payload(&ctx, "src/missing.rs");
812+
813+
assert_eq!(error.exit_code(), GRAPH_SYNC_CONTRACT_EXIT_CODE);
814+
assert_eq!(error.payload()["reason"], "indexed_file_not_found");
815+
assert_eq!(
816+
skipped,
817+
json!({
818+
"project_id": "test-project",
819+
"file_path": "src/missing.rs",
820+
"status": "skipped",
821+
"reason": "indexed_file_not_found",
822+
})
823+
);
824+
}
825+
694826
#[test]
695827
fn test_build_lifecycle_url_clear_uses_project_id_query() {
696828
let url = code_graph::build_lifecycle_url(

0 commit comments

Comments
 (0)