Skip to content

Commit 0cf021a

Browse files
committed
feat(review): persist repository graph cache
1 parent 294ad93 commit 0cf021a

File tree

6 files changed

+410
-61
lines changed

6 files changed

+410
-61
lines changed

TODO.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ This roadmap is derived from deep research into Greptile's public docs, blog, MC
6161

6262
## 4. Code Graph and Repository Intelligence
6363

64-
31. [ ] Turn the current symbol graph into a persisted repository graph with durable storage and reload support.
64+
31. [x] Turn the current symbol graph into a persisted repository graph with durable storage and reload support.
6565
32. [x] Add caller/callee expansion APIs for multi-hop impact analysis from changed symbols.
6666
33. [x] Add contract edges between interfaces, implementations, and API endpoints.
6767
34. [x] Add "similar implementation" lookup so repeated patterns and divergences are explicit.

src/core/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,7 @@ pub use semantic::{
4646
load_semantic_feedback_store, refresh_semantic_index, save_semantic_feedback_store,
4747
semantic_context_for_diff, SemanticFeedbackExample, SemanticFeedbackStore,
4848
};
49-
pub use symbol_index::{SymbolContextRetriever, SymbolIndex, SymbolRetrievalPolicy};
49+
pub use symbol_index::{
50+
default_symbol_index_path, load_symbol_index, save_symbol_index, SymbolContextRetriever,
51+
SymbolIndex, SymbolRetrievalPolicy,
52+
};

src/core/symbol_graph.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
use std::collections::{HashMap, HashSet};
22
use std::path::{Path, PathBuf};
33

4+
use serde::{Deserialize, Serialize};
5+
46
use crate::core::symbol_index::SymbolLocation;
57
use crate::core::ContextProvenance;
68

79
/// Relationship between two symbols in the codebase.
8-
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
10+
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
911
pub enum SymbolRelation {
1012
/// This symbol calls the target.
1113
Calls,
@@ -61,7 +63,7 @@ impl SymbolRelation {
6163
}
6264

6365
/// A reference to a related symbol with the relationship type.
64-
#[derive(Debug, Clone)]
66+
#[derive(Debug, Clone, Serialize, Deserialize)]
6567
pub struct SymbolEdge {
6668
pub target: String,
6769
pub relation: SymbolRelation,
@@ -70,7 +72,7 @@ pub struct SymbolEdge {
7072
}
7173

7274
/// A node in the symbol graph representing a single symbol definition.
73-
#[derive(Debug, Clone)]
75+
#[derive(Debug, Clone, Serialize, Deserialize)]
7476
pub struct SymbolNode {
7577
pub name: String,
7678
pub file_path: PathBuf,
@@ -79,7 +81,7 @@ pub struct SymbolNode {
7981
pub edges: Vec<SymbolEdge>,
8082
}
8183

82-
#[derive(Debug, Clone, PartialEq, Eq)]
84+
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
8385
pub enum SymbolKind {
8486
Function,
8587
Struct,
@@ -89,7 +91,7 @@ pub enum SymbolKind {
8991
}
9092

9193
/// Graph-based symbol index that tracks relationships between symbols.
92-
#[derive(Debug, Default)]
94+
#[derive(Debug, Default, Serialize, Deserialize)]
9395
pub struct SymbolGraph {
9496
/// symbol_name -> list of nodes (same name can appear in multiple files)
9597
nodes: HashMap<String, Vec<SymbolNode>>,

src/core/symbol_index.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use git2::Repository;
44
use ignore::WalkBuilder;
55
use once_cell::sync::Lazy;
66
use regex::Regex;
7+
use serde::{Deserialize, Serialize};
78
use serde_json::{json, Value};
89
use std::collections::{HashMap, HashSet};
910
use std::env;
@@ -15,20 +16,23 @@ use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio};
1516
use crate::core::symbol_graph::SymbolGraph;
1617
use crate::core::ContextProvenance;
1718

19+
#[path = "symbol_index/persistence.rs"]
20+
mod persistence;
1821
#[path = "symbol_index/retrieval.rs"]
1922
mod retrieval;
2023

24+
pub use persistence::{default_symbol_index_path, load_symbol_index, save_symbol_index};
2125
pub use retrieval::{SymbolContextRetriever, SymbolRetrievalPolicy};
2226

23-
#[derive(Debug, Clone)]
27+
#[derive(Debug, Clone, Serialize, Deserialize)]
2428
pub struct SymbolLocation {
2529
pub file_path: PathBuf,
2630
pub line_range: (usize, usize),
2731
pub snippet: String,
2832
pub provenance: Option<ContextProvenance>,
2933
}
3034

31-
#[derive(Debug, Clone, Default)]
35+
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
3236
struct SymbolIndexMetadata {
3337
provider: String,
3438
repo_revision: Option<String>,
@@ -160,7 +164,7 @@ impl SymbolIndexFreshnessSnapshot {
160164
}
161165
}
162166

163-
#[derive(Debug, Default)]
167+
#[derive(Debug, Default, Serialize, Deserialize)]
164168
pub struct SymbolIndex {
165169
symbols: HashMap<String, Vec<SymbolLocation>>,
166170
dependency_graph: HashMap<PathBuf, HashSet<PathBuf>>,
@@ -171,7 +175,7 @@ pub struct SymbolIndex {
171175
metadata: SymbolIndexMetadata,
172176
}
173177

174-
#[derive(Debug, Clone)]
178+
#[derive(Debug, Clone, Serialize, Deserialize)]
175179
struct FileSummary {
176180
snippet: String,
177181
line_count: usize,
@@ -555,6 +559,14 @@ impl SymbolIndex {
555559
self.freshness_snapshot(repo_root).trace_details()
556560
}
557561

562+
pub fn to_json(&self) -> Result<String, serde_json::Error> {
563+
serde_json::to_string_pretty(self)
564+
}
565+
566+
pub fn from_json(content: &str) -> Result<Self, serde_json::Error> {
567+
serde_json::from_str(content)
568+
}
569+
558570
fn register_file_summary(&mut self, relative: &Path, lines: &[&str]) {
559571
let line_count = lines.len();
560572
let snippet = lines
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
use anyhow::Result;
2+
use sha2::{Digest, Sha256};
3+
use std::path::{Path, PathBuf};
4+
5+
use super::SymbolIndex;
6+
7+
pub fn default_symbol_index_path(repo_root: &Path, cache_key: &str) -> PathBuf {
8+
let repo_key = hash_text(&repo_root.to_string_lossy());
9+
let cache_key = if cache_key.trim().is_empty() {
10+
"default".to_string()
11+
} else {
12+
cache_key.trim().to_string()
13+
};
14+
15+
dirs::data_local_dir()
16+
.unwrap_or_else(|| PathBuf::from("."))
17+
.join("diffscope")
18+
.join("symbol-index")
19+
.join(format!(
20+
"{}-{}.json",
21+
&repo_key[..16],
22+
&cache_key[..cache_key.len().min(16)]
23+
))
24+
}
25+
26+
pub fn load_symbol_index(path: &Path) -> Option<SymbolIndex> {
27+
std::fs::read_to_string(path)
28+
.ok()
29+
.and_then(|content| SymbolIndex::from_json(&content).ok())
30+
}
31+
32+
pub fn save_symbol_index(path: &Path, index: &SymbolIndex) -> Result<()> {
33+
atomic_write_string(path, &index.to_json()?)
34+
}
35+
36+
fn atomic_write_string(path: &Path, content: &str) -> Result<()> {
37+
if let Some(parent) = path.parent() {
38+
std::fs::create_dir_all(parent)?;
39+
}
40+
41+
let file_name = path
42+
.file_name()
43+
.and_then(|value| value.to_str())
44+
.unwrap_or("symbol-index.json");
45+
let tmp_path = path.with_file_name(format!("{}.{}.tmp", file_name, std::process::id()));
46+
std::fs::write(&tmp_path, content)?;
47+
std::fs::rename(&tmp_path, path)?;
48+
Ok(())
49+
}
50+
51+
fn hash_text(content: &str) -> String {
52+
format!("{:x}", Sha256::digest(content.as_bytes()))
53+
}
54+
55+
#[cfg(test)]
56+
mod tests {
57+
use super::*;
58+
use crate::core::SymbolIndex;
59+
60+
#[test]
61+
fn save_and_load_symbol_index_round_trips() {
62+
let dir = tempfile::tempdir().unwrap();
63+
let repo_root = dir.path().join("repo");
64+
std::fs::create_dir_all(&repo_root).unwrap();
65+
let cache_path = dir.path().join("symbol-index.json");
66+
67+
let source_file = repo_root.join("src/lib.rs");
68+
std::fs::create_dir_all(source_file.parent().unwrap()).unwrap();
69+
std::fs::write(&source_file, "pub fn helper() {}\n").unwrap();
70+
71+
let index = SymbolIndex::build(&repo_root, 16, 128 * 1024, 8, |_path| false).unwrap();
72+
save_symbol_index(&cache_path, &index).unwrap();
73+
74+
let loaded = load_symbol_index(&cache_path).unwrap();
75+
assert_eq!(loaded.files_indexed(), index.files_indexed());
76+
assert_eq!(loaded.symbols_indexed(), index.symbols_indexed());
77+
assert!(loaded.lookup("helper").is_some());
78+
}
79+
}

0 commit comments

Comments
 (0)