Skip to content

Commit 23dfb0c

Browse files
committed
refactor: split core semantic types and persistence
Move semantic data models and persistence/path helpers into focused modules so later embedding and retrieval refactors can change the index pipeline without reopening basic storage code. Made-with: Cursor
1 parent 8e7bc81 commit 23dfb0c

File tree

4 files changed

+191
-168
lines changed

4 files changed

+191
-168
lines changed

TODO.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
## Immediate Queue
1212

1313
- [ ] `src/core/semantic.rs`
14-
- Split semantic index/store model types and defaults from persistence I/O helpers.
1514
- Split embedding metadata compatibility and adapter/fallback embedding generation.
1615
- Split source-file discovery and excerpt/query builders from index refresh bookkeeping.
1716
- Split semantic diff retrieval and feedback-example matching from feedback-store maintenance.
@@ -24,7 +23,6 @@
2423
## Core Backlog
2524

2625
- [ ] `src/core/semantic.rs`
27-
- Split JSON/file persistence helpers from default path derivation and atomic write helpers.
2826
- Split semantic chunk hashing/key generation from summary/excerpt assembly.
2927
- Split changed-range filtering and per-query match scoring from context chunk rendering.
3028
- Split feedback embedding-text/fingerprint helpers from feedback-store reconciliation.

src/core/semantic.rs

Lines changed: 14 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use anyhow::Result;
22
use ignore::WalkBuilder;
3-
use serde::{Deserialize, Serialize};
43
use sha2::{Digest, Sha256};
54
use std::collections::{HashMap, HashSet};
65
use std::path::{Path, PathBuf};
@@ -12,126 +11,30 @@ use crate::core::diff_parser::{ChangeType, UnifiedDiff};
1211
use crate::core::function_chunker::chunk_diff_by_functions;
1312
use crate::core::ContextProvenance;
1413

14+
#[path = "semantic/persistence.rs"]
15+
mod persistence;
16+
#[path = "semantic/types.rs"]
17+
mod types;
18+
1519
const MAX_CODE_FILE_BYTES: usize = 512 * 1024;
1620
const FALLBACK_EMBEDDING_DIMENSIONS: usize = 128;
1721
const SUPPORTED_CODE_EXTENSIONS: &[&str] = &[
1822
"rs", "py", "pyi", "js", "jsx", "ts", "tsx", "go", "java", "kt", "cs", "rb", "php", "c", "h",
1923
"cc", "cpp", "cxx", "hpp", "swift", "scala",
2024
];
2125

22-
#[derive(Debug, Clone, Serialize, Deserialize)]
23-
pub struct SemanticChunk {
24-
pub key: String,
25-
pub file_path: PathBuf,
26-
pub symbol_name: String,
27-
pub line_range: (usize, usize),
28-
pub summary: String,
29-
pub embedding_text: String,
30-
pub code_excerpt: String,
31-
pub embedding: Vec<f32>,
32-
pub content_hash: String,
33-
}
34-
35-
#[derive(Debug, Clone, Serialize, Deserialize)]
36-
pub struct SemanticIndex {
37-
pub version: u32,
38-
pub entries: HashMap<String, SemanticChunk>,
39-
#[serde(default)]
40-
pub file_states: HashMap<PathBuf, SemanticFileState>,
41-
#[serde(default)]
42-
pub embedding: SemanticEmbeddingMetadata,
43-
}
44-
45-
#[derive(Debug, Clone)]
46-
pub struct SemanticMatch {
47-
pub chunk: SemanticChunk,
48-
pub similarity: f32,
49-
}
50-
51-
#[derive(Debug, Clone, Serialize, Deserialize)]
52-
pub struct SemanticFeedbackExample {
53-
pub content: String,
54-
pub category: String,
55-
pub file_patterns: Vec<String>,
56-
pub accepted: bool,
57-
pub created_at: String,
58-
pub embedding: Vec<f32>,
59-
}
60-
61-
#[derive(Debug, Clone, Serialize, Deserialize)]
62-
pub struct SemanticFeedbackStore {
63-
pub version: u32,
64-
pub examples: Vec<SemanticFeedbackExample>,
65-
#[serde(default)]
66-
pub embedding: SemanticEmbeddingMetadata,
67-
}
26+
use types::default_embedding_metadata;
6827

69-
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
70-
pub struct SemanticFileState {
71-
pub content_hash: String,
72-
}
73-
74-
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
75-
pub struct SemanticEmbeddingMetadata {
76-
pub strategy: String,
77-
pub model: String,
78-
pub dimensions: usize,
79-
}
80-
81-
impl Default for SemanticEmbeddingMetadata {
82-
fn default() -> Self {
83-
default_embedding_metadata()
84-
}
85-
}
86-
87-
impl Default for SemanticIndex {
88-
fn default() -> Self {
89-
Self {
90-
version: 1,
91-
entries: HashMap::new(),
92-
file_states: HashMap::new(),
93-
embedding: default_embedding_metadata(),
94-
}
95-
}
96-
}
97-
98-
impl Default for SemanticFeedbackStore {
99-
fn default() -> Self {
100-
Self {
101-
version: 1,
102-
examples: Vec::new(),
103-
embedding: default_embedding_metadata(),
104-
}
105-
}
106-
}
107-
108-
fn default_embedding_metadata() -> SemanticEmbeddingMetadata {
109-
SemanticEmbeddingMetadata {
110-
strategy: "hash-v1".to_string(),
111-
model: "local-hash".to_string(),
112-
dimensions: FALLBACK_EMBEDDING_DIMENSIONS,
113-
}
114-
}
115-
116-
impl SemanticIndex {
117-
pub fn to_json(&self) -> Result<String, serde_json::Error> {
118-
serde_json::to_string_pretty(self)
119-
}
120-
121-
pub fn from_json(content: &str) -> Result<Self, serde_json::Error> {
122-
serde_json::from_str(content)
123-
}
124-
}
28+
pub use persistence::{
29+
default_index_path, default_semantic_feedback_path, load_semantic_feedback_store,
30+
load_semantic_index, save_semantic_feedback_store, save_semantic_index,
31+
};
32+
pub use types::{
33+
SemanticChunk, SemanticEmbeddingMetadata, SemanticFeedbackExample, SemanticFeedbackStore,
34+
SemanticFileState, SemanticIndex, SemanticMatch,
35+
};
12536

12637
impl SemanticFeedbackStore {
127-
pub fn to_json(&self) -> Result<String, serde_json::Error> {
128-
serde_json::to_string_pretty(self)
129-
}
130-
131-
pub fn from_json(content: &str) -> Result<Self, serde_json::Error> {
132-
serde_json::from_str(content)
133-
}
134-
13538
pub fn add_example(&mut self, example: SemanticFeedbackExample) {
13639
let fingerprint = feedback_example_fingerprint(
13740
&example.content,
@@ -164,61 +67,6 @@ pub fn align_semantic_feedback_store(
16467
store.embedding = merge_embedding_metadata(&store.embedding, &expected);
16568
}
16669

167-
pub fn default_index_path(repo_root: &Path) -> PathBuf {
168-
let repo_key = hash_text(&repo_root.to_string_lossy());
169-
dirs::data_local_dir()
170-
.unwrap_or_else(|| PathBuf::from("."))
171-
.join("diffscope")
172-
.join("semantic")
173-
.join(format!("{}.json", &repo_key[..16]))
174-
}
175-
176-
pub fn default_semantic_feedback_path(feedback_path: &Path) -> PathBuf {
177-
let parent = feedback_path.parent().unwrap_or_else(|| Path::new("."));
178-
let stem = feedback_path
179-
.file_stem()
180-
.and_then(|value| value.to_str())
181-
.unwrap_or("diffscope.feedback");
182-
parent.join(format!("{}.semantic.json", stem))
183-
}
184-
185-
pub fn load_semantic_index(path: &Path) -> SemanticIndex {
186-
std::fs::read_to_string(path)
187-
.ok()
188-
.and_then(|content| SemanticIndex::from_json(&content).ok())
189-
.unwrap_or_default()
190-
}
191-
192-
pub fn save_semantic_index(path: &Path, index: &SemanticIndex) -> Result<()> {
193-
atomic_write_string(path, &index.to_json()?)
194-
}
195-
196-
pub fn load_semantic_feedback_store(path: &Path) -> SemanticFeedbackStore {
197-
std::fs::read_to_string(path)
198-
.ok()
199-
.and_then(|content| SemanticFeedbackStore::from_json(&content).ok())
200-
.unwrap_or_default()
201-
}
202-
203-
pub fn save_semantic_feedback_store(path: &Path, store: &SemanticFeedbackStore) -> Result<()> {
204-
atomic_write_string(path, &store.to_json()?)
205-
}
206-
207-
fn atomic_write_string(path: &Path, content: &str) -> Result<()> {
208-
if let Some(parent) = path.parent() {
209-
std::fs::create_dir_all(parent)?;
210-
}
211-
212-
let file_name = path
213-
.file_name()
214-
.and_then(|value| value.to_str())
215-
.unwrap_or("semantic.json");
216-
let tmp_path = path.with_file_name(format!("{}.{}.tmp", file_name, std::process::id()));
217-
std::fs::write(&tmp_path, content)?;
218-
std::fs::rename(&tmp_path, path)?;
219-
Ok(())
220-
}
221-
22270
pub async fn embed_texts_with_fallback(
22371
adapter: Option<&dyn LLMAdapter>,
22472
texts: &[String],

src/core/semantic/persistence.rs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
use anyhow::Result;
2+
use std::path::{Path, PathBuf};
3+
4+
use super::{SemanticFeedbackStore, SemanticIndex};
5+
6+
pub fn default_index_path(repo_root: &Path) -> PathBuf {
7+
let repo_key = super::hash_text(&repo_root.to_string_lossy());
8+
dirs::data_local_dir()
9+
.unwrap_or_else(|| PathBuf::from("."))
10+
.join("diffscope")
11+
.join("semantic")
12+
.join(format!("{}.json", &repo_key[..16]))
13+
}
14+
15+
pub fn default_semantic_feedback_path(feedback_path: &Path) -> PathBuf {
16+
let parent = feedback_path.parent().unwrap_or_else(|| Path::new("."));
17+
let stem = feedback_path
18+
.file_stem()
19+
.and_then(|value| value.to_str())
20+
.unwrap_or("diffscope.feedback");
21+
parent.join(format!("{}.semantic.json", stem))
22+
}
23+
24+
pub fn load_semantic_index(path: &Path) -> SemanticIndex {
25+
std::fs::read_to_string(path)
26+
.ok()
27+
.and_then(|content| SemanticIndex::from_json(&content).ok())
28+
.unwrap_or_default()
29+
}
30+
31+
pub fn save_semantic_index(path: &Path, index: &SemanticIndex) -> Result<()> {
32+
atomic_write_string(path, &index.to_json()?)
33+
}
34+
35+
pub fn load_semantic_feedback_store(path: &Path) -> SemanticFeedbackStore {
36+
std::fs::read_to_string(path)
37+
.ok()
38+
.and_then(|content| SemanticFeedbackStore::from_json(&content).ok())
39+
.unwrap_or_default()
40+
}
41+
42+
pub fn save_semantic_feedback_store(path: &Path, store: &SemanticFeedbackStore) -> Result<()> {
43+
atomic_write_string(path, &store.to_json()?)
44+
}
45+
46+
fn atomic_write_string(path: &Path, content: &str) -> Result<()> {
47+
if let Some(parent) = path.parent() {
48+
std::fs::create_dir_all(parent)?;
49+
}
50+
51+
let file_name = path
52+
.file_name()
53+
.and_then(|value| value.to_str())
54+
.unwrap_or("semantic.json");
55+
let tmp_path = path.with_file_name(format!("{}.{}.tmp", file_name, std::process::id()));
56+
std::fs::write(&tmp_path, content)?;
57+
std::fs::rename(&tmp_path, path)?;
58+
Ok(())
59+
}

0 commit comments

Comments
 (0)