11use anyhow:: Result ;
22use ignore:: WalkBuilder ;
3- use serde:: { Deserialize , Serialize } ;
43use sha2:: { Digest , Sha256 } ;
54use std:: collections:: { HashMap , HashSet } ;
65use std:: path:: { Path , PathBuf } ;
@@ -12,126 +11,30 @@ use crate::core::diff_parser::{ChangeType, UnifiedDiff};
1211use crate :: core:: function_chunker:: chunk_diff_by_functions;
1312use crate :: core:: ContextProvenance ;
1413
14+ #[ path = "semantic/persistence.rs" ]
15+ mod persistence;
16+ #[ path = "semantic/types.rs" ]
17+ mod types;
18+
1519const MAX_CODE_FILE_BYTES : usize = 512 * 1024 ;
1620const FALLBACK_EMBEDDING_DIMENSIONS : usize = 128 ;
1721const SUPPORTED_CODE_EXTENSIONS : & [ & str ] = & [
1822 "rs" , "py" , "pyi" , "js" , "jsx" , "ts" , "tsx" , "go" , "java" , "kt" , "cs" , "rb" , "php" , "c" , "h" ,
1923 "cc" , "cpp" , "cxx" , "hpp" , "swift" , "scala" ,
2024] ;
2125
22- #[ derive( Debug , Clone , Serialize , Deserialize ) ]
23- pub struct SemanticChunk {
24- pub key : String ,
25- pub file_path : PathBuf ,
26- pub symbol_name : String ,
27- pub line_range : ( usize , usize ) ,
28- pub summary : String ,
29- pub embedding_text : String ,
30- pub code_excerpt : String ,
31- pub embedding : Vec < f32 > ,
32- pub content_hash : String ,
33- }
34-
35- #[ derive( Debug , Clone , Serialize , Deserialize ) ]
36- pub struct SemanticIndex {
37- pub version : u32 ,
38- pub entries : HashMap < String , SemanticChunk > ,
39- #[ serde( default ) ]
40- pub file_states : HashMap < PathBuf , SemanticFileState > ,
41- #[ serde( default ) ]
42- pub embedding : SemanticEmbeddingMetadata ,
43- }
44-
45- #[ derive( Debug , Clone ) ]
46- pub struct SemanticMatch {
47- pub chunk : SemanticChunk ,
48- pub similarity : f32 ,
49- }
50-
51- #[ derive( Debug , Clone , Serialize , Deserialize ) ]
52- pub struct SemanticFeedbackExample {
53- pub content : String ,
54- pub category : String ,
55- pub file_patterns : Vec < String > ,
56- pub accepted : bool ,
57- pub created_at : String ,
58- pub embedding : Vec < f32 > ,
59- }
60-
61- #[ derive( Debug , Clone , Serialize , Deserialize ) ]
62- pub struct SemanticFeedbackStore {
63- pub version : u32 ,
64- pub examples : Vec < SemanticFeedbackExample > ,
65- #[ serde( default ) ]
66- pub embedding : SemanticEmbeddingMetadata ,
67- }
26+ use types:: default_embedding_metadata;
6827
69- #[ derive( Debug , Clone , Serialize , Deserialize , Default , PartialEq , Eq ) ]
70- pub struct SemanticFileState {
71- pub content_hash : String ,
72- }
73-
74- #[ derive( Debug , Clone , Serialize , Deserialize , PartialEq , Eq ) ]
75- pub struct SemanticEmbeddingMetadata {
76- pub strategy : String ,
77- pub model : String ,
78- pub dimensions : usize ,
79- }
80-
81- impl Default for SemanticEmbeddingMetadata {
82- fn default ( ) -> Self {
83- default_embedding_metadata ( )
84- }
85- }
86-
87- impl Default for SemanticIndex {
88- fn default ( ) -> Self {
89- Self {
90- version : 1 ,
91- entries : HashMap :: new ( ) ,
92- file_states : HashMap :: new ( ) ,
93- embedding : default_embedding_metadata ( ) ,
94- }
95- }
96- }
97-
98- impl Default for SemanticFeedbackStore {
99- fn default ( ) -> Self {
100- Self {
101- version : 1 ,
102- examples : Vec :: new ( ) ,
103- embedding : default_embedding_metadata ( ) ,
104- }
105- }
106- }
107-
108- fn default_embedding_metadata ( ) -> SemanticEmbeddingMetadata {
109- SemanticEmbeddingMetadata {
110- strategy : "hash-v1" . to_string ( ) ,
111- model : "local-hash" . to_string ( ) ,
112- dimensions : FALLBACK_EMBEDDING_DIMENSIONS ,
113- }
114- }
115-
116- impl SemanticIndex {
117- pub fn to_json ( & self ) -> Result < String , serde_json:: Error > {
118- serde_json:: to_string_pretty ( self )
119- }
120-
121- pub fn from_json ( content : & str ) -> Result < Self , serde_json:: Error > {
122- serde_json:: from_str ( content)
123- }
124- }
28+ pub use persistence:: {
29+ default_index_path, default_semantic_feedback_path, load_semantic_feedback_store,
30+ load_semantic_index, save_semantic_feedback_store, save_semantic_index,
31+ } ;
32+ pub use types:: {
33+ SemanticChunk , SemanticEmbeddingMetadata , SemanticFeedbackExample , SemanticFeedbackStore ,
34+ SemanticFileState , SemanticIndex , SemanticMatch ,
35+ } ;
12536
12637impl SemanticFeedbackStore {
127- pub fn to_json ( & self ) -> Result < String , serde_json:: Error > {
128- serde_json:: to_string_pretty ( self )
129- }
130-
131- pub fn from_json ( content : & str ) -> Result < Self , serde_json:: Error > {
132- serde_json:: from_str ( content)
133- }
134-
13538 pub fn add_example ( & mut self , example : SemanticFeedbackExample ) {
13639 let fingerprint = feedback_example_fingerprint (
13740 & example. content ,
@@ -164,61 +67,6 @@ pub fn align_semantic_feedback_store(
16467 store. embedding = merge_embedding_metadata ( & store. embedding , & expected) ;
16568}
16669
167- pub fn default_index_path ( repo_root : & Path ) -> PathBuf {
168- let repo_key = hash_text ( & repo_root. to_string_lossy ( ) ) ;
169- dirs:: data_local_dir ( )
170- . unwrap_or_else ( || PathBuf :: from ( "." ) )
171- . join ( "diffscope" )
172- . join ( "semantic" )
173- . join ( format ! ( "{}.json" , & repo_key[ ..16 ] ) )
174- }
175-
176- pub fn default_semantic_feedback_path ( feedback_path : & Path ) -> PathBuf {
177- let parent = feedback_path. parent ( ) . unwrap_or_else ( || Path :: new ( "." ) ) ;
178- let stem = feedback_path
179- . file_stem ( )
180- . and_then ( |value| value. to_str ( ) )
181- . unwrap_or ( "diffscope.feedback" ) ;
182- parent. join ( format ! ( "{}.semantic.json" , stem) )
183- }
184-
185- pub fn load_semantic_index ( path : & Path ) -> SemanticIndex {
186- std:: fs:: read_to_string ( path)
187- . ok ( )
188- . and_then ( |content| SemanticIndex :: from_json ( & content) . ok ( ) )
189- . unwrap_or_default ( )
190- }
191-
192- pub fn save_semantic_index ( path : & Path , index : & SemanticIndex ) -> Result < ( ) > {
193- atomic_write_string ( path, & index. to_json ( ) ?)
194- }
195-
196- pub fn load_semantic_feedback_store ( path : & Path ) -> SemanticFeedbackStore {
197- std:: fs:: read_to_string ( path)
198- . ok ( )
199- . and_then ( |content| SemanticFeedbackStore :: from_json ( & content) . ok ( ) )
200- . unwrap_or_default ( )
201- }
202-
203- pub fn save_semantic_feedback_store ( path : & Path , store : & SemanticFeedbackStore ) -> Result < ( ) > {
204- atomic_write_string ( path, & store. to_json ( ) ?)
205- }
206-
207- fn atomic_write_string ( path : & Path , content : & str ) -> Result < ( ) > {
208- if let Some ( parent) = path. parent ( ) {
209- std:: fs:: create_dir_all ( parent) ?;
210- }
211-
212- let file_name = path
213- . file_name ( )
214- . and_then ( |value| value. to_str ( ) )
215- . unwrap_or ( "semantic.json" ) ;
216- let tmp_path = path. with_file_name ( format ! ( "{}.{}.tmp" , file_name, std:: process:: id( ) ) ) ;
217- std:: fs:: write ( & tmp_path, content) ?;
218- std:: fs:: rename ( & tmp_path, path) ?;
219- Ok ( ( ) )
220- }
221-
22270pub async fn embed_texts_with_fallback (
22371 adapter : Option < & dyn LLMAdapter > ,
22472 texts : & [ String ] ,
0 commit comments