11use crate :: copy:: CancelToken ;
22use crate :: error:: FsError ;
3+ use grep_regex:: { RegexMatcher , RegexMatcherBuilder } ;
4+ use grep_searcher:: { Searcher , Sink , SinkMatch } ;
35use globset:: { GlobBuilder , GlobMatcher } ;
4- use regex:: { Regex , RegexBuilder } ;
56use serde:: { Deserialize , Serialize } ;
67use std:: collections:: HashSet ;
78use std:: fs;
9+ use std:: io;
810use std:: path:: { Path , PathBuf } ;
911
1012#[ derive( Debug , Clone , Serialize , Deserialize ) ]
@@ -90,8 +92,8 @@ fn build_name_matcher(request: &FileSearchRequest) -> Result<NameMatcher, FsErro
9092 } )
9193}
9294
93- fn build_content_regex ( request : & FileSearchRequest ) -> Result < Option < Regex > , FsError > {
94- let pattern = request. content_pattern . trim ( ) ;
95+ fn build_content_regex ( request : & FileSearchRequest ) -> Result < Option < RegexMatcher > , FsError > {
96+ let pattern = request. content_pattern . as_str ( ) ;
9597 if pattern. is_empty ( ) {
9698 return Ok ( None ) ;
9799 }
@@ -107,11 +109,15 @@ fn build_content_regex(request: &FileSearchRequest) -> Result<Option<Regex>, FsE
107109 source
108110 } ;
109111
110- let regex = RegexBuilder :: new ( & source)
111- . case_insensitive ( !request. case_sensitive_content )
112- . build ( )
112+ let mut builder = RegexMatcherBuilder :: new ( ) ;
113+ builder. case_insensitive ( !request. case_sensitive_content ) ;
114+ // Force line-oriented matching so grep-searcher can stream without
115+ // falling back to whole-file multiline mode.
116+ builder. line_terminator ( Some ( b'\n' ) ) ;
117+ let matcher = builder
118+ . build ( & source)
113119 . map_err ( |_| FsError :: InvalidInput ) ?;
114- Ok ( Some ( regex ) )
120+ Ok ( Some ( matcher ) )
115121}
116122
117123fn is_hidden ( path : & Path ) -> bool {
@@ -132,26 +138,49 @@ fn should_ignore_dir(path: &Path, ignored_dirs: &HashSet<String>) -> bool {
132138 . unwrap_or ( false )
133139}
134140
135- fn read_file_matches ( path : & Path , regex : & Regex , all_charsets : bool ) -> bool {
136- let bytes = match fs:: read ( path) {
137- Ok ( bytes) => bytes,
138- Err ( _) => return false ,
139- } ;
140- if all_charsets {
141- let text = String :: from_utf8_lossy ( & bytes) ;
142- regex. is_match ( & text)
143- } else {
144- std:: str:: from_utf8 ( & bytes)
145- . map ( |text| regex. is_match ( text) )
146- . unwrap_or ( false )
141+ #[ derive( Default ) ]
142+ struct FirstMatchSink {
143+ found : bool ,
144+ }
145+
146+ impl Sink for FirstMatchSink {
147+ type Error = io:: Error ;
148+
149+ fn matched (
150+ & mut self ,
151+ _searcher : & Searcher ,
152+ _mat : & SinkMatch < ' _ > ,
153+ ) -> Result < bool , Self :: Error > {
154+ self . found = true ;
155+ // Stop at the first content match for this file.
156+ Ok ( false )
147157 }
148158}
149159
160+ fn read_file_matches ( path : & Path , matcher : & RegexMatcher , all_charsets : bool ) -> bool {
161+ // Keep old behavior: with "all charsets" disabled, skip files that aren't
162+ // valid UTF-8. This check still reads the file, but matching itself below
163+ // is streaming and doesn't allocate the whole haystack for regex search.
164+ if !all_charsets {
165+ let bytes = match fs:: read ( path) {
166+ Ok ( bytes) => bytes,
167+ Err ( _) => return false ,
168+ } ;
169+ if std:: str:: from_utf8 ( & bytes) . is_err ( ) {
170+ return false ;
171+ }
172+ }
173+
174+ let mut searcher = Searcher :: new ( ) ;
175+ let mut sink = FirstMatchSink :: default ( ) ;
176+ searcher. search_path ( matcher, path, & mut sink) . is_ok ( ) && sink. found
177+ }
178+
150179fn entry_matches (
151180 path : & Path ,
152181 is_directory : bool ,
153182 name_matcher : & NameMatcher ,
154- content_regex : Option < & Regex > ,
183+ content_regex : Option < & RegexMatcher > ,
155184 all_charsets : bool ,
156185) -> bool {
157186 let file_name = path. file_name ( ) . and_then ( |name| name. to_str ( ) ) . unwrap_or ( "" ) ;
0 commit comments