Skip to content

Commit 2b7ac7c

Browse files
committed
feat: integrate grep-regex and grep-searcher for enhanced content matching in search functionality
1 parent 5d1095f commit 2b7ac7c

4 files changed

Lines changed: 111 additions & 21 deletions

File tree

packages/ui/lib/dialogs/FindFilesDialog.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ export function FindFilesDialog({
6363
ignoreDirsEnabled,
6464
ignoreDirs: ignoreDirsEnabled ? parseIgnoreDirs(ignoreDirsText) : [],
6565
filePattern: filePattern.trim() || "*",
66-
contentPattern: contentPattern.trim(),
66+
// Preserve leading/trailing whitespace for content search (important for regex patterns like " *The").
67+
contentPattern,
6768
recursive,
6869
followSymlinks,
6970
shellPatterns,

src-tauri/Cargo.lock

Lines changed: 58 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src-tauri/dotdir-core/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ serde = { version = "1", features = ["derive"] }
1212
thiserror = "2"
1313
globset = "0.4"
1414
regex = "1"
15+
grep-regex = "0.1"
16+
grep-searcher = "0.1"
1517

1618
[target.'cfg(unix)'.dependencies]
1719
libc = "0.2"

src-tauri/dotdir-core/src/search.rs

Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
use crate::copy::CancelToken;
22
use crate::error::FsError;
3+
use grep_regex::{RegexMatcher, RegexMatcherBuilder};
4+
use grep_searcher::{Searcher, Sink, SinkMatch};
35
use globset::{GlobBuilder, GlobMatcher};
4-
use regex::{Regex, RegexBuilder};
56
use serde::{Deserialize, Serialize};
67
use std::collections::HashSet;
78
use std::fs;
9+
use std::io;
810
use std::path::{Path, PathBuf};
911

1012
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -90,8 +92,8 @@ fn build_name_matcher(request: &FileSearchRequest) -> Result<NameMatcher, FsErro
9092
})
9193
}
9294

93-
fn build_content_regex(request: &FileSearchRequest) -> Result<Option<Regex>, FsError> {
94-
let pattern = request.content_pattern.trim();
95+
fn build_content_regex(request: &FileSearchRequest) -> Result<Option<RegexMatcher>, FsError> {
96+
let pattern = request.content_pattern.as_str();
9597
if pattern.is_empty() {
9698
return Ok(None);
9799
}
@@ -107,11 +109,15 @@ fn build_content_regex(request: &FileSearchRequest) -> Result<Option<Regex>, FsE
107109
source
108110
};
109111

110-
let regex = RegexBuilder::new(&source)
111-
.case_insensitive(!request.case_sensitive_content)
112-
.build()
112+
let mut builder = RegexMatcherBuilder::new();
113+
builder.case_insensitive(!request.case_sensitive_content);
114+
// Force line-oriented matching so grep-searcher can stream without
115+
// falling back to whole-file multiline mode.
116+
builder.line_terminator(Some(b'\n'));
117+
let matcher = builder
118+
.build(&source)
113119
.map_err(|_| FsError::InvalidInput)?;
114-
Ok(Some(regex))
120+
Ok(Some(matcher))
115121
}
116122

117123
fn is_hidden(path: &Path) -> bool {
@@ -132,26 +138,49 @@ fn should_ignore_dir(path: &Path, ignored_dirs: &HashSet<String>) -> bool {
132138
.unwrap_or(false)
133139
}
134140

135-
fn read_file_matches(path: &Path, regex: &Regex, all_charsets: bool) -> bool {
136-
let bytes = match fs::read(path) {
137-
Ok(bytes) => bytes,
138-
Err(_) => return false,
139-
};
140-
if all_charsets {
141-
let text = String::from_utf8_lossy(&bytes);
142-
regex.is_match(&text)
143-
} else {
144-
std::str::from_utf8(&bytes)
145-
.map(|text| regex.is_match(text))
146-
.unwrap_or(false)
141+
#[derive(Default)]
142+
struct FirstMatchSink {
143+
found: bool,
144+
}
145+
146+
impl Sink for FirstMatchSink {
147+
type Error = io::Error;
148+
149+
fn matched(
150+
&mut self,
151+
_searcher: &Searcher,
152+
_mat: &SinkMatch<'_>,
153+
) -> Result<bool, Self::Error> {
154+
self.found = true;
155+
// Stop at the first content match for this file.
156+
Ok(false)
147157
}
148158
}
149159

160+
fn read_file_matches(path: &Path, matcher: &RegexMatcher, all_charsets: bool) -> bool {
161+
// Keep old behavior: with "all charsets" disabled, skip files that aren't
162+
// valid UTF-8. This check still reads the file, but matching itself below
163+
// is streaming and doesn't allocate the whole haystack for regex search.
164+
if !all_charsets {
165+
let bytes = match fs::read(path) {
166+
Ok(bytes) => bytes,
167+
Err(_) => return false,
168+
};
169+
if std::str::from_utf8(&bytes).is_err() {
170+
return false;
171+
}
172+
}
173+
174+
let mut searcher = Searcher::new();
175+
let mut sink = FirstMatchSink::default();
176+
searcher.search_path(matcher, path, &mut sink).is_ok() && sink.found
177+
}
178+
150179
fn entry_matches(
151180
path: &Path,
152181
is_directory: bool,
153182
name_matcher: &NameMatcher,
154-
content_regex: Option<&Regex>,
183+
content_regex: Option<&RegexMatcher>,
155184
all_charsets: bool,
156185
) -> bool {
157186
let file_name = path.file_name().and_then(|name| name.to_str()).unwrap_or("");

0 commit comments

Comments
 (0)