Skip to content

Commit 8d5449e

Browse files
committed
Merge pull request #868 from DataDog/jf/K9VULN-12862
[K9VULN-12862] Skip crawling of non-UTF-8 paths
1 parent 5a879fc commit 8d5449e

2 files changed

Lines changed: 29 additions & 17 deletions

File tree

crates/bins/src/bin/datadog-static-analyzer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ fn main() -> Result<()> {
405405
subdirectories_to_analyze.clone(),
406406
&path_config,
407407
)
408-
.expect("unable to get the list of files to analyze");
408+
.context("unable to get the list of files to analyze")?;
409409

410410
let num_cores_requested = matches
411411
.opt_str("c")

crates/cli/src/file_utils.rs

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -196,14 +196,12 @@ pub fn get_files(
196196
// repo with a custom rule.
197197
let mut should_include = entry.is_file() && !entry.is_symlink();
198198

199-
let relative_path_str = entry
200-
.strip_prefix(directory)
201-
.ok()
202-
.and_then(|p| p.to_str())
203-
.ok_or_else(|| anyhow::Error::msg("should get the path"))?;
199+
let Ok(Some(rel_path_str)) = entry.strip_prefix(directory).map(|p| p.to_str()) else {
200+
continue;
201+
};
204202

205203
// check if the path is allowed by the configuration.
206-
should_include = should_include && path_config.allows_file(relative_path_str);
204+
should_include = should_include && path_config.allows_file(rel_path_str);
207205

208206
// do not include the git directory.
209207
if entry.starts_with(&git_directory) {
@@ -335,19 +333,16 @@ pub fn filter_files_by_diff_aware_info(
335333
directory_path: &Path,
336334
diff_aware_info: &DiffAwareData,
337335
) -> Vec<PathBuf> {
338-
let files_to_scan: HashSet<&str> =
339-
HashSet::from_iter(diff_aware_info.files.iter().map(|f| f.as_str()));
336+
let files_to_scan: HashSet<&Path> =
337+
HashSet::from_iter(diff_aware_info.files.iter().map(Path::new));
340338

341339
files
342340
.iter()
343-
.filter(|f| {
344-
let p = f
345-
.strip_prefix(directory_path)
346-
.unwrap()
347-
.to_str()
348-
.expect("path contains non-Unicode characters");
349-
350-
files_to_scan.contains(p)
341+
.filter(|file_path| {
342+
let Ok(rel_path) = file_path.strip_prefix(directory_path) else {
343+
return false;
344+
};
345+
files_to_scan.contains(rel_path)
351346
})
352347
.cloned()
353348
.collect()
@@ -915,6 +910,23 @@ mod tests {
915910
);
916911
}
917912

913+
#[cfg(target_os = "linux")]
914+
#[test]
915+
fn get_files_non_utf8_path() {
916+
let tmp = TestDir::new();
917+
918+
let valid_path = tmp.base_path().join("valid.js");
919+
fs::File::create(&valid_path).unwrap();
920+
// (0xFF isn't a valid UTF-8 byte)
921+
let invalid_path = tmp
922+
.base_path()
923+
.join(<std::ffi::OsStr as std::os::unix::ffi::OsStrExt>::from_bytes(b"\xFF.js"));
924+
fs::File::create(&invalid_path).unwrap();
925+
926+
let files = get_files(tmp.base_path(), vec![], &PathConfig::default()).unwrap();
927+
assert_eq!(files, vec![valid_path]);
928+
}
929+
918930
#[test]
919931
fn test_get_language_for_file() {
920932
// extension Java

0 commit comments

Comments
 (0)