Skip to content

Commit 6bdd52d

Browse files
ROX-31266: Implement tests with valid and invalid utf 8 strings (#251)
1 parent 165a21c commit 6bdd52d

10 files changed

Lines changed: 569 additions & 18 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ uuid = { version = "1.17.0", features = ["v4"] }
4646
bindgen = "0.72.0"
4747
tempfile = { version = "3.20.0", default-features = false }
4848
yaml-rust2 = "0.11.0"
49+
regex = "1.11.1"
4950

5051
[profile.release]
5152
debug = "line-tables-only"

fact/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ fact-ebpf = { path = "../fact-ebpf" }
3535

3636
[dev-dependencies]
3737
tempfile = { workspace = true }
38+
regex = { workspace = true }
3839

3940
[build-dependencies]
4041
anyhow = { workspace = true }

fact/src/event/mod.rs

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,3 +358,193 @@ impl From<ChownFileData> for fact_api::FileOwnershipChange {
358358
}
359359
}
360360
}
361+
362+
#[cfg(test)]
363+
mod test_utils {
364+
use std::os::raw::c_char;
365+
366+
/// Helper function to convert raw bytes to a c_char array for testing
367+
pub fn bytes_to_c_char_array<const N: usize>(bytes: &[u8]) -> [c_char; N] {
368+
let mut array = [0 as c_char; N];
369+
let len = bytes.len().min(N - 1);
370+
for (i, &byte) in bytes.iter().take(len).enumerate() {
371+
array[i] = byte as c_char;
372+
}
373+
array
374+
}
375+
376+
/// Helper function to convert a Rust string to a c_char array for testing
377+
pub fn string_to_c_char_array<const N: usize>(s: &str) -> [c_char; N] {
378+
bytes_to_c_char_array(s.as_bytes())
379+
}
380+
}
381+
382+
#[cfg(test)]
383+
mod tests {
384+
use super::test_utils::*;
385+
use super::*;
386+
387+
#[test]
388+
fn slice_to_string_valid_utf8() {
389+
let tests = [
390+
("hello", "ASCII"),
391+
("café", "French"),
392+
("файл", "Cyrillic"),
393+
("测试文件", "Chinese"),
394+
("test🚀file", "Emoji"),
395+
("test-файл-测试-🐛.txt", "Mixed Unicode"),
396+
("ملف", "Arabic"),
397+
("קובץ", "Hebrew"),
398+
("ファイル", "Japanese"),
399+
];
400+
401+
for (input, description) in tests {
402+
let arr = string_to_c_char_array::<{ PATH_MAX as usize }>(input);
403+
assert_eq!(
404+
slice_to_string(&arr).unwrap(),
405+
input,
406+
"Failed for {}",
407+
description
408+
);
409+
}
410+
}
411+
412+
#[test]
413+
fn slice_to_string_invalid_utf8() {
414+
let tests: &[(&[u8], &str)] = &[
415+
(&[0xFF, 0xFE, 0xFD], "Invalid continuation bytes"),
416+
(b"test\xE2", "Truncated multi-byte sequence"),
417+
(&[0xC0, 0x80], "Overlong encoding"),
418+
(b"hello\x80world", "Invalid start byte"),
419+
(&[0x80], "Lone continuation byte"),
420+
(b"test\xFF\xFE", "Mixed valid and invalid bytes"),
421+
];
422+
423+
for (bytes, description) in tests {
424+
let arr = bytes_to_c_char_array::<{ PATH_MAX as usize }>(bytes);
425+
assert!(
426+
slice_to_string(&arr).is_err(),
427+
"Should fail for {}",
428+
description
429+
);
430+
}
431+
}
432+
433+
#[test]
434+
fn sanitize_d_path_valid_utf8() {
435+
let tests = [
436+
("/etc/test", "/etc/test", "ASCII"),
437+
("/tmp/файл.txt", "/tmp/файл.txt", "Cyrillic"),
438+
(
439+
"/home/user/测试文件.log",
440+
"/home/user/测试文件.log",
441+
"Chinese",
442+
),
443+
("/data/🚀rocket.dat", "/data/🚀rocket.dat", "Emoji"),
444+
(
445+
"/var/log/app-данные-数据-🐛.log",
446+
"/var/log/app-данные-数据-🐛.log",
447+
"Mixed Unicode",
448+
),
449+
("/home/ملف.txt", "/home/ملف.txt", "Arabic"),
450+
("/opt/ファイル.conf", "/opt/ファイル.conf", "Japanese"),
451+
];
452+
453+
for (input, expected, description) in tests {
454+
let arr = string_to_c_char_array::<{ PATH_MAX as usize }>(input);
455+
assert_eq!(
456+
sanitize_d_path(&arr),
457+
PathBuf::from(expected),
458+
"Failed for {}",
459+
description
460+
);
461+
}
462+
}
463+
464+
#[test]
465+
fn sanitize_d_path_deleted_suffix() {
466+
let tests = [
467+
(
468+
"/tmp/test.txt (deleted)",
469+
"/tmp/test.txt",
470+
"ASCII with deleted suffix",
471+
),
472+
(
473+
"/tmp/файл.txt (deleted)",
474+
"/tmp/файл.txt",
475+
"Unicode with deleted suffix",
476+
),
477+
("/etc/config.yaml", "/etc/config.yaml", "No deleted suffix"),
478+
(
479+
"/var/log/app/debug.log (deleted)",
480+
"/var/log/app/debug.log",
481+
"Nested path with deleted suffix",
482+
),
483+
];
484+
485+
for (input, expected, description) in tests {
486+
let arr = string_to_c_char_array::<{ PATH_MAX as usize }>(input);
487+
assert_eq!(
488+
sanitize_d_path(&arr),
489+
PathBuf::from(expected),
490+
"Failed for {}",
491+
description
492+
);
493+
}
494+
}
495+
496+
#[test]
497+
fn sanitize_d_path_invalid_utf8() {
498+
use regex::Regex;
499+
500+
let tests: &[(&[u8], &str, &str)] = &[
501+
(
502+
b"/tmp/\xFF\xFE.txt",
503+
r"^/tmp/\u{FFFD}+\.txt$",
504+
"Invalid continuation bytes",
505+
),
506+
(
507+
b"/var/test\xE2\x80",
508+
r"^/var/test\u{FFFD}+$",
509+
"Truncated multi-byte sequence",
510+
),
511+
(
512+
b"/home/file\x80.log",
513+
r"^/home/file\u{FFFD}\.log$",
514+
"Invalid start byte",
515+
),
516+
(
517+
b"/tmp/\xD1\x84\xFF\xD0\xBB.txt",
518+
r"^/tmp/ф\u{FFFD}л\.txt$",
519+
"Mixed valid and invalid UTF-8",
520+
),
521+
];
522+
523+
for (bytes, pattern, description) in tests {
524+
let arr = bytes_to_c_char_array::<{ PATH_MAX as usize }>(bytes);
525+
let result = sanitize_d_path(&arr);
526+
let result_str = result.to_string_lossy();
527+
528+
let re = Regex::new(pattern).expect("Invalid regex pattern");
529+
assert!(
530+
re.is_match(&result_str),
531+
"Failed for {}: expected pattern '{}', got '{}'",
532+
description,
533+
pattern,
534+
result_str
535+
);
536+
}
537+
}
538+
539+
#[test]
540+
fn sanitize_d_path_invalid_utf8_with_deleted_suffix() {
541+
let invalid_with_deleted =
542+
bytes_to_c_char_array::<{ PATH_MAX as usize }>(b"/tmp/\xFF\xFE (deleted)");
543+
let result = sanitize_d_path(&invalid_with_deleted);
544+
let result_str = result.to_string_lossy();
545+
546+
assert!(result_str.contains("/tmp/"));
547+
assert!(!result_str.ends_with(" (deleted)"));
548+
assert!(result_str.contains('\u{FFFD}'));
549+
}
550+
}

0 commit comments

Comments
 (0)