Skip to content

Commit b6fdeb1

Browse files
committed
♻️ Improve Windows path stripping: add tests, consolidate API, fix types
- Add boundary/adversarial tests for strip_absolute_path_bsdtar (empty, separators-only, drive-only, device prefix, terminal dotdot/dot) - Add is_unsafe_link equivalent tests and bypass vector tests - Consolidate pub(crate) strip_absolute_path_bsdtar + has_parent_dir_component into single pub(crate) is_unsafe_link_path, keeping primitives module-private - Simplify RewrittenPath: remove unused lifetime, Cow<str> → String - Add doc comments with security invariants to key functions - Update stale comments on sanitize_preserve_curdir_str and cfg(windows) tests
1 parent 7beda9b commit b6fdeb1

2 files changed

Lines changed: 268 additions & 17 deletions

File tree

cli/src/command/core/path.rs

Lines changed: 267 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -122,18 +122,22 @@ impl PathnameEditor {
122122
Some(stripped.into_owned())
123123
}
124124

125+
/// Rewrite the path by stripping Windows-style absolute prefixes (drive letters,
126+
/// UNC/API prefixes) and leading root separators, matching bsdtar's
127+
/// `strip_absolute_path()` behavior. When `absolute_paths` is true, the path
128+
/// is returned unchanged.
125129
#[inline]
126-
fn rewrite_path_for_extraction<'a>(&self, path: &'a Path) -> RewrittenPath<'a> {
130+
fn rewrite_path_for_extraction(&self, path: &Path) -> RewrittenPath {
127131
let raw = path.to_string_lossy().into_owned();
128132
if self.absolute_paths {
129133
RewrittenPath {
130-
path: Cow::Owned(raw),
134+
path: raw,
131135
had_root: false,
132136
}
133137
} else {
134-
let (path, had_root) = strip_absolute_path_bsdtar(&raw);
138+
let (stripped, had_root) = strip_absolute_path_bsdtar(&raw);
135139
RewrittenPath {
136-
path: Cow::Owned(path.into_owned()),
140+
path: stripped.into_owned(),
137141
had_root,
138142
}
139143
}
@@ -155,8 +159,12 @@ impl PathnameEditor {
155159
}
156160
}
157161

158-
struct RewrittenPath<'a> {
159-
path: Cow<'a, str>,
162+
/// Result of [`PathnameEditor::rewrite_path_for_extraction`].
163+
///
164+
/// `had_root` indicates whether any absolute prefix (root separator, drive
165+
/// letter, or Windows API prefix) was consumed during rewriting.
166+
struct RewrittenPath {
167+
path: String,
160168
had_root: bool,
161169
}
162170

@@ -174,11 +182,14 @@ fn strip_components(path: &Path, count: Option<usize>) -> Option<Cow<'_, Path>>
174182
Some(Cow::from(PathBuf::from_iter(components.skip(count))))
175183
}
176184

177-
/// CLI-side sanitization that keeps `CurDir` (`.`) and `ParentDir` (`..`) components.
185+
/// CLI-side sanitization that retains `CurDir` (`.`) and `ParentDir` (`..`)
186+
/// while stripping `RootDir` and `Prefix` components via [`std::path::Path::components`].
178187
///
179-
/// Absolute path prefix handling is performed before this stage; this function
180-
/// only normalizes `.` / `..` under the host path semantics while preserving
181-
/// backslashes as literal bytes on Unix to match bsdtar's POSIX behavior.
188+
/// Absolute path prefix stripping (Windows drive letters, UNC prefixes,
189+
/// leading `/../` sequences) is handled upstream by [`strip_absolute_path_bsdtar`];
190+
/// this function handles residual host-path normalization. On Unix,
191+
/// backslash-containing segments are treated as literal `Normal` components by
192+
/// the standard library, which preserves them as-is.
182193
fn sanitize_preserve_curdir_str(s: &str) -> String {
183194
let path = Path::new(s);
184195
join_components_forward_slash(path.components().filter(|c| {
@@ -201,7 +212,23 @@ fn sanitize_preserve_curdir_reference(reference: EntryReference) -> EntryReferen
201212
EntryReference::from_utf8_preserve_root(&sanitize_preserve_curdir_str(reference.as_str()))
202213
}
203214

204-
pub(crate) fn has_parent_dir_component(s: &str) -> bool {
215+
/// Returns `true` if the path is unsafe as a link reference.
216+
///
217+
/// A link is unsafe if it contains an absolute path component (root separator,
218+
/// drive letter, or Windows API prefix) or a parent directory (`..`) component
219+
/// under either host or Windows path semantics.
220+
pub(crate) fn is_unsafe_link_path(s: &str) -> bool {
221+
let (rewritten, had_root) = strip_absolute_path_bsdtar(s);
222+
had_root || has_parent_dir_component(rewritten.as_ref())
223+
}
224+
225+
/// Returns `true` if the path contains a `..` (parent directory) component
226+
/// under either host path semantics or Windows path semantics.
227+
///
228+
/// The dual check ensures that Windows-style `..` preceded by backslash
229+
/// separators (e.g., `..\\file`) is detected even on non-Windows hosts where
230+
/// `std::path::Path` treats backslashes as literal characters.
231+
fn has_parent_dir_component(s: &str) -> bool {
205232
Path::new(s)
206233
.components()
207234
.any(|c| matches!(c, Component::ParentDir))
@@ -210,7 +237,21 @@ pub(crate) fn has_parent_dir_component(s: &str) -> bool {
210237
.any(|c| matches!(c, Utf8WindowsComponent::ParentDir))
211238
}
212239

213-
pub(crate) fn strip_absolute_path_bsdtar(path: &str) -> (Cow<'_, str>, bool) {
240+
/// bsdtar-compatible stripping of absolute path prefixes.
241+
///
242+
/// Strips Windows API prefixes (`\\?\`, `\\.\`), UNC prefixes (`\\?\UNC\`),
243+
/// drive letters (`C:`), and leading separators (including consuming `/../`
244+
/// and `/./` sequences that follow a root).
245+
///
246+
/// Returns the remaining path after stripping and a flag indicating whether
247+
/// any prefix or root separator was consumed.
248+
///
249+
/// # Security invariant
250+
///
251+
/// This function may leave `..` components in the output (e.g., from `D:..`).
252+
/// Callers **must** check the result with [`has_parent_dir_component`] to
253+
/// detect path traversal, or use [`is_unsafe_link_path`] which combines both.
254+
fn strip_absolute_path_bsdtar(path: &str) -> (Cow<'_, str>, bool) {
214255
let mut rest = path;
215256
let mut had_root = false;
216257

@@ -646,8 +687,10 @@ mod tests {
646687
//
647688
// bsdtar's strip_absolute_path() strips Windows API prefixes, drive letters,
648689
// and leading separators. On Windows, Rust's Path::components() correctly
649-
// parses these as Prefix/RootDir components, which sanitize_preserve_curdir
650-
// filters out — producing the same result as bsdtar.
690+
// parses these as Prefix/RootDir components. The `rewrite_path_for_extraction`
691+
// method calls `strip_absolute_path_bsdtar` to handle this at the string
692+
// level before host-path normalization. The cross-platform tests below this
693+
// section cover the same scenarios without #[cfg(windows)].
651694
//
652695
// These 8 types correspond to bsdtar's test_windows.c mkfullpath() types.
653696

@@ -856,4 +899,214 @@ mod tests {
856899
assert_eq!(reference.as_str(), "etc/hosts");
857900
assert!(had_root);
858901
}
902+
903+
// --- B1: strip_absolute_path_bsdtar boundary values ---
904+
905+
#[test]
906+
fn strip_absolute_path_bsdtar_empty_string() {
907+
let (path, had_root) = strip_absolute_path_bsdtar("");
908+
assert_eq!("", path);
909+
assert!(!had_root);
910+
}
911+
912+
#[test]
913+
fn strip_absolute_path_bsdtar_single_forward_slash() {
914+
let (path, had_root) = strip_absolute_path_bsdtar("/");
915+
assert_eq!("", path);
916+
assert!(had_root);
917+
}
918+
919+
#[test]
920+
fn strip_absolute_path_bsdtar_single_backslash() {
921+
let (path, had_root) = strip_absolute_path_bsdtar("\\");
922+
assert_eq!("", path);
923+
assert!(had_root);
924+
}
925+
926+
#[test]
927+
fn strip_absolute_path_bsdtar_multiple_separators() {
928+
let (path, had_root) = strip_absolute_path_bsdtar("///");
929+
assert_eq!("", path);
930+
assert!(had_root);
931+
932+
let (path, had_root) = strip_absolute_path_bsdtar("\\\\");
933+
assert_eq!("", path);
934+
assert!(had_root);
935+
}
936+
937+
#[test]
938+
fn strip_absolute_path_bsdtar_drive_letter_only() {
939+
let (path, had_root) = strip_absolute_path_bsdtar("c:");
940+
assert_eq!("", path);
941+
assert!(had_root);
942+
}
943+
944+
#[test]
945+
fn strip_absolute_path_bsdtar_terminal_dotdot() {
946+
// bsdtar: "/.."-at-end -- p[3]=='\0' is not a separator, so only "/"
947+
// is stripped, leaving "..". The ".." is caught by has_parent_dir_component.
948+
let (path, had_root) = strip_absolute_path_bsdtar("/..");
949+
assert_eq!("..", path);
950+
assert!(had_root);
951+
952+
let (path, had_root) = strip_absolute_path_bsdtar("\\..");
953+
assert_eq!("..", path);
954+
assert!(had_root);
955+
}
956+
957+
#[test]
958+
fn strip_absolute_path_bsdtar_terminal_dot() {
959+
// bsdtar: "/."-at-end -- p[2]=='\0' is not a separator, so only "/"
960+
// is stripped, leaving ".".
961+
let (path, had_root) = strip_absolute_path_bsdtar("/.");
962+
assert_eq!(".", path);
963+
assert!(had_root);
964+
965+
let (path, had_root) = strip_absolute_path_bsdtar("\\.");
966+
assert_eq!(".", path);
967+
assert!(had_root);
968+
}
969+
970+
#[test]
971+
fn strip_absolute_path_bsdtar_prefix_exactly_4_bytes() {
972+
let (path, had_root) = strip_absolute_path_bsdtar("//?/");
973+
assert_eq!("", path);
974+
assert!(had_root);
975+
}
976+
977+
#[test]
978+
fn strip_absolute_path_bsdtar_unc_prefix_exactly_8_bytes() {
979+
let (path, had_root) = strip_absolute_path_bsdtar("//?/UNC/");
980+
assert_eq!("", path);
981+
assert!(had_root);
982+
}
983+
984+
#[test]
985+
fn strip_absolute_path_bsdtar_safe_relative_paths() {
986+
let (path, had_root) = strip_absolute_path_bsdtar("file.txt");
987+
assert_eq!("file.txt", path);
988+
assert!(!had_root);
989+
990+
let (path, had_root) = strip_absolute_path_bsdtar("a/b/c");
991+
assert_eq!("a/b/c", path);
992+
assert!(!had_root);
993+
994+
let (path, had_root) = strip_absolute_path_bsdtar("./a/b");
995+
assert_eq!("./a/b", path);
996+
assert!(!had_root);
997+
}
998+
999+
// --- B2: device prefix (\\.\) tests ---
1000+
1001+
#[test]
1002+
fn strip_absolute_path_bsdtar_device_prefix_with_drive() {
1003+
// \\.\C:\file -- device prefix (4 bytes stripped), then drive letter
1004+
let (path, had_root) = strip_absolute_path_bsdtar("\\\\.\\C:\\file");
1005+
assert_eq!("file", path);
1006+
assert!(had_root);
1007+
1008+
// //./C:/file -- forward-slash variant
1009+
let (path, had_root) = strip_absolute_path_bsdtar("//./C:/file");
1010+
assert_eq!("file", path);
1011+
assert!(had_root);
1012+
}
1013+
1014+
#[test]
1015+
fn strip_absolute_path_bsdtar_device_prefix_only() {
1016+
// \\.\ alone (4 bytes + trailing separator)
1017+
let (path, had_root) = strip_absolute_path_bsdtar("\\\\.\\");
1018+
assert_eq!("", path);
1019+
assert!(had_root);
1020+
}
1021+
1022+
#[test]
1023+
fn strip_absolute_path_bsdtar_device_prefix_with_traversal() {
1024+
// \\.\..\secret -- device prefix (4 bytes) stripped, "..\secret" remains.
1025+
// The ".." is NOT consumed because it doesn't follow a separator;
1026+
// has_parent_dir_component catches it downstream.
1027+
let (path, had_root) = strip_absolute_path_bsdtar("\\\\.\\..\\secret");
1028+
assert_eq!("..\\secret", path);
1029+
assert!(had_root);
1030+
assert!(has_parent_dir_component(path.as_ref()));
1031+
}
1032+
1033+
// --- B3+B4: is_unsafe_link equivalent (strip + has_parent_dir combined) ---
1034+
// is_unsafe_link is: had_root || has_parent_dir_component(rewritten)
1035+
// We test the same logic directly here.
1036+
1037+
#[test]
1038+
fn strip_then_parent_dir_detects_drive_dotdot_backslash() {
1039+
let (rewritten, had_root) = strip_absolute_path_bsdtar("c:..\\file");
1040+
assert!(had_root || has_parent_dir_component(rewritten.as_ref()));
1041+
}
1042+
1043+
#[test]
1044+
fn strip_then_parent_dir_detects_slash_dotdot_backslash() {
1045+
let (rewritten, had_root) = strip_absolute_path_bsdtar("/..\\file");
1046+
assert!(had_root || has_parent_dir_component(rewritten.as_ref()));
1047+
}
1048+
1049+
#[test]
1050+
fn strip_then_parent_dir_detects_drive_slash_dotdot_backslash() {
1051+
let (rewritten, had_root) = strip_absolute_path_bsdtar("c:/..\\file");
1052+
assert!(had_root || has_parent_dir_component(rewritten.as_ref()));
1053+
}
1054+
1055+
#[test]
1056+
fn strip_then_parent_dir_detects_device_prefix_dotdot() {
1057+
let (rewritten, had_root) = strip_absolute_path_bsdtar("\\\\?\\..\\file");
1058+
assert!(had_root || has_parent_dir_component(rewritten.as_ref()));
1059+
}
1060+
1061+
#[test]
1062+
fn strip_then_parent_dir_allows_safe_path() {
1063+
let (rewritten, had_root) = strip_absolute_path_bsdtar("a/b/c");
1064+
assert!(!had_root);
1065+
assert!(!has_parent_dir_component(rewritten.as_ref()));
1066+
}
1067+
1068+
/// Equivalent to is_unsafe_link logic for various link reference patterns.
1069+
fn is_unsafe_link_equivalent(s: &str) -> bool {
1070+
let (rewritten, had_root) = strip_absolute_path_bsdtar(s);
1071+
had_root || has_parent_dir_component(rewritten.as_ref())
1072+
}
1073+
1074+
#[test]
1075+
fn unsafe_link_equivalent_detects_all_unsafe_patterns() {
1076+
// Windows drive prefix
1077+
assert!(is_unsafe_link_equivalent("C:/file"));
1078+
// POSIX absolute
1079+
assert!(is_unsafe_link_equivalent("/etc/passwd"));
1080+
// Backslash parent traversal
1081+
assert!(is_unsafe_link_equivalent("..\\file"));
1082+
// Forward slash parent traversal
1083+
assert!(is_unsafe_link_equivalent("../file"));
1084+
// Windows UNC with embedded dotdot
1085+
assert!(is_unsafe_link_equivalent("\\\\?\\UNC\\..\\file"));
1086+
// Drive-relative with dotdot
1087+
assert!(is_unsafe_link_equivalent("D:../file"));
1088+
}
1089+
1090+
#[test]
1091+
fn unsafe_link_equivalent_allows_safe_patterns() {
1092+
assert!(!is_unsafe_link_equivalent("a/b/c"));
1093+
assert!(!is_unsafe_link_equivalent("file.txt"));
1094+
assert!(!is_unsafe_link_equivalent("./a/b"));
1095+
}
1096+
1097+
#[test]
1098+
fn has_parent_dir_component_adversarial_cases() {
1099+
// mid-path backslash dotdot
1100+
assert!(has_parent_dir_component("foo\\..\\bar"));
1101+
// standalone dotdot
1102+
assert!(has_parent_dir_component(".."));
1103+
// dotdot with mixed separators
1104+
assert!(has_parent_dir_component("a/b\\../c"));
1105+
// trailing separator after dotdot
1106+
assert!(has_parent_dir_component("../"));
1107+
// NOT parent dir: "..name" is a normal component
1108+
assert!(!has_parent_dir_component("a/..name"));
1109+
// NOT parent dir: "..." is a normal component
1110+
assert!(!has_parent_dir_component("a/.../b"));
1111+
}
8591112
}

cli/src/command/extract.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,7 +1732,5 @@ fn search_group(name: &str, id: u64) -> io::Result<Group> {
17321732

17331733
#[inline]
17341734
fn is_unsafe_link(reference: &EntryReference) -> bool {
1735-
let (rewritten, had_root) =
1736-
crate::command::core::path::strip_absolute_path_bsdtar(reference.as_str());
1737-
had_root || crate::command::core::path::has_parent_dir_component(rewritten.as_ref())
1735+
crate::command::core::path::is_unsafe_link_path(reference.as_str())
17381736
}

0 commit comments

Comments
 (0)