@@ -122,18 +122,22 @@ impl PathnameEditor {
122122 Some ( stripped. into_owned ( ) )
123123 }
124124
125+ /// Rewrite the path by stripping Windows-style absolute prefixes (drive letters,
126+ /// UNC/API prefixes) and leading root separators, matching bsdtar's
127+ /// `strip_absolute_path()` behavior. When `absolute_paths` is true, the path
128+ /// is returned unchanged.
125129 #[ inline]
126- fn rewrite_path_for_extraction < ' a > ( & self , path : & ' a Path ) -> RewrittenPath < ' a > {
130+ fn rewrite_path_for_extraction ( & self , path : & Path ) -> RewrittenPath {
127131 let raw = path. to_string_lossy ( ) . into_owned ( ) ;
128132 if self . absolute_paths {
129133 RewrittenPath {
130- path : Cow :: Owned ( raw) ,
134+ path : raw,
131135 had_root : false ,
132136 }
133137 } else {
134- let ( path , had_root) = strip_absolute_path_bsdtar ( & raw ) ;
138+ let ( stripped , had_root) = strip_absolute_path_bsdtar ( & raw ) ;
135139 RewrittenPath {
136- path : Cow :: Owned ( path . into_owned ( ) ) ,
140+ path : stripped . into_owned ( ) ,
137141 had_root,
138142 }
139143 }
@@ -155,8 +159,12 @@ impl PathnameEditor {
155159 }
156160}
157161
158- struct RewrittenPath < ' a > {
159- path : Cow < ' a , str > ,
162+ /// Result of [`PathnameEditor::rewrite_path_for_extraction`].
163+ ///
164+ /// `had_root` indicates whether any absolute prefix (root separator, drive
165+ /// letter, or Windows API prefix) was consumed during rewriting.
166+ struct RewrittenPath {
167+ path : String ,
160168 had_root : bool ,
161169}
162170
@@ -174,11 +182,14 @@ fn strip_components(path: &Path, count: Option<usize>) -> Option<Cow<'_, Path>>
174182 Some ( Cow :: from ( PathBuf :: from_iter ( components. skip ( count) ) ) )
175183}
176184
177- /// CLI-side sanitization that keeps `CurDir` (`.`) and `ParentDir` (`..`) components.
185+ /// CLI-side sanitization that retains `CurDir` (`.`) and `ParentDir` (`..`)
186+ /// while stripping `RootDir` and `Prefix` components via [`std::path::Path::components`].
178187///
179- /// Absolute path prefix handling is performed before this stage; this function
180- /// only normalizes `.` / `..` under the host path semantics while preserving
181- /// backslashes as literal bytes on Unix to match bsdtar's POSIX behavior.
188+ /// Absolute path prefix stripping (Windows drive letters, UNC prefixes,
189+ /// leading `/../` sequences) is handled upstream by [`strip_absolute_path_bsdtar`];
190+ /// this function handles residual host-path normalization. On Unix,
191+ /// backslash-containing segments are treated as literal `Normal` components by
192+ /// the standard library, which preserves them as-is.
182193fn sanitize_preserve_curdir_str ( s : & str ) -> String {
183194 let path = Path :: new ( s) ;
184195 join_components_forward_slash ( path. components ( ) . filter ( |c| {
@@ -201,7 +212,23 @@ fn sanitize_preserve_curdir_reference(reference: EntryReference) -> EntryReferen
201212 EntryReference :: from_utf8_preserve_root ( & sanitize_preserve_curdir_str ( reference. as_str ( ) ) )
202213}
203214
204- pub ( crate ) fn has_parent_dir_component ( s : & str ) -> bool {
215+ /// Returns `true` if the path is unsafe as a link reference.
216+ ///
217+ /// A link is unsafe if it contains an absolute path component (root separator,
218+ /// drive letter, or Windows API prefix) or a parent directory (`..`) component
219+ /// under either host or Windows path semantics.
220+ pub ( crate ) fn is_unsafe_link_path ( s : & str ) -> bool {
221+ let ( rewritten, had_root) = strip_absolute_path_bsdtar ( s) ;
222+ had_root || has_parent_dir_component ( rewritten. as_ref ( ) )
223+ }
224+
225+ /// Returns `true` if the path contains a `..` (parent directory) component
226+ /// under either host path semantics or Windows path semantics.
227+ ///
228+ /// The dual check ensures that Windows-style `..` preceded by backslash
229+ /// separators (e.g., `..\\file`) is detected even on non-Windows hosts where
230+ /// `std::path::Path` treats backslashes as literal characters.
231+ fn has_parent_dir_component ( s : & str ) -> bool {
205232 Path :: new ( s)
206233 . components ( )
207234 . any ( |c| matches ! ( c, Component :: ParentDir ) )
@@ -210,7 +237,21 @@ pub(crate) fn has_parent_dir_component(s: &str) -> bool {
210237 . any ( |c| matches ! ( c, Utf8WindowsComponent :: ParentDir ) )
211238}
212239
213- pub ( crate ) fn strip_absolute_path_bsdtar ( path : & str ) -> ( Cow < ' _ , str > , bool ) {
240+ /// bsdtar-compatible stripping of absolute path prefixes.
241+ ///
242+ /// Strips Windows API prefixes (`\\?\`, `\\.\`), UNC prefixes (`\\?\UNC\`),
243+ /// drive letters (`C:`), and leading separators (including consuming `/../`
244+ /// and `/./` sequences that follow a root).
245+ ///
246+ /// Returns the remaining path after stripping and a flag indicating whether
247+ /// any prefix or root separator was consumed.
248+ ///
249+ /// # Security invariant
250+ ///
251+ /// This function may leave `..` components in the output (e.g., from `D:..`).
252+ /// Callers **must** check the result with [`has_parent_dir_component`] to
253+ /// detect path traversal, or use [`is_unsafe_link_path`] which combines both.
254+ fn strip_absolute_path_bsdtar ( path : & str ) -> ( Cow < ' _ , str > , bool ) {
214255 let mut rest = path;
215256 let mut had_root = false ;
216257
@@ -646,8 +687,10 @@ mod tests {
646687 //
647688 // bsdtar's strip_absolute_path() strips Windows API prefixes, drive letters,
648689 // and leading separators. On Windows, Rust's Path::components() correctly
649- // parses these as Prefix/RootDir components, which sanitize_preserve_curdir
650- // filters out — producing the same result as bsdtar.
690+ // parses these as Prefix/RootDir components. The `rewrite_path_for_extraction`
691+ // method calls `strip_absolute_path_bsdtar` to handle this at the string
692+ // level before host-path normalization. The cross-platform tests below this
693+ // section cover the same scenarios without #[cfg(windows)].
651694 //
652695 // These 8 types correspond to bsdtar's test_windows.c mkfullpath() types.
653696
@@ -856,4 +899,214 @@ mod tests {
856899 assert_eq ! ( reference. as_str( ) , "etc/hosts" ) ;
857900 assert ! ( had_root) ;
858901 }
902+
903+ // --- B1: strip_absolute_path_bsdtar boundary values ---
904+
905+ #[ test]
906+ fn strip_absolute_path_bsdtar_empty_string ( ) {
907+ let ( path, had_root) = strip_absolute_path_bsdtar ( "" ) ;
908+ assert_eq ! ( "" , path) ;
909+ assert ! ( !had_root) ;
910+ }
911+
912+ #[ test]
913+ fn strip_absolute_path_bsdtar_single_forward_slash ( ) {
914+ let ( path, had_root) = strip_absolute_path_bsdtar ( "/" ) ;
915+ assert_eq ! ( "" , path) ;
916+ assert ! ( had_root) ;
917+ }
918+
919+ #[ test]
920+ fn strip_absolute_path_bsdtar_single_backslash ( ) {
921+ let ( path, had_root) = strip_absolute_path_bsdtar ( "\\ " ) ;
922+ assert_eq ! ( "" , path) ;
923+ assert ! ( had_root) ;
924+ }
925+
926+ #[ test]
927+ fn strip_absolute_path_bsdtar_multiple_separators ( ) {
928+ let ( path, had_root) = strip_absolute_path_bsdtar ( "///" ) ;
929+ assert_eq ! ( "" , path) ;
930+ assert ! ( had_root) ;
931+
932+ let ( path, had_root) = strip_absolute_path_bsdtar ( "\\ \\ " ) ;
933+ assert_eq ! ( "" , path) ;
934+ assert ! ( had_root) ;
935+ }
936+
937+ #[ test]
938+ fn strip_absolute_path_bsdtar_drive_letter_only ( ) {
939+ let ( path, had_root) = strip_absolute_path_bsdtar ( "c:" ) ;
940+ assert_eq ! ( "" , path) ;
941+ assert ! ( had_root) ;
942+ }
943+
944+ #[ test]
945+ fn strip_absolute_path_bsdtar_terminal_dotdot ( ) {
946+ // bsdtar: "/.."-at-end -- p[3]=='\0' is not a separator, so only "/"
947+ // is stripped, leaving "..". The ".." is caught by has_parent_dir_component.
948+ let ( path, had_root) = strip_absolute_path_bsdtar ( "/.." ) ;
949+ assert_eq ! ( ".." , path) ;
950+ assert ! ( had_root) ;
951+
952+ let ( path, had_root) = strip_absolute_path_bsdtar ( "\\ .." ) ;
953+ assert_eq ! ( ".." , path) ;
954+ assert ! ( had_root) ;
955+ }
956+
957+ #[ test]
958+ fn strip_absolute_path_bsdtar_terminal_dot ( ) {
959+ // bsdtar: "/."-at-end -- p[2]=='\0' is not a separator, so only "/"
960+ // is stripped, leaving ".".
961+ let ( path, had_root) = strip_absolute_path_bsdtar ( "/." ) ;
962+ assert_eq ! ( "." , path) ;
963+ assert ! ( had_root) ;
964+
965+ let ( path, had_root) = strip_absolute_path_bsdtar ( "\\ ." ) ;
966+ assert_eq ! ( "." , path) ;
967+ assert ! ( had_root) ;
968+ }
969+
970+ #[ test]
971+ fn strip_absolute_path_bsdtar_prefix_exactly_4_bytes ( ) {
972+ let ( path, had_root) = strip_absolute_path_bsdtar ( "//?/" ) ;
973+ assert_eq ! ( "" , path) ;
974+ assert ! ( had_root) ;
975+ }
976+
977+ #[ test]
978+ fn strip_absolute_path_bsdtar_unc_prefix_exactly_8_bytes ( ) {
979+ let ( path, had_root) = strip_absolute_path_bsdtar ( "//?/UNC/" ) ;
980+ assert_eq ! ( "" , path) ;
981+ assert ! ( had_root) ;
982+ }
983+
984+ #[ test]
985+ fn strip_absolute_path_bsdtar_safe_relative_paths ( ) {
986+ let ( path, had_root) = strip_absolute_path_bsdtar ( "file.txt" ) ;
987+ assert_eq ! ( "file.txt" , path) ;
988+ assert ! ( !had_root) ;
989+
990+ let ( path, had_root) = strip_absolute_path_bsdtar ( "a/b/c" ) ;
991+ assert_eq ! ( "a/b/c" , path) ;
992+ assert ! ( !had_root) ;
993+
994+ let ( path, had_root) = strip_absolute_path_bsdtar ( "./a/b" ) ;
995+ assert_eq ! ( "./a/b" , path) ;
996+ assert ! ( !had_root) ;
997+ }
998+
999+ // --- B2: device prefix (\\.\) tests ---
1000+
1001+ #[ test]
1002+ fn strip_absolute_path_bsdtar_device_prefix_with_drive ( ) {
1003+ // \\.\C:\file -- device prefix (4 bytes stripped), then drive letter
1004+ let ( path, had_root) = strip_absolute_path_bsdtar ( "\\ \\ .\\ C:\\ file" ) ;
1005+ assert_eq ! ( "file" , path) ;
1006+ assert ! ( had_root) ;
1007+
1008+ // //./C:/file -- forward-slash variant
1009+ let ( path, had_root) = strip_absolute_path_bsdtar ( "//./C:/file" ) ;
1010+ assert_eq ! ( "file" , path) ;
1011+ assert ! ( had_root) ;
1012+ }
1013+
1014+ #[ test]
1015+ fn strip_absolute_path_bsdtar_device_prefix_only ( ) {
1016+ // \\.\ alone (4 bytes + trailing separator)
1017+ let ( path, had_root) = strip_absolute_path_bsdtar ( "\\ \\ .\\ " ) ;
1018+ assert_eq ! ( "" , path) ;
1019+ assert ! ( had_root) ;
1020+ }
1021+
1022+ #[ test]
1023+ fn strip_absolute_path_bsdtar_device_prefix_with_traversal ( ) {
1024+ // \\.\..\secret -- device prefix (4 bytes) stripped, "..\secret" remains.
1025+ // The ".." is NOT consumed because it doesn't follow a separator;
1026+ // has_parent_dir_component catches it downstream.
1027+ let ( path, had_root) = strip_absolute_path_bsdtar ( "\\ \\ .\\ ..\\ secret" ) ;
1028+ assert_eq ! ( "..\\ secret" , path) ;
1029+ assert ! ( had_root) ;
1030+ assert ! ( has_parent_dir_component( path. as_ref( ) ) ) ;
1031+ }
1032+
1033+ // --- B3+B4: is_unsafe_link equivalent (strip + has_parent_dir combined) ---
1034+ // is_unsafe_link is: had_root || has_parent_dir_component(rewritten)
1035+ // We test the same logic directly here.
1036+
1037+ #[ test]
1038+ fn strip_then_parent_dir_detects_drive_dotdot_backslash ( ) {
1039+ let ( rewritten, had_root) = strip_absolute_path_bsdtar ( "c:..\\ file" ) ;
1040+ assert ! ( had_root || has_parent_dir_component( rewritten. as_ref( ) ) ) ;
1041+ }
1042+
1043+ #[ test]
1044+ fn strip_then_parent_dir_detects_slash_dotdot_backslash ( ) {
1045+ let ( rewritten, had_root) = strip_absolute_path_bsdtar ( "/..\\ file" ) ;
1046+ assert ! ( had_root || has_parent_dir_component( rewritten. as_ref( ) ) ) ;
1047+ }
1048+
1049+ #[ test]
1050+ fn strip_then_parent_dir_detects_drive_slash_dotdot_backslash ( ) {
1051+ let ( rewritten, had_root) = strip_absolute_path_bsdtar ( "c:/..\\ file" ) ;
1052+ assert ! ( had_root || has_parent_dir_component( rewritten. as_ref( ) ) ) ;
1053+ }
1054+
1055+ #[ test]
1056+ fn strip_then_parent_dir_detects_device_prefix_dotdot ( ) {
1057+ let ( rewritten, had_root) = strip_absolute_path_bsdtar ( "\\ \\ ?\\ ..\\ file" ) ;
1058+ assert ! ( had_root || has_parent_dir_component( rewritten. as_ref( ) ) ) ;
1059+ }
1060+
1061+ #[ test]
1062+ fn strip_then_parent_dir_allows_safe_path ( ) {
1063+ let ( rewritten, had_root) = strip_absolute_path_bsdtar ( "a/b/c" ) ;
1064+ assert ! ( !had_root) ;
1065+ assert ! ( !has_parent_dir_component( rewritten. as_ref( ) ) ) ;
1066+ }
1067+
1068+ /// Equivalent to is_unsafe_link logic for various link reference patterns.
1069+ fn is_unsafe_link_equivalent ( s : & str ) -> bool {
1070+ let ( rewritten, had_root) = strip_absolute_path_bsdtar ( s) ;
1071+ had_root || has_parent_dir_component ( rewritten. as_ref ( ) )
1072+ }
1073+
1074+ #[ test]
1075+ fn unsafe_link_equivalent_detects_all_unsafe_patterns ( ) {
1076+ // Windows drive prefix
1077+ assert ! ( is_unsafe_link_equivalent( "C:/file" ) ) ;
1078+ // POSIX absolute
1079+ assert ! ( is_unsafe_link_equivalent( "/etc/passwd" ) ) ;
1080+ // Backslash parent traversal
1081+ assert ! ( is_unsafe_link_equivalent( "..\\ file" ) ) ;
1082+ // Forward slash parent traversal
1083+ assert ! ( is_unsafe_link_equivalent( "../file" ) ) ;
1084+ // Windows UNC with embedded dotdot
1085+ assert ! ( is_unsafe_link_equivalent( "\\ \\ ?\\ UNC\\ ..\\ file" ) ) ;
1086+ // Drive-relative with dotdot
1087+ assert ! ( is_unsafe_link_equivalent( "D:../file" ) ) ;
1088+ }
1089+
1090+ #[ test]
1091+ fn unsafe_link_equivalent_allows_safe_patterns ( ) {
1092+ assert ! ( !is_unsafe_link_equivalent( "a/b/c" ) ) ;
1093+ assert ! ( !is_unsafe_link_equivalent( "file.txt" ) ) ;
1094+ assert ! ( !is_unsafe_link_equivalent( "./a/b" ) ) ;
1095+ }
1096+
1097+ #[ test]
1098+ fn has_parent_dir_component_adversarial_cases ( ) {
1099+ // mid-path backslash dotdot
1100+ assert ! ( has_parent_dir_component( "foo\\ ..\\ bar" ) ) ;
1101+ // standalone dotdot
1102+ assert ! ( has_parent_dir_component( ".." ) ) ;
1103+ // dotdot with mixed separators
1104+ assert ! ( has_parent_dir_component( "a/b\\ ../c" ) ) ;
1105+ // trailing separator after dotdot
1106+ assert ! ( has_parent_dir_component( "../" ) ) ;
1107+ // NOT parent dir: "..name" is a normal component
1108+ assert ! ( !has_parent_dir_component( "a/..name" ) ) ;
1109+ // NOT parent dir: "..." is a normal component
1110+ assert ! ( !has_parent_dir_component( "a/.../b" ) ) ;
1111+ }
8591112}
0 commit comments