@@ -28,29 +28,6 @@ pub fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(original: P, link: Q) -> io::Resu
2828 }
2929 #[ cfg( windows) ]
3030 fn inner ( original : & Path , link : & Path ) -> io:: Result < ( ) > {
31- use std:: borrow:: Cow ;
32- use std:: ffi:: OsString ;
33- use std:: os:: windows:: ffi:: { OsStrExt , OsStringExt } ;
34- use std:: path:: PathBuf ;
35-
36- fn normalize_windows_separators ( path : & Path ) -> Cow < ' _ , Path > {
37- let wide: Vec < u16 > = path. as_os_str ( ) . encode_wide ( ) . collect ( ) ;
38- if !wide. iter ( ) . any ( |& unit| unit == u16:: from ( b'/' ) ) {
39- return Cow :: Borrowed ( path) ;
40- }
41- let normalized = wide
42- . into_iter ( )
43- . map ( |unit| {
44- if unit == u16:: from ( b'/' ) {
45- u16:: from ( b'\\' )
46- } else {
47- unit
48- }
49- } )
50- . collect :: < Vec < _ > > ( ) ;
51- Cow :: Owned ( PathBuf :: from ( OsString :: from_wide ( & normalized) ) )
52- }
53-
5431 let original = normalize_windows_separators ( original) ;
5532 let link = normalize_windows_separators ( link) ;
5633 // Symlink targets are resolved relative to the link's parent directory,
@@ -77,6 +54,37 @@ pub fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(original: P, link: Q) -> io::Resu
7754 inner ( original. as_ref ( ) , link. as_ref ( ) )
7855}
7956
57+ /// Replaces forward-slash separators with backslashes for Windows path APIs.
58+ ///
59+ /// Windows symlink reparse points store the target verbatim; non-canonical
60+ /// `/` separators break resolution under `\\?\` extended-length paths and
61+ /// confuse downstream tools that read the reparse buffer (e.g. bsdtar,
62+ /// GNU tar, 7-Zip all normalize on extract). Goes through UTF-16 to preserve
63+ /// non-UTF-8 OsString sequences (WTF-16) byte-for-byte.
64+ #[ cfg( windows) ]
65+ fn normalize_windows_separators ( path : & Path ) -> std:: borrow:: Cow < ' _ , Path > {
66+ use std:: borrow:: Cow ;
67+ use std:: ffi:: OsString ;
68+ use std:: os:: windows:: ffi:: { OsStrExt , OsStringExt } ;
69+ use std:: path:: PathBuf ;
70+
71+ let wide: Vec < u16 > = path. as_os_str ( ) . encode_wide ( ) . collect ( ) ;
72+ if !wide. iter ( ) . any ( |& unit| unit == u16:: from ( b'/' ) ) {
73+ return Cow :: Borrowed ( path) ;
74+ }
75+ let normalized = wide
76+ . into_iter ( )
77+ . map ( |unit| {
78+ if unit == u16:: from ( b'/' ) {
79+ u16:: from ( b'\\' )
80+ } else {
81+ unit
82+ }
83+ } )
84+ . collect :: < Vec < _ > > ( ) ;
85+ Cow :: Owned ( PathBuf :: from ( OsString :: from_wide ( & normalized) ) )
86+ }
87+
8088/// Removes a path by dispatching based on file type.
8189///
8290/// - Symlinks: removed via `remove_file` (or `remove_dir` for directory symlinks on Windows)
@@ -170,3 +178,78 @@ pub fn remove_path_all<P: AsRef<Path>>(path: P) -> io::Result<()> {
170178pub fn remove_path < P : AsRef < Path > > ( path : P ) -> io:: Result < ( ) > {
171179 remove_path_with ( path. as_ref ( ) , fs:: remove_dir)
172180}
181+
182+ #[ cfg( all( test, windows) ) ]
183+ mod windows_tests {
184+ use super :: normalize_windows_separators;
185+ use std:: borrow:: Cow ;
186+ use std:: ffi:: OsString ;
187+ use std:: os:: windows:: ffi:: { OsStrExt , OsStringExt } ;
188+ use std:: path:: { Path , PathBuf } ;
189+
190+ fn wide_units_of ( path : & Path ) -> Vec < u16 > {
191+ path. as_os_str ( ) . encode_wide ( ) . collect ( )
192+ }
193+
194+ #[ test]
195+ fn returns_borrowed_when_no_forward_slash ( ) {
196+ let input = Path :: new ( r"foo\bar\baz" ) ;
197+ let result = normalize_windows_separators ( input) ;
198+ assert ! ( matches!( result, Cow :: Borrowed ( _) ) ) ;
199+ assert_eq ! ( result. as_ref( ) , input) ;
200+ }
201+
202+ #[ test]
203+ fn converts_basic_forward_slash_to_backslash ( ) {
204+ let result = normalize_windows_separators ( Path :: new ( "foo/bar" ) ) ;
205+ assert ! ( matches!( result, Cow :: Owned ( _) ) ) ;
206+ assert_eq ! ( result. as_ref( ) , Path :: new( r"foo\bar" ) ) ;
207+ }
208+
209+ #[ test]
210+ fn preserves_existing_backslashes_in_mixed_input ( ) {
211+ let result = normalize_windows_separators ( Path :: new ( r"a/b\c/d" ) ) ;
212+ assert_eq ! ( result. as_ref( ) , Path :: new( r"a\b\c\d" ) ) ;
213+ }
214+
215+ #[ test]
216+ fn empty_path_returns_borrowed ( ) {
217+ let input = Path :: new ( "" ) ;
218+ let result = normalize_windows_separators ( input) ;
219+ assert ! ( matches!( result, Cow :: Borrowed ( _) ) ) ;
220+ assert_eq ! ( result. as_ref( ) , input) ;
221+ }
222+
223+ #[ test]
224+ fn single_forward_slash_is_converted ( ) {
225+ let result = normalize_windows_separators ( Path :: new ( "/" ) ) ;
226+ assert_eq ! ( result. as_ref( ) , Path :: new( r"\" ) ) ;
227+ }
228+
229+ #[ test]
230+ fn extended_length_path_with_forward_slashes_is_normalized ( ) {
231+ // \\?\ prefix rejects forward slashes at resolution time; this is the
232+ // headline regression case the caller relies on this helper to fix.
233+ let result = normalize_windows_separators ( Path :: new ( r"\\?\C:/foo/bar" ) ) ;
234+ assert_eq ! ( result. as_ref( ) , Path :: new( r"\\?\C:\foo\bar" ) ) ;
235+ }
236+
237+ #[ test]
238+ fn lone_surrogate_is_preserved_while_slash_is_converted ( ) {
239+ // 0xD800 is a lone high surrogate (valid only in WTF-16). The helper
240+ // must round-trip it byte-for-byte while still rewriting U+002F.
241+ let units: [ u16 ; 3 ] = [ 0xD800 , u16:: from ( b'/' ) , u16:: from ( b'a' ) ] ;
242+ let input = PathBuf :: from ( OsString :: from_wide ( & units) ) ;
243+ let result = normalize_windows_separators ( & input) ;
244+ assert_eq ! (
245+ wide_units_of( result. as_ref( ) ) ,
246+ vec![ 0xD800 , u16 :: from( b'\\' ) , u16 :: from( b'a' ) ]
247+ ) ;
248+ }
249+
250+ #[ test]
251+ fn unicode_characters_are_preserved ( ) {
252+ let result = normalize_windows_separators ( Path :: new ( "日本語/フォルダ" ) ) ;
253+ assert_eq ! ( result. as_ref( ) , Path :: new( r"日本語\フォルダ" ) ) ;
254+ }
255+ }
0 commit comments