Skip to content

Commit 01fa883

Browse files
authored
Add function to strip trailing path separators and preserve root paths (microsoft#279)
Fixes microsoft#278
1 parent 4b67cdd commit 01fa883

File tree

1 file changed

+179
-9
lines changed

1 file changed

+179
-9
lines changed

crates/pet-fs/src/path.rs

Lines changed: 179 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,97 @@
33

44
use std::{
55
env,
6-
path::{Path, PathBuf},
6+
path::{Path, PathBuf, MAIN_SEPARATOR},
77
};
88

9-
// Normalizes the case of a path on Windows without resolving junctions/symlinks.
10-
// Uses GetLongPathNameW which normalizes case but preserves junction paths.
11-
// For unix, this is a noop.
12-
// Note: On Windows, case normalization only works for existing paths. For non-existent
13-
// paths, the function falls back to the absolute path without case normalization.
14-
// See: https://github.com/microsoft/python-environment-tools/issues/186
9+
/// Strips trailing path separators from a path, preserving root paths.
10+
///
11+
/// This function removes trailing `/` or `\` from paths while ensuring that root paths
12+
/// like `/` on Unix or `C:\` on Windows are preserved.
13+
///
14+
/// # Examples
15+
/// - `/home/user/` → `/home/user`
16+
/// - `C:\Users\` → `C:\Users`
17+
/// - `/` → `/` (preserved)
18+
/// - `C:\` → `C:\` (preserved)
19+
///
20+
/// # Use Cases
21+
/// Use this when path data comes from external sources that may include trailing separators:
22+
/// - Windows Registry entries (e.g., `C:\...\x64\`)
23+
/// - Configuration files (environments.txt, .condarc)
24+
/// - Environment variables
25+
///
26+
/// # Related
27+
/// - `norm_case()` - Full path normalization (includes trailing separator stripping on Windows)
28+
pub fn strip_trailing_separator<P: AsRef<Path>>(path: P) -> PathBuf {
29+
let path_str = path.as_ref().to_string_lossy();
30+
31+
#[cfg(windows)]
32+
{
33+
// On Windows, preserve root paths like "C:\"
34+
let mut result = path_str.to_string();
35+
while result.len() > 3 && (result.ends_with('\\') || result.ends_with('/')) {
36+
result.pop();
37+
}
38+
PathBuf::from(result)
39+
}
40+
41+
#[cfg(unix)]
42+
{
43+
// On Unix, preserve the root "/"
44+
let mut result = path_str.to_string();
45+
while result.len() > 1 && result.ends_with(MAIN_SEPARATOR) {
46+
result.pop();
47+
}
48+
PathBuf::from(result)
49+
}
50+
}
51+
52+
/// Normalizes path case on Windows without resolving symlinks/junctions.
53+
///
54+
/// # Behavior by Platform
55+
///
56+
/// ## Windows
57+
/// - Normalizes path case to match the actual filesystem casing
58+
/// - Converts relative paths to absolute paths
59+
/// - Converts forward slashes to backslashes
60+
/// - Strips trailing path separators (except for root paths like `C:\`)
61+
/// - Removes UNC prefix (`\\?\`) if the original path didn't have it
62+
/// - **Does NOT resolve symlinks or junctions** (uses `GetLongPathNameW`)
63+
/// - For non-existent paths, returns the absolute path without case normalization
64+
///
65+
/// ## Unix
66+
/// - Returns the path unchanged (no-op)
67+
/// - Path case is significant on Unix, so no normalization is performed
68+
///
69+
/// # Use Cases
70+
///
71+
/// This function is typically used for:
72+
///
73+
/// 1. **Path Comparison/Hashing**: Ensures consistent path representation for cache keys
74+
/// and hash generation (e.g., Poetry environment name hashing, fs_cache)
75+
///
76+
/// 2. **Sanitizing External Path Sources**: Normalizes paths from external sources like:
77+
/// - Windows Registry entries (may have trailing slashes)
78+
/// - Configuration files (environments.txt, .condarc)
79+
/// - Environment variables (VIRTUAL_ENV, WORKON_HOME)
80+
///
81+
/// 3. **Storing/Displaying Paths**: Ensures paths are in a canonical form for storage
82+
/// and display (e.g., `PythonEnvironment.executable`, `PythonEnvironment.prefix`)
83+
///
84+
/// # Important Notes
85+
///
86+
/// - On Windows, this function uses `GetLongPathNameW` which **preserves junction paths**
87+
/// unlike `fs::canonicalize` which would resolve them to their target.
88+
/// - For symlink resolution, use `resolve_symlink()` instead.
89+
///
90+
/// # Related
91+
/// - `strip_trailing_separator()` - Just removes trailing separators
92+
/// - `resolve_symlink()` - Resolves symlinks to their target
93+
/// - `expand_path()` - Expands `~` and environment variables
94+
///
95+
/// See: <https://github.com/microsoft/python-environment-tools/issues/186>
96+
/// See: <https://github.com/microsoft/python-environment-tools/issues/278>
1597
pub fn norm_case<P: AsRef<Path>>(path: P) -> PathBuf {
1698
// On unix do not use canonicalize, results in weird issues with homebrew paths
1799
// Even readlink does the same thing
@@ -113,8 +195,23 @@ fn normalize_case_windows(path: &Path) -> Option<PathBuf> {
113195
Some(PathBuf::from(result_str))
114196
}
115197

116-
// Resolves symlinks to the real file.
117-
// If the real file == exe, then it is not a symlink.
198+
/// Resolves a symlink to its real file path.
199+
///
200+
/// Returns `None` if the path is not a symlink or cannot be resolved.
201+
/// If the real file equals the input, returns `None` (the path is not a symlink).
202+
///
203+
/// # Filtering
204+
/// This function only resolves symlinks for Python and Conda related executables:
205+
/// - Files starting with `python` or `conda` (e.g., `python3.12`, `conda`)
206+
/// - Excludes files ending with `-config` or `-build` (e.g., `python3-config`)
207+
///
208+
/// # Use Cases
209+
/// - Identifying the actual Python executable behind symbolic links
210+
/// - Homebrew Python symlinks: `/opt/homebrew/bin/python3.12` → actual Cellar path
211+
/// - Tracking all symlink variants of a Python installation
212+
///
213+
/// # Related
214+
/// - `norm_case()` - Normalizes path case without resolving symlinks
118215
pub fn resolve_symlink<T: AsRef<Path>>(exe: &T) -> Option<PathBuf> {
119216
let name = exe.as_ref().file_name()?.to_string_lossy();
120217
// In bin directory of homebrew, we have files like python-build, python-config, python3-config
@@ -143,6 +240,28 @@ pub fn resolve_symlink<T: AsRef<Path>>(exe: &T) -> Option<PathBuf> {
143240
}
144241
}
145242

243+
/// Expands `~` (home directory) and environment variables in a path.
244+
///
245+
/// This function handles:
246+
/// - `~` prefix: Expands to the user's home directory
247+
/// - `${USERNAME}`: Expands to the current username
248+
/// - `${HOME}`: Expands to the home directory
249+
///
250+
/// # Examples
251+
/// - `~/envs` → `/home/user/envs`
252+
/// - `${HOME}/.conda` → `/home/user/.conda`
253+
///
254+
/// # Environment Variables
255+
/// - On Unix: Uses `$HOME` for home directory, `$USER` for username
256+
/// - On Windows: Uses `%USERPROFILE%` for home directory, `%USERNAME%` for username
257+
///
258+
/// # Use Cases
259+
/// Used primarily for expanding paths from conda rc files which support
260+
/// [environment variable expansion](https://docs.conda.io/projects/conda/en/23.1.x/user-guide/configuration/use-condarc.html#expansion-of-environment-variables).
261+
///
262+
/// # Related
263+
/// - `norm_case()` - Normalizes path case
264+
/// - `strip_trailing_separator()` - Removes trailing path separators
146265
pub fn expand_path(path: PathBuf) -> PathBuf {
147266
if path.starts_with("~") {
148267
if let Some(ref home) = get_user_home() {
@@ -186,6 +305,57 @@ fn get_user_home() -> Option<PathBuf> {
186305
mod tests {
187306
use super::*;
188307

308+
// ==================== strip_trailing_separator tests ====================
309+
310+
#[test]
311+
fn test_strip_trailing_separator_no_trailing() {
312+
// Paths without trailing separators should remain unchanged
313+
assert_eq!(
314+
strip_trailing_separator("/home/user"),
315+
PathBuf::from("/home/user")
316+
);
317+
assert_eq!(
318+
strip_trailing_separator("/home/user/envs"),
319+
PathBuf::from("/home/user/envs")
320+
);
321+
}
322+
323+
#[test]
324+
#[cfg(unix)]
325+
fn test_strip_trailing_separator_unix() {
326+
// Strip trailing slash
327+
assert_eq!(
328+
strip_trailing_separator("/home/user/"),
329+
PathBuf::from("/home/user")
330+
);
331+
// Multiple trailing slashes
332+
assert_eq!(
333+
strip_trailing_separator("/home/user///"),
334+
PathBuf::from("/home/user")
335+
);
336+
// Root path should be preserved
337+
assert_eq!(strip_trailing_separator("/"), PathBuf::from("/"));
338+
}
339+
340+
#[test]
341+
#[cfg(windows)]
342+
fn test_strip_trailing_separator_windows() {
343+
// Strip trailing backslash
344+
assert_eq!(
345+
strip_trailing_separator("C:\\Users\\"),
346+
PathBuf::from("C:\\Users")
347+
);
348+
// Strip trailing forward slash (also valid on Windows)
349+
assert_eq!(
350+
strip_trailing_separator("C:\\Users/"),
351+
PathBuf::from("C:\\Users")
352+
);
353+
// Root path should be preserved
354+
assert_eq!(strip_trailing_separator("C:\\"), PathBuf::from("C:\\"));
355+
}
356+
357+
// ==================== norm_case tests ====================
358+
189359
#[test]
190360
#[cfg(unix)]
191361
fn test_norm_case_returns_path_for_nonexistent_unix() {

0 commit comments

Comments
 (0)