Skip to content

Commit af17720

Browse files
committed
⚡ Add EntryReference::sanitize and EntryReference::from_{utf8,path,path_lossy}_preserve_root
1 parent c87cf63 commit af17720

1 file changed

Lines changed: 152 additions & 16 deletions

File tree

lib/src/entry/reference.rs

Lines changed: 152 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,22 +25,7 @@ pub struct EntryReference(String);
2525

2626
impl EntryReference {
2727
fn new_from_utf8path(path: &Utf8Path) -> Self {
28-
let has_root = path.has_root();
29-
let has_prefix = path
30-
.components()
31-
.any(|it| matches!(&it, Utf8Component::Prefix(_)));
32-
let p = path.components().filter_map(|it| match it {
33-
Utf8Component::Prefix(p) => Some(p.as_str()),
34-
Utf8Component::RootDir => None,
35-
Utf8Component::CurDir => Some("."),
36-
Utf8Component::ParentDir => Some(".."),
37-
Utf8Component::Normal(n) => Some(n),
38-
});
39-
let mut s = join_with_capacity(p, "/", path.as_str().len());
40-
if !has_prefix && has_root {
41-
s.insert(0, '/');
42-
};
43-
Self(s)
28+
Self::new_from_utf8path_preserve_root(path).sanitize()
4429
}
4530

4631
#[inline]
@@ -80,6 +65,89 @@ impl EntryReference {
8065
Self::new_from_utf8(&path.to_string_lossy())
8166
}
8267

68+
/// Creates an [EntryReference] from a UTF-8 string while preserving absolute
69+
/// roots, prefixes, and parent components.
70+
///
71+
/// # Examples
72+
///
73+
/// ```
74+
/// use libpna::EntryReference;
75+
///
76+
/// assert_eq!("/foo.txt", EntryReference::from_utf8_preserve_root("/foo.txt"));
77+
/// assert_eq!("bar/../foo.txt", EntryReference::from_utf8_preserve_root("bar/../foo.txt"));
78+
/// assert_eq!("../foo.txt", EntryReference::from_utf8_preserve_root("../foo.txt"));
79+
/// ```
80+
#[inline]
81+
pub fn from_utf8_preserve_root(path: &str) -> Self {
82+
Self::new_from_utf8path_preserve_root(Utf8Path::new(path))
83+
}
84+
85+
#[inline]
86+
fn new_from_utf8path_preserve_root(path: &Utf8Path) -> Self {
87+
Self(path.as_str().to_owned())
88+
}
89+
90+
/// Creates an [EntryReference] from a path, preserving absolute path components.
91+
///
92+
/// # Errors
93+
///
94+
/// Returns an [`EntryReferenceError`] if the path cannot be represented as valid UTF-8.
95+
///
96+
/// # Examples
97+
///
98+
/// ```
99+
/// use libpna::EntryReference;
100+
///
101+
/// assert_eq!("/foo.txt", EntryReference::from_path_preserve_root("/foo.txt".as_ref()).unwrap());
102+
/// assert_eq!("../foo.txt", EntryReference::from_path_preserve_root("../foo.txt".as_ref()).unwrap());
103+
/// ```
104+
#[inline]
105+
pub fn from_path_preserve_root(path: &Path) -> Result<Self, EntryReferenceError> {
106+
let path = str::from_utf8(path.as_os_str().as_encoded_bytes())?;
107+
Ok(Self::new_from_utf8path_preserve_root(Utf8Path::new(path)))
108+
}
109+
110+
/// Creates an [EntryReference] from a path, preserving absolute path components.
111+
///
112+
/// Any invalid UTF-8 sequences are replaced.
113+
///
114+
/// # Examples
115+
///
116+
/// ```
117+
/// use libpna::EntryReference;
118+
///
119+
/// assert_eq!("/foo.txt", EntryReference::from_path_lossy_preserve_root("/foo.txt".as_ref()));
120+
/// ```
121+
#[inline]
122+
pub fn from_path_lossy_preserve_root(path: &Path) -> Self {
123+
Self::new_from_utf8path_preserve_root(Utf8Path::new(&path.to_string_lossy()))
124+
}
125+
126+
/// Returns a sanitized relative reference containing only normal path components.
127+
///
128+
/// This discards prefixes, root separators, `.` and `..`, mirroring the safety
129+
/// behavior used for archive member names.
130+
#[inline]
131+
pub fn sanitize(&self) -> Self {
132+
let path = Utf8Path::new(&self.0);
133+
let has_root = path.has_root();
134+
let has_prefix = path
135+
.components()
136+
.any(|it| matches!(&it, Utf8Component::Prefix(_)));
137+
let p = path.components().filter_map(|it| match it {
138+
Utf8Component::Prefix(p) => Some(p.as_str()),
139+
Utf8Component::RootDir => None,
140+
Utf8Component::CurDir => Some("."),
141+
Utf8Component::ParentDir => Some(".."),
142+
Utf8Component::Normal(n) => Some(n),
143+
});
144+
let mut s = join_with_capacity(p, "/", path.as_str().len());
145+
if !has_prefix && has_root {
146+
s.insert(0, '/');
147+
};
148+
Self(s)
149+
}
150+
83151
#[inline]
84152
pub(crate) fn as_bytes(&self) -> &[u8] {
85153
self.0.as_bytes()
@@ -399,6 +467,28 @@ mod tests {
399467
assert_eq!("/", EntryReference::from("///"));
400468
}
401469

470+
#[test]
471+
fn preserve_root_variants() {
472+
assert_eq!(
473+
"/abs/path",
474+
EntryReference::from_utf8_preserve_root("/abs/path").as_str()
475+
);
476+
assert_eq!(
477+
"../rel/path",
478+
EntryReference::from_utf8_preserve_root("../rel/path").as_str()
479+
);
480+
#[cfg(windows)]
481+
assert_eq!(
482+
"C:/drive/path",
483+
EntryReference::from_utf8_preserve_root("C:\\drive\\path").as_str()
484+
);
485+
#[cfg(not(windows))]
486+
assert_eq!(
487+
"C:\\drive\\path",
488+
EntryReference::from_utf8_preserve_root("C:\\drive\\path").as_str()
489+
);
490+
}
491+
402492
#[test]
403493
fn remove_last() {
404494
assert_eq!("test", EntryReference::from("test/"));
@@ -472,6 +562,52 @@ mod tests {
472562
assert_eq!("/test/test.txt", EntryReference::from("///test///test.txt"));
473563
}
474564

565+
#[test]
566+
fn preserve_root_edge_cases() {
567+
// Empty string
568+
assert_eq!("", EntryReference::from_utf8_preserve_root(""));
569+
// Only parent dir
570+
assert_eq!("..", EntryReference::from_utf8_preserve_root(".."));
571+
// Only current dir
572+
assert_eq!(".", EntryReference::from_utf8_preserve_root("."));
573+
// Only root
574+
assert_eq!("/", EntryReference::from_utf8_preserve_root("/"));
575+
// Multiple parent dirs
576+
assert_eq!(
577+
"../../..",
578+
EntryReference::from_utf8_preserve_root("../../..")
579+
);
580+
}
581+
582+
#[test]
583+
fn sanitize_edge_cases() {
584+
// Empty string remains empty
585+
assert_eq!("", EntryReference::from_utf8_preserve_root("").sanitize());
586+
// Only parent dir
587+
assert_eq!(
588+
"..",
589+
EntryReference::from_utf8_preserve_root("..").sanitize()
590+
);
591+
// Only current dir
592+
assert_eq!(".", EntryReference::from_utf8_preserve_root(".").sanitize());
593+
// Only root
594+
assert_eq!("/", EntryReference::from_utf8_preserve_root("/").sanitize());
595+
// Multiple parent dirs
596+
assert_eq!(
597+
"../../..",
598+
EntryReference::from_utf8_preserve_root("../../..").sanitize()
599+
);
600+
// Mixed with normal component
601+
assert_eq!(
602+
"/../foo",
603+
EntryReference::from_utf8_preserve_root("/../foo").sanitize()
604+
);
605+
assert_eq!(
606+
"./foo",
607+
EntryReference::from_utf8_preserve_root("./foo").sanitize()
608+
);
609+
}
610+
475611
#[cfg(unix)]
476612
#[test]
477613
fn unix_error_cases() {

0 commit comments

Comments
 (0)