|
| 1 | +//! Tagged identity for a file. The two arms encode where the file |
| 2 | +//! lives: |
| 3 | +//! |
| 4 | +//! - [`FilePath::File`] wraps an [`AbsPathBuf`] (a UTF-8 absolute path |
| 5 | +//! with lexical normalisation applied at construction). This is the |
| 6 | +//! identity HashMaps key on for anything that has a filesystem |
| 7 | +//! representation. |
| 8 | +//! - [`FilePath::Virtual`] wraps a [`VirtualUri`] (a non-`file:` URI |
| 9 | +//! preserved byte for byte). Identity is exact string equality. |
| 10 | +//! |
| 11 | +//! No filesystem I/O happens in construction. Bridging across symlinks |
| 12 | +//! is the job of secondary canonical-path indexes at the specific call |
| 13 | +//! sites that need it, never of this type. |
| 14 | +
|
| 15 | +use std::path::PathBuf; |
| 16 | + |
| 17 | +use camino::Utf8Component; |
| 18 | +use camino::Utf8Path; |
| 19 | +use camino::Utf8PathBuf; |
| 20 | +use stdext::result::ResultExt; |
| 21 | +use url::Url; |
| 22 | + |
| 23 | +/// Tagged identity for a file. See module docs. |
| 24 | +#[derive(Debug, Clone, PartialEq, Eq, Hash)] |
| 25 | +pub enum FilePath { |
| 26 | + /// A real filesystem file. Identity is the lexically normalised |
| 27 | + /// absolute path. |
| 28 | + File(AbsPathBuf), |
| 29 | + /// A URI with any scheme other than `file:`. Identity is the |
| 30 | + /// verbatim URI. |
| 31 | + Virtual(VirtualUri), |
| 32 | +} |
| 33 | + |
| 34 | +impl FilePath { |
| 35 | + /// Convert a URL into a `FilePath`. |
| 36 | + /// |
| 37 | + /// Dispatches by scheme. `file:` URLs build a [`FilePath::File`]; |
| 38 | + /// everything else builds a [`FilePath::Virtual`] that preserves |
| 39 | + /// the URL verbatim. |
| 40 | + pub fn from_url(url: &Url) -> Self { |
| 41 | + if url.scheme() == "file" { |
| 42 | + if let Some(path) = AbsPathBuf::from_url(url) { |
| 43 | + return Self::File(path); |
| 44 | + } |
| 45 | + // Fall through: a `file:` URL we can't extract a path from |
| 46 | + // stays as Virtual so the input isn't lost. Rare in practice. |
| 47 | + } |
| 48 | + Self::Virtual(VirtualUri::new(url.clone())) |
| 49 | + } |
| 50 | + |
| 51 | + /// Build a [`FilePath::File`] from a filesystem path. Returns `None` |
| 52 | + /// if the path can't be expressed as a UTF-8 absolute path. |
| 53 | + pub fn from_path_buf(path: PathBuf) -> Option<Self> { |
| 54 | + AbsPathBuf::from_path_buf(path).map(Self::File) |
| 55 | + } |
| 56 | + |
| 57 | + /// Parse a URI string into a [`FilePath`]. `file:` URIs become |
| 58 | + /// [`FilePath::File`]; everything else becomes [`FilePath::Virtual`]. |
| 59 | + pub fn parse(s: &str) -> anyhow::Result<Self> { |
| 60 | + let url = Url::parse(s)?; |
| 61 | + Ok(Self::from_url(&url)) |
| 62 | + } |
| 63 | + |
| 64 | + /// Reconstruct a [`Url`]. |
| 65 | + /// |
| 66 | + /// `File` arms rebuild a `file:` URL from the stored path; `Virtual` |
| 67 | + /// arms return the stored URL verbatim. Note that `File` round-trips |
| 68 | + /// can produce a URL that differs in bytes from the original input |
| 69 | + /// (drive-letter casing, encoded `:`). When that matters, store the |
| 70 | + /// original URL alongside in a separate field instead of relying on |
| 71 | + /// this method. |
| 72 | + pub fn to_url(&self) -> Url { |
| 73 | + match self { |
| 74 | + Self::File(path) => path.to_url(), |
| 75 | + Self::Virtual(uri) => uri.as_url().clone(), |
| 76 | + } |
| 77 | + } |
| 78 | + |
| 79 | + /// `true` for the `File` arm. |
| 80 | + pub fn is_file(&self) -> bool { |
| 81 | + matches!(self, Self::File(_)) |
| 82 | + } |
| 83 | + |
| 84 | + /// Borrow the inner [`AbsPathBuf`] for the `File` arm. |
| 85 | + pub fn as_file(&self) -> Option<&AbsPathBuf> { |
| 86 | + match self { |
| 87 | + Self::File(p) => Some(p), |
| 88 | + Self::Virtual(_) => None, |
| 89 | + } |
| 90 | + } |
| 91 | + |
| 92 | + /// Borrow the filesystem path for the `File` arm. `None` for `Virtual`. |
| 93 | + pub fn as_path(&self) -> Option<&Utf8Path> { |
| 94 | + self.as_file().map(AbsPathBuf::as_path) |
| 95 | + } |
| 96 | + |
| 97 | + /// Borrow the inner [`VirtualUri`] for the `Virtual` arm. |
| 98 | + pub fn as_virtual(&self) -> Option<&VirtualUri> { |
| 99 | + match self { |
| 100 | + Self::Virtual(u) => Some(u), |
| 101 | + Self::File(_) => None, |
| 102 | + } |
| 103 | + } |
| 104 | +} |
| 105 | + |
| 106 | +impl std::fmt::Display for FilePath { |
| 107 | + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 108 | + // `File` arms format as a `file:` URL so the output matches |
| 109 | + // what we'd send on the wire, not as a bare path. The path |
| 110 | + // form is reachable via `as_path()` for callers that want it. |
| 111 | + match self { |
| 112 | + Self::File(p) => p.to_url().fmt(f), |
| 113 | + Self::Virtual(u) => u.fmt(f), |
| 114 | + } |
| 115 | + } |
| 116 | +} |
| 117 | + |
| 118 | +/// Lexically normalised absolute UTF-8 path. Identity for filesystem |
| 119 | +/// files inside [`FilePath::File`]. |
| 120 | +/// |
| 121 | +/// Normalisation applied at construction: |
| 122 | +/// - `.` segments dropped, `..` resolved lexically, repeated separators |
| 123 | +/// and trailing slashes collapsed (via `Utf8Path::components()`). |
| 124 | +/// - Windows drive letter uppercased. |
| 125 | +/// |
| 126 | +/// No filesystem I/O. The same input produces the same `AbsPathBuf` |
| 127 | +/// regardless of whether the file exists on disk. |
| 128 | +#[derive(Debug, Clone, PartialEq, Eq, Hash)] |
| 129 | +pub struct AbsPathBuf(Utf8PathBuf); |
| 130 | + |
| 131 | +impl AbsPathBuf { |
| 132 | + /// Build from a `file:` URL. Returns `None` for non-`file:` URLs |
| 133 | + /// or for `file:` URLs whose path can't be extracted as UTF-8 |
| 134 | + /// absolute. |
| 135 | + pub fn from_url(url: &Url) -> Option<Self> { |
| 136 | + if url.scheme() != "file" { |
| 137 | + return None; |
| 138 | + } |
| 139 | + let path = url |
| 140 | + .to_file_path() |
| 141 | + .map_err(|()| anyhow::anyhow!("URL has no file path: {url}")) |
| 142 | + .warn_on_err()?; |
| 143 | + Self::from_path_buf(path) |
| 144 | + } |
| 145 | + |
| 146 | + /// Build from a filesystem path. Returns `None` if the path can't |
| 147 | + /// be represented as UTF-8 or is not absolute. |
| 148 | + pub fn from_path_buf(path: PathBuf) -> Option<Self> { |
| 149 | + let utf8 = Utf8PathBuf::from_path_buf(path) |
| 150 | + .map_err(|p| anyhow::anyhow!("Path is not valid UTF-8: {}", p.display())) |
| 151 | + .warn_on_err()?; |
| 152 | + Self::from_utf8_path_buf(utf8) |
| 153 | + } |
| 154 | + |
| 155 | + /// Build from a UTF-8 path. Returns `None` if the path is not |
| 156 | + /// absolute. |
| 157 | + pub fn from_utf8_path_buf(path: Utf8PathBuf) -> Option<Self> { |
| 158 | + if !path.is_absolute() { |
| 159 | + return None; |
| 160 | + } |
| 161 | + Some(Self(normalise(path))) |
| 162 | + } |
| 163 | + |
| 164 | + /// Reconstruct a `file:` URL. |
| 165 | + pub fn to_url(&self) -> Url { |
| 166 | + Url::from_file_path(self.0.as_std_path()) |
| 167 | + .expect("AbsPathBuf is absolute: Url::from_file_path can't fail") |
| 168 | + } |
| 169 | + |
| 170 | + /// Underlying UTF-8 path. |
| 171 | + pub fn as_path(&self) -> &Utf8Path { |
| 172 | + &self.0 |
| 173 | + } |
| 174 | +} |
| 175 | + |
| 176 | +impl std::fmt::Display for AbsPathBuf { |
| 177 | + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 178 | + self.0.fmt(f) |
| 179 | + } |
| 180 | +} |
| 181 | + |
| 182 | +/// A URI with any scheme other than `file:`, preserved verbatim. |
| 183 | +/// Identity for [`FilePath::Virtual`]. |
| 184 | +#[derive(Debug, Clone, PartialEq, Eq, Hash)] |
| 185 | +pub struct VirtualUri(Url); |
| 186 | + |
| 187 | +impl VirtualUri { |
| 188 | + pub fn new(url: Url) -> Self { |
| 189 | + Self(url) |
| 190 | + } |
| 191 | + |
| 192 | + pub fn as_url(&self) -> &Url { |
| 193 | + &self.0 |
| 194 | + } |
| 195 | + |
| 196 | + pub fn into_url(self) -> Url { |
| 197 | + self.0 |
| 198 | + } |
| 199 | +} |
| 200 | + |
| 201 | +impl std::fmt::Display for VirtualUri { |
| 202 | + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 203 | + self.0.fmt(f) |
| 204 | + } |
| 205 | +} |
| 206 | + |
| 207 | +/// Lexical normalisation: collapse `.` / `..` / repeated separators / |
| 208 | +/// trailing slashes, uppercase the Windows drive letter. Adapted from |
| 209 | +/// the routine cargo and rust-analyzer use. |
| 210 | +fn normalise(path: Utf8PathBuf) -> Utf8PathBuf { |
| 211 | + let mut components = path.components().peekable(); |
| 212 | + |
| 213 | + // Handle prefix first and uppercase it |
| 214 | + let mut out = if let Some(c @ Utf8Component::Prefix(_)) = components.peek().copied() { |
| 215 | + components.next(); |
| 216 | + Utf8PathBuf::from(uppercase_disk_prefix(c.as_str())) |
| 217 | + } else { |
| 218 | + Utf8PathBuf::new() |
| 219 | + }; |
| 220 | + |
| 221 | + for component in components { |
| 222 | + match component { |
| 223 | + Utf8Component::Prefix(_) => unreachable!("Prefix only appears as the first component"), |
| 224 | + Utf8Component::RootDir => out.push(component.as_str()), |
| 225 | + Utf8Component::CurDir => {}, |
| 226 | + Utf8Component::ParentDir => { |
| 227 | + out.pop(); |
| 228 | + }, |
| 229 | + Utf8Component::Normal(c) => out.push(c), |
| 230 | + } |
| 231 | + } |
| 232 | + out |
| 233 | +} |
| 234 | + |
| 235 | +/// If `prefix` is a Windows disk prefix like `c:` or `\\?\c:`, |
| 236 | +/// uppercase the drive letter. Other prefixes (UNC, DeviceNS) pass |
| 237 | +/// through. Operates on the prefix's string form so we don't have to |
| 238 | +/// reconstruct from `Utf8Prefix` variants. |
| 239 | +fn uppercase_disk_prefix(prefix: &str) -> String { |
| 240 | + let bytes = prefix.as_bytes(); |
| 241 | + // `X:` somewhere in `prefix` — uppercase the drive letter byte. |
| 242 | + // Handles `c:`, `\\?\c:`, leaves UNC etc. alone. |
| 243 | + if let Some(colon_idx) = prefix.find(':') { |
| 244 | + if colon_idx > 0 { |
| 245 | + let drive_idx = colon_idx - 1; |
| 246 | + if bytes[drive_idx].is_ascii_lowercase() { |
| 247 | + let mut out = prefix.to_string(); |
| 248 | + // Safe: the byte at drive_idx is ASCII (alphabetic). |
| 249 | + unsafe { |
| 250 | + out.as_bytes_mut()[drive_idx] = bytes[drive_idx].to_ascii_uppercase(); |
| 251 | + } |
| 252 | + return out; |
| 253 | + } |
| 254 | + } |
| 255 | + } |
| 256 | + prefix.to_string() |
| 257 | +} |
| 258 | + |
| 259 | +#[cfg(test)] |
| 260 | +mod tests; |
0 commit comments