Skip to content

Commit 7d82537

Browse files
authored
Refactor UrlId into FilePath enum (#1251)
Branched from #1250 (see that PR first for context) Progress towards #1212 - Rename `UrlId` to `FilePath` - Make it our own custom enum instead of piggybacking on the `Url` enum. Variants: `File` (lexically normalised) and `Virtual` (wrap an `Uri` e.g. for untitled files). We could add `Vendored` later for vendored base R sources, mirroring ty. This refactor better reflects that the main representation for our files are on-disk paths.
2 parents 3d81f02 + 1c94827 commit 7d82537

55 files changed

Lines changed: 1071 additions & 1027 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 15 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ aether_factory = { git = "https://github.com/posit-dev/air", package = "air_r_fa
2929
aether_lsp_utils = { git = "https://github.com/posit-dev/air", rev = "d2659d5b158374bf486b594625ca50abbd0ac879" }
3030
aether_parser = { git = "https://github.com/posit-dev/air", package = "air_r_parser", rev = "d2659d5b158374bf486b594625ca50abbd0ac879" }
3131
aether_syntax = { git = "https://github.com/posit-dev/air", package = "air_r_syntax", rev = "d2659d5b158374bf486b594625ca50abbd0ac879" }
32-
aether_url = { path = "crates/aether_url" }
32+
aether_path = { path = "crates/aether_path" }
3333
amalthea = { path = "crates/amalthea" }
3434
anyhow = "1.0.102"
3535
ark = { path = "crates/ark" }
@@ -43,6 +43,7 @@ biome_rowan = { git = "https://github.com/lionel-/biome", rev = "a1296ea6ba363d8
4343
biome_text_size = { git = "https://github.com/lionel-/biome", rev = "a1296ea6ba363d8b8d8f02181b2a4ce9315c5ef9" }
4444
blake3 = "1.8.5"
4545
bus = "2.4.1"
46+
camino = "1.1"
4647
cc = "1.2.61"
4748
cfg-if = "1.0.4"
4849
chrono = "0.4.44"
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
[package]
2-
name = "aether_url"
2+
name = "aether_path"
33
version = "0.1.0"
4-
description = "Canonicalised file URL identity (`UrlId`) for matching the same file across heterogeneous URI sources."
4+
description = "Tagged identity types for files (filesystem paths and non-`file:` URIs)."
55
authors.workspace = true
66
edition.workspace = true
77
rust-version.workspace = true
88
license.workspace = true
99

1010
[dependencies]
1111
anyhow.workspace = true
12+
camino.workspace = true
1213
log.workspace = true
1314
stdext.workspace = true
1415
url.workspace = true
Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
//! Tagged identity for a file. The two arms encode where the file
2+
//! lives:
3+
//!
4+
//! - [`FilePath::File`] wraps an [`AbsPathBuf`] (a UTF-8 absolute path
5+
//! with lexical normalisation applied at construction). This is the
6+
//! identity HashMaps key on for anything that has a filesystem
7+
//! representation.
8+
//! - [`FilePath::Virtual`] wraps a [`VirtualUri`] (a non-`file:` URI
9+
//! preserved byte for byte). Identity is exact string equality.
10+
//!
11+
//! No filesystem I/O happens in construction. Bridging across symlinks
12+
//! is the job of secondary canonical-path indexes at the specific call
13+
//! sites that need it, never of this type.
14+
15+
use std::path::PathBuf;
16+
17+
use camino::Utf8Component;
18+
use camino::Utf8Path;
19+
use camino::Utf8PathBuf;
20+
use stdext::result::ResultExt;
21+
use url::Url;
22+
23+
/// Tagged identity for a file. See module docs.
24+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
25+
pub enum FilePath {
26+
/// A real filesystem file. Identity is the lexically normalised
27+
/// absolute path.
28+
File(AbsPathBuf),
29+
/// A URI with any scheme other than `file:`. Identity is the
30+
/// verbatim URI.
31+
Virtual(VirtualUri),
32+
}
33+
34+
impl FilePath {
35+
/// Convert a URL into a `FilePath`.
36+
///
37+
/// Dispatches by scheme. `file:` URLs build a [`FilePath::File`];
38+
/// everything else builds a [`FilePath::Virtual`] that preserves
39+
/// the URL verbatim.
40+
pub fn from_url(url: &Url) -> Self {
41+
if url.scheme() == "file" {
42+
if let Some(path) = AbsPathBuf::from_url(url) {
43+
return Self::File(path);
44+
}
45+
// Fall through: a `file:` URL we can't extract a path from
46+
// stays as Virtual so the input isn't lost. Rare in practice.
47+
}
48+
Self::Virtual(VirtualUri::new(url.clone()))
49+
}
50+
51+
/// Build a [`FilePath::File`] from a filesystem path. Returns `None`
52+
/// if the path can't be expressed as a UTF-8 absolute path.
53+
pub fn from_path_buf(path: PathBuf) -> Option<Self> {
54+
AbsPathBuf::from_path_buf(path).map(Self::File)
55+
}
56+
57+
/// Parse a URI string into a [`FilePath`]. `file:` URIs become
58+
/// [`FilePath::File`]; everything else becomes [`FilePath::Virtual`].
59+
pub fn parse(s: &str) -> anyhow::Result<Self> {
60+
let url = Url::parse(s)?;
61+
Ok(Self::from_url(&url))
62+
}
63+
64+
/// Reconstruct a [`Url`].
65+
///
66+
/// `File` arms rebuild a `file:` URL from the stored path; `Virtual`
67+
/// arms return the stored URL verbatim. Note that `File` round-trips
68+
/// can produce a URL that differs in bytes from the original input
69+
/// (drive-letter casing, encoded `:`). When that matters, store the
70+
/// original URL alongside in a separate field instead of relying on
71+
/// this method.
72+
pub fn to_url(&self) -> Url {
73+
match self {
74+
Self::File(path) => path.to_url(),
75+
Self::Virtual(uri) => uri.as_url().clone(),
76+
}
77+
}
78+
79+
/// `true` for the `File` arm.
80+
pub fn is_file(&self) -> bool {
81+
matches!(self, Self::File(_))
82+
}
83+
84+
/// Borrow the inner [`AbsPathBuf`] for the `File` arm.
85+
pub fn as_file(&self) -> Option<&AbsPathBuf> {
86+
match self {
87+
Self::File(p) => Some(p),
88+
Self::Virtual(_) => None,
89+
}
90+
}
91+
92+
/// Borrow the filesystem path for the `File` arm. `None` for `Virtual`.
93+
pub fn as_path(&self) -> Option<&Utf8Path> {
94+
self.as_file().map(AbsPathBuf::as_path)
95+
}
96+
97+
/// Borrow the inner [`VirtualUri`] for the `Virtual` arm.
98+
pub fn as_virtual(&self) -> Option<&VirtualUri> {
99+
match self {
100+
Self::Virtual(u) => Some(u),
101+
Self::File(_) => None,
102+
}
103+
}
104+
}
105+
106+
impl std::fmt::Display for FilePath {
107+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108+
// `File` arms format as a `file:` URL so the output matches
109+
// what we'd send on the wire, not as a bare path. The path
110+
// form is reachable via `as_path()` for callers that want it.
111+
match self {
112+
Self::File(p) => p.to_url().fmt(f),
113+
Self::Virtual(u) => u.fmt(f),
114+
}
115+
}
116+
}
117+
118+
/// Lexically normalised absolute UTF-8 path. Identity for filesystem
119+
/// files inside [`FilePath::File`].
120+
///
121+
/// Normalisation applied at construction:
122+
/// - `.` segments dropped, `..` resolved lexically, repeated separators
123+
/// and trailing slashes collapsed (via `Utf8Path::components()`).
124+
/// - Windows drive letter uppercased.
125+
///
126+
/// No filesystem I/O. The same input produces the same `AbsPathBuf`
127+
/// regardless of whether the file exists on disk.
128+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
129+
pub struct AbsPathBuf(Utf8PathBuf);
130+
131+
impl AbsPathBuf {
132+
/// Build from a `file:` URL. Returns `None` for non-`file:` URLs
133+
/// or for `file:` URLs whose path can't be extracted as UTF-8
134+
/// absolute.
135+
pub fn from_url(url: &Url) -> Option<Self> {
136+
if url.scheme() != "file" {
137+
return None;
138+
}
139+
let path = url
140+
.to_file_path()
141+
.map_err(|()| anyhow::anyhow!("URL has no file path: {url}"))
142+
.warn_on_err()?;
143+
Self::from_path_buf(path)
144+
}
145+
146+
/// Build from a filesystem path. Returns `None` if the path can't
147+
/// be represented as UTF-8 or is not absolute.
148+
pub fn from_path_buf(path: PathBuf) -> Option<Self> {
149+
let utf8 = Utf8PathBuf::from_path_buf(path)
150+
.map_err(|p| anyhow::anyhow!("Path is not valid UTF-8: {}", p.display()))
151+
.warn_on_err()?;
152+
Self::from_utf8_path_buf(utf8)
153+
}
154+
155+
/// Build from a UTF-8 path. Returns `None` if the path is not
156+
/// absolute.
157+
pub fn from_utf8_path_buf(path: Utf8PathBuf) -> Option<Self> {
158+
if !path.is_absolute() {
159+
return None;
160+
}
161+
Some(Self(normalise(path)))
162+
}
163+
164+
/// Reconstruct a `file:` URL.
165+
pub fn to_url(&self) -> Url {
166+
Url::from_file_path(self.0.as_std_path())
167+
.expect("AbsPathBuf is absolute: Url::from_file_path can't fail")
168+
}
169+
170+
/// Underlying UTF-8 path.
171+
pub fn as_path(&self) -> &Utf8Path {
172+
&self.0
173+
}
174+
}
175+
176+
impl std::fmt::Display for AbsPathBuf {
177+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
178+
self.0.fmt(f)
179+
}
180+
}
181+
182+
/// A URI with any scheme other than `file:`, preserved verbatim.
183+
/// Identity for [`FilePath::Virtual`].
184+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
185+
pub struct VirtualUri(Url);
186+
187+
impl VirtualUri {
188+
pub fn new(url: Url) -> Self {
189+
Self(url)
190+
}
191+
192+
pub fn as_url(&self) -> &Url {
193+
&self.0
194+
}
195+
196+
pub fn into_url(self) -> Url {
197+
self.0
198+
}
199+
}
200+
201+
impl std::fmt::Display for VirtualUri {
202+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
203+
self.0.fmt(f)
204+
}
205+
}
206+
207+
/// Lexical normalisation: collapse `.` / `..` / repeated separators /
208+
/// trailing slashes, uppercase the Windows drive letter. Adapted from
209+
/// the routine cargo and rust-analyzer use.
210+
fn normalise(path: Utf8PathBuf) -> Utf8PathBuf {
211+
let mut components = path.components().peekable();
212+
213+
// Handle prefix first and uppercase it
214+
let mut out = if let Some(c @ Utf8Component::Prefix(_)) = components.peek().copied() {
215+
components.next();
216+
Utf8PathBuf::from(uppercase_disk_prefix(c.as_str()))
217+
} else {
218+
Utf8PathBuf::new()
219+
};
220+
221+
for component in components {
222+
match component {
223+
Utf8Component::Prefix(_) => unreachable!("Prefix only appears as the first component"),
224+
Utf8Component::RootDir => out.push(component.as_str()),
225+
Utf8Component::CurDir => {},
226+
Utf8Component::ParentDir => {
227+
out.pop();
228+
},
229+
Utf8Component::Normal(c) => out.push(c),
230+
}
231+
}
232+
out
233+
}
234+
235+
/// If `prefix` is a Windows disk prefix like `c:` or `\\?\c:`,
236+
/// uppercase the drive letter. Other prefixes (UNC, DeviceNS) pass
237+
/// through. Operates on the prefix's string form so we don't have to
238+
/// reconstruct from `Utf8Prefix` variants.
239+
fn uppercase_disk_prefix(prefix: &str) -> String {
240+
let bytes = prefix.as_bytes();
241+
// `X:` somewhere in `prefix` — uppercase the drive letter byte.
242+
// Handles `c:`, `\\?\c:`, leaves UNC etc. alone.
243+
if let Some(colon_idx) = prefix.find(':') {
244+
if colon_idx > 0 {
245+
let drive_idx = colon_idx - 1;
246+
if bytes[drive_idx].is_ascii_lowercase() {
247+
let mut out = prefix.to_string();
248+
// Safe: the byte at drive_idx is ASCII (alphabetic).
249+
unsafe {
250+
out.as_bytes_mut()[drive_idx] = bytes[drive_idx].to_ascii_uppercase();
251+
}
252+
return out;
253+
}
254+
}
255+
}
256+
prefix.to_string()
257+
}
258+
259+
#[cfg(test)]
260+
mod tests;

0 commit comments

Comments
 (0)