Skip to content

Commit 05fd708

Browse files
committed
First pass at base package cache
1 parent fc0a201 commit 05fd708

8 files changed

Lines changed: 509 additions & 146 deletions

File tree

crates/ark/src/lsp/diagnostics.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1647,6 +1647,7 @@ foo
16471647
version: "1.0.0".to_string(),
16481648
depends: vec![],
16491649
repository: None,
1650+
priority: None,
16501651
fields: Dcf::new(),
16511652
};
16521653
let package = Package::from_parts(PathBuf::from("/mock/path"), description, namespace);
@@ -1744,6 +1745,7 @@ foo
17441745
version: "1.0.0".to_string(),
17451746
depends: vec![],
17461747
repository: None,
1748+
priority: None,
17471749
fields: Dcf::new(),
17481750
};
17491751
let package1 =
@@ -1760,6 +1762,7 @@ foo
17601762
version: "1.0.0".to_string(),
17611763
depends: vec![],
17621764
repository: None,
1765+
priority: None,
17631766
fields: Dcf::new(),
17641767
};
17651768
let package2 =
@@ -1818,6 +1821,7 @@ foo
18181821
version: "1.0.0".to_string(),
18191822
depends: vec![],
18201823
repository: None,
1824+
priority: None,
18211825
fields: Dcf::new(),
18221826
};
18231827
let package = Package::from_parts(PathBuf::from("/mock/path"), description, namespace);

crates/oak_package/src/package_description.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ pub struct Description {
4040

4141
pub repository: Option<Repository>,
4242

43+
pub priority: Option<Priority>,
44+
4345
/// Raw DCF fields
4446
pub fields: Dcf,
4547
}
@@ -49,6 +51,12 @@ pub enum Repository {
4951
CRAN,
5052
}
5153

54+
#[derive(Clone, Debug, PartialEq)]
55+
pub enum Priority {
56+
Base,
57+
Recommended,
58+
}
59+
5260
impl Description {
5361
/// Parse a DESCRIPTION file in DCF format
5462
pub fn parse(contents: &str) -> anyhow::Result<Self> {
@@ -84,11 +92,22 @@ impl Description {
8492
None
8593
});
8694

95+
let priority = fields.get("Priority").and_then(|priority| {
96+
if priority == "base" {
97+
return Some(Priority::Base);
98+
}
99+
if priority == "recommended" {
100+
return Some(Priority::Recommended);
101+
}
102+
None
103+
});
104+
87105
Ok(Description {
88106
name,
89107
version,
90108
depends,
91109
repository,
110+
priority,
92111
fields,
93112
})
94113
}
@@ -214,6 +233,26 @@ Repository: CRAN"#;
214233
assert_eq!(parsed.repository, Some(Repository::CRAN));
215234
}
216235

236+
#[test]
237+
fn parses_description_with_priority() {
238+
let desc = r#"Package: utils
239+
Version: 4.5.0
240+
Priority: base"#;
241+
let parsed = Description::parse(desc).unwrap();
242+
assert_eq!(parsed.priority, Some(Priority::Base));
243+
244+
let desc = r#"Package: MASS
245+
Version: 7.3-65
246+
Priority: recommended"#;
247+
let parsed = Description::parse(desc).unwrap();
248+
assert_eq!(parsed.priority, Some(Priority::Recommended));
249+
250+
let desc = r#"Package: mypkg
251+
Version: 1.0.0"#;
252+
let parsed = Description::parse(desc).unwrap();
253+
assert!(parsed.priority.is_none());
254+
}
255+
217256
#[test]
218257
fn parses_description_with_unknown_repository() {
219258
let desc = r#"Package: mypackage

crates/oak_sources/src/base.rs

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
use std::io::Cursor;
2+
3+
use flate2::read::GzDecoder;
4+
use oak_fs::file_lock::FileLock;
5+
6+
/// Names of the R base packages, i.e. everything that ships with R and carries
7+
/// `Priority: base` in its DESCRIPTION.
8+
pub(crate) const BASE_PACKAGES: &[&str] = &[
9+
"base",
10+
"compiler",
11+
"datasets",
12+
"graphics",
13+
"grDevices",
14+
"grid",
15+
"methods",
16+
"parallel",
17+
"splines",
18+
"stats",
19+
"stats4",
20+
"tcltk",
21+
"tools",
22+
"utils",
23+
];
24+
25+
/// Download the R source tarball for R {version} from CRAN's archive.
26+
///
27+
/// Base R packages (e.g. `base`, `utils`, `stats`) are not distributed at the standard
28+
/// `src/contrib/` location on CRAN. Instead, we must retrieve them from the base R
29+
/// sources themselves, which lives at `src/base/R-{major}/R-{version}.tar.gz`. Each
30+
/// package is located inside that tarball at `src/library/{package}/`.
31+
///
32+
/// Returns `Ok(None)` if the tarball is not on CRAN (e.g. a development R version), which
33+
/// we treat as "source unavailable" rather than an error.
34+
pub(crate) fn download(version: &str) -> anyhow::Result<Option<reqwest::blocking::Response>> {
35+
let major = version
36+
.split('.')
37+
.next()
38+
.ok_or_else(|| anyhow::anyhow!("Invalid R version for base source download: {version}"))?;
39+
40+
let mirrors = ["https://cran.r-project.org", "https://cran.rstudio.com"];
41+
let suffix = format!("src/base/R-{major}/R-{version}.tar.gz");
42+
let response = crate::download::download_with_mirrors(&suffix, &mirrors)?;
43+
44+
if response.status() == reqwest::StatusCode::NOT_FOUND {
45+
return Ok(None);
46+
}
47+
48+
if !response.status().is_success() {
49+
return Err(anyhow::anyhow!(
50+
"Failed to download R {version} source. HTTP {status}. Reason: {reason}",
51+
status = response.status(),
52+
reason = response.status().canonical_reason().unwrap_or("Unknown")
53+
));
54+
}
55+
56+
Ok(Some(response))
57+
}
58+
59+
/// Extract a single base package's R files from the R source tarball bytes.
60+
///
61+
/// Writes `R-{version}/src/library/{package}/R/*.R` entries into an `R/` folder inside
62+
/// the directory `destination_lock` lives in. Files are marked read only to match the
63+
/// rest of the cache.
64+
pub(crate) fn extract(
65+
package: &str,
66+
version: &str,
67+
bytes: &[u8],
68+
destination_lock: &FileLock,
69+
) -> anyhow::Result<()> {
70+
let destination = destination_lock.parent().join("R");
71+
std::fs::create_dir(&destination)?;
72+
73+
let cursor = Cursor::new(bytes);
74+
let gz = GzDecoder::new(cursor);
75+
let mut archive = tar::Archive::new(gz);
76+
77+
let prefix = format!("R-{version}/src/library/{package}/R/");
78+
79+
for entry in archive.entries()? {
80+
let mut entry = entry?;
81+
let path = entry.path()?;
82+
83+
let Some(relative) = path.strip_prefix(&prefix).ok() else {
84+
continue;
85+
};
86+
87+
if relative
88+
.extension()
89+
.is_none_or(|ext| ext != "R" && ext != "r")
90+
{
91+
continue;
92+
}
93+
94+
let absolute = destination.join(relative);
95+
96+
// Some base packages (e.g. `utils`) have platform-specific subdirs under `R/`
97+
// like `R/windows/` and `R/unix/` (their `Makefile` handles them at install
98+
// time). Create parents if one is required so `unpack()` can write nested files.
99+
if let Some(parent) = relative.parent().filter(|p| !p.as_os_str().is_empty()) {
100+
std::fs::create_dir_all(destination.join(parent))?;
101+
}
102+
103+
entry.unpack(&absolute)?;
104+
crate::fs::set_readonly(&absolute)?;
105+
}
106+
107+
Ok(())
108+
}
109+
110+
#[cfg(test)]
111+
mod tests {
112+
use oak_fs::file_lock::Filesystem;
113+
use tempfile::TempDir;
114+
115+
use crate::base::download;
116+
use crate::base::extract;
117+
118+
/// Requires internet access and downloads a large tarball of the R sources
119+
#[ignore = "Downloads a 40mb tarball"]
120+
#[test]
121+
fn test_base_download_and_extract() {
122+
let response = download("4.5.0").unwrap().expect("R 4.5.0 source to exist");
123+
let bytes = response.bytes().unwrap();
124+
125+
let destination_tempdir = TempDir::new().unwrap();
126+
let destination = Filesystem::new(destination_tempdir.path().to_path_buf());
127+
let destination_lock = destination.open_rw_exclusive_create(".lock").unwrap();
128+
129+
extract("utils", "4.5.0", &bytes, &destination_lock).unwrap();
130+
131+
// Spot check: `utils` has a well-known `help.R` file
132+
let help = destination_lock.parent().join("R").join("help.R");
133+
assert!(help.exists());
134+
assert!(help.metadata().unwrap().permissions().readonly());
135+
}
136+
137+
#[test]
138+
fn test_base_download_unknown_version_returns_none() {
139+
let response = download("0.0.0").unwrap();
140+
assert!(response.is_none());
141+
}
142+
}

crates/oak_sources/src/cran.rs

Lines changed: 18 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use std::io::Cursor;
2-
use std::path::Path;
32

43
use flate2::read::GzDecoder;
54
use oak_fs::file_lock::FileLock;
@@ -21,15 +20,13 @@ pub(crate) fn cache_cran(
2120
// But anything else is
2221
if !response.status().is_success() {
2322
return Err(anyhow::anyhow!(
24-
"Failed to download {package} {version}: HTTP {status}",
25-
status = response.status()
23+
"Failed to download {package} {version}. HTTP {status}. Reason: {reason}",
24+
status = response.status(),
25+
reason = response.status().canonical_reason().unwrap_or("Unknown")
2626
));
2727
}
2828

29-
let destination = destination_lock.parent().join("R");
30-
std::fs::create_dir(&destination)?;
31-
32-
extract(package, response, destination.as_path())?;
29+
extract(package, response, destination_lock)?;
3330

3431
Ok(true)
3532
}
@@ -38,16 +35,18 @@ fn download(package: &str, version: &str) -> anyhow::Result<reqwest::blocking::R
3835
let mirrors = ["https://cran.r-project.org", "https://cran.rstudio.com"];
3936

4037
// Try released version
41-
let response =
42-
download_with_mirrors(&format!("src/contrib/{package}_{version}.tar.gz"), &mirrors)?;
38+
let response = crate::download::download_with_mirrors(
39+
&format!("src/contrib/{package}_{version}.tar.gz"),
40+
&mirrors,
41+
)?;
4342

4443
if response.status() != reqwest::StatusCode::NOT_FOUND {
4544
// Found it
4645
return Ok(response);
4746
}
4847

4948
// Try archive
50-
let response = download_with_mirrors(
49+
let response = crate::download::download_with_mirrors(
5150
&format!("src/contrib/Archive/{package}/{package}_{version}.tar.gz"),
5251
&mirrors,
5352
)?;
@@ -56,41 +55,14 @@ fn download(package: &str, version: &str) -> anyhow::Result<reqwest::blocking::R
5655
Ok(response)
5756
}
5857

59-
fn download_with_mirrors(
60-
suffix: &str,
61-
mirrors: &[&str],
62-
) -> anyhow::Result<reqwest::blocking::Response> {
63-
if mirrors.is_empty() {
64-
panic!("`mirrors` can't be empty.");
65-
}
66-
67-
let mut out = None;
68-
69-
for mirror in mirrors {
70-
let url = format!("{mirror}/{suffix}");
71-
let response = reqwest::blocking::get(&url)?;
72-
let status = response.status();
73-
74-
out = Some(response);
75-
76-
if status == reqwest::StatusCode::SERVICE_UNAVAILABLE {
77-
// Try next mirror, this one is temporarily unavailable
78-
continue;
79-
} else {
80-
// We got an actual response of some kind from this mirror, return it
81-
break;
82-
}
83-
}
84-
85-
// Safety: We guarantee that there is at least 1 mirror
86-
Ok(out.unwrap())
87-
}
88-
8958
fn extract(
9059
package: &str,
9160
response: reqwest::blocking::Response,
92-
destination: &Path,
61+
destination_lock: &FileLock,
9362
) -> anyhow::Result<()> {
63+
let destination = destination_lock.parent().join("R");
64+
std::fs::create_dir(&destination)?;
65+
9466
// Pass response bytes of the `.tar.gz` into a gzip decoder, wrapped in a tar archive
9567
// reader, this abstracts away all the details, so we can just iterate over the
9668
// entries
@@ -104,19 +76,16 @@ fn extract(
10476

10577
for entry in archive.entries()? {
10678
let mut entry = entry?;
107-
10879
let path = entry.path()?;
109-
let path = path.to_string_lossy();
110-
111-
if !path.starts_with(&prefix) {
112-
continue;
113-
}
11480

115-
let Some(relative) = path.strip_prefix(&prefix) else {
81+
let Some(relative) = path.strip_prefix(&prefix).ok() else {
11682
continue;
11783
};
11884

119-
if !relative.ends_with(".R") && !relative.ends_with(".r") {
85+
if relative
86+
.extension()
87+
.is_none_or(|ext| ext != "R" && ext != "r")
88+
{
12089
continue;
12190
}
12291

crates/oak_sources/src/download.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
pub(crate) fn download_with_mirrors(
2+
suffix: &str,
3+
mirrors: &[&str],
4+
) -> anyhow::Result<reqwest::blocking::Response> {
5+
if mirrors.is_empty() {
6+
panic!("`mirrors` can't be empty.");
7+
}
8+
9+
let mut out = None;
10+
11+
for mirror in mirrors {
12+
let url = format!("{mirror}/{suffix}");
13+
let response = reqwest::blocking::get(&url)?;
14+
let status = response.status();
15+
16+
out = Some(response);
17+
18+
if status == reqwest::StatusCode::SERVICE_UNAVAILABLE {
19+
// Try next mirror, this one is temporarily unavailable
20+
continue;
21+
} else {
22+
// We got an actual response of some kind from this mirror, return it
23+
break;
24+
}
25+
}
26+
27+
// Safety: We guarantee that there is at least 1 mirror
28+
Ok(out.unwrap())
29+
}

0 commit comments

Comments
 (0)