Skip to content

Commit d49ea97

Browse files
committed
First pass at base package cache
1 parent 97f1f48 commit d49ea97

6 files changed

Lines changed: 455 additions & 97 deletions

File tree

crates/ark/src/lsp/diagnostics.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1647,6 +1647,7 @@ foo
16471647
version: "1.0.0".to_string(),
16481648
depends: vec![],
16491649
repository: None,
1650+
priority: None,
16501651
fields: Dcf::new(),
16511652
};
16521653
let package = Package::from_parts(PathBuf::from("/mock/path"), description, namespace);
@@ -1744,6 +1745,7 @@ foo
17441745
version: "1.0.0".to_string(),
17451746
depends: vec![],
17461747
repository: None,
1748+
priority: None,
17471749
fields: Dcf::new(),
17481750
};
17491751
let package1 =
@@ -1760,6 +1762,7 @@ foo
17601762
version: "1.0.0".to_string(),
17611763
depends: vec![],
17621764
repository: None,
1765+
priority: None,
17631766
fields: Dcf::new(),
17641767
};
17651768
let package2 =
@@ -1818,6 +1821,7 @@ foo
18181821
version: "1.0.0".to_string(),
18191822
depends: vec![],
18201823
repository: None,
1824+
priority: None,
18211825
fields: Dcf::new(),
18221826
};
18231827
let package = Package::from_parts(PathBuf::from("/mock/path"), description, namespace);

crates/oak_package/src/package_description.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ pub struct Description {
4040

4141
pub repository: Option<Repository>,
4242

43+
pub priority: Option<Priority>,
44+
4345
/// Raw DCF fields
4446
pub fields: Dcf,
4547
}
@@ -49,6 +51,12 @@ pub enum Repository {
4951
CRAN,
5052
}
5153

54+
#[derive(Clone, Debug, PartialEq)]
55+
pub enum Priority {
56+
Base,
57+
Recommended,
58+
}
59+
5260
impl Description {
5361
/// Parse a DESCRIPTION file in DCF format
5462
pub fn parse(contents: &str) -> anyhow::Result<Self> {
@@ -84,11 +92,22 @@ impl Description {
8492
None
8593
});
8694

95+
let priority = fields.get("Priority").and_then(|priority| {
96+
if priority == "base" {
97+
return Some(Priority::Base);
98+
}
99+
if priority == "recommended" {
100+
return Some(Priority::Recommended);
101+
}
102+
None
103+
});
104+
87105
Ok(Description {
88106
name,
89107
version,
90108
depends,
91109
repository,
110+
priority,
92111
fields,
93112
})
94113
}
@@ -214,6 +233,26 @@ Repository: CRAN"#;
214233
assert_eq!(parsed.repository, Some(Repository::CRAN));
215234
}
216235

236+
#[test]
237+
fn parses_description_with_priority() {
238+
let desc = r#"Package: utils
239+
Version: 4.5.0
240+
Priority: base"#;
241+
let parsed = Description::parse(desc).unwrap();
242+
assert_eq!(parsed.priority, Some(Priority::Base));
243+
244+
let desc = r#"Package: MASS
245+
Version: 7.3-65
246+
Priority: recommended"#;
247+
let parsed = Description::parse(desc).unwrap();
248+
assert_eq!(parsed.priority, Some(Priority::Recommended));
249+
250+
let desc = r#"Package: mypkg
251+
Version: 1.0.0"#;
252+
let parsed = Description::parse(desc).unwrap();
253+
assert!(parsed.priority.is_none());
254+
}
255+
217256
#[test]
218257
fn parses_description_with_unknown_repository() {
219258
let desc = r#"Package: mypackage

crates/oak_sources/src/base.rs

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
use std::io::Cursor;
2+
use std::io::Read;
3+
4+
use flate2::read::GzDecoder;
5+
use oak_fs::file_lock::FileLock;
6+
7+
use crate::download::Outcome;
8+
9+
/// Names of the R base packages, i.e. everything that ships with R and carries
10+
/// `Priority: base` in its DESCRIPTION.
11+
pub(crate) const BASE_PACKAGES: &[&str] = &[
12+
"base",
13+
"compiler",
14+
"datasets",
15+
"graphics",
16+
"grDevices",
17+
"grid",
18+
"methods",
19+
"parallel",
20+
"splines",
21+
"stats",
22+
"stats4",
23+
"tcltk",
24+
"tools",
25+
"utils",
26+
];
27+
28+
/// Download the R source tarball for R {version} from CRAN's archive.
29+
///
30+
/// Base R packages (e.g. `base`, `utils`, `stats`) are not distributed at the standard
31+
/// `src/contrib/` location on CRAN. Instead, we must retrieve them from the base R
32+
/// sources themselves, which lives at `src/base/R-{major}/R-{version}.tar.gz`. Each
33+
/// package is located inside that tarball at `src/library/{package}/`.
34+
///
35+
/// Returns `Ok(None)` if the tarball is not on CRAN (e.g. a development R version), which
36+
/// we treat as "source unavailable" rather than an error.
37+
pub(crate) fn download(version: &str) -> anyhow::Result<Option<Vec<u8>>> {
38+
let major = version
39+
.split('.')
40+
.next()
41+
.ok_or_else(|| anyhow::anyhow!("Invalid R version for base source download: {version}"))?;
42+
43+
let mirrors = ["https://cran.r-project.org", "https://cran.rstudio.com"];
44+
let suffix = format!("src/base/R-{major}/R-{version}.tar.gz");
45+
46+
match crate::download::download_with_mirrors(&suffix, &mirrors)? {
47+
Outcome::Success(response) => {
48+
let mut bytes = Vec::new();
49+
response.into_body().into_reader().read_to_end(&mut bytes)?;
50+
Ok(Some(bytes))
51+
},
52+
Outcome::NotFound => Ok(None),
53+
}
54+
}
55+
56+
/// Extract a single base package's R files from the R source tarball bytes.
57+
///
58+
/// Writes `R-{version}/src/library/{package}/R/*.R` entries into an `R/` folder inside
59+
/// the directory `destination_lock` lives in. Files are marked read only to match the
60+
/// rest of the cache.
61+
pub(crate) fn extract(
62+
package: &str,
63+
version: &str,
64+
bytes: &[u8],
65+
destination_lock: &FileLock,
66+
) -> anyhow::Result<()> {
67+
let destination = destination_lock.parent().join("R");
68+
std::fs::create_dir(&destination)?;
69+
70+
let cursor = Cursor::new(bytes);
71+
let gz = GzDecoder::new(cursor);
72+
let mut archive = tar::Archive::new(gz);
73+
74+
let prefix = format!("R-{version}/src/library/{package}/R/");
75+
76+
for entry in archive.entries()? {
77+
let mut entry = entry?;
78+
let path = entry.path()?;
79+
80+
let Some(relative) = path.strip_prefix(&prefix).ok() else {
81+
continue;
82+
};
83+
84+
if relative
85+
.extension()
86+
.is_none_or(|ext| ext != "R" && ext != "r")
87+
{
88+
continue;
89+
}
90+
91+
let absolute = destination.join(relative);
92+
93+
// Some base packages (e.g. `utils`) have platform-specific subdirs under `R/`
94+
// like `R/windows/` and `R/unix/` (their `Makefile` handles them at install
95+
// time). Create parents if one is required so `unpack()` can write nested files.
96+
if let Some(parent) = relative.parent().filter(|p| !p.as_os_str().is_empty()) {
97+
std::fs::create_dir_all(destination.join(parent))?;
98+
}
99+
100+
entry.unpack(&absolute)?;
101+
crate::fs::set_readonly(&absolute)?;
102+
}
103+
104+
Ok(())
105+
}
106+
107+
#[cfg(test)]
108+
mod tests {
109+
use oak_fs::file_lock::Filesystem;
110+
use tempfile::TempDir;
111+
112+
use crate::base::download;
113+
use crate::base::extract;
114+
115+
/// Requires internet access and downloads a large tarball of the R sources
116+
#[ignore = "Downloads a 40mb tarball"]
117+
#[test]
118+
fn test_base_download_and_extract() {
119+
let bytes = download("4.5.0").unwrap().expect("R 4.5.0 source to exist");
120+
121+
let destination_tempdir = TempDir::new().unwrap();
122+
let destination = Filesystem::new(destination_tempdir.path().to_path_buf());
123+
let destination_lock = destination.open_rw_exclusive_create(".lock").unwrap();
124+
125+
extract("utils", "4.5.0", &bytes, &destination_lock).unwrap();
126+
127+
// Spot check: `utils` has a well-known `help.R` file
128+
let help = destination_lock.parent().join("R").join("help.R");
129+
assert!(help.exists());
130+
assert!(help.metadata().unwrap().permissions().readonly());
131+
}
132+
133+
#[test]
134+
fn test_base_download_unknown_version_returns_none() {
135+
let bytes = download("0.0.0").unwrap();
136+
assert!(bytes.is_none());
137+
}
138+
}

crates/oak_sources/src/hash.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
use sha2::Digest;
2+
use sha2::Sha256;
3+
4+
/// Retain 8 ASCII characters for each hash fragment
5+
pub(crate) fn hash(contents: &str) -> String {
6+
let mut hash = hex::encode(Sha256::digest(contents));
7+
hash.truncate(8);
8+
hash
9+
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
use std::fs::read_to_string;
2+
use std::path::Path;
3+
use std::path::PathBuf;
4+
5+
use oak_package::package_description::Description;
6+
7+
pub(crate) struct InstalledPackage {
8+
key: String,
9+
name: String,
10+
library_path: PathBuf,
11+
description: Description,
12+
description_hash: String,
13+
}
14+
15+
impl InstalledPackage {
16+
pub(crate) fn find(package: &str, library_paths: &[PathBuf]) -> anyhow::Result<Option<Self>> {
17+
let mut library_path = None;
18+
19+
for library_path_candidate in library_paths {
20+
if library_path_candidate.join(package).exists() {
21+
library_path = Some(library_path_candidate);
22+
break;
23+
}
24+
}
25+
26+
let Some(library_path) = library_path else {
27+
// Not installed
28+
return Ok(None);
29+
};
30+
31+
let package_path = library_path.join(package);
32+
33+
let description_path = package_path.join("DESCRIPTION");
34+
let description_contents = read_to_string(&description_path)?;
35+
let description = Description::parse(&description_contents)?;
36+
37+
let library_path_hash = crate::hash::hash(library_path.to_string_lossy().as_ref());
38+
let description_hash = crate::hash::hash(&description_contents);
39+
40+
// Flat key unique enough to handle:
41+
// - The same R package across multiple libpaths
42+
// - Reinstalling a dev R package without changing the version (0.1.0.9000)
43+
let key = format!(
44+
"{name}_{version}_libpath-{library_path_hash}_description-{description_hash}",
45+
name = package,
46+
version = &description.version,
47+
library_path_hash = &library_path_hash,
48+
description_hash = &description_hash
49+
);
50+
51+
Ok(Some(Self {
52+
key,
53+
name: package.to_string(),
54+
library_path: library_path.clone(),
55+
description,
56+
description_hash,
57+
}))
58+
}
59+
60+
pub(crate) fn name(&self) -> &str {
61+
&self.name
62+
}
63+
64+
pub(crate) fn version(&self) -> &str {
65+
&self.description().version
66+
}
67+
68+
pub(crate) fn description(&self) -> &Description {
69+
&self.description
70+
}
71+
72+
// Flat key unique enough to handle:
73+
// - The same R package across multiple libpaths
74+
// - Reinstalling a dev R package without changing the version (0.1.0.9000)
75+
pub(crate) fn key(&self) -> &str {
76+
&self.key
77+
}
78+
79+
pub(crate) fn library_path(&self) -> &Path {
80+
self.library_path.as_path()
81+
}
82+
83+
pub(crate) fn package_path(&self) -> PathBuf {
84+
self.library_path.join(&self.name)
85+
}
86+
87+
pub(crate) fn description_path(&self) -> PathBuf {
88+
self.package_path().join("DESCRIPTION")
89+
}
90+
91+
pub(crate) fn namespace_path(&self) -> PathBuf {
92+
self.package_path().join("NAMESPACE")
93+
}
94+
95+
pub(crate) fn description_hash(&self) -> &str {
96+
&self.description_hash
97+
}
98+
}

0 commit comments

Comments
 (0)