Skip to content

Commit c5bd2c8

Browse files
committed
fix: cache prefix info in staging metadata instead of reinstalling host packages
Instead of reinstalling the staging cache's host packages during install_environments (which is slow), cache the PrefixInfo data (path-to-package and package nature mappings) in the staging cache metadata. When linking checks run, merge this cached info into the current prefix's PrefixInfo so shared libraries from the staging cache's host packages can be correctly attributed. This avoids the performance cost of re-downloading and installing packages while still providing the linking check with the information it needs. https://claude.ai/code/session_014mGW5MYfcfu8NFWxUJbU1J
1 parent 37eb1ef commit c5bd2c8

8 files changed

Lines changed: 109 additions & 62 deletions

File tree

py-rattler-build/rust/src/build.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ pub fn build_rendered_variant_py(
225225
finalized_sources: None,
226226
finalized_cache_dependencies: None,
227227
finalized_cache_sources: None,
228+
cached_prefix_info: None,
228229
build_summary: Arc::new(Mutex::new(BuildSummary::default())),
229230
system_tools: SystemTools::default(),
230231
extra_meta: None,

src/build.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,12 @@ pub async fn run_build(
137137
// This will build or restore staging caches and return their dependencies/sources if inherited
138138
let staging_result = output.process_staging_caches(tool_configuration).await?;
139139

140-
// If we inherit from a staging cache, store its dependencies and sources
141-
if let Some((deps, sources)) = staging_result {
140+
// If we inherit from a staging cache, store its dependencies, sources,
141+
// and cached prefix info (for linking checks)
142+
if let Some((deps, sources, cached_prefix_info)) = staging_result {
142143
output.finalized_cache_dependencies = Some(deps);
143144
output.finalized_cache_sources = Some(sources);
145+
output.cached_prefix_info = Some(cached_prefix_info);
144146
}
145147

146148
// Fetch sources for this output

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ pub async fn get_build_output(
600600
finalized_sources: None,
601601
finalized_cache_dependencies: None,
602602
finalized_cache_sources: None,
603+
cached_prefix_info: None,
603604
system_tools: SystemTools::new(),
604605
build_summary: Arc::new(Mutex::new(BuildSummary::default())),
605606
extra_meta: Some(

src/post_process/checks.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,15 @@ pub fn perform_linking_checks(
528528
let dynamic_linking = &output.recipe.build().dynamic_linking;
529529
let system_libs = find_system_libs(output)?;
530530

531-
let prefix_info = PrefixInfo::from_prefix(output.prefix())?;
531+
let mut prefix_info = PrefixInfo::from_prefix(output.prefix())?;
532+
533+
// Merge cached prefix info from the staging cache (if any).
534+
// The staging cache's host packages are not physically installed in the
535+
// prefix (their conda-meta records are absent), so we merge the cached
536+
// mappings to allow the linking checks to attribute shared libraries.
537+
if let Some(cached) = &output.cached_prefix_info {
538+
prefix_info.merge_cached(cached);
539+
}
532540

533541
let host_dso_packages = host_run_export_dso_packages(output, &prefix_info.package_to_nature);
534542
tracing::trace!("Host run_export DSO packages: {host_dso_packages:#?}",);

src/post_process/package_nature.rs

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@ use std::{
1818
hash::Hash,
1919
ops::Sub,
2020
path::{Path, PathBuf},
21+
str::FromStr,
2122
};
2223

2324
/// The nature of a package
24-
#[derive(Debug, PartialEq, Eq)]
25+
#[derive(Debug, PartialEq, Eq, Clone, serde::Serialize, serde::Deserialize)]
2526
pub enum PackageNature {
2627
/// Libraries
2728
RunExportsLibrary,
@@ -213,6 +214,57 @@ impl PrefixInfo {
213214

214215
Ok(prefix_info)
215216
}
217+
218+
/// Merge cached prefix info (from a staging cache) into this PrefixInfo.
219+
/// Cached entries are only added if they don't already exist.
220+
pub fn merge_cached(&mut self, cached: &CachedPrefixInfo) {
221+
for (name_str, nature) in &cached.package_to_nature {
222+
if let Ok(name) = PackageName::from_str(name_str) {
223+
self.package_to_nature.entry(name).or_insert(nature.clone());
224+
}
225+
}
226+
for (path_str, name_str) in &cached.path_to_package {
227+
if let Ok(name) = PackageName::from_str(name_str) {
228+
let path_buf: CaseInsensitivePathBuf = PathBuf::from(path_str).into();
229+
self.path_to_package.entry(path_buf).or_insert(name);
230+
}
231+
}
232+
}
233+
}
234+
235+
/// Serializable prefix info for storing in staging cache metadata.
236+
/// Maps file paths to their owning packages and packages to their nature,
237+
/// so that linking checks can attribute libraries without needing the
238+
/// original conda-meta records installed.
239+
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
240+
pub struct CachedPrefixInfo {
241+
/// Maps file paths (relative to prefix) to package names
242+
pub path_to_package: HashMap<String, String>,
243+
/// Maps package names to their nature
244+
pub package_to_nature: HashMap<String, PackageNature>,
245+
}
246+
247+
impl CachedPrefixInfo {
248+
/// Build a CachedPrefixInfo from a PrefixInfo
249+
pub(crate) fn from_prefix_info(info: &PrefixInfo) -> Self {
250+
Self {
251+
path_to_package: info
252+
.path_to_package
253+
.iter()
254+
.map(|(path, name)| {
255+
(
256+
path.path.to_string_lossy().to_string(),
257+
name.as_normalized().to_string(),
258+
)
259+
})
260+
.collect(),
261+
package_to_nature: info
262+
.package_to_nature
263+
.iter()
264+
.map(|(name, nature)| (name.as_normalized().to_string(), nature.clone()))
265+
.collect(),
266+
}
267+
}
216268
}
217269

218270
#[cfg(test)]

src/render/resolved_dependencies.rs

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,58 +1145,6 @@ impl Output {
11451145
return Ok(());
11461146
}
11471147

1148-
// When inheriting from a staging cache, the cache's host packages
1149-
// (e.g. zlib, libiconv) must also be installed in the host prefix.
1150-
// Without them the linking checks cannot attribute shared libraries
1151-
// to their providing packages and will report false overlinking.
1152-
if let Some(cache_deps) = &self.finalized_cache_dependencies {
1153-
if let Some(cache_host) = &cache_deps.host {
1154-
let mut merged = dependencies.clone();
1155-
1156-
let host = merged.host.get_or_insert_with(|| ResolvedDependencies {
1157-
specs: Vec::new(),
1158-
resolved: Vec::new(),
1159-
});
1160-
1161-
// Add cache host packages that aren't already present
1162-
let existing_names: std::collections::HashSet<_> = host
1163-
.resolved
1164-
.iter()
1165-
.map(|r| r.package_record.name.clone())
1166-
.collect();
1167-
1168-
for record in &cache_host.resolved {
1169-
if !existing_names.contains(&record.package_record.name) {
1170-
host.resolved.push(record.clone());
1171-
// Also add the spec so the package is tracked as explicit
1172-
for spec in &cache_host.specs {
1173-
if spec.spec().name.as_ref()
1174-
== Some(&rattler_conda_types::PackageNameMatcher::Exact(
1175-
record.package_record.name.clone(),
1176-
))
1177-
{
1178-
host.specs.push(spec.clone());
1179-
break;
1180-
}
1181-
}
1182-
}
1183-
}
1184-
1185-
tracing::info!(
1186-
"Merged {} host packages from staging cache into host environment",
1187-
cache_host.resolved.len().saturating_sub(
1188-
cache_host
1189-
.resolved
1190-
.iter()
1191-
.filter(|r| existing_names.contains(&r.package_record.name))
1192-
.count()
1193-
)
1194-
);
1195-
1196-
return install_environments(self, &merged, tool_configuration).await;
1197-
}
1198-
}
1199-
12001148
install_environments(self, dependencies, tool_configuration).await
12011149
}
12021150
}

src/staging.rs

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use crate::{
2323
env_vars,
2424
metadata::{Output, build_reindexed_channels},
2525
packaging::Files,
26+
post_process::package_nature::{CachedPrefixInfo, PrefixInfo},
2627
render::resolved_dependencies::{
2728
FinalizedDependencies, RunExportsDownload, install_environments, resolve_dependencies,
2829
},
@@ -72,6 +73,13 @@ pub struct StagingCacheMetadata {
7273

7374
/// The variant configuration that was used
7475
pub variant: BTreeMap<NormalizedKey, Variable>,
76+
77+
/// Cached prefix info (path-to-package and package nature mappings)
78+
/// from the host environment at staging cache build time.
79+
/// This allows linking checks to attribute shared libraries to their
80+
/// providing packages without needing the conda-meta records installed.
81+
#[serde(default)]
82+
pub cached_prefix_info: CachedPrefixInfo,
7583
}
7684

7785
impl Output {
@@ -142,7 +150,7 @@ impl Output {
142150
/// 2. If yes, restore the cached files to the prefix
143151
/// 3. If no, build the staging cache and save it
144152
///
145-
/// Returns the finalized dependencies and sources from the staging cache
153+
/// Returns the finalized dependencies, sources, and cached prefix info
146154
pub async fn build_or_restore_staging_cache(
147155
&self,
148156
staging: &StagingCache,
@@ -151,6 +159,7 @@ impl Output {
151159
(
152160
FinalizedDependencies,
153161
Vec<rattler_build_recipe::stage1::Source>,
162+
CachedPrefixInfo,
154163
),
155164
miette::Error,
156165
> {
@@ -217,6 +226,7 @@ impl Output {
217226
(
218227
FinalizedDependencies,
219228
Vec<rattler_build_recipe::stage1::Source>,
229+
CachedPrefixInfo,
220230
),
221231
miette::Error,
222232
> {
@@ -368,6 +378,12 @@ impl Output {
368378
.run()
369379
.into_diagnostic()?;
370380

381+
// Capture prefix info (path-to-package and package nature mappings)
382+
// from the host environment while conda-meta records are still present.
383+
// This data is needed by linking checks in inheriting outputs.
384+
let prefix_info = PrefixInfo::from_prefix(self.prefix()).into_diagnostic()?;
385+
let cached_prefix_info = CachedPrefixInfo::from_prefix_info(&prefix_info);
386+
371387
// Save metadata
372388
let metadata = StagingCacheMetadata {
373389
name: staging.name.clone(),
@@ -377,6 +393,7 @@ impl Output {
377393
work_dir_files: copied_work_dir.copied_paths().to_vec(),
378394
prefix: self.prefix().to_path_buf(),
379395
variant: staging.used_variant.clone(),
396+
cached_prefix_info,
380397
};
381398

382399
let metadata_json = serde_json::to_string_pretty(&metadata).into_diagnostic()?;
@@ -388,7 +405,11 @@ impl Output {
388405
metadata.work_dir_files.len()
389406
);
390407

391-
Ok((finalized_dependencies, finalized_sources))
408+
Ok((
409+
finalized_dependencies,
410+
finalized_sources,
411+
metadata.cached_prefix_info,
412+
))
392413
}
393414

394415
/// Restore a staging cache from disk
@@ -400,6 +421,7 @@ impl Output {
400421
(
401422
FinalizedDependencies,
402423
Vec<rattler_build_recipe::stage1::Source>,
424+
CachedPrefixInfo,
403425
),
404426
miette::Error,
405427
> {
@@ -439,7 +461,11 @@ impl Output {
439461
metadata.name
440462
);
441463

442-
Ok((metadata.finalized_dependencies, metadata.finalized_sources))
464+
Ok((
465+
metadata.finalized_dependencies,
466+
metadata.finalized_sources,
467+
metadata.cached_prefix_info,
468+
))
443469
}
444470

445471
/// Process all staging caches for this output
@@ -454,6 +480,7 @@ impl Output {
454480
Option<(
455481
FinalizedDependencies,
456482
Vec<rattler_build_recipe::stage1::Source>,
483+
CachedPrefixInfo,
457484
)>,
458485
miette::Error,
459486
> {
@@ -470,7 +497,7 @@ impl Output {
470497
"Building or restoring staging cache: {}",
471498
staging_cache.name
472499
);
473-
let (_deps, _sources) = self
500+
let (_deps, _sources, _prefix_info) = self
474501
.build_or_restore_staging_cache(staging_cache, tool_configuration)
475502
.await?;
476503
}
@@ -491,11 +518,11 @@ impl Output {
491518
})?;
492519

493520
// Get or build the cache
494-
let (deps, sources) = self
521+
let (deps, sources, cached_prefix_info) = self
495522
.build_or_restore_staging_cache(staging, tool_configuration)
496523
.await?;
497524

498-
Ok(Some((deps, sources)))
525+
Ok(Some((deps, sources, cached_prefix_info)))
499526
} else {
500527
Ok(None)
501528
}

src/types/build_output.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use std::{
2020

2121
use crate::{
2222
console_utils::github_integration_enabled,
23+
post_process::package_nature::CachedPrefixInfo,
2324
render::resolved_dependencies::FinalizedDependencies,
2425
system_tools::SystemTools,
2526
types::{BuildConfiguration, BuildSummary, PlatformWithVirtualPackages},
@@ -50,6 +51,13 @@ pub struct BuildOutput {
5051
#[serde(skip_serializing_if = "Option::is_none")]
5152
pub finalized_cache_sources: Option<Vec<Source>>,
5253

54+
/// Cached prefix info from the staging cache's host environment.
55+
/// Used by linking checks to attribute shared libraries to packages
56+
/// that were installed during the staging cache build but are not
57+
/// physically present in the current host prefix.
58+
#[serde(skip_serializing_if = "Option::is_none")]
59+
pub cached_prefix_info: Option<CachedPrefixInfo>,
60+
5361
/// Summary of the build
5462
#[serde(skip)]
5563
pub build_summary: Arc<Mutex<BuildSummary>>,

0 commit comments

Comments
 (0)