77fcp (fetch conda packages) module
88"""
99
10+ from __future__ import annotations
11+
1012import logging
1113import os
1214import shutil
3840)
3941
4042if TYPE_CHECKING :
43+ from collections .abc import Iterable
44+ from typing import Literal
45+
4146 from .conda_interface import PackageCacheRecord
4247
4348logger = logging .getLogger (__name__ )
@@ -144,15 +149,47 @@ def _fetch(download_dir, precs):
144149 return list (dict .fromkeys (PrefixGraph (pc .iter_records ()).graph ))
145150
146151
147- def check_duplicates_files (pc_recs , platform , duplicate_files = "error" ):
152+ def check_duplicates_files (
153+ pc_recs : Iterable [PackageCacheRecord ],
154+ platform : str ,
155+ duplicate_files : Literal ["error" , "warn" , "skip" ] = "error" ,
156+ env_prefixes : dict [PackageCacheRecord , str ] | None = None ,
157+ ) -> tuple [int , int , int ]:
158+ """
159+ Check for duplicate files across packages and compute size/path metrics.
160+
161+ Iterates through all files in the provided package cache records to:
162+ 1. Detect duplicate files (same path in multiple packages)
163+ 2. Compute approximate tarball and extracted sizes
164+ 3. Track the longest relative file path (for MAX_PATH validation on Windows)
165+
166+ Args:
167+ pc_recs: Package cache records to check.
168+ platform: Target platform string (e.g., "win-64", "linux-64").
169+ duplicate_files: How to handle duplicates - "error", "warn", or "skip".
170+ env_prefixes: Optional dict mapping PackageCacheRecord -> path prefix string.
171+ Used to account for extra_envs paths which are installed under
172+ "envs/<name>/" rather than the base install directory. Records not
173+ in this dict are assumed to be in the base environment (no prefix).
174+ A trailing separator is added automatically if missing.
175+
176+ Returns:
177+ Tuple of (approx_tarball_size, approx_extracted_size, max_relative_path_length)
178+ """
148179 assert duplicate_files in ("warn" , "skip" , "error" )
180+ if env_prefixes is None :
181+ env_prefixes = {}
182+ for env , prefix in env_prefixes .items ():
183+ if prefix and not prefix .endswith ("/" ):
184+ env_prefixes [env ] += "/"
149185
150186 map_members_scase = defaultdict (set )
151187 map_members_icase = defaultdict (lambda : {"files" : set (), "fns" : set ()})
152188
153189 # Keep a min, 50MB buffer size
154190 total_tarball_size = 52428800
155191 total_extracted_pkgs_size = 52428800
192+ max_relative_path_length = 0
156193
157194 for pc_rec in pc_recs :
158195 fn = pc_rec .fn
@@ -161,8 +198,12 @@ def check_duplicates_files(pc_recs, platform, duplicate_files="error"):
161198 total_tarball_size += int (pc_rec .get ("size" , 0 ))
162199
163200 paths_data = read_paths_json (extracted_package_dir ).paths
201+ env_prefix_len = len (env_prefixes .get (pc_rec , "" ))
164202 for path_data in paths_data :
165203 short_path = path_data .path
204+ max_relative_path_length = max (
205+ max_relative_path_length , env_prefix_len + len (short_path )
206+ )
166207 try :
167208 size = path_data .size_in_bytes or getsize (join (extracted_package_dir , short_path ))
168209 except AttributeError :
@@ -176,7 +217,7 @@ def check_duplicates_files(pc_recs, platform, duplicate_files="error"):
176217 map_members_icase [short_path_lower ]["fns" ].add (fn )
177218
178219 if duplicate_files == "skip" :
179- return total_tarball_size , total_extracted_pkgs_size
220+ return total_tarball_size , total_extracted_pkgs_size , max_relative_path_length
180221
181222 logger .info ("Checking for duplicate files ..." )
182223 for member in map_members_scase :
@@ -201,7 +242,7 @@ def check_duplicates_files(pc_recs, platform, duplicate_files="error"):
201242 else :
202243 sys .exit (f"Error: { msg_str } " )
203244
204- return total_tarball_size , total_extracted_pkgs_size
245+ return total_tarball_size , total_extracted_pkgs_size , max_relative_path_length
205246
206247
207248def _precs_from_environment (environment , input_dir ):
@@ -443,18 +484,24 @@ def _main(
443484 input_dir = input_dir ,
444485 )
445486 if dry_run :
446- return None , None , None , None , None , None , None , None
487+ return None , None , None , None , None , None , None , None , None
447488 pc_recs , _urls , dists , has_conda = _fetch_precs (
448489 precs , download_dir , transmute_file_type = transmute_file_type
449490 )
450491 all_pc_recs = pc_recs .copy ()
451492
452493 extra_envs_data = {}
494+ env_prefixes = {} # Maps pc_rec -> "envs/<name>/" prefix for max path calculation
453495 for env_name , env_precs in extra_envs_precs .items ():
454496 env_pc_recs , env_urls , env_dists , _ = _fetch_precs (
455497 env_precs , download_dir , transmute_file_type = transmute_file_type
456498 )
457499 extra_envs_data [env_name ] = {"_urls" : env_urls , "_dists" : env_dists , "_records" : env_precs }
500+ env_prefix = f"envs/{ env_name } /"
501+ for pc_rec in env_pc_recs :
502+ existing_prefix = env_prefixes .get (pc_rec , "" )
503+ if len (env_prefix ) > len (existing_prefix ):
504+ env_prefixes [pc_rec ] = env_prefix
458505 all_pc_recs += env_pc_recs
459506
460507 duplicate_files = "warn" if ignore_duplicate_files else "error"
@@ -463,8 +510,12 @@ def _main(
463510 duplicate_files = "skip"
464511
465512 all_pc_recs = list ({rec : None for rec in all_pc_recs }) # deduplicate
466- approx_tarballs_size , approx_pkgs_size = check_duplicates_files (
467- pc_recs , platform , duplicate_files = duplicate_files
513+ # Pass all_pc_recs (base + extra_envs) to check_duplicates_files:
514+ # - When extra_envs exists, duplicate_files="skip" so only sizes and max path are computed
515+ # - When no extra_envs, all_pc_recs == pc_recs
516+ # - env_prefixes dict ensures max path accounts for "envs/<name>/" prefix in extra_envs
517+ approx_tarballs_size , approx_pkgs_size , max_relative_path_length = check_duplicates_files (
518+ all_pc_recs , platform , duplicate_files = duplicate_files , env_prefixes = env_prefixes
468519 )
469520
470521 return (
@@ -476,6 +527,7 @@ def _main(
476527 approx_pkgs_size ,
477528 has_conda ,
478529 extra_envs_data ,
530+ max_relative_path_length ,
479531 )
480532
481533
@@ -529,6 +581,7 @@ def main(info, verbose=True, dry_run=False, conda_exe="conda.exe"):
529581 approx_pkgs_size ,
530582 has_conda ,
531583 extra_envs_info ,
584+ max_relative_path_length ,
532585 ) = _main (
533586 name ,
534587 version ,
@@ -561,3 +614,4 @@ def main(info, verbose=True, dry_run=False, conda_exe="conda.exe"):
561614 info ["_has_conda" ] = has_conda
562615 # contains {env_name: [_dists, _urls, _records]} for each extra environment
563616 info ["_extra_envs_info" ] = extra_envs_info
617+ info ["_max_relative_path_length" ] = max_relative_path_length
0 commit comments