11import logging
2+ import os
23from collections import defaultdict
34from gettext import gettext as _
45from itertools import groupby
@@ -119,18 +120,35 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
119120 .first ()
120121 .artifact
121122 )
122- new_data = artifact_to_python_content_data (package .filename , main_artifact , domain )
123+ # For wheels, keep the temp file so we can reuse it for metadata artifact
124+ is_wheel = package .filename .endswith (".whl" )
125+ if is_wheel :
126+ new_data , temp_path = artifact_to_python_content_data (
127+ package .filename , main_artifact , domain , keep_temp_file = True
128+ )
129+ else :
130+ new_data = artifact_to_python_content_data (
131+ package .filename , main_artifact , domain
132+ )
133+ temp_path = None
134+
123135 total_metadata_repaired += update_metadata_artifact_if_needed (
124136 package ,
125137 new_data .get ("metadata_sha256" ),
126138 main_artifact ,
127139 metadata_batch ,
128140 pkgs_metadata_not_repaired ,
141+ temp_path = temp_path ,
129142 )
130143 total_repaired += update_package_if_needed (
131144 package , new_data , batch , set_of_update_fields
132145 )
133146
147+ # Clean up temp file and close artifact file handle
148+ if temp_path and os .path .exists (temp_path ):
149+ os .unlink (temp_path )
150+ main_artifact .file .close ()
151+
134152 # For on-demand content, we expect that:
135153 # 1. PythonPackageContent always has correct name and version
136154 # 2. RemoteArtifact always has correct sha256
@@ -240,6 +258,7 @@ def update_metadata_artifact_if_needed(
240258 main_artifact : Artifact ,
241259 metadata_batch : list [tuple ],
242260 pkgs_metadata_not_repaired : set [str ],
261+ temp_path : str | None = None ,
243262) -> int :
244263 """
245264 Repairs metadata artifacts for wheel packages by creating missing metadata artifacts
@@ -252,6 +271,7 @@ def update_metadata_artifact_if_needed(
252271 main_artifact: The main package artifact used to generate metadata.
253272 metadata_batch: List of tuples for batch processing (updated in-place).
254273 pkgs_metadata_not_repaired: Set of package PKs that failed repair (updated in-place).
274+ temp_path: Path to already-extracted temp wheel file, avoids re-reading from S3.
255275
256276 Returns:
257277 Number of repaired metadata artifacts (only when batch is flushed at BULK_SIZE).
@@ -266,13 +286,13 @@ def update_metadata_artifact_if_needed(
266286
267287 # Create missing
268288 if not cas :
269- metadata_batch .append ((package , main_artifact ))
289+ metadata_batch .append ((package , main_artifact , temp_path ))
270290 # Fix existing
271291 elif new_metadata_sha256 != original_metadata_sha256 :
272292 ca = cas .first ()
273293 metadata_artifact = ca .artifact
274294 if metadata_artifact is None or (metadata_artifact .sha256 != new_metadata_sha256 ):
275- metadata_batch .append ((package , main_artifact ))
295+ metadata_batch .append ((package , main_artifact , temp_path ))
276296
277297 if len (metadata_batch ) == BULK_SIZE :
278298 not_repaired = _process_metadata_batch (metadata_batch )
@@ -289,16 +309,21 @@ def _process_metadata_batch(metadata_batch: list[tuple]) -> set[str]:
289309 and their corresponding ContentArtifacts.
290310
291311 Args:
292- metadata_batch: List of (package, main_artifact) tuples.
312+ metadata_batch: List of (package, main_artifact, temp_path ) tuples.
293313
294314 Returns:
295315 Set of package PKs for which metadata artifacts could not be created.
296316 """
297317 not_repaired = set ()
298318 content_artifacts = []
299319
300- for package , main_artifact in metadata_batch :
301- metadata_artifact = artifact_to_metadata_artifact (package .filename , main_artifact )
320+ for package , main_artifact , temp_path in metadata_batch :
321+ if temp_path and os .path .exists (temp_path ):
322+ metadata_artifact = artifact_to_metadata_artifact_from_path (
323+ package .filename , temp_path
324+ )
325+ else :
326+ metadata_artifact = artifact_to_metadata_artifact (package .filename , main_artifact )
302327 if metadata_artifact :
303328 ca = ContentArtifact (
304329 artifact = metadata_artifact ,
0 commit comments