11import logging
2+ import os
23from collections import defaultdict
34from gettext import gettext as _
45from itertools import groupby
89from django .db .models .query import QuerySet
910from pulp_python .app .models import PythonPackageContent , PythonRepository
1011from pulp_python .app .utils import (
11- artifact_to_metadata_artifact ,
1212 artifact_to_python_content_data ,
13+ copy_artifact_to_temp_file ,
14+ extract_wheel_metadata ,
1315 fetch_json_release_metadata ,
16+ metadata_content_to_artifact ,
1417 parse_metadata ,
1518)
16- from pulpcore .plugin .models import Artifact , ContentArtifact , ProgressReport
19+ from pulpcore .plugin .models import ContentArtifact , ProgressReport
1720from pulpcore .plugin .util import get_domain
1821
1922log = logging .getLogger (__name__ )
2023
2124
22- BULK_SIZE = 1000
25+ BULK_SIZE = 250
2326
2427
2528def repair (repository_pk : UUID ) -> None :
@@ -118,11 +121,21 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
118121 .first ()
119122 .artifact
120123 )
121- new_data = artifact_to_python_content_data (package .filename , main_artifact , domain )
124+ # Copy artifact to temp file once, extract both content data and metadata
125+ temp_path = copy_artifact_to_temp_file (main_artifact , package .filename )
126+ try :
127+ new_data = artifact_to_python_content_data (
128+ package .filename , main_artifact , domain , temp_path = temp_path
129+ )
130+ metadata_content = (
131+ extract_wheel_metadata (temp_path ) if package .filename .endswith (".whl" ) else None
132+ )
133+ finally :
134+ os .unlink (temp_path )
122135 total_metadata_repaired += update_metadata_artifact_if_needed (
123136 package ,
124137 new_data .get ("metadata_sha256" ),
125- main_artifact ,
138+ metadata_content ,
126139 metadata_batch ,
127140 pkgs_metadata_not_repaired ,
128141 )
@@ -236,7 +249,7 @@ def update_package_if_needed(
236249def update_metadata_artifact_if_needed (
237250 package : PythonPackageContent ,
238251 new_metadata_sha256 : str | None ,
239- main_artifact : Artifact ,
252+ metadata_content : bytes | None ,
240253 metadata_batch : list [tuple ],
241254 pkgs_metadata_not_repaired : set [str ],
242255) -> int :
@@ -248,7 +261,7 @@ def update_metadata_artifact_if_needed(
248261 Args:
249262 package: Package to check for metadata changes.
250263 new_metadata_sha256: The correct metadata_sha256 extracted from the main artifact, or None.
251- main_artifact: The main package artifact used to generate metadata .
264+ metadata_content: Raw metadata bytes extracted from the wheel, or None .
252265 metadata_batch: List of tuples for batch processing (updated in-place).
253266 pkgs_metadata_not_repaired: Set of package PKs that failed repair (updated in-place).
254267
@@ -265,13 +278,13 @@ def update_metadata_artifact_if_needed(
265278
266279 # Create missing
267280 if not cas :
268- metadata_batch .append ((package , main_artifact ))
281+ metadata_batch .append ((package , metadata_content ))
269282 # Fix existing
270283 elif new_metadata_sha256 != original_metadata_sha256 :
271284 ca = cas .first ()
272285 metadata_artifact = ca .artifact
273286 if metadata_artifact is None or (metadata_artifact .sha256 != new_metadata_sha256 ):
274- metadata_batch .append ((package , main_artifact ))
287+ metadata_batch .append ((package , metadata_content ))
275288
276289 if len (metadata_batch ) == BULK_SIZE :
277290 not_repaired = _process_metadata_batch (metadata_batch )
@@ -288,16 +301,16 @@ def _process_metadata_batch(metadata_batch: list[tuple]) -> set[str]:
288301 and their corresponding ContentArtifacts.
289302
290303 Args:
291- metadata_batch: List of (package, main_artifact ) tuples.
304+ metadata_batch: List of (package, metadata_content ) tuples.
292305
293306 Returns:
294307 Set of package PKs for which metadata artifacts could not be created.
295308 """
296309 not_repaired = set ()
297310 content_artifacts = []
298311
299- for package , main_artifact in metadata_batch :
300- metadata_artifact = artifact_to_metadata_artifact ( package . filename , main_artifact )
312+ for package , metadata_content in metadata_batch :
313+ metadata_artifact = metadata_content_to_artifact ( metadata_content )
301314 if metadata_artifact :
302315 ca = ContentArtifact (
303316 artifact = metadata_artifact ,
0 commit comments