Skip to content

Commit 01e7240

Browse files
committed
fix: don't pass temp_path through metadata batch
The temp file is deleted after each loop iteration (in the finally block), but the metadata batch is flushed later — so the temp_path stored in the batch points to a deleted file. Fix: use temp_path only for artifact_to_python_content_data (avoids one S3 read per package). The metadata batch falls back to artifact_to_metadata_artifact's original behavior for the second read. Combined with BULK_SIZE=250, this is still a major memory improvement. JIRA: PULP-1573
1 parent 9cda7a0 commit 01e7240

1 file changed

Lines changed: 5 additions & 10 deletions

File tree

pulp_python/app/tasks/repair.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
132132
main_artifact,
133133
metadata_batch,
134134
pkgs_metadata_not_repaired,
135-
temp_path=temp_path,
136135
)
137136
total_repaired += update_package_if_needed(
138137
package, new_data, batch, set_of_update_fields
@@ -249,7 +248,6 @@ def update_metadata_artifact_if_needed(
249248
main_artifact: Artifact,
250249
metadata_batch: list[tuple],
251250
pkgs_metadata_not_repaired: set[str],
252-
temp_path: str | None = None,
253251
) -> int:
254252
"""
255253
Repairs metadata artifacts for wheel packages by creating missing metadata artifacts
@@ -262,7 +260,6 @@ def update_metadata_artifact_if_needed(
262260
main_artifact: The main package artifact used to generate metadata.
263261
metadata_batch: List of tuples for batch processing (updated in-place).
264262
pkgs_metadata_not_repaired: Set of package PKs that failed repair (updated in-place).
265-
temp_path: Path to already-extracted temp wheel file, avoids re-reading from S3.
266263
267264
Returns:
268265
Number of repaired metadata artifacts (only when batch is flushed at BULK_SIZE).
@@ -277,13 +274,13 @@ def update_metadata_artifact_if_needed(
277274

278275
# Create missing
279276
if not cas:
280-
metadata_batch.append((package, main_artifact, temp_path))
277+
metadata_batch.append((package, main_artifact))
281278
# Fix existing
282279
elif new_metadata_sha256 != original_metadata_sha256:
283280
ca = cas.first()
284281
metadata_artifact = ca.artifact
285282
if metadata_artifact is None or (metadata_artifact.sha256 != new_metadata_sha256):
286-
metadata_batch.append((package, main_artifact, temp_path))
283+
metadata_batch.append((package, main_artifact))
287284

288285
if len(metadata_batch) == BULK_SIZE:
289286
not_repaired = _process_metadata_batch(metadata_batch)
@@ -300,18 +297,16 @@ def _process_metadata_batch(metadata_batch: list[tuple]) -> set[str]:
300297
and their corresponding ContentArtifacts.
301298
302299
Args:
303-
metadata_batch: List of (package, main_artifact, temp_path) tuples.
300+
metadata_batch: List of (package, main_artifact) tuples.
304301
305302
Returns:
306303
Set of package PKs for which metadata artifacts could not be created.
307304
"""
308305
not_repaired = set()
309306
content_artifacts = []
310307

311-
for package, main_artifact, temp_path in metadata_batch:
312-
metadata_artifact = artifact_to_metadata_artifact(
313-
package.filename, main_artifact, temp_path=temp_path
314-
)
308+
for package, main_artifact in metadata_batch:
309+
metadata_artifact = artifact_to_metadata_artifact(package.filename, main_artifact)
315310
if metadata_artifact:
316311
ca = ContentArtifact(
317312
artifact=metadata_artifact,

0 commit comments

Comments
 (0)