Skip to content

Commit 9cda7a0

Browse files
committed
refactor: use helper + optional temp_path instead of separate functions
Address review feedback: - Add copy_artifact_to_temp_file helper that copies artifact to disk - artifact_to_python_content_data accepts optional temp_path param - artifact_to_metadata_artifact accepts optional temp_path param - Remove artifact_to_metadata_artifact_from_path (no longer needed) - Repair loop creates temp file once via helper, passes to both functions - Use try/finally for temp file cleanup JIRA: PULP-1573
1 parent afa4fe8 commit 9cda7a0

File tree

2 files changed

+63
-92
lines changed

2 files changed

+63
-92
lines changed

pulp_python/app/tasks/repair.py

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
from pulp_python.app.models import PythonPackageContent, PythonRepository
1111
from pulp_python.app.utils import (
1212
artifact_to_metadata_artifact,
13-
artifact_to_metadata_artifact_from_path,
1413
artifact_to_python_content_data,
14+
copy_artifact_to_temp_file,
1515
fetch_json_release_metadata,
1616
parse_metadata,
1717
)
@@ -120,30 +120,24 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> tuple[int, set[s
120120
.first()
121121
.artifact
122122
)
123-
# For wheels, keep the temp file so we can reuse it for metadata artifact
124-
is_wheel = package.filename.endswith(".whl")
125-
if is_wheel:
126-
new_data, temp_path = artifact_to_python_content_data(
127-
package.filename, main_artifact, domain, keep_temp_file=True
123+
# Copy artifact to temp file once, reuse for both content data and metadata
124+
temp_path = copy_artifact_to_temp_file(main_artifact, package.filename)
125+
try:
126+
new_data = artifact_to_python_content_data(
127+
package.filename, main_artifact, domain, temp_path=temp_path
128128
)
129-
else:
130-
new_data = artifact_to_python_content_data(package.filename, main_artifact, domain)
131-
temp_path = None
132-
133-
total_metadata_repaired += update_metadata_artifact_if_needed(
134-
package,
135-
new_data.get("metadata_sha256"),
136-
main_artifact,
137-
metadata_batch,
138-
pkgs_metadata_not_repaired,
139-
temp_path=temp_path,
140-
)
141-
total_repaired += update_package_if_needed(
142-
package, new_data, batch, set_of_update_fields
143-
)
144-
145-
# Clean up temp file after processing
146-
if temp_path and os.path.exists(temp_path):
129+
total_metadata_repaired += update_metadata_artifact_if_needed(
130+
package,
131+
new_data.get("metadata_sha256"),
132+
main_artifact,
133+
metadata_batch,
134+
pkgs_metadata_not_repaired,
135+
temp_path=temp_path,
136+
)
137+
total_repaired += update_package_if_needed(
138+
package, new_data, batch, set_of_update_fields
139+
)
140+
finally:
147141
os.unlink(temp_path)
148142

149143
# For on-demand content, we expect that:
@@ -315,10 +309,9 @@ def _process_metadata_batch(metadata_batch: list[tuple]) -> set[str]:
315309
content_artifacts = []
316310

317311
for package, main_artifact, temp_path in metadata_batch:
318-
if temp_path and os.path.exists(temp_path):
319-
metadata_artifact = artifact_to_metadata_artifact_from_path(package.filename, temp_path)
320-
else:
321-
metadata_artifact = artifact_to_metadata_artifact(package.filename, main_artifact)
312+
metadata_artifact = artifact_to_metadata_artifact(
313+
package.filename, main_artifact, temp_path=temp_path
314+
)
322315
if metadata_artifact:
323316
ca = ContentArtifact(
324317
artifact=metadata_artifact,

pulp_python/app/utils.py

Lines changed: 42 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -240,59 +240,67 @@ def compute_metadata_sha256(filename: str) -> str | None:
240240
return hashlib.sha256(metadata_content).hexdigest() if metadata_content else None
241241

242242

243-
def artifact_to_python_content_data(filename, artifact, domain=None, keep_temp_file=False):
243+
def copy_artifact_to_temp_file(artifact, filename, tmp_dir="."):
244+
"""
245+
Copy an artifact's file to a temporary file on disk.
246+
247+
Returns the path to the temp file. The caller is responsible for cleanup.
248+
"""
249+
temp_file = tempfile.NamedTemporaryFile("wb", dir=tmp_dir, suffix=filename, delete=False)
250+
artifact.file.seek(0)
251+
shutil.copyfileobj(artifact.file, temp_file)
252+
temp_file.flush()
253+
temp_file.close()
254+
return temp_file.name
255+
256+
257+
def artifact_to_python_content_data(filename, artifact, domain=None, temp_path=None):
244258
"""
245259
Takes the artifact/filename and returns the metadata needed to create a PythonPackageContent.
246260
247-
If keep_temp_file is True, returns a tuple of (data, temp_file_path) instead of just data.
248-
The caller is responsible for cleaning up the temp file.
249-
"""
250-
if keep_temp_file:
251-
temp_file = tempfile.NamedTemporaryFile("wb", dir=".", suffix=filename, delete=False)
252-
artifact.file.seek(0)
253-
shutil.copyfileobj(artifact.file, temp_file)
254-
temp_file.flush()
255-
temp_file.close()
256-
metadata = get_project_metadata_from_file(temp_file.name)
257-
data = parse_project_metadata(vars(metadata))
258-
data["sha256"] = artifact.sha256
259-
data["size"] = artifact.size
260-
data["filename"] = filename
261-
data["pulp_domain"] = domain or artifact.pulp_domain
262-
data["_pulp_domain"] = data["pulp_domain"]
263-
return data, temp_file.name
261+
If temp_path is provided, uses it instead of copying the artifact to a new temp file.
262+
"""
263+
# Copy file to a temp directory under the user provided filename, we do this
264+
# because pkginfo validates that the filename has a valid extension before
265+
# reading it
266+
if temp_path:
267+
metadata = get_project_metadata_from_file(temp_path)
264268
else:
265-
# Copy file to a temp directory under the user provided filename, we do this
266-
# because pkginfo validates that the filename has a valid extension before
267-
# reading it
268269
with tempfile.NamedTemporaryFile("wb", dir=".", suffix=filename) as temp_file:
269270
artifact.file.seek(0)
270271
shutil.copyfileobj(artifact.file, temp_file)
271272
temp_file.flush()
272273
metadata = get_project_metadata_from_file(temp_file.name)
273-
data = parse_project_metadata(vars(metadata))
274-
data["sha256"] = artifact.sha256
275-
data["size"] = artifact.size
276-
data["filename"] = filename
277-
data["pulp_domain"] = domain or artifact.pulp_domain
278-
data["_pulp_domain"] = data["pulp_domain"]
279-
return data
274+
data = parse_project_metadata(vars(metadata))
275+
data["sha256"] = artifact.sha256
276+
data["size"] = artifact.size
277+
data["filename"] = filename
278+
data["pulp_domain"] = domain or artifact.pulp_domain
279+
data["_pulp_domain"] = data["pulp_domain"]
280+
return data
280281

281282

282283
def artifact_to_metadata_artifact(
283-
filename: str, artifact: Artifact, tmp_dir: str = "."
284+
filename: str, artifact: Artifact, tmp_dir: str = ".", temp_path: str | None = None
284285
) -> Artifact | None:
285286
"""
286287
Creates artifact for metadata from the provided wheel artifact.
288+
289+
If temp_path is provided, uses it instead of copying the artifact to a new temp file.
287290
"""
288291
if not filename.endswith(".whl"):
289292
return None
290293

291-
with tempfile.NamedTemporaryFile("wb", dir=tmp_dir, suffix=filename, delete=False) as temp_file:
292-
temp_wheel_path = temp_file.name
293-
artifact.file.seek(0)
294-
shutil.copyfileobj(artifact.file, temp_file)
295-
temp_file.flush()
294+
if temp_path:
295+
temp_wheel_path = temp_path
296+
else:
297+
with tempfile.NamedTemporaryFile(
298+
"wb", dir=tmp_dir, suffix=filename, delete=False
299+
) as temp_file:
300+
temp_wheel_path = temp_file.name
301+
artifact.file.seek(0)
302+
shutil.copyfileobj(artifact.file, temp_file)
303+
temp_file.flush()
296304

297305
metadata_content = extract_wheel_metadata(temp_wheel_path)
298306
if not metadata_content:
@@ -315,36 +323,6 @@ def artifact_to_metadata_artifact(
315323
return metadata_artifact
316324

317325

318-
def artifact_to_metadata_artifact_from_path(filename: str, temp_wheel_path: str) -> Artifact | None:
319-
"""
320-
Creates artifact for metadata from an already-extracted wheel temp file.
321-
322-
Unlike artifact_to_metadata_artifact, this avoids re-reading the artifact
323-
from storage by using a temp file that was already written during metadata
324-
extraction.
325-
"""
326-
if not filename.endswith(".whl"):
327-
return None
328-
329-
metadata_content = extract_wheel_metadata(temp_wheel_path)
330-
if not metadata_content:
331-
return None
332-
333-
with tempfile.NamedTemporaryFile("wb", dir=".", suffix=".metadata", delete=False) as temp_md:
334-
temp_metadata_path = temp_md.name
335-
temp_md.write(metadata_content)
336-
temp_md.flush()
337-
338-
metadata_artifact = Artifact.init_and_validate(temp_metadata_path)
339-
try:
340-
metadata_artifact.save()
341-
except IntegrityError:
342-
metadata_artifact = Artifact.objects.get(
343-
sha256=metadata_artifact.sha256, pulp_domain=get_domain()
344-
)
345-
return metadata_artifact
346-
347-
348326
def fetch_json_release_metadata(name: str, version: str, remotes: set[Remote]) -> dict:
349327
"""
350328
Fetches metadata for a specific release from PyPI's JSON API. A release can contain

0 commit comments

Comments
 (0)