Skip to content

Commit c94eede

Browse files
committed
Quote all files if original RECORD had all files quoted
1 parent b47b92b commit c94eede

2 files changed

Lines changed: 36 additions & 30 deletions

File tree

python/private/pypi/repack_whl.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,17 +151,21 @@ def main(sys_argv):
151151
logging.debug(f"Found dist-info dir: {distinfo_dir}")
152152
record_path = distinfo_dir / "RECORD"
153153
record_contents = record_path.read_text() if record_path.exists() else ""
154+
quote_files = all(line.startswith('"') for line in record_contents.splitlines())
154155
distribution_prefix = distinfo_dir.with_suffix("").name
155156

156157
with _WhlFile(
157-
args.output, mode="w", distribution_prefix=distribution_prefix
158+
args.output,
159+
mode="w",
160+
distribution_prefix=distribution_prefix,
161+
quote_all_filenames=quote_files,
158162
) as out:
159163
for p in _files_to_pack(patched_wheel_dir, record_contents):
160164
rel_path = p.relative_to(patched_wheel_dir)
161165
out.add_file(str(rel_path), p)
162166

163167
logging.debug(f"Writing RECORD file")
164-
got_record = out.add_recordfile().decode("utf-8", "surrogateescape")
168+
got_record = out.add_recordfile()
165169

166170
if got_record == record_contents:
167171
logging.info(f"Created a whl file: {args.output}")

tools/wheelmaker.py

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -132,13 +132,17 @@ def __init__(
132132
distribution_prefix: str,
133133
strip_path_prefixes=None,
134134
compression=zipfile.ZIP_DEFLATED,
135+
quote_all_filenames: bool = False,
135136
**kwargs,
136137
):
137138
self._distribution_prefix = distribution_prefix
138139

139140
self._strip_path_prefixes = strip_path_prefixes or []
140-
# Entries for the RECORD file as (filename, hash, size) tuples.
141-
self._record = []
141+
# Entries for the RECORD file as (filename, digest, size) tuples.
142+
self._record: list[tuple[str, str, str]] = []
143+
# Whether to quote filenames in the RECORD file (for compatibility with
144+
# some wheels like torch that have quoted filenames in their RECORD).
145+
self.quote_all_filenames = quote_all_filenames
142146

143147
super().__init__(filename, mode=mode, compression=compression, **kwargs)
144148

@@ -192,16 +196,15 @@ def add_string(self, filename, contents):
192196
hash.update(contents)
193197
self._add_to_record(filename, self._serialize_digest(hash), len(contents))
194198

195-
def _serialize_digest(self, hash):
199+
def _serialize_digest(self, hash) -> str:
196200
# https://www.python.org/dev/peps/pep-0376/#record
197201
# "base64.urlsafe_b64encode(digest) with trailing = removed"
198202
digest = base64.urlsafe_b64encode(hash.digest())
199203
digest = b"sha256=" + digest.rstrip(b"=")
200-
return digest
204+
return digest.decode("utf-8", "surrogateescape")
201205

202-
def _add_to_record(self, filename, hash, size):
203-
size = str(size).encode("ascii")
204-
self._record.append((filename, hash, size))
206+
def _add_to_record(self, filename: str, hash: str, size: int) -> None:
207+
self._record.append((filename, hash, str(size)))
205208

206209
def _zipinfo(self, filename):
207210
"""Construct deterministic ZipInfo entry for a file named filename"""
@@ -223,29 +226,28 @@ def _zipinfo(self, filename):
223226
zinfo.compress_type = self.compression
224227
return zinfo
225228

226-
def add_recordfile(self):
229+
def _quote_filename(self, filename: str) -> str:
230+
"""Return a possibly quoted filename for RECORD file."""
231+
# Use csv writer to auto-quote the filename (may contain ",")
232+
with io.StringIO() as buf:
233+
csv.writer(buf).writerow([filename.lstrip("/")])
234+
filename = buf.getvalue().strip()
235+
# Some RECORDs like torch have *all* filenames quoted and we must minimize diff
236+
if self.quote_all_filenames and not filename.startswith('"'):
237+
filename = f'"{filename}"'
238+
return filename
239+
240+
def add_recordfile(self) -> str:
227241
"""Write RECORD file to the distribution."""
228242
record_path = self.distinfo_path("RECORD")
229-
entries = self._record + [(record_path, b"", b"")]
230-
with io.StringIO() as contents_io:
231-
writer = csv.writer(contents_io, lineterminator="\n")
232-
for filename, digest, size in entries:
233-
if isinstance(filename, str):
234-
filename = filename.lstrip("/")
235-
writer.writerow(
236-
(
237-
(
238-
c
239-
if isinstance(c, str)
240-
else c.decode("utf-8", "surrogateescape")
241-
)
242-
for c in (filename, digest, size)
243-
)
244-
)
245-
246-
contents = contents_io.getvalue()
247-
self.add_string(record_path, contents)
248-
return contents.encode("utf-8", "surrogateescape")
243+
entries = self._record + [(record_path, "", "")]
244+
entries = [
245+
(self._quote_filename(fname), digest, size)
246+
for fname, digest, size in entries
247+
]
248+
contents = "\n".join(",".join(entry) for entry in entries) + "\n"
249+
self.add_string(record_path, contents)
250+
return contents
249251

250252

251253
class WheelMaker(object):

0 commit comments

Comments
 (0)