@@ -132,13 +132,17 @@ def __init__(
132132 distribution_prefix : str ,
133133 strip_path_prefixes = None ,
134134 compression = zipfile .ZIP_DEFLATED ,
135+ quote_all_filenames : bool = False ,
135136 ** kwargs ,
136137 ):
137138 self ._distribution_prefix = distribution_prefix
138139
139140 self ._strip_path_prefixes = strip_path_prefixes or []
140- # Entries for the RECORD file as (filename, hash, size) tuples.
141- self ._record = []
141+ # Entries for the RECORD file as (filename, digest, size) tuples.
142+ self ._record : list [tuple [str , str , str ]] = []
143+ # Whether to quote filenames in the RECORD file (for compatibility with
144+ # some wheels like torch that have quoted filenames in their RECORD).
145+ self .quote_all_filenames = quote_all_filenames
142146
143147 super ().__init__ (filename , mode = mode , compression = compression , ** kwargs )
144148
@@ -192,16 +196,15 @@ def add_string(self, filename, contents):
192196 hash .update (contents )
193197 self ._add_to_record (filename , self ._serialize_digest (hash ), len (contents ))
194198
195- def _serialize_digest (self , hash ):
199+ def _serialize_digest (self , hash ) -> str :
196200 # https://www.python.org/dev/peps/pep-0376/#record
197201 # "base64.urlsafe_b64encode(digest) with trailing = removed"
198202 digest = base64 .urlsafe_b64encode (hash .digest ())
199203 digest = b"sha256=" + digest .rstrip (b"=" )
200- return digest
204+ return digest . decode ( "utf-8" , "surrogateescape" )
201205
202- def _add_to_record (self , filename , hash , size ):
203- size = str (size ).encode ("ascii" )
204- self ._record .append ((filename , hash , size ))
206+ def _add_to_record (self , filename : str , hash : str , size : int ) -> None :
207+ self ._record .append ((filename , hash , str (size )))
205208
206209 def _zipinfo (self , filename ):
207210 """Construct deterministic ZipInfo entry for a file named filename"""
@@ -223,29 +226,27 @@ def _zipinfo(self, filename):
223226 zinfo .compress_type = self .compression
224227 return zinfo
225228
226- def add_recordfile (self ):
229+ def _quote_filename (self , filename : str ) -> str :
230+ """Return a possibly quoted filename for RECORD file."""
231+ filename = filename .lstrip ("/" )
232+ # Some RECORDs like torch have *all* filenames quoted and we must minimize diff.
233+ # Otherwise, we quote only when necessary (e.g. for filenames with commas).
234+ quoting = csv .QUOTE_ALL if self .quote_all_filenames else csv .QUOTE_MINIMAL
235+ with io .StringIO () as buf :
236+ csv .writer (buf , quoting = quoting ).writerow ([filename ])
237+ return buf .getvalue ().strip ()
238+
239+ def add_recordfile (self ) -> str :
227240 """Write RECORD file to the distribution."""
228241 record_path = self .distinfo_path ("RECORD" )
229- entries = self ._record + [(record_path , b"" , b"" )]
230- with io .StringIO () as contents_io :
231- writer = csv .writer (contents_io , lineterminator = "\n " )
232- for filename , digest , size in entries :
233- if isinstance (filename , str ):
234- filename = filename .lstrip ("/" )
235- writer .writerow (
236- (
237- (
238- c
239- if isinstance (c , str )
240- else c .decode ("utf-8" , "surrogateescape" )
241- )
242- for c in (filename , digest , size )
243- )
244- )
245-
246- contents = contents_io .getvalue ()
247- self .add_string (record_path , contents )
248- return contents .encode ("utf-8" , "surrogateescape" )
242+ entries = self ._record + [(record_path , "" , "" )]
243+ entries = [
244+ (self ._quote_filename (fname ), digest , size )
245+ for fname , digest , size in entries
246+ ]
247+ contents = "\n " .join ("," .join (entry ) for entry in entries ) + "\n "
248+ self .add_string (record_path , contents )
249+ return contents
249250
250251
251252class WheelMaker (object ):
0 commit comments