@@ -396,6 +396,54 @@ def decode_unicode_escape(value):
396396if (__version_info__ [3 ] is None ):
397397 __version__ = str (__version_info__ [0 ]) + "." + str (__version_info__ [1 ]) + "." + str (__version_info__ [2 ])
398398
399+ # ===== Module-level type code table & helpers (reuse anywhere) =====
400+
401+ FT = {
402+ "FILE" : 0 ,
403+ "HARDLINK" : 1 ,
404+ "SYMLINK" : 2 ,
405+ "CHAR" : 3 ,
406+ "BLOCK" : 4 ,
407+ "DIR" : 5 ,
408+ "FIFO" : 6 ,
409+ "FILE_ALT" : 7 , # treated like regular file
410+ "SOCK" : 8 ,
411+ "DOOR" : 9 ,
412+ "PORT" : 10 ,
413+ "WHT" : 11 ,
414+ "JUNCTION" : 13 ,
415+ }
416+
417+ # Base category for each concrete ftype (no unions here).
418+ BASE_CATEGORY_BY_CODE = {
419+ 0 : "files" ,
420+ 7 : "files" ,
421+ 1 : "hardlinks" ,
422+ 2 : "symlinks" ,
423+ 3 : "character" ,
424+ 4 : "block" ,
425+ 5 : "directories" ,
426+ 6 : "fifo" ,
427+ 8 : "sockets" ,
428+ 9 : "doors" ,
429+ 10 : "ports" ,
430+ 11 : "whiteouts" ,
431+ 13 : "junctions" ,
432+ }
433+
434+ # Union categories defined by which base codes should populate them.
435+ UNION_RULES = [
436+ ("links" , set ([FT ["HARDLINK" ], FT ["SYMLINK" ]])),
437+ ("devices" , set ([FT ["CHAR" ], FT ["BLOCK" ]])),
438+ ]
439+
440+ # Deterministic category order (handy for consistent output/printing).
441+ CATEGORY_ORDER = [
442+ "files" , "hardlinks" , "symlinks" , "character" , "block" ,
443+ "directories" , "fifo" , "sockets" , "doors" , "ports" ,
444+ "whiteouts" , "junctions" , "links" , "devices"
445+ ]
446+
399447# Robust bitness detection
400448# Works on Py2 & Py3, all platforms
401449
@@ -7727,72 +7775,122 @@ def ListDirToArray(infiles, dirlistfromtxt=False, fmttype=__file_format_default_
77277775 return listarrayfiles
77287776
77297777
7778+ # ===== Function (keeps inarray schema; returns entries + indexes) =====
7779+
77307780def ArchiveFileArrayToArrayIndex (inarray , returnfp = False ):
7731- if (isinstance (inarray , dict )):
7732- listarrayfiles = inarray
7733- else :
7781+ """
7782+ Build a bidirectional index over an archive listing while preserving the
7783+ input 'inarray' as-is. Python 2/3 compatible, no external deps.
7784+
7785+ Input (unchanged contract):
7786+ inarray: dict with at least:
7787+ - 'ffilelist': list of dicts: {'fname': <str>, 'fid': <any>, 'ftype': <int>}
7788+ - 'fnumfiles': int (expected count)
7789+ - optional 'fp': any (passed through if returnfp=True)
7790+
7791+ Output structure:
7792+ {
7793+ 'list': inarray, # alias to original input (not copied)
7794+ 'fp': inarray.get('fp') or None,
7795+ 'entries': { fid: {'name': fname, 'type': ftype} },
7796+ 'indexes': {
7797+ 'by_name': { fname: fid },
7798+ 'by_type': {
7799+ <category>: {
7800+ 'by_name': { fname: fid },
7801+ 'by_id': { fid: fname },
7802+ 'count': <int>
7803+ }, ...
7804+ }
7805+ },
7806+ 'counts': {
7807+ 'total': <int>,
7808+ 'by_type': { <category>: <int>, ... }
7809+ },
7810+ 'unknown_types': { <ftype_int>: [fname, ...] }
7811+ }
7812+ """
7813+ if not isinstance (inarray , dict ):
77347814 return False
7735- if ( not listarrayfiles ) :
7815+ if not inarray :
77367816 return False
7737- outarray = {'list' : listarrayfiles , 'filetoid' : {}, 'idtofile' : {}, 'filetypes' : {'directories' : {'filetoid' : {}, 'idtofile' : {}}, 'files' : {'filetoid' : {}, 'idtofile' : {}}, 'links' : {'filetoid' : {}, 'idtofile' : {}}, 'symlinks' : {'filetoid' : {
7738- }, 'idtofile' : {}}, 'hardlinks' : {'filetoid' : {}, 'idtofile' : {}}, 'character' : {'filetoid' : {}, 'idtofile' : {}}, 'block' : {'filetoid' : {}, 'idtofile' : {}}, 'fifo' : {'filetoid' : {}, 'idtofile' : {}}, 'devices' : {'filetoid' : {}, 'idtofile' : {}}}}
7739- if (returnfp ):
7740- outarray .update ({'fp' : listarrayfiles ['fp' ]})
7741- else :
7742- outarray .update ({'fp' : None })
7743- lenlist = len (listarrayfiles ['ffilelist' ])
7744- lcfi = 0
7745- lcfx = int (listarrayfiles ['fnumfiles' ])
7746- if (lenlist > listarrayfiles ['fnumfiles' ] or lenlist < listarrayfiles ['fnumfiles' ]):
7747- lcfx = int (lenlist )
7748- else :
7749- lcfx = int (listarrayfiles ['fnumfiles' ])
7750- while (lcfi < lcfx ):
7751- filetoidarray = {listarrayfiles ['ffilelist' ][lcfi ]
7752- ['fname' ]: listarrayfiles ['ffilelist' ][lcfi ]['fid' ]}
7753- idtofilearray = {listarrayfiles ['ffilelist' ][lcfi ]
7754- ['fid' ]: listarrayfiles ['ffilelist' ][lcfi ]['fname' ]}
7755- outarray ['filetoid' ].update (filetoidarray )
7756- outarray ['idtofile' ].update (idtofilearray )
7757- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 0 or listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 7 ):
7758- outarray ['filetypes' ]['files' ]['filetoid' ].update (filetoidarray )
7759- outarray ['filetypes' ]['files' ]['idtofile' ].update (idtofilearray )
7760- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 1 ):
7761- outarray ['filetypes' ]['hardlinks' ]['filetoid' ].update (
7762- filetoidarray )
7763- outarray ['filetypes' ]['hardlinks' ]['idtofile' ].update (
7764- idtofilearray )
7765- outarray ['filetypes' ]['links' ]['filetoid' ].update (filetoidarray )
7766- outarray ['filetypes' ]['links' ]['idtofile' ].update (idtofilearray )
7767- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 2 ):
7768- outarray ['filetypes' ]['symlinks' ]['filetoid' ].update (filetoidarray )
7769- outarray ['filetypes' ]['symlinks' ]['idtofile' ].update (idtofilearray )
7770- outarray ['filetypes' ]['links' ]['filetoid' ].update (filetoidarray )
7771- outarray ['filetypes' ]['links' ]['idtofile' ].update (idtofilearray )
7772- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 3 ):
7773- outarray ['filetypes' ]['character' ]['filetoid' ].update (
7774- filetoidarray )
7775- outarray ['filetypes' ]['character' ]['idtofile' ].update (
7776- idtofilearray )
7777- outarray ['filetypes' ]['devices' ]['filetoid' ].update (filetoidarray )
7778- outarray ['filetypes' ]['devices' ]['idtofile' ].update (idtofilearray )
7779- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 4 ):
7780- outarray ['filetypes' ]['block' ]['filetoid' ].update (filetoidarray )
7781- outarray ['filetypes' ]['block' ]['idtofile' ].update (idtofilearray )
7782- outarray ['filetypes' ]['devices' ]['filetoid' ].update (filetoidarray )
7783- outarray ['filetypes' ]['devices' ]['idtofile' ].update (idtofilearray )
7784- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 5 ):
7785- outarray ['filetypes' ]['directories' ]['filetoid' ].update (
7786- filetoidarray )
7787- outarray ['filetypes' ]['directories' ]['idtofile' ].update (
7788- idtofilearray )
7789- if (listarrayfiles ['ffilelist' ][lcfi ]['ftype' ] == 6 ):
7790- outarray ['filetypes' ]['symlinks' ]['filetoid' ].update (filetoidarray )
7791- outarray ['filetypes' ]['symlinks' ]['idtofile' ].update (idtofilearray )
7792- outarray ['filetypes' ]['devices' ]['filetoid' ].update (filetoidarray )
7793- outarray ['filetypes' ]['devices' ]['idtofile' ].update (idtofilearray )
7794- lcfi = lcfi + 1
7795- return outarray
7817+
7818+ # Buckets for categories
7819+ def _bucket ():
7820+ return {"by_name" : {}, "by_id" : {}, "count" : 0 }
7821+
7822+ by_type = {}
7823+ for cat in CATEGORY_ORDER :
7824+ by_type [cat ] = _bucket ()
7825+
7826+ out = {
7827+ "list" : inarray ,
7828+ "fp" : inarray .get ("fp" ) if returnfp else None ,
7829+ "entries" : {},
7830+ "indexes" : {
7831+ "by_name" : {},
7832+ "by_type" : by_type ,
7833+ },
7834+ "counts" : {"total" : 0 , "by_type" : {}},
7835+ "unknown_types" : {},
7836+ }
7837+
7838+ ffilelist = inarray .get ("ffilelist" ) or []
7839+ try :
7840+ fnumfiles = int (inarray .get ("fnumfiles" , len (ffilelist )))
7841+ except Exception :
7842+ fnumfiles = len (ffilelist )
7843+
7844+ # Process only what's present
7845+ total = min (len (ffilelist ), fnumfiles )
7846+
7847+ def _add (cat , name , fid ):
7848+ b = by_type [cat ]
7849+ b ["by_name" ][name ] = fid
7850+ b ["by_id" ][fid ] = name
7851+ # Count is number of unique names in this category
7852+ b ["count" ] = len (b ["by_name" ])
7853+
7854+ i = 0
7855+ while i < total :
7856+ e = ffilelist [i ]
7857+ name = e .get ("fname" )
7858+ fid = e .get ("fid" )
7859+ t = e .get ("ftype" )
7860+
7861+ if name is None or fid is None or t is None :
7862+ i += 1
7863+ continue
7864+
7865+ # Store canonical entry once, keyed by fid
7866+ out ["entries" ][fid ] = {"name" : name , "type" : t }
7867+
7868+ # Global reverse index for fast name -> id
7869+ out ["indexes" ]["by_name" ][name ] = fid
7870+
7871+ # Base category
7872+ base_cat = BASE_CATEGORY_BY_CODE .get (t )
7873+ if base_cat is not None :
7874+ _add (base_cat , name , fid )
7875+ else :
7876+ # Track unknown codes for visibility/forward-compat
7877+ lst = out ["unknown_types" ].setdefault (t , [])
7878+ if name not in lst :
7879+ lst .append (name )
7880+
7881+ # Union categories
7882+ for union_name , code_set in UNION_RULES :
7883+ if t in code_set :
7884+ _add (union_name , name , fid )
7885+
7886+ i += 1
7887+
7888+ # Counts
7889+ out ["counts" ]["total" ] = total
7890+ for cat in CATEGORY_ORDER :
7891+ out ["counts" ]["by_type" ][cat ] = by_type [cat ]["count" ]
7892+
7893+ return out
77967894
77977895
77987896def RePackArchiveFile (infile , outfile , fmttype = "auto" , compression = "auto" , compresswholefile = True , compressionlevel = None , compressionuselist = compressionlistalt , followlink = False , filestart = 0 , seekstart = 0 , seekend = 0 , checksumtype = ["crc32" , "crc32" , "crc32" , "crc32" ], skipchecksum = False , extradata = [], jsondata = {}, formatspecs = __file_format_dict__ , seektoend = False , verbose = False , returnfp = False ):
0 commit comments