@@ -2455,6 +2455,121 @@ def GetBinaryFileType(infile, filestart=0, closefp=True):
24552455 # -------------- FALLBACK --------------
24562456 return False
24572457
2458+ def _get_seek_consts ():
2459+ """Return (SEEK_DATA, SEEK_HOLE) if supported, else (None, None)."""
2460+ seek_data = getattr (os , "SEEK_DATA" , None )
2461+ seek_hole = getattr (os , "SEEK_HOLE" , None )
2462+ if seek_data is None or seek_hole is None :
2463+ return None , None
2464+ return seek_data , seek_hole
2465+
2466+ def pack_sparse_to_stream (path , out_fp , bufsize = 1024 * 1024 ):
2467+ """
2468+ Write ONLY data extents from sparse file `path` into `out_fp`.
2469+ Returns: (logical_size, extents, stored_bytes)
2470+ extents: list of (offset, length) in logical file
2471+ stored_bytes: total bytes written to out_fp
2472+ """
2473+ st = os .stat (path , follow_symlinks = False )
2474+ logical_size = int (st .st_size )
2475+ extents = []
2476+ stored = 0
2477+
2478+ SEEK_DATA , SEEK_HOLE = _get_seek_consts ()
2479+
2480+ with open (path , "rb" , buffering = 0 ) as f :
2481+ if SEEK_DATA is not None and SEEK_HOLE is not None :
2482+ # Kernel knows where holes are (best, fastest, exact).
2483+ pos = 0
2484+ while pos < logical_size :
2485+ try :
2486+ data_off = os .lseek (f .fileno (), pos , SEEK_DATA )
2487+ except OSError :
2488+ break # no more data
2489+ try :
2490+ hole_off = os .lseek (f .fileno (), data_off , SEEK_HOLE )
2491+ except OSError :
2492+ hole_off = logical_size
2493+ if hole_off > logical_size :
2494+ hole_off = logical_size
2495+
2496+ length = hole_off - data_off
2497+ if length <= 0 :
2498+ pos = max (pos + 1 , hole_off )
2499+ continue
2500+
2501+ extents .append ((data_off , length ))
2502+ # copy that extent’s bytes into out_fp
2503+ os .lseek (f .fileno (), data_off , os .SEEK_SET )
2504+ remaining = length
2505+ while remaining :
2506+ chunk = f .read (min (bufsize , remaining ))
2507+ if not chunk :
2508+ break
2509+ out_fp .write (chunk )
2510+ stored += len (chunk )
2511+ remaining -= len (chunk )
2512+
2513+ pos = hole_off
2514+ else :
2515+ # Portable fallback (no SEEK_HOLE/DATA): scan for non-zero blocks.
2516+ # Not perfect (won't detect "real zeros" vs "holes"), but works as a fallback.
2517+ block = 4096
2518+ pos = 0
2519+ while pos < logical_size :
2520+ chunk = f .read (block )
2521+ if not chunk :
2522+ break
2523+ if any (b != 0 for b in chunk ):
2524+ off = pos
2525+ # extend this run while blocks have any non-zero
2526+ run = bytearray (chunk )
2527+ while True :
2528+ nxt = f .read (block )
2529+ if not nxt or not any (b != 0 for b in nxt ):
2530+ if nxt :
2531+ # rewind one block if it was all-zero (we read too far)
2532+ f .seek (- len (nxt ), os .SEEK_CUR )
2533+ break
2534+ run .extend (nxt )
2535+ extents .append ((off , len (run )))
2536+ out_fp .write (run )
2537+ stored += len (run )
2538+ pos = off + len (run )
2539+ else :
2540+ pos += len (chunk )
2541+
2542+ out_fp .seek (0 , os .SEEK_SET )
2543+ return logical_size , extents , stored
2544+
2545+ def write_sparse_to_fileobj (out_fp , logical_size , extents , in_fp , bufsize = 1024 * 1024 ):
2546+ """
2547+ Recreate sparse file layout into an already-open writable file-like object.
2548+ """
2549+ out_fp .seek (0 )
2550+ out_fp .truncate (int (logical_size ))
2551+
2552+ for off , length in extents :
2553+ out_fp .seek (int (off ), os .SEEK_SET )
2554+ remaining = int (length )
2555+ while remaining :
2556+ chunk = in_fp .read (min (bufsize , remaining ))
2557+ if not chunk :
2558+ raise EOFError ("Archive ended while reading sparse extent data" )
2559+ out_fp .write (chunk )
2560+ remaining -= len (chunk )
2561+
2562+ def unpack_sparse_to_path (in_fp , out_path , logical_size , extents , bufsize = 1024 * 1024 ):
2563+ os .makedirs (os .path .dirname (out_path ) or "." , exist_ok = True )
2564+
2565+ with open (out_path , "wb" ) as f :
2566+ write_sparse_to_fileobj (f , logical_size , extents , in_fp , bufsize )
2567+
2568+ try :
2569+ f .flush ()
2570+ os .fsync (f .fileno ())
2571+ except Exception :
2572+ pass
24582573
24592574def _is_valid_zlib_header (cmf , flg ):
24602575 """
@@ -5995,7 +6110,8 @@ def AppendFilesWithContentToList(infiles, dirlistfromtxt=False, extradata=[], js
59956110 # Types that should be considered zero-length in the archive context:
59966111 zero_length_types = {1 , 2 , 3 , 4 , 5 , 6 , 8 , 9 , 10 , 11 , 13 }
59976112 # Types that have actual data to read:
5998- data_types = {0 , 7 , 12 }
6113+ data_types = {0 , 7 }
6114+ sparse_types = {12 }
59996115 if ftype in zero_length_types :
60006116 fsize = format (int ("0" ), 'x' ).lower ()
60016117 elif ftype in data_types :
@@ -6312,7 +6428,8 @@ def AppendFilesWithContentFromTarFileToList(infile, extradata=[], jsondata={}, c
63126428 # Types that should be considered zero-length in the archive context:
63136429 zero_length_types = {1 , 2 , 3 , 4 , 5 , 6 , 8 , 9 , 10 , 11 , 13 }
63146430 # Types that have actual data to read:
6315- data_types = {0 , 7 , 12 }
6431+ data_types = {0 , 7 }
6432+ sparse_types = {12 }
63166433 if ftype in zero_length_types :
63176434 fsize = format (int ("0" ), 'x' ).lower ()
63186435 elif ftype in data_types :
0 commit comments