@@ -476,6 +476,8 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader,
476476 band_count = samples if (planar == 2 and samples > 1 ) else 1
477477 tiles_per_band = tiles_across * tiles_down
478478
479+ # Build list of tiles to decode
480+ tile_jobs = []
479481 for band_idx in range (band_count ):
480482 band_tile_offset = band_idx * tiles_per_band if band_count > 1 else 0
481483 tile_samples = 1 if band_count > 1 else samples
@@ -485,37 +487,55 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader,
485487 tile_idx = band_tile_offset + tr * tiles_across + tc
486488 if tile_idx >= len (offsets ):
487489 continue
488-
489- tile_data = data [offsets [tile_idx ]:offsets [tile_idx ] + byte_counts [tile_idx ]]
490- tile_pixels = _decode_strip_or_tile (
491- tile_data , compression , tw , th , tile_samples ,
492- bps , bytes_per_sample , is_sub_byte , dtype , pred ,
493- byte_order = header .byte_order )
494-
495- tile_r0 = tr * th
496- tile_c0 = tc * tw
497-
498- src_r0 = max (r0 - tile_r0 , 0 )
499- src_c0 = max (c0 - tile_c0 , 0 )
500- src_r1 = min (r1 - tile_r0 , th )
501- src_c1 = min (c1 - tile_c0 , tw )
502-
503- dst_r0 = max (tile_r0 - r0 , 0 )
504- dst_c0 = max (tile_c0 - c0 , 0 )
505-
506- actual_tile_h = min (th , height - tile_r0 )
507- actual_tile_w = min (tw , width - tile_c0 )
508- src_r1 = min (src_r1 , actual_tile_h )
509- src_c1 = min (src_c1 , actual_tile_w )
510- dst_r1 = dst_r0 + (src_r1 - src_r0 )
511- dst_c1 = dst_c0 + (src_c1 - src_c0 )
512-
513- if dst_r1 > dst_r0 and dst_c1 > dst_c0 :
514- src_slice = tile_pixels [src_r0 :src_r1 , src_c0 :src_c1 ]
515- if band_count > 1 :
516- result [dst_r0 :dst_r1 , dst_c0 :dst_c1 , band_idx ] = src_slice
517- else :
518- result [dst_r0 :dst_r1 , dst_c0 :dst_c1 ] = src_slice
490+ tile_jobs .append ((band_idx , tr , tc , tile_idx , tile_samples ))
491+
492+ # Decode tiles -- parallel for compressed, sequential for uncompressed
493+ n_tiles = len (tile_jobs )
494+ use_parallel = (compression != 1 and n_tiles > 4 ) # 1 = COMPRESSION_NONE
495+
496+ def _decode_one (job ):
497+ band_idx , tr , tc , tile_idx , tile_samples = job
498+ tile_data = data [offsets [tile_idx ]:offsets [tile_idx ] + byte_counts [tile_idx ]]
499+ return _decode_strip_or_tile (
500+ tile_data , compression , tw , th , tile_samples ,
501+ bps , bytes_per_sample , is_sub_byte , dtype , pred ,
502+ byte_order = header .byte_order )
503+
504+ if use_parallel :
505+ from concurrent .futures import ThreadPoolExecutor
506+ import os as _os
507+ n_workers = min (n_tiles , _os .cpu_count () or 4 )
508+ with ThreadPoolExecutor (max_workers = n_workers ) as pool :
509+ decoded = list (pool .map (_decode_one , tile_jobs ))
510+ else :
511+ decoded = [_decode_one (job ) for job in tile_jobs ]
512+
513+ # Place decoded tiles into the output array
514+ for (band_idx , tr , tc , tile_idx , tile_samples ), tile_pixels in zip (tile_jobs , decoded ):
515+ tile_r0 = tr * th
516+ tile_c0 = tc * tw
517+
518+ src_r0 = max (r0 - tile_r0 , 0 )
519+ src_c0 = max (c0 - tile_c0 , 0 )
520+ src_r1 = min (r1 - tile_r0 , th )
521+ src_c1 = min (c1 - tile_c0 , tw )
522+
523+ dst_r0 = max (tile_r0 - r0 , 0 )
524+ dst_c0 = max (tile_c0 - c0 , 0 )
525+
526+ actual_tile_h = min (th , height - tile_r0 )
527+ actual_tile_w = min (tw , width - tile_c0 )
528+ src_r1 = min (src_r1 , actual_tile_h )
529+ src_c1 = min (src_c1 , actual_tile_w )
530+ dst_r1 = dst_r0 + (src_r1 - src_r0 )
531+ dst_c1 = dst_c0 + (src_c1 - src_c0 )
532+
533+ if dst_r1 > dst_r0 and dst_c1 > dst_c0 :
534+ src_slice = tile_pixels [src_r0 :src_r1 , src_c0 :src_c1 ]
535+ if band_count > 1 :
536+ result [dst_r0 :dst_r1 , dst_c0 :dst_c1 , band_idx ] = src_slice
537+ else :
538+ result [dst_r0 :dst_r1 , dst_c0 :dst_c1 ] = src_slice
519539
520540 return result
521541
0 commit comments