@@ -892,14 +892,28 @@ def __hash__(self) -> int:
892892 return hash (self .manifest_path )
893893
894894
895+ # Global cache for ManifestFile objects, keyed by manifest_path.
896+ # This deduplicates ManifestFile objects across manifest lists, which commonly
897+ # share manifests after append operations.
895898_DEFAULT_MANIFEST_CACHE_SIZE = 128
896- _manifest_cache_size = Config ().get_int ("manifest-cache-size" ) or _DEFAULT_MANIFEST_CACHE_SIZE
899+ _configured_manifest_cache_size = Config ().get_int ("manifest-cache-size" )
900+ _manifest_cache_size = (
901+ _configured_manifest_cache_size if _configured_manifest_cache_size is not None else _DEFAULT_MANIFEST_CACHE_SIZE
902+ )
903+
904+ # Lock for thread-safe cache access.
897905_manifest_cache_lock = threading .RLock ()
898- _manifest_cache : LRUCache [str , ManifestFile ] = LRUCache (maxsize = _manifest_cache_size )
906+ _manifest_cache : LRUCache [str , ManifestFile ] | dict [str , ManifestFile ] = (
907+ LRUCache (maxsize = _manifest_cache_size ) if _manifest_cache_size > 0 else {}
908+ )
899909
900910
901911def clear_manifest_cache () -> None :
902- """Clear the manifest cache."""
912+ """Clear cached ManifestFile objects.
913+
914+ This is primarily useful in long-lived or memory-sensitive processes that
915+ want to release cached manifest metadata between bursts of table reads.
916+ """
903917 with _manifest_cache_lock :
904918 _manifest_cache .clear ()
905919
@@ -931,6 +945,9 @@ def _manifests(io: FileIO, manifest_list: str) -> tuple[ManifestFile, ...]:
931945 file = io .new_input (manifest_list )
932946 manifest_files = list (read_manifest_list (file ))
933947
948+ if _manifest_cache_size == 0 :
949+ return tuple (manifest_files )
950+
934951 result = []
935952 with _manifest_cache_lock :
936953 for manifest_file in manifest_files :
0 commit comments