Skip to content

Commit 65c0c9b

Browse files
committed
Add option to cache unknown bucket type
1 parent 991faba commit 65c0c9b

2 files changed

Lines changed: 57 additions & 2 deletions

File tree

gcsfs/extended_gcsfs.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ def __init__(self, *args, finalize_on_close=False, **kwargs):
9696
finalize_on_close : bool, default False
9797
By default, files in zonal buckets are left unfinalized to allow appends.
9898
**kwargs : dict
99+
- cache_unknown_buckets : bool, default False
100+
Whether to cache UNKNOWN bucket types. Useful when users lack permissions
101+
for the Storage Control API to avoid repeated slow failing lookups.
99102
Additional arguments passed to GCSFileSystem.
100103
Supports retry configuration overrides for Storage Control API:
101104
- retry_timeout: Total time to spend retrying (seconds).
@@ -104,6 +107,7 @@ def __init__(self, *args, finalize_on_close=False, **kwargs):
104107
- retry_multiplier: Multiplier for delay between retries.
105108
These map to `google.api_core.retry.AsyncRetry` arguments (without 'retry_' prefix).
106109
"""
110+
self._cache_unknown_buckets = kwargs.pop("cache_unknown_buckets", False)
107111
valid_keys = DEFAULT_RETRY_CONFIG.keys()
108112
self.retry_config = {
109113
k[6:]: v
@@ -194,8 +198,7 @@ async def _lookup_bucket_type(self, bucket):
194198
if bucket in self._storage_layout_cache:
195199
return self._storage_layout_cache[bucket]
196200
bucket_type = await self._get_bucket_type(bucket)
197-
# Dont cache UNKNOWN type
198-
if bucket_type == BucketType.UNKNOWN:
201+
if bucket_type == BucketType.UNKNOWN and not self._cache_unknown_buckets:
199202
return bucket_type
200203
self._storage_layout_cache[bucket] = bucket_type
201204
return self._storage_layout_cache[bucket]

gcsfs/tests/test_extended_gcsfs.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1611,3 +1611,55 @@ async def test_cat_file_non_zonal_fallback(extended_gcsfs):
16111611
mock_super_cat.assert_awaited_once_with(
16121612
"standard_bucket/obj", start=10, end=20, concurrency=2, custom_arg="val"
16131613
)
1614+
1615+
1616+
@pytest.mark.asyncio
1617+
async def test_lookup_bucket_type_not_cached_unknown(extended_gcsfs):
1618+
"""Test that BucketType.UNKNOWN is not cached when _cache_unknown_buckets is False."""
1619+
fs = extended_gcsfs
1620+
fs._cache_unknown_buckets = False
1621+
1622+
# Clear cache just in case
1623+
fs._storage_layout_cache.clear()
1624+
1625+
# Mock _get_bucket_type to return UNKNOWN
1626+
with mock.patch.object(
1627+
fs, "_get_bucket_type", new_callable=mock.AsyncMock
1628+
) as mock_get_type:
1629+
mock_get_type.return_value = BucketType.UNKNOWN
1630+
1631+
# First lookup
1632+
type1 = await fs._lookup_bucket_type("my-bucket")
1633+
assert type1 == BucketType.UNKNOWN
1634+
assert mock_get_type.call_count == 1
1635+
1636+
# Second lookup should call _get_bucket_type again because it's not cached
1637+
type2 = await fs._lookup_bucket_type("my-bucket")
1638+
assert type2 == BucketType.UNKNOWN
1639+
assert mock_get_type.call_count == 2
1640+
1641+
1642+
@pytest.mark.asyncio
1643+
async def test_lookup_bucket_type_cached_unknown(extended_gcsfs):
1644+
"""Test that BucketType.UNKNOWN is cached when _cache_unknown_buckets is True."""
1645+
fs = extended_gcsfs
1646+
fs._cache_unknown_buckets = True
1647+
1648+
# Clear cache just in case
1649+
fs._storage_layout_cache.clear()
1650+
1651+
# Mock _get_bucket_type to return UNKNOWN
1652+
with mock.patch.object(
1653+
fs, "_get_bucket_type", new_callable=mock.AsyncMock
1654+
) as mock_get_type:
1655+
mock_get_type.return_value = BucketType.UNKNOWN
1656+
1657+
# First lookup
1658+
type1 = await fs._lookup_bucket_type("my-bucket")
1659+
assert type1 == BucketType.UNKNOWN
1660+
assert mock_get_type.call_count == 1
1661+
1662+
# Second lookup should NOT call _get_bucket_type again because it's cached
1663+
type2 = await fs._lookup_bucket_type("my-bucket")
1664+
assert type2 == BucketType.UNKNOWN
1665+
assert mock_get_type.call_count == 1

0 commit comments

Comments
 (0)