Skip to content

Commit 991faba

Browse files
authored
Improve test speed and emulator compatibility (#843)
* Improve test speed with ADC timeout handling and isolated multi-threaded test file fixture * Add cache_unknown_buckets config to allow caching unknown bucket types in tests * add tests * Introduce is_real_gcs * remove _cache_unknown_buckets
1 parent c21f4e0 commit 991faba

11 files changed

Lines changed: 131 additions & 57 deletions

gcsfs/tests/conftest.py

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
TEST_VERSIONED_BUCKET,
2626
TEST_ZONAL_BUCKET,
2727
)
28+
from gcsfs.tests.utils import is_real_gcs
2829

2930
files = {
3031
"test/accounts.1.json": (
@@ -60,13 +61,6 @@
6061
"zonal/test/c": b"ab\n" + b"a" * (2**18) + b"\nab",
6162
}
6263

63-
_MULTI_THREADED_TEST_DATA_SIZE = 5 * 1024 * 1024 # 5MB
64-
pattern = b"0123456789abcdef"
65-
text_files["multi_threaded_test_file"] = (
66-
pattern * (_MULTI_THREADED_TEST_DATA_SIZE // len(pattern))
67-
+ pattern[: _MULTI_THREADED_TEST_DATA_SIZE % len(pattern)]
68-
)
69-
7064
allfiles = dict(**files, **csv_files, **text_files)
7165
a = TEST_BUCKET + "/tmp/test/a"
7266
b = TEST_BUCKET + "/tmp/test/b"
@@ -82,6 +76,37 @@
8276
}
8377

8478

79+
@pytest.fixture(autouse=True)
80+
def _avoid_adc_timeout(monkeypatch):
81+
"""Avoid slow ADC lookups and Metadata Server requests in tests."""
82+
# Do not apply if tests are explicitly running against real GCS
83+
if is_real_gcs():
84+
yield
85+
return
86+
87+
# Disable GCE metadata check in google-auth and gcsfs
88+
monkeypatch.setenv("NO_GCE_CHECK", "true")
89+
90+
# Set a dummy project to avoid project ID lookup timeouts if not set
91+
if "GOOGLE_CLOUD_PROJECT" not in os.environ:
92+
monkeypatch.setenv("GOOGLE_CLOUD_PROJECT", "dummy-project")
93+
94+
yield
95+
96+
97+
@pytest.fixture(autouse=True)
98+
def _mock_get_bucket_type_on_emulator():
99+
"""Mock _get_bucket_type to return UNKNOWN instantly on emulator."""
100+
if not is_real_gcs():
101+
with mock.patch(
102+
"gcsfs.extended_gcsfs.ExtendedGcsFileSystem._get_bucket_type",
103+
return_value=BucketType.UNKNOWN,
104+
):
105+
yield
106+
else:
107+
yield
108+
109+
85110
def stop_docker(container):
86111
cmd = shlex.split('docker ps -a -q --filter "name=%s"' % container)
87112
cid = subprocess.check_output(cmd).strip().decode()
@@ -92,6 +117,8 @@ def stop_docker(container):
92117
@pytest.fixture(scope="session")
93118
def docker_gcs():
94119
if "STORAGE_EMULATOR_HOST" in os.environ:
120+
if not is_real_gcs():
121+
params["token"] = "anon"
95122
# assume using real API or otherwise have a server already set up
96123
yield os.getenv("STORAGE_EMULATOR_HOST")
97124
return
@@ -290,9 +317,6 @@ def final_cleanup(gcs_factory, buckets_to_delete):
290317
def gcs_versioned(gcs_factory, buckets_to_delete):
291318
gcs = gcs_factory()
292319
gcs.version_aware = True
293-
is_real_gcs = (
294-
os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com"
295-
)
296320
try: # ensure we're empty.
297321
# The versioned bucket might be created by `is_versioning_enabled`
298322
# in test_core_versioned.py. We must register it for cleanup only if
@@ -306,7 +330,7 @@ def gcs_versioned(gcs_factory, buckets_to_delete):
306330
buckets_to_delete.add(TEST_VERSIONED_BUCKET)
307331
except ImportError:
308332
pass # test_core_versioned is not being run
309-
if is_real_gcs:
333+
if is_real_gcs():
310334
cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET)
311335
else:
312336
# For emulators, we delete and recreate the bucket for a clean state
@@ -321,7 +345,7 @@ def gcs_versioned(gcs_factory, buckets_to_delete):
321345
finally:
322346
# Ensure the bucket is empty after the test.
323347
try:
324-
if is_real_gcs:
348+
if is_real_gcs():
325349
cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET)
326350
except Exception as e:
327351
logging.warning(
@@ -367,13 +391,9 @@ def cleanup_versioned_bucket(gcs, bucket_name, prefix=None):
367391

368392

369393
def _create_extended_gcsfs(gcs_factory, buckets_to_delete, populate_bucket, **kwargs):
370-
is_real_gcs = (
371-
os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com"
372-
)
373-
374394
extended_gcsfs = gcs_factory(**kwargs)
375395
# Only create/delete/populate the bucket if we are NOT using the real GCS endpoint.
376-
if not is_real_gcs:
396+
if not is_real_gcs():
377397
if not extended_gcsfs.exists(TEST_ZONAL_BUCKET):
378398
extended_gcsfs.mkdir(TEST_ZONAL_BUCKET)
379399
buckets_to_delete.add(TEST_ZONAL_BUCKET)
@@ -433,7 +453,7 @@ def gcs_hns(gcs_factory, buckets_to_delete):
433453
def zonal_write_mocks():
434454
"""A fixture for mocking Zonal bucket write functionality."""
435455

436-
if os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com":
456+
if is_real_gcs():
437457
yield None
438458
return
439459

gcsfs/tests/integration/test_async_gcsfs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from gcsfs.extended_gcsfs import ExtendedGcsFileSystem
2222
from gcsfs.tests.settings import TEST_HNS_BUCKET
23+
from gcsfs.tests.utils import is_real_gcs
2324

2425
REQUIRED_ENV_VAR = "GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT"
2526

@@ -34,7 +35,7 @@
3435
reason=f"Skipping tests: {REQUIRED_ENV_VAR} env variable is not set",
3536
),
3637
pytest.mark.skipif(
37-
os.environ.get("STORAGE_EMULATOR_HOST") != "https://storage.googleapis.com",
38+
not is_real_gcs(),
3839
reason="Skipping tests on emulator, requires real GCS.",
3940
),
4041
]

gcsfs/tests/integration/test_extended_hns.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
from gcsfs.extended_gcsfs import BucketType, ExtendedGcsFileSystem
2121
from gcsfs.tests.settings import TEST_HNS_BUCKET, TEST_PROJECT
22+
from gcsfs.tests.utils import is_real_gcs
2223

2324
should_run_hns = os.getenv("GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT", "false").lower() in (
2425
"true",
@@ -27,8 +28,7 @@
2728

2829
# Skip these tests if not running against a real GCS backend or if experimentation flag is not set.
2930
pytestmark = pytest.mark.skipif(
30-
os.environ.get("STORAGE_EMULATOR_HOST") != "https://storage.googleapis.com"
31-
or not should_run_hns,
31+
not is_real_gcs() or not should_run_hns,
3232
reason="This test class is for real GCS HNS buckets only and requires experimental flag.",
3333
)
3434

gcsfs/tests/test_core.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -487,8 +487,7 @@ def test_rm_recursive(gcs):
487487

488488
def test_rm_chunked_batch(gcs):
489489
files = [f"{TEST_BUCKET}/t{i}" for i in range(303)]
490-
for fn in files:
491-
gcs.touch(fn)
490+
gcs.pipe({fn: b"" for fn in files})
492491

493492
files_created = gcs.find(TEST_BUCKET)
494493
for fn in files:
@@ -511,8 +510,7 @@ def test_rm_wildcards_in_directory(gcs):
511510
f"{base_dir}/b1.dat",
512511
f"{base_dir}/subdir/nested.txt",
513512
]
514-
for f in files:
515-
gcs.touch(f)
513+
gcs.pipe({f: b"" for f in files})
516514

517515
# 1. Test '?' wildcard (non-recursive)
518516
gcs.rm(f"{base_dir}/file?.txt")
@@ -2019,7 +2017,6 @@ def test_find_dircache(gcs):
20192017
f"{TEST_BUCKET}/2014-01-01.csv",
20202018
f"{TEST_BUCKET}/2014-01-02.csv",
20212019
f"{TEST_BUCKET}/2014-01-03.csv",
2022-
f"{TEST_BUCKET}/multi_threaded_test_file",
20232020
f"{TEST_BUCKET}/zonal",
20242021
}
20252022
assert set(gcs.ls(f"{TEST_BUCKET}/nested")) == {

gcsfs/tests/test_core_versioned.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from gcsfs import GCSFileSystem
99
from gcsfs.tests.settings import TEST_VERSIONED_BUCKET
10+
from gcsfs.tests.utils import is_real_gcs
1011

1112
a = TEST_VERSIONED_BUCKET + "/tmp/test/a"
1213
b = TEST_VERSIONED_BUCKET + "/tmp/test/b"
@@ -22,7 +23,7 @@ def is_versioning_enabled():
2223
"""
2324
# Don't skip when using an emulator, as we create the versioned bucket ourselves.
2425
global _VERSIONED_BUCKET_CREATED_BY_TESTS
25-
if os.environ.get("STORAGE_EMULATOR_HOST") != "https://storage.googleapis.com":
26+
if not is_real_gcs():
2627
return True, ""
2728
try:
2829
gcs = GCSFileSystem(project=os.getenv("GCSFS_TEST_PROJECT", "project"))

gcsfs/tests/test_credentials.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,43 @@ def test_connect_google_default_uses_request():
4545
assert isinstance(kwargs["request"], Request)
4646

4747

48+
def test_connect_cloud_success():
49+
with patch("google.auth.compute_engine.Credentials") as mock_creds_class:
50+
mock_creds = Mock()
51+
mock_creds_class.return_value = mock_creds
52+
53+
cred = GoogleCredentials(
54+
project="my-project", access="read_only", token="cloud", on_google=True
55+
)
56+
57+
assert cred.credentials == mock_creds
58+
assert mock_creds.refresh.called
59+
assert cred.method == "cloud"
60+
61+
62+
def test_connect_cloud_failure():
63+
import google.auth.exceptions
64+
65+
with patch("google.auth.compute_engine.Credentials") as mock_creds_class:
66+
mock_creds = Mock()
67+
mock_creds_class.return_value = mock_creds
68+
mock_creds.refresh.side_effect = google.auth.exceptions.RefreshError(
69+
"mock error"
70+
)
71+
72+
with pytest.raises(ValueError, match="Invalid gcloud credentials"):
73+
GoogleCredentials(
74+
project="my-project", access="read_only", token="cloud", on_google=True
75+
)
76+
77+
78+
def test_connect_cloud_not_on_google():
79+
with pytest.raises(ValueError):
80+
GoogleCredentials(
81+
project="my-project", access="read_only", token="cloud", on_google=False
82+
)
83+
84+
4885
@pytest.mark.parametrize("token", ["", "incorrect.token", "x" * 100])
4986
def test_credentials_from_raw_token(token):
5087
with patch.dict(os.environ, {"FETCH_RAW_TOKEN_EXPIRY": "false"}):

0 commit comments

Comments
 (0)