perf: update microbenchmarks and use concurrent download_ranges

zhixiangli · zhixiangli · commit 73fec60af409 · 2026-04-01T11:04:06.000Z
diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/config.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/config.py
@@ -83,7 +83,7 @@ def _get_params() -> Dict[str, List[TimeBasedReadParameters]]:
             num_files = num_processes * num_coros
 
             # Create a descriptive name for the parameter set
-            name = f"{pattern}_{bucket_type}_{num_processes}p_{file_size_mib}MiB_{chunk_size_kib}KiB_{num_ranges_val}ranges"
+            name = f"{pattern}_{bucket_type}_{num_processes}p_{num_coros}c_{file_size_mib}MiB_{chunk_size_kib}KiB_{num_ranges_val}ranges"
 
             params[workload_name].append(
                 TimeBasedReadParameters(
diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/config.yaml b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/config.yaml
@@ -5,7 +5,7 @@ common:
   file_sizes_mib:
     - 10240 # 10GiB
   chunk_sizes_kib: [64] # 16KiB
-  num_ranges: [1]
+  num_ranges: [1, 16] # Number of ranges generated per iteration, distributed among `coros` concurrent tasks.
   rounds: 1
   duration: 30 # seconds
   warmup_duration: 5 # seconds
@@ -14,15 +14,15 @@ workload:
   ############# multi process multi coroutine #########
   - name: "read_seq_multi_process"
     pattern: "seq"
-    coros: [1]
+    coros: [16] # Number of concurrent asyncio tasks per process calling download_ranges.
     processes: [96]
 
 
   - name: "read_rand_multi_process"
     pattern: "rand"
-    coros: [1]
+    coros: [16] # Number of concurrent asyncio tasks per process calling download_ranges.
     processes: [1]
 
 defaults:
   DEFAULT_RAPID_ZONAL_BUCKET: "chandrasiri-benchmarks-zb"
-  DEFAULT_STANDARD_BUCKET: "chandrasiri-benchmarks-rb"
+  DEFAULT_STANDARD_BUCKET: "chandrasiri-benchmarks-rb"
diff --git a/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py b/packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py
@@ -120,6 +120,8 @@ async def _download_time_based_async(client, filename, params):
     mrd = AsyncMultiRangeDownloader(client, params.bucket_name, filename)
     await mrd.open()
 
+    lock = asyncio.Lock()
+
     offset = 0
     is_warming_up = True
     start_time = time.monotonic()
@@ -146,7 +148,22 @@ async def _download_time_based_async(client, filename, params):
                 if offset + params.chunk_size_bytes > params.file_size_bytes:
                     offset = 0  # Reset offset if end of file is reached
 
-        await mrd.download_ranges(ranges)
+        # Test async mrd.download_ranges: multiple concurrent calls to mrd.download_ranges
+        # are expected to use the same underlying stream with true multiplexing.
+        chunk_base, remainder = divmod(len(ranges), params.num_coros)
+        chunks = []
+        start_idx = 0
+        for i in range(params.num_coros):
+            end_idx = start_idx + chunk_base + (1 if i < remainder else 0)
+            chunks.append(ranges[start_idx:end_idx])
+            start_idx = end_idx
+
+        tasks = [
+            asyncio.create_task(mrd.download_ranges(chunk, lock))
+            for chunk in chunks
+            if chunk
+        ]
+        await asyncio.gather(*tasks)
 
         bytes_in_buffers = sum(r[2].getbuffer().nbytes for r in ranges)
         assert bytes_in_buffers == params.chunk_size_bytes * params.num_ranges