Skip to content

Commit 73fec60

Browse files
committed
perf: update microbenchmarks and use concurrent download_ranges
1 parent 14abfd5 commit 73fec60

File tree

3 files changed

+23
-6
lines changed

3 files changed

+23
-6
lines changed

packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def _get_params() -> Dict[str, List[TimeBasedReadParameters]]:
8383
num_files = num_processes * num_coros
8484

8585
# Create a descriptive name for the parameter set
86-
name = f"{pattern}_{bucket_type}_{num_processes}p_{file_size_mib}MiB_{chunk_size_kib}KiB_{num_ranges_val}ranges"
86+
name = f"{pattern}_{bucket_type}_{num_processes}p_{num_coros}c_{file_size_mib}MiB_{chunk_size_kib}KiB_{num_ranges_val}ranges"
8787

8888
params[workload_name].append(
8989
TimeBasedReadParameters(

packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ common:
55
file_sizes_mib:
66
- 10240 # 10GiB
77
chunk_sizes_kib: [64] # 16KiB
8-
num_ranges: [1]
8+
num_ranges: [1, 16] # Number of ranges generated per iteration, distributed among `coros` concurrent tasks.
99
rounds: 1
1010
duration: 30 # seconds
1111
warmup_duration: 5 # seconds
@@ -14,15 +14,15 @@ workload:
1414
############# multi process multi coroutine #########
1515
- name: "read_seq_multi_process"
1616
pattern: "seq"
17-
coros: [1]
17+
coros: [16] # Number of concurrent asyncio tasks per process calling download_ranges.
1818
processes: [96]
1919

2020

2121
- name: "read_rand_multi_process"
2222
pattern: "rand"
23-
coros: [1]
23+
coros: [16] # Number of concurrent asyncio tasks per process calling download_ranges.
2424
processes: [1]
2525

2626
defaults:
2727
DEFAULT_RAPID_ZONAL_BUCKET: "chandrasiri-benchmarks-zb"
28-
DEFAULT_STANDARD_BUCKET: "chandrasiri-benchmarks-rb"
28+
DEFAULT_STANDARD_BUCKET: "chandrasiri-benchmarks-rb"

packages/google-cloud-storage/tests/perf/microbenchmarks/time_based/reads/test_reads.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ async def _download_time_based_async(client, filename, params):
120120
mrd = AsyncMultiRangeDownloader(client, params.bucket_name, filename)
121121
await mrd.open()
122122

123+
lock = asyncio.Lock()
124+
123125
offset = 0
124126
is_warming_up = True
125127
start_time = time.monotonic()
@@ -146,7 +148,22 @@ async def _download_time_based_async(client, filename, params):
146148
if offset + params.chunk_size_bytes > params.file_size_bytes:
147149
offset = 0 # Reset offset if end of file is reached
148150

149-
await mrd.download_ranges(ranges)
151+
# Test async mrd.download_ranges: multiple concurrent calls to mrd.download_ranges
152+
# are expected to use the same underlying stream with true multiplexing.
153+
chunk_base, remainder = divmod(len(ranges), params.num_coros)
154+
chunks = []
155+
start_idx = 0
156+
for i in range(params.num_coros):
157+
end_idx = start_idx + chunk_base + (1 if i < remainder else 0)
158+
chunks.append(ranges[start_idx:end_idx])
159+
start_idx = end_idx
160+
161+
tasks = [
162+
asyncio.create_task(mrd.download_ranges(chunk, lock))
163+
for chunk in chunks
164+
if chunk
165+
]
166+
await asyncio.gather(*tasks)
150167

151168
bytes_in_buffers = sum(r[2].getbuffer().nbytes for r in ranges)
152169
assert bytes_in_buffers == params.chunk_size_bytes * params.num_ranges

0 commit comments

Comments
 (0)