Skip to content

Commit f26764c

Browse files
committed
Avoid using too much memory for large arrays
1 parent c6d6002 commit f26764c

2 files changed

Lines changed: 116 additions & 12 deletions

File tree

bench/indexing/duckdb_query_bench.py

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from __future__ import annotations
99

1010
import argparse
11+
import gc
1112
import math
1213
import os
1314
import re
@@ -271,6 +272,9 @@ def build_duckdb_file(
271272
*,
272273
layout: str,
273274
batch_size: int,
275+
duckdb_threads: int,
276+
duckdb_memory_limit: str | None,
277+
preserve_insertion_order: bool,
274278
) -> float:
275279
path.parent.mkdir(parents=True, exist_ok=True)
276280
_remove_duckdb_path(path)
@@ -283,7 +287,10 @@ def build_duckdb_file(
283287
start_time = time.perf_counter()
284288
con = duckdb.connect(str(path))
285289
try:
286-
con.execute("PRAGMA threads=8")
290+
con.execute(f"PRAGMA threads={int(duckdb_threads)}")
291+
con.execute(f"SET preserve_insertion_order={'true' if preserve_insertion_order else 'false'}")
292+
if duckdb_memory_limit is not None:
293+
con.execute(f"SET memory_limit = {duckdb_memory_limit!r}")
287294
con.execute(f"CREATE TABLE data (id {id_type}, payload FLOAT)")
288295
for start in range(0, size, batch_size):
289296
stop = min(start + batch_size, size)
@@ -305,6 +312,16 @@ def build_duckdb_file(
305312
con.execute("INSERT INTO data SELECT * FROM batch_arrow")
306313
con.unregister("batch_arrow")
307314

315+
# For random distributions, random_ids can be very large (e.g. 8 GB
316+
# for 1G float64 rows). Release it, and the final Arrow batch objects,
317+
# before building DuckDB's ART index; otherwise both the source payload
318+
# and DuckDB's index builder compete for memory.
319+
random_ids = None
320+
ids = None
321+
payload = None
322+
batch = None
323+
gc.collect()
324+
308325
if layout == "art-index":
309326
con.execute("CREATE INDEX data_id_idx ON data(id)")
310327
elif layout != "zonemap":
@@ -324,10 +341,23 @@ def _open_or_build_duckdb_file(
324341
*,
325342
layout: str,
326343
batch_size: int,
344+
duckdb_threads: int,
345+
duckdb_memory_limit: str | None,
346+
preserve_insertion_order: bool,
327347
) -> float:
328348
if _valid_duckdb_file(path, layout):
329349
return 0.0
330-
return build_duckdb_file(size, dist, id_dtype, path, layout=layout, batch_size=batch_size)
350+
return build_duckdb_file(
351+
size,
352+
dist,
353+
id_dtype,
354+
path,
355+
layout=layout,
356+
batch_size=batch_size,
357+
duckdb_threads=duckdb_threads,
358+
duckdb_memory_limit=duckdb_memory_limit,
359+
preserve_insertion_order=preserve_insertion_order,
360+
)
331361

332362

333363
def _query_bounds(size: int, query_width: int, dtype: np.dtype) -> tuple[object, object]:
@@ -421,9 +451,22 @@ def benchmark_layout(
421451
batch_size: int,
422452
repeats: int,
423453
exact_query: bool,
454+
duckdb_threads: int,
455+
duckdb_memory_limit: str | None,
456+
preserve_insertion_order: bool,
424457
) -> dict:
425458
path = duckdb_path(outdir, size, dist, id_dtype, layout, batch_size)
426-
create_s = _open_or_build_duckdb_file(size, dist, id_dtype, path, layout=layout, batch_size=batch_size)
459+
create_s = _open_or_build_duckdb_file(
460+
size,
461+
dist,
462+
id_dtype,
463+
path,
464+
layout=layout,
465+
batch_size=batch_size,
466+
duckdb_threads=duckdb_threads,
467+
duckdb_memory_limit=duckdb_memory_limit,
468+
preserve_insertion_order=preserve_insertion_order,
469+
)
427470
lo, hi = _query_bounds(size, query_width, id_dtype)
428471

429472
cold_scan_elapsed, warm_scan_elapsed, third_scan_elapsed, scan_rows = benchmark_scan_once(
@@ -469,7 +512,7 @@ def benchmark_layout(
469512

470513
def parse_human_int(value: str) -> int:
471514
value = value.strip().lower().replace("_", "")
472-
multipliers = {"k": 1_000, "m": 1_000_000}
515+
multipliers = {"k": 1_000, "m": 1_000_000, "g": 1_000_000_000}
473516
if value[-1:] in multipliers:
474517
return int(float(value[:-1]) * multipliers[value[-1]])
475518
return int(value)
@@ -484,11 +527,16 @@ def print_results(
484527
query_width: int,
485528
id_dtype: np.dtype,
486529
exact_query: bool,
530+
duckdb_threads: int,
531+
duckdb_memory_limit: str | None,
532+
preserve_insertion_order: bool,
487533
) -> None:
488534
print("DuckDB range-query benchmark via SQL filtered reads")
489535
print(
490536
f"batch_size={batch_size:,}, repeats={repeats}, dist={dist}, query_width={query_width:,}, "
491-
f"dtype={id_dtype.name}, query_single_value={exact_query}"
537+
f"dtype={id_dtype.name}, query_single_value={exact_query}, duckdb_threads={duckdb_threads}, "
538+
f"duckdb_memory_limit={'auto' if duckdb_memory_limit is None else duckdb_memory_limit}, "
539+
f"preserve_insertion_order={preserve_insertion_order}"
492540
)
493541
print("Note: 'zonemap' is DuckDB's default table layout with automatic min/max pruning.")
494542
print(" 'art-index' adds an explicit secondary index on id.")
@@ -561,10 +609,29 @@ def main() -> None:
561609
help="Batch size used while loading the table. Default: 1.25M.",
562610
)
563611
parser.add_argument("--repeats", type=int, default=DEFAULT_REPEATS, help="Benchmark repeats. Default: 3.")
612+
parser.add_argument(
613+
"--duckdb-threads",
614+
type=int,
615+
default=8,
616+
help="DuckDB thread count used while building tables/indexes. Default: 8.",
617+
)
618+
parser.add_argument(
619+
"--duckdb-memory-limit",
620+
default=None,
621+
help="Optional DuckDB memory_limit setting, e.g. '12GB'. Default: DuckDB auto limit.",
622+
)
623+
parser.add_argument(
624+
"--preserve-insertion-order",
625+
action=argparse.BooleanOptionalAction,
626+
default=False,
627+
help="DuckDB preserve_insertion_order setting during build. Default: false to reduce memory.",
628+
)
564629
args = parser.parse_args()
565630

566631
if args.query_single_value and args.query_width != 1:
567632
raise ValueError("--query-single-value requires --query-width 1")
633+
if args.duckdb_threads <= 0:
634+
raise ValueError("--duckdb-threads must be positive")
568635

569636
id_dtype = np.dtype(args.dtype)
570637
sizes = SIZES if args.size == "all" else (parse_human_int(args.size),)
@@ -586,6 +653,9 @@ def main() -> None:
586653
args.batch_size,
587654
args.repeats,
588655
args.query_single_value,
656+
args.duckdb_threads,
657+
args.duckdb_memory_limit,
658+
args.preserve_insertion_order,
589659
)
590660
)
591661

@@ -597,6 +667,9 @@ def main() -> None:
597667
query_width=args.query_width,
598668
id_dtype=id_dtype,
599669
exact_query=args.query_single_value,
670+
duckdb_threads=args.duckdb_threads,
671+
duckdb_memory_limit=args.duckdb_memory_limit,
672+
preserve_insertion_order=args.preserve_insertion_order,
600673
)
601674

602675

bench/indexing/index_query_bench.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -255,10 +255,42 @@ def _fill_permuted_ids(ids: np.ndarray, size: int, start: int, stop: int, step:
255255
ids[:] = ordered_ids_from_positions(shuffled_positions, size, ids.dtype)
256256

257257

258-
def _randomized_ids(size: int, dtype: np.dtype) -> np.ndarray:
259-
ids = make_ordered_ids(size, dtype)
260-
np.random.default_rng(RNG_SEED).shuffle(ids)
261-
return ids
258+
def _feistel_permute_uint64(values: np.ndarray, nbits: int) -> np.ndarray:
259+
half_bits = (nbits + 1) // 2
260+
left_bits = nbits - half_bits
261+
right_mask = np.uint64((1 << half_bits) - 1)
262+
left_mask = np.uint64((1 << left_bits) - 1)
263+
left = (values >> np.uint64(half_bits)) & left_mask
264+
right = values & right_mask
265+
# Fixed deterministic odd constants. This is not cryptographic; it is a
266+
# vectorized bijective mixer for benchmark data generation.
267+
keys = (0x9E3779B97F4A7C15, 0xBF58476D1CE4E5B9, 0x94D049BB133111EB, 0xD2B74407B1CE6E93)
268+
for key in keys:
269+
mixed = (right * np.uint64(key) + np.uint64(key >> 17)) & left_mask
270+
left, right = right, (left ^ mixed) & left_mask
271+
return (left << np.uint64(half_bits)) | right
272+
273+
274+
def _randomized_positions_slice(size: int, start: int, stop: int) -> np.ndarray:
275+
if size <= 1:
276+
return np.zeros(stop - start, dtype=np.int64)
277+
nbits = max(2, int(size - 1).bit_length())
278+
if nbits % 2:
279+
nbits += 1
280+
positions = np.arange(start, stop, dtype=np.uint64)
281+
positions = _feistel_permute_uint64(positions, nbits)
282+
# Cycle-walk the small fraction that lands outside [0, size). For the
283+
# benchmark sizes used here this normally needs at most one extra pass.
284+
size_u = np.uint64(size)
285+
mask = positions >= size_u
286+
while np.any(mask):
287+
positions[mask] = _feistel_permute_uint64(positions[mask], nbits)
288+
mask = positions >= size_u
289+
return positions.astype(np.int64, copy=False)
290+
291+
292+
def _fill_randomized_ids(ids: np.ndarray, size: int, start: int, stop: int) -> None:
293+
ids[:] = ordered_ids_from_positions(_randomized_positions_slice(size, start, stop), size, ids.dtype)
262294

263295

264296
def build_persistent_array(
@@ -275,18 +307,17 @@ def build_persistent_array(
275307
block_len = int(arr.blocks[0])
276308
block_order = _block_order(size, block_len) if dist == "block-shuffled" else None
277309
permuted_step, permuted_offset = _permuted_position_params(size) if dist == "permuted" else (1, 0)
278-
random_ids = _randomized_ids(size, id_dtype) if dist == "random" else None
279310
for start in range(0, size, chunk_len):
280311
stop = min(start + chunk_len, size)
281312
chunk = np.zeros(stop - start, dtype=dtype)
282-
if dist == "full":
313+
if dist == "sorted":
283314
chunk["id"] = ordered_id_slice(size, start, stop, id_dtype)
284315
elif dist == "block-shuffled":
285316
_fill_block_shuffled_ids(chunk["id"], size, start, stop, block_len, block_order)
286317
elif dist == "permuted":
287318
_fill_permuted_ids(chunk["id"], size, start, stop, permuted_step, permuted_offset)
288319
elif dist == "random":
289-
chunk["id"] = random_ids[start:stop]
320+
_fill_randomized_ids(chunk["id"], size, start, stop)
290321
else:
291322
raise ValueError(f"unsupported distribution {dist!r}")
292323
chunk["payload"] = payload_slice(start, stop)

0 commit comments

Comments
 (0)