Skip to content

Commit ac1cab5

Browse files
committed
test: consolidate categorical fixtures with factory pattern
Reduce repetition in categorical test fixtures by using a config-driven factory pattern instead of separate fixture groups for each category size. Changes: - Replace 15 individual fixtures with 3 generated fixtures + 1 data fixture - Consolidate n50 and n100 into single n100 config (serves both use cases) - Use `_make_cat_fixture()` factory for zarr/h5ad parametrization - Update tests to use new fixture names (cat_n3_store, cat_n100_store) Addresses review feedback about fixture repetitiveness.
1 parent f740784 commit ac1cab5

1 file changed

Lines changed: 72 additions & 175 deletions

File tree

tests/lazy/test_read.py

Lines changed: 72 additions & 175 deletions
Original file line numberDiff line numberDiff line change
@@ -249,176 +249,73 @@ def test_chunks_df(
249249
assert arr.chunksize == expected_chunks
250250

251251

252-
# Session-scoped fixtures for categorical data (write once, read many)
253-
# Each category type has zarr and h5ad path fixtures, plus a parametrized store fixture
252+
# Session-scoped categorical fixtures parametrized by (n_categories, ordered)
253+
# Data is written once per session; stores are opened per-test with backend parametrization
254254

255-
256-
def _write_categorical_zarr(tmp_path_factory, name: str, cat: pd.Categorical) -> Path:
257-
"""Helper to write categorical to zarr and return path."""
258-
path = tmp_path_factory.mktemp(f"{name}.zarr")
259-
store = zarr.open(path, mode="w")
260-
write_elem(store, "cat", cat)
261-
return path
262-
263-
264-
def _write_categorical_h5ad(tmp_path_factory, name: str, cat: pd.Categorical) -> Path:
265-
"""Helper to write categorical to h5ad and return path."""
266-
path = tmp_path_factory.mktemp(name) / "cat.h5ad"
267-
with h5py.File(path, mode="w") as f:
268-
write_elem(f, "cat", cat)
269-
return path
270-
271-
272-
def _open_categorical_store(path: Path, backend: str):
273-
"""Helper to open categorical store for either backend."""
274-
if backend == "zarr":
275-
return zarr.open(path, mode="r")["cat"]
276-
else:
277-
return h5py.File(path, mode="r")["cat"]
278-
279-
280-
# Small categorical ['a', 'b', 'c']
281-
@pytest.fixture(scope="session")
282-
def cat_small_path_zarr(tmp_path_factory) -> Path:
283-
return _write_categorical_zarr(
284-
tmp_path_factory, "cat_small", pd.Categorical(["a", "b", "c"])
285-
)
255+
# Configuration: (name, n_categories, ordered, category_names)
256+
_CAT_CONFIGS: list[tuple[str, int, bool, list[str] | None]] = [
257+
("n3", 3, False, ["a", "b", "c"]), # basic tests, equality, hashing
258+
("n100", 100, False, None), # truncation, n_categories, head/tail
259+
("ordered", 3, True, ["low", "medium", "high"]), # ordered categories
260+
]
286261

287262

288263
@pytest.fixture(scope="session")
289-
def cat_small_path_h5ad(tmp_path_factory) -> Path:
290-
return _write_categorical_h5ad(
291-
tmp_path_factory, "cat_small", pd.Categorical(["a", "b", "c"])
292-
)
293-
294-
295-
@pytest.fixture(params=["zarr", "h5ad"])
296-
def cat_small_store(request, cat_small_path_zarr: Path, cat_small_path_h5ad: Path):
297-
"""Parametrized fixture: small categorical ['a', 'b', 'c'] for both backends."""
298-
path = cat_small_path_zarr if request.param == "zarr" else cat_small_path_h5ad
299-
store = _open_categorical_store(path, request.param)
300-
yield store
301-
if request.param == "h5ad":
302-
store.file.close()
303-
304-
305-
# Medium categorical ['a', 'b', 'c', 'd', 'e']
306-
@pytest.fixture(scope="session")
307-
def cat_medium_path_zarr(tmp_path_factory) -> Path:
308-
return _write_categorical_zarr(
309-
tmp_path_factory, "cat_medium", pd.Categorical(["a", "b", "c", "d", "e"])
310-
)
311-
264+
def cat_data_paths(tmp_path_factory) -> dict[tuple[str, str], Path]:
265+
"""Create all categorical test data once per session, return paths dict."""
266+
base = tmp_path_factory.mktemp("categorical_data")
267+
paths: dict[tuple[str, str], Path] = {}
312268

313-
@pytest.fixture(scope="session")
314-
def cat_medium_path_h5ad(tmp_path_factory) -> Path:
315-
return _write_categorical_h5ad(
316-
tmp_path_factory, "cat_medium", pd.Categorical(["a", "b", "c", "d", "e"])
317-
)
269+
for name, n_cat, ordered, cat_names in _CAT_CONFIGS:
270+
categories = cat_names or [f"cat_{i:02d}" for i in range(n_cat)]
271+
cat = pd.Categorical(categories, categories=categories, ordered=ordered)
318272

273+
# Write zarr
274+
zarr_path = base / f"{name}.zarr"
275+
store = zarr.open(zarr_path, mode="w")
276+
write_elem(store, "cat", cat)
277+
paths[(name, "zarr")] = zarr_path
319278

320-
@pytest.fixture(params=["zarr", "h5ad"])
321-
def cat_medium_store(request, cat_medium_path_zarr: Path, cat_medium_path_h5ad: Path):
322-
"""Parametrized fixture: medium categorical for both backends."""
323-
path = cat_medium_path_zarr if request.param == "zarr" else cat_medium_path_h5ad
324-
store = _open_categorical_store(path, request.param)
325-
yield store
326-
if request.param == "h5ad":
327-
store.file.close()
279+
# Write h5ad
280+
h5_path = base / f"{name}.h5ad"
281+
with h5py.File(h5_path, mode="w") as f:
282+
write_elem(f, "cat", cat)
283+
paths[(name, "h5ad")] = h5_path
328284

285+
return paths
329286

330-
# Large categorical with 100 categories
331-
@pytest.fixture(scope="session")
332-
def cat_large_path_zarr(tmp_path_factory) -> Path:
333-
categories = [f"cat_{i}" for i in range(100)]
334-
return _write_categorical_zarr(
335-
tmp_path_factory, "cat_large", pd.Categorical(categories)
336-
)
337287

338-
339-
@pytest.fixture(scope="session")
340-
def cat_large_path_h5ad(tmp_path_factory) -> Path:
341-
categories = [f"cat_{i}" for i in range(100)]
342-
return _write_categorical_h5ad(
343-
tmp_path_factory, "cat_large", pd.Categorical(categories)
344-
)
345-
346-
347-
@pytest.fixture(params=["zarr", "h5ad"])
348-
def cat_large_store(request, cat_large_path_zarr: Path, cat_large_path_h5ad: Path):
349-
"""Parametrized fixture: large categorical (100 categories) for both backends."""
350-
path = cat_large_path_zarr if request.param == "zarr" else cat_large_path_h5ad
351-
store = _open_categorical_store(path, request.param)
352-
yield store
353-
if request.param == "h5ad":
354-
store.file.close()
355-
356-
357-
# Ordered categorical ['low', 'medium', 'high']
358-
@pytest.fixture(scope="session")
359-
def cat_ordered_path_zarr(tmp_path_factory) -> Path:
360-
cat = pd.Categorical(
361-
["low", "medium", "high"] * 3 + ["low"],
362-
categories=["low", "medium", "high"],
363-
ordered=True,
364-
)
365-
return _write_categorical_zarr(tmp_path_factory, "cat_ordered", cat)
366-
367-
368-
@pytest.fixture(scope="session")
369-
def cat_ordered_path_h5ad(tmp_path_factory) -> Path:
370-
cat = pd.Categorical(
371-
["low", "medium", "high"] * 3 + ["low"],
372-
categories=["low", "medium", "high"],
373-
ordered=True,
374-
)
375-
return _write_categorical_h5ad(tmp_path_factory, "cat_ordered", cat)
288+
def _open_cat_store(path: Path, backend: str):
289+
"""Open categorical store for either backend."""
290+
if backend == "zarr":
291+
return zarr.open(path, mode="r")["cat"]
292+
return h5py.File(path, mode="r")["cat"]
376293

377294

378-
@pytest.fixture(params=["zarr", "h5ad"])
379-
def cat_ordered_store(
380-
request, cat_ordered_path_zarr: Path, cat_ordered_path_h5ad: Path
381-
):
382-
"""Parametrized fixture: ordered categorical for both backends."""
383-
path = cat_ordered_path_zarr if request.param == "zarr" else cat_ordered_path_h5ad
384-
store = _open_categorical_store(path, request.param)
385-
yield store
386-
if request.param == "h5ad":
387-
store.file.close()
295+
def _make_cat_fixture(config_name: str):
296+
"""Factory to create categorical store fixtures with zarr/h5ad parametrization."""
388297

298+
@pytest.fixture(params=["zarr", "h5ad"])
299+
def _fixture(request, cat_data_paths):
300+
path = cat_data_paths[(config_name, request.param)]
301+
store = _open_cat_store(path, request.param)
302+
yield store
303+
if request.param == "h5ad":
304+
store.file.close()
389305

390-
# 50 categories for head/tail testing
391-
@pytest.fixture(scope="session")
392-
def cat_fifty_path_zarr(tmp_path_factory) -> Path:
393-
categories = [f"Type_{i:02d}" for i in range(50)]
394-
return _write_categorical_zarr(
395-
tmp_path_factory, "cat_fifty", pd.Categorical(categories)
396-
)
306+
return _fixture
397307

398308

399-
@pytest.fixture(scope="session")
400-
def cat_fifty_path_h5ad(tmp_path_factory) -> Path:
401-
categories = [f"Type_{i:02d}" for i in range(50)]
402-
return _write_categorical_h5ad(
403-
tmp_path_factory, "cat_fifty", pd.Categorical(categories)
404-
)
405-
309+
cat_n3_store = _make_cat_fixture("n3")
310+
cat_n100_store = _make_cat_fixture("n100")
311+
cat_ordered_store = _make_cat_fixture("ordered")
406312

407-
@pytest.fixture(params=["zarr", "h5ad"])
408-
def cat_fifty_store(request, cat_fifty_path_zarr: Path, cat_fifty_path_h5ad: Path):
409-
"""Parametrized fixture: 50 categories for head/tail testing, both backends."""
410-
path = cat_fifty_path_zarr if request.param == "zarr" else cat_fifty_path_h5ad
411-
store = _open_categorical_store(path, request.param)
412-
yield store
413-
if request.param == "h5ad":
414-
store.file.close()
415313

416-
417-
def test_lazy_categorical_dtype_n_categories(cat_large_store):
314+
def test_lazy_categorical_dtype_n_categories(cat_n100_store):
418315
"""Test n_categories is cheap (metadata only) and uses cache when loaded."""
419316
from anndata.experimental.backed._lazy_arrays import LazyCategoricalDtype
420317

421-
lazy_cat = read_elem_lazy(cat_large_store)
318+
lazy_cat = read_elem_lazy(cat_n100_store)
422319
dtype = lazy_cat.dtype
423320
assert isinstance(dtype, LazyCategoricalDtype)
424321

@@ -438,11 +335,11 @@ def test_lazy_categorical_dtype_n_categories(cat_large_store):
438335
assert dtype.n_categories == 3 # Returns cached length, not disk length
439336

440337

441-
def test_lazy_categorical_dtype_head_tail_categories(cat_fifty_store):
338+
def test_lazy_categorical_dtype_head_tail_categories(cat_n100_store):
442339
"""Test head_categories and tail_categories perform partial reads without loading all."""
443340
from anndata.experimental.backed._lazy_arrays import LazyCategoricalDtype
444341

445-
lazy_cat = read_elem_lazy(cat_fifty_store)
342+
lazy_cat = read_elem_lazy(cat_n100_store)
446343
dtype = lazy_cat.dtype
447344
assert isinstance(dtype, LazyCategoricalDtype)
448345

@@ -452,42 +349,42 @@ def test_lazy_categorical_dtype_head_tail_categories(cat_fifty_store):
452349
# Test head_categories (first n) - should NOT load all categories
453350
first5 = dtype.head_categories(5)
454351
assert len(first5) == 5
455-
assert list(first5) == [f"Type_{i:02d}" for i in range(5)]
352+
assert list(first5) == [f"cat_{i:02d}" for i in range(5)]
456353
assert "categories" not in dtype.__dict__ # Still not fully loaded
457354

458355
# Test head_categories default (first 5)
459356
default_head = dtype.head_categories()
460357
assert len(default_head) == 5
461-
assert list(default_head) == [f"Type_{i:02d}" for i in range(5)]
358+
assert list(default_head) == [f"cat_{i:02d}" for i in range(5)]
462359
assert "categories" not in dtype.__dict__ # Still not fully loaded
463360

464361
# Test tail_categories (last n) - should NOT load all categories
465362
last3 = dtype.tail_categories(3)
466363
assert len(last3) == 3
467-
assert list(last3) == [f"Type_{i:02d}" for i in range(47, 50)]
364+
assert list(last3) == [f"cat_{i:02d}" for i in range(97, 100)]
468365
assert "categories" not in dtype.__dict__ # Still not fully loaded
469366

470367
# Test tail_categories default (last 5)
471368
default_tail = dtype.tail_categories()
472369
assert len(default_tail) == 5
473-
assert list(default_tail) == [f"Type_{i:02d}" for i in range(45, 50)]
370+
assert list(default_tail) == [f"cat_{i:02d}" for i in range(95, 100)]
474371
assert "categories" not in dtype.__dict__ # Still not fully loaded
475372

476373
# Test requesting more than available
477-
all_head = dtype.head_categories(100)
478-
assert len(all_head) == 50
479-
assert list(all_head) == [f"Type_{i:02d}" for i in range(50)]
374+
all_head = dtype.head_categories(200)
375+
assert len(all_head) == 100
376+
assert list(all_head) == [f"cat_{i:02d}" for i in range(100)]
480377

481-
all_tail = dtype.tail_categories(100)
482-
assert len(all_tail) == 50
483-
assert list(all_tail) == [f"Type_{i:02d}" for i in range(50)]
378+
all_tail = dtype.tail_categories(200)
379+
assert len(all_tail) == 100
380+
assert list(all_tail) == [f"cat_{i:02d}" for i in range(100)]
484381

485382

486-
def test_lazy_categorical_dtype_categories_caching(cat_medium_store):
383+
def test_lazy_categorical_dtype_categories_caching(cat_n3_store):
487384
"""Test that categories are cached after full load."""
488385
from anndata.experimental.backed._lazy_arrays import LazyCategoricalDtype
489386

490-
lazy_cat = read_elem_lazy(cat_medium_store)
387+
lazy_cat = read_elem_lazy(cat_n3_store)
491388
dtype = lazy_cat.dtype
492389
assert isinstance(dtype, LazyCategoricalDtype)
493390

@@ -497,7 +394,7 @@ def test_lazy_categorical_dtype_categories_caching(cat_medium_store):
497394
# Load categories
498395
cats = dtype.categories
499396
assert cats is not None
500-
assert list(cats) == ["a", "b", "c", "d", "e"]
397+
assert list(cats) == ["a", "b", "c"]
501398

502399
# After loading, should be cached in __dict__ (cached_property pattern)
503400
assert "categories" in dtype.__dict__
@@ -523,24 +420,24 @@ def test_lazy_categorical_dtype_ordered(cat_ordered_store):
523420
assert list(dtype.categories) == ["low", "medium", "high"]
524421

525422

526-
def test_lazy_categorical_dtype_repr(cat_large_store, cat_small_store):
423+
def test_lazy_categorical_dtype_repr(cat_n100_store, cat_n3_store):
527424
"""Test LazyCategoricalDtype repr shows truncated categories."""
528425
from anndata.experimental.backed._lazy_arrays import LazyCategoricalDtype
529426

530427
# Test large number of categories (truncated repr)
531-
lazy_cat = read_elem_lazy(cat_large_store)
428+
lazy_cat = read_elem_lazy(cat_n100_store)
532429
dtype = lazy_cat.dtype
533430
assert isinstance(dtype, LazyCategoricalDtype)
534431

535432
repr_str = repr(dtype)
536433
assert "LazyCategoricalDtype" in repr_str
537434
assert "n=100" in repr_str
538435
assert "..." in repr_str # Truncation indicator
539-
assert "cat_0" in repr_str # Head category
436+
assert "cat_00" in repr_str # Head category
540437
assert "cat_99" in repr_str # Tail category
541438

542439
# Test small number of categories (full repr)
543-
small_lazy_cat = read_elem_lazy(cat_small_store)
440+
small_lazy_cat = read_elem_lazy(cat_n3_store)
544441
small_dtype = small_lazy_cat.dtype
545442

546443
small_repr = repr(small_dtype)
@@ -551,11 +448,11 @@ def test_lazy_categorical_dtype_repr(cat_large_store, cat_small_store):
551448
assert "'c'" in small_repr
552449

553450

554-
def test_lazy_categorical_dtype_equality(cat_small_store):
451+
def test_lazy_categorical_dtype_equality(cat_n3_store):
555452
"""Test LazyCategoricalDtype equality comparisons and basic properties."""
556453
from anndata.experimental.backed._lazy_arrays import LazyCategoricalDtype
557454

558-
lazy_cat = read_elem_lazy(cat_small_store)
455+
lazy_cat = read_elem_lazy(cat_n3_store)
559456
dtype = lazy_cat.dtype
560457
assert isinstance(dtype, LazyCategoricalDtype)
561458

@@ -591,7 +488,7 @@ def test_lazy_categorical_dtype_equality(cat_small_store):
591488

592489
@pytest.mark.parametrize("backend", ["zarr", "h5ad"])
593490
def test_lazy_categorical_dtype_equality_no_load(
594-
cat_small_path_zarr: Path, cat_small_path_h5ad: Path, backend: str
491+
cat_data_paths: dict[tuple[str, str], Path], backend: str
595492
):
596493
"""Test same-location equality doesn't load category data.
597494
@@ -601,14 +498,14 @@ def test_lazy_categorical_dtype_equality_no_load(
601498
"""
602499
from anndata.experimental.backed._lazy_arrays import LazyCategoricalDtype
603500

501+
path = cat_data_paths[("n3", backend)]
502+
604503
if backend == "zarr":
605-
path = cat_small_path_zarr
606504

607505
def open_store(p):
608506
return zarr.open(p, mode="r")["cat"]
609507

610508
else:
611-
path = cat_small_path_h5ad
612509
# Keep h5py files open for the duration of the test
613510
open_store = lambda p: h5py.File(p, mode="r")["cat"]
614511

@@ -696,11 +593,11 @@ def test_lazy_categorical_roundtrip_via_anndata(tmp_path: Path):
696593
assert loaded.obs["ordered_cat"].equals(adata.obs["ordered_cat"])
697594

698595

699-
def test_lazy_categorical_dtype_hash(cat_small_store):
596+
def test_lazy_categorical_dtype_hash(cat_n3_store):
700597
"""Test LazyCategoricalDtype is hashable."""
701598
from anndata.experimental.backed._lazy_arrays import LazyCategoricalDtype
702599

703-
lazy_cat = read_elem_lazy(cat_small_store)
600+
lazy_cat = read_elem_lazy(cat_n3_store)
704601
dtype = lazy_cat.dtype
705602
assert isinstance(dtype, LazyCategoricalDtype)
706603

0 commit comments

Comments
 (0)