@@ -249,176 +249,73 @@ def test_chunks_df(
249249 assert arr .chunksize == expected_chunks
250250
251251
252- # Session-scoped fixtures for categorical data (write once, read many )
253- # Each category type has zarr and h5ad path fixtures, plus a parametrized store fixture
252+ # Session-scoped categorical fixtures parametrized by (n_categories, ordered )
253+ # Data is written once per session; stores are opened per-test with backend parametrization
254254
255-
256- def _write_categorical_zarr (tmp_path_factory , name : str , cat : pd .Categorical ) -> Path :
257- """Helper to write categorical to zarr and return path."""
258- path = tmp_path_factory .mktemp (f"{ name } .zarr" )
259- store = zarr .open (path , mode = "w" )
260- write_elem (store , "cat" , cat )
261- return path
262-
263-
264- def _write_categorical_h5ad (tmp_path_factory , name : str , cat : pd .Categorical ) -> Path :
265- """Helper to write categorical to h5ad and return path."""
266- path = tmp_path_factory .mktemp (name ) / "cat.h5ad"
267- with h5py .File (path , mode = "w" ) as f :
268- write_elem (f , "cat" , cat )
269- return path
270-
271-
272- def _open_categorical_store (path : Path , backend : str ):
273- """Helper to open categorical store for either backend."""
274- if backend == "zarr" :
275- return zarr .open (path , mode = "r" )["cat" ]
276- else :
277- return h5py .File (path , mode = "r" )["cat" ]
278-
279-
280- # Small categorical ['a', 'b', 'c']
281- @pytest .fixture (scope = "session" )
282- def cat_small_path_zarr (tmp_path_factory ) -> Path :
283- return _write_categorical_zarr (
284- tmp_path_factory , "cat_small" , pd .Categorical (["a" , "b" , "c" ])
285- )
255+ # Configuration: (name, n_categories, ordered, category_names)
256+ _CAT_CONFIGS : list [tuple [str , int , bool , list [str ] | None ]] = [
257+ ("n3" , 3 , False , ["a" , "b" , "c" ]), # basic tests, equality, hashing
258+ ("n100" , 100 , False , None ), # truncation, n_categories, head/tail
259+ ("ordered" , 3 , True , ["low" , "medium" , "high" ]), # ordered categories
260+ ]
286261
287262
288263@pytest .fixture (scope = "session" )
289- def cat_small_path_h5ad (tmp_path_factory ) -> Path :
290- return _write_categorical_h5ad (
291- tmp_path_factory , "cat_small" , pd .Categorical (["a" , "b" , "c" ])
292- )
293-
294-
295- @pytest .fixture (params = ["zarr" , "h5ad" ])
296- def cat_small_store (request , cat_small_path_zarr : Path , cat_small_path_h5ad : Path ):
297- """Parametrized fixture: small categorical ['a', 'b', 'c'] for both backends."""
298- path = cat_small_path_zarr if request .param == "zarr" else cat_small_path_h5ad
299- store = _open_categorical_store (path , request .param )
300- yield store
301- if request .param == "h5ad" :
302- store .file .close ()
303-
304-
305- # Medium categorical ['a', 'b', 'c', 'd', 'e']
306- @pytest .fixture (scope = "session" )
307- def cat_medium_path_zarr (tmp_path_factory ) -> Path :
308- return _write_categorical_zarr (
309- tmp_path_factory , "cat_medium" , pd .Categorical (["a" , "b" , "c" , "d" , "e" ])
310- )
311-
264+ def cat_data_paths (tmp_path_factory ) -> dict [tuple [str , str ], Path ]:
265+ """Create all categorical test data once per session, return paths dict."""
266+ base = tmp_path_factory .mktemp ("categorical_data" )
267+ paths : dict [tuple [str , str ], Path ] = {}
312268
313- @pytest .fixture (scope = "session" )
314- def cat_medium_path_h5ad (tmp_path_factory ) -> Path :
315- return _write_categorical_h5ad (
316- tmp_path_factory , "cat_medium" , pd .Categorical (["a" , "b" , "c" , "d" , "e" ])
317- )
269+ for name , n_cat , ordered , cat_names in _CAT_CONFIGS :
270+ categories = cat_names or [f"cat_{ i :02d} " for i in range (n_cat )]
271+ cat = pd .Categorical (categories , categories = categories , ordered = ordered )
318272
273+ # Write zarr
274+ zarr_path = base / f"{ name } .zarr"
275+ store = zarr .open (zarr_path , mode = "w" )
276+ write_elem (store , "cat" , cat )
277+ paths [(name , "zarr" )] = zarr_path
319278
320- @pytest .fixture (params = ["zarr" , "h5ad" ])
321- def cat_medium_store (request , cat_medium_path_zarr : Path , cat_medium_path_h5ad : Path ):
322- """Parametrized fixture: medium categorical for both backends."""
323- path = cat_medium_path_zarr if request .param == "zarr" else cat_medium_path_h5ad
324- store = _open_categorical_store (path , request .param )
325- yield store
326- if request .param == "h5ad" :
327- store .file .close ()
279+ # Write h5ad
280+ h5_path = base / f"{ name } .h5ad"
281+ with h5py .File (h5_path , mode = "w" ) as f :
282+ write_elem (f , "cat" , cat )
283+ paths [(name , "h5ad" )] = h5_path
328284
285+ return paths
329286
330- # Large categorical with 100 categories
331- @pytest .fixture (scope = "session" )
332- def cat_large_path_zarr (tmp_path_factory ) -> Path :
333- categories = [f"cat_{ i } " for i in range (100 )]
334- return _write_categorical_zarr (
335- tmp_path_factory , "cat_large" , pd .Categorical (categories )
336- )
337287
338-
339- @pytest .fixture (scope = "session" )
340- def cat_large_path_h5ad (tmp_path_factory ) -> Path :
341- categories = [f"cat_{ i } " for i in range (100 )]
342- return _write_categorical_h5ad (
343- tmp_path_factory , "cat_large" , pd .Categorical (categories )
344- )
345-
346-
347- @pytest .fixture (params = ["zarr" , "h5ad" ])
348- def cat_large_store (request , cat_large_path_zarr : Path , cat_large_path_h5ad : Path ):
349- """Parametrized fixture: large categorical (100 categories) for both backends."""
350- path = cat_large_path_zarr if request .param == "zarr" else cat_large_path_h5ad
351- store = _open_categorical_store (path , request .param )
352- yield store
353- if request .param == "h5ad" :
354- store .file .close ()
355-
356-
357- # Ordered categorical ['low', 'medium', 'high']
358- @pytest .fixture (scope = "session" )
359- def cat_ordered_path_zarr (tmp_path_factory ) -> Path :
360- cat = pd .Categorical (
361- ["low" , "medium" , "high" ] * 3 + ["low" ],
362- categories = ["low" , "medium" , "high" ],
363- ordered = True ,
364- )
365- return _write_categorical_zarr (tmp_path_factory , "cat_ordered" , cat )
366-
367-
368- @pytest .fixture (scope = "session" )
369- def cat_ordered_path_h5ad (tmp_path_factory ) -> Path :
370- cat = pd .Categorical (
371- ["low" , "medium" , "high" ] * 3 + ["low" ],
372- categories = ["low" , "medium" , "high" ],
373- ordered = True ,
374- )
375- return _write_categorical_h5ad (tmp_path_factory , "cat_ordered" , cat )
288+ def _open_cat_store (path : Path , backend : str ):
289+ """Open categorical store for either backend."""
290+ if backend == "zarr" :
291+ return zarr .open (path , mode = "r" )["cat" ]
292+ return h5py .File (path , mode = "r" )["cat" ]
376293
377294
378- @pytest .fixture (params = ["zarr" , "h5ad" ])
379- def cat_ordered_store (
380- request , cat_ordered_path_zarr : Path , cat_ordered_path_h5ad : Path
381- ):
382- """Parametrized fixture: ordered categorical for both backends."""
383- path = cat_ordered_path_zarr if request .param == "zarr" else cat_ordered_path_h5ad
384- store = _open_categorical_store (path , request .param )
385- yield store
386- if request .param == "h5ad" :
387- store .file .close ()
295+ def _make_cat_fixture (config_name : str ):
296+ """Factory to create categorical store fixtures with zarr/h5ad parametrization."""
388297
298+ @pytest .fixture (params = ["zarr" , "h5ad" ])
299+ def _fixture (request , cat_data_paths ):
300+ path = cat_data_paths [(config_name , request .param )]
301+ store = _open_cat_store (path , request .param )
302+ yield store
303+ if request .param == "h5ad" :
304+ store .file .close ()
389305
390- # 50 categories for head/tail testing
391- @pytest .fixture (scope = "session" )
392- def cat_fifty_path_zarr (tmp_path_factory ) -> Path :
393- categories = [f"Type_{ i :02d} " for i in range (50 )]
394- return _write_categorical_zarr (
395- tmp_path_factory , "cat_fifty" , pd .Categorical (categories )
396- )
306+ return _fixture
397307
398308
399- @pytest .fixture (scope = "session" )
400- def cat_fifty_path_h5ad (tmp_path_factory ) -> Path :
401- categories = [f"Type_{ i :02d} " for i in range (50 )]
402- return _write_categorical_h5ad (
403- tmp_path_factory , "cat_fifty" , pd .Categorical (categories )
404- )
405-
309+ cat_n3_store = _make_cat_fixture ("n3" )
310+ cat_n100_store = _make_cat_fixture ("n100" )
311+ cat_ordered_store = _make_cat_fixture ("ordered" )
406312
407- @pytest .fixture (params = ["zarr" , "h5ad" ])
408- def cat_fifty_store (request , cat_fifty_path_zarr : Path , cat_fifty_path_h5ad : Path ):
409- """Parametrized fixture: 50 categories for head/tail testing, both backends."""
410- path = cat_fifty_path_zarr if request .param == "zarr" else cat_fifty_path_h5ad
411- store = _open_categorical_store (path , request .param )
412- yield store
413- if request .param == "h5ad" :
414- store .file .close ()
415313
416-
417- def test_lazy_categorical_dtype_n_categories (cat_large_store ):
314+ def test_lazy_categorical_dtype_n_categories (cat_n100_store ):
418315 """Test n_categories is cheap (metadata only) and uses cache when loaded."""
419316 from anndata .experimental .backed ._lazy_arrays import LazyCategoricalDtype
420317
421- lazy_cat = read_elem_lazy (cat_large_store )
318+ lazy_cat = read_elem_lazy (cat_n100_store )
422319 dtype = lazy_cat .dtype
423320 assert isinstance (dtype , LazyCategoricalDtype )
424321
@@ -438,11 +335,11 @@ def test_lazy_categorical_dtype_n_categories(cat_large_store):
438335 assert dtype .n_categories == 3 # Returns cached length, not disk length
439336
440337
441- def test_lazy_categorical_dtype_head_tail_categories (cat_fifty_store ):
338+ def test_lazy_categorical_dtype_head_tail_categories (cat_n100_store ):
442339 """Test head_categories and tail_categories perform partial reads without loading all."""
443340 from anndata .experimental .backed ._lazy_arrays import LazyCategoricalDtype
444341
445- lazy_cat = read_elem_lazy (cat_fifty_store )
342+ lazy_cat = read_elem_lazy (cat_n100_store )
446343 dtype = lazy_cat .dtype
447344 assert isinstance (dtype , LazyCategoricalDtype )
448345
@@ -452,42 +349,42 @@ def test_lazy_categorical_dtype_head_tail_categories(cat_fifty_store):
452349 # Test head_categories (first n) - should NOT load all categories
453350 first5 = dtype .head_categories (5 )
454351 assert len (first5 ) == 5
455- assert list (first5 ) == [f"Type_ { i :02d} " for i in range (5 )]
352+ assert list (first5 ) == [f"cat_ { i :02d} " for i in range (5 )]
456353 assert "categories" not in dtype .__dict__ # Still not fully loaded
457354
458355 # Test head_categories default (first 5)
459356 default_head = dtype .head_categories ()
460357 assert len (default_head ) == 5
461- assert list (default_head ) == [f"Type_ { i :02d} " for i in range (5 )]
358+ assert list (default_head ) == [f"cat_ { i :02d} " for i in range (5 )]
462359 assert "categories" not in dtype .__dict__ # Still not fully loaded
463360
464361 # Test tail_categories (last n) - should NOT load all categories
465362 last3 = dtype .tail_categories (3 )
466363 assert len (last3 ) == 3
467- assert list (last3 ) == [f"Type_ { i :02d} " for i in range (47 , 50 )]
364+ assert list (last3 ) == [f"cat_ { i :02d} " for i in range (97 , 100 )]
468365 assert "categories" not in dtype .__dict__ # Still not fully loaded
469366
470367 # Test tail_categories default (last 5)
471368 default_tail = dtype .tail_categories ()
472369 assert len (default_tail ) == 5
473- assert list (default_tail ) == [f"Type_ { i :02d} " for i in range (45 , 50 )]
370+ assert list (default_tail ) == [f"cat_ { i :02d} " for i in range (95 , 100 )]
474371 assert "categories" not in dtype .__dict__ # Still not fully loaded
475372
476373 # Test requesting more than available
477- all_head = dtype .head_categories (100 )
478- assert len (all_head ) == 50
479- assert list (all_head ) == [f"Type_ { i :02d} " for i in range (50 )]
374+ all_head = dtype .head_categories (200 )
375+ assert len (all_head ) == 100
376+ assert list (all_head ) == [f"cat_ { i :02d} " for i in range (100 )]
480377
481- all_tail = dtype .tail_categories (100 )
482- assert len (all_tail ) == 50
483- assert list (all_tail ) == [f"Type_ { i :02d} " for i in range (50 )]
378+ all_tail = dtype .tail_categories (200 )
379+ assert len (all_tail ) == 100
380+ assert list (all_tail ) == [f"cat_ { i :02d} " for i in range (100 )]
484381
485382
486- def test_lazy_categorical_dtype_categories_caching (cat_medium_store ):
383+ def test_lazy_categorical_dtype_categories_caching (cat_n3_store ):
487384 """Test that categories are cached after full load."""
488385 from anndata .experimental .backed ._lazy_arrays import LazyCategoricalDtype
489386
490- lazy_cat = read_elem_lazy (cat_medium_store )
387+ lazy_cat = read_elem_lazy (cat_n3_store )
491388 dtype = lazy_cat .dtype
492389 assert isinstance (dtype , LazyCategoricalDtype )
493390
@@ -497,7 +394,7 @@ def test_lazy_categorical_dtype_categories_caching(cat_medium_store):
497394 # Load categories
498395 cats = dtype .categories
499396 assert cats is not None
500- assert list (cats ) == ["a" , "b" , "c" , "d" , "e" ]
397+ assert list (cats ) == ["a" , "b" , "c" ]
501398
502399 # After loading, should be cached in __dict__ (cached_property pattern)
503400 assert "categories" in dtype .__dict__
@@ -523,24 +420,24 @@ def test_lazy_categorical_dtype_ordered(cat_ordered_store):
523420 assert list (dtype .categories ) == ["low" , "medium" , "high" ]
524421
525422
526- def test_lazy_categorical_dtype_repr (cat_large_store , cat_small_store ):
423+ def test_lazy_categorical_dtype_repr (cat_n100_store , cat_n3_store ):
527424 """Test LazyCategoricalDtype repr shows truncated categories."""
528425 from anndata .experimental .backed ._lazy_arrays import LazyCategoricalDtype
529426
530427 # Test large number of categories (truncated repr)
531- lazy_cat = read_elem_lazy (cat_large_store )
428+ lazy_cat = read_elem_lazy (cat_n100_store )
532429 dtype = lazy_cat .dtype
533430 assert isinstance (dtype , LazyCategoricalDtype )
534431
535432 repr_str = repr (dtype )
536433 assert "LazyCategoricalDtype" in repr_str
537434 assert "n=100" in repr_str
538435 assert "..." in repr_str # Truncation indicator
539- assert "cat_0 " in repr_str # Head category
436+ assert "cat_00 " in repr_str # Head category
540437 assert "cat_99" in repr_str # Tail category
541438
542439 # Test small number of categories (full repr)
543- small_lazy_cat = read_elem_lazy (cat_small_store )
440+ small_lazy_cat = read_elem_lazy (cat_n3_store )
544441 small_dtype = small_lazy_cat .dtype
545442
546443 small_repr = repr (small_dtype )
@@ -551,11 +448,11 @@ def test_lazy_categorical_dtype_repr(cat_large_store, cat_small_store):
551448 assert "'c'" in small_repr
552449
553450
554- def test_lazy_categorical_dtype_equality (cat_small_store ):
451+ def test_lazy_categorical_dtype_equality (cat_n3_store ):
555452 """Test LazyCategoricalDtype equality comparisons and basic properties."""
556453 from anndata .experimental .backed ._lazy_arrays import LazyCategoricalDtype
557454
558- lazy_cat = read_elem_lazy (cat_small_store )
455+ lazy_cat = read_elem_lazy (cat_n3_store )
559456 dtype = lazy_cat .dtype
560457 assert isinstance (dtype , LazyCategoricalDtype )
561458
@@ -591,7 +488,7 @@ def test_lazy_categorical_dtype_equality(cat_small_store):
591488
592489@pytest .mark .parametrize ("backend" , ["zarr" , "h5ad" ])
593490def test_lazy_categorical_dtype_equality_no_load (
594- cat_small_path_zarr : Path , cat_small_path_h5ad : Path , backend : str
491+ cat_data_paths : dict [ tuple [ str , str ], Path ] , backend : str
595492):
596493 """Test same-location equality doesn't load category data.
597494
@@ -601,14 +498,14 @@ def test_lazy_categorical_dtype_equality_no_load(
601498 """
602499 from anndata .experimental .backed ._lazy_arrays import LazyCategoricalDtype
603500
501+ path = cat_data_paths [("n3" , backend )]
502+
604503 if backend == "zarr" :
605- path = cat_small_path_zarr
606504
607505 def open_store (p ):
608506 return zarr .open (p , mode = "r" )["cat" ]
609507
610508 else :
611- path = cat_small_path_h5ad
612509 # Keep h5py files open for the duration of the test
613510 open_store = lambda p : h5py .File (p , mode = "r" )["cat" ]
614511
@@ -696,11 +593,11 @@ def test_lazy_categorical_roundtrip_via_anndata(tmp_path: Path):
696593 assert loaded .obs ["ordered_cat" ].equals (adata .obs ["ordered_cat" ])
697594
698595
699- def test_lazy_categorical_dtype_hash (cat_small_store ):
596+ def test_lazy_categorical_dtype_hash (cat_n3_store ):
700597 """Test LazyCategoricalDtype is hashable."""
701598 from anndata .experimental .backed ._lazy_arrays import LazyCategoricalDtype
702599
703- lazy_cat = read_elem_lazy (cat_small_store )
600+ lazy_cat = read_elem_lazy (cat_n3_store )
704601 dtype = lazy_cat .dtype
705602 assert isinstance (dtype , LazyCategoricalDtype )
706603
0 commit comments