@@ -492,58 +492,41 @@ def test_lazy_categorical_dtype_equality_no_load(
492492):
493493 """Test same-location equality doesn't load category data.
494494
495- Both h5py (HDF5 object ID comparison) and zarr 3.x (StorePath comparison) use
496- location-based equality that doesn't read array contents. This test verifies
497- that behavior by patching __getitem__ to raise if called.
495+ LazyCategoricalDtype uses location-based comparison to avoid loading categories:
496+ - zarr: StorePath comparison
497+ - h5py: HDF5 object ID comparison
498+
499+ We patch read_elem to verify no data is loaded during comparison.
498500 """
499501 from anndata .experimental .backed ._lazy_arrays import LazyCategoricalDtype
500502
501503 path = cat_data_paths [("n3" , backend )]
502504
503505 if backend == "zarr" :
504-
505- def open_store (p ):
506- return zarr .open (p , mode = "r" )["cat" ]
507-
506+ open_store = lambda p : zarr .open (p , mode = "r" )["cat" ]
508507 else :
509- # Keep h5py files open for the duration of the test
510508 open_store = lambda p : h5py .File (p , mode = "r" )["cat" ]
511509
512- # Open the same file twice to get different Python objects pointing to same location
513510 store1 = open_store (path )
514511 store2 = open_store (path )
515512 dtype1 = read_elem_lazy (store1 ).dtype
516513 dtype2 = read_elem_lazy (store2 ).dtype
517514
518515 assert isinstance (dtype1 , LazyCategoricalDtype )
519516 assert isinstance (dtype2 , LazyCategoricalDtype )
520- # Verify these are different Python objects
521517 assert dtype1 ._categories_elem is not dtype2 ._categories_elem
522518
523- # Patch __getitem__ to raise if data is loaded during comparison
524- cat_arr1 = dtype1 . _get_categories_array ()
525- cat_arr2 = dtype2 . _get_categories_array ()
519+ # Same-location comparison should NOT call read_elem
520+ with patch ( "anndata.io.read_elem" , side_effect = AssertionError ( "read_elem called" )):
521+ assert dtype1 == dtype2
526522
523+ # Positive control: comparison with regular CategoricalDtype DOES call read_elem
527524 with (
528- patch .object (
529- cat_arr1 ,
530- "__getitem__" ,
531- side_effect = AssertionError ("Data was loaded from arr1" ),
532- ),
533- patch .object (
534- cat_arr2 ,
535- "__getitem__" ,
536- side_effect = AssertionError ("Data was loaded from arr2" ),
537- ),
525+ pytest .raises (AssertionError , match = "read_elem called" ),
526+ patch ("anndata.io.read_elem" , side_effect = AssertionError ("read_elem called" )),
538527 ):
539- # This should use location-based comparison without triggering __getitem__
540- assert dtype1 == dtype2
541-
542- # Also verify our cache wasn't populated
543- assert "categories" not in dtype1 .__dict__
544- assert "categories" not in dtype2 .__dict__
528+ dtype1 == pd .CategoricalDtype (categories = ["a" , "b" , "c" ]) # noqa: B015
545529
546- # Clean up h5py file handles
547530 if backend == "h5ad" :
548531 store1 .file .close ()
549532 store2 .file .close ()
0 commit comments