Skip to content

Commit edb04fc

Browse files
committed
test: improve equality_no_load test with read_elem patching
- Switch from patching __getitem__ to patching read_elem (more reliable) - Add positive control: comparison with pd.CategoricalDtype triggers read_elem - This proves both that the optimization works AND that the patch detects loads
1 parent ac1cab5 commit edb04fc

1 file changed

Lines changed: 13 additions & 30 deletions

File tree

tests/lazy/test_read.py

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -492,58 +492,41 @@ def test_lazy_categorical_dtype_equality_no_load(
492492
):
493493
"""Test same-location equality doesn't load category data.
494494
495-
Both h5py (HDF5 object ID comparison) and zarr 3.x (StorePath comparison) use
496-
location-based equality that doesn't read array contents. This test verifies
497-
that behavior by patching __getitem__ to raise if called.
495+
LazyCategoricalDtype uses location-based comparison to avoid loading categories:
496+
- zarr: StorePath comparison
497+
- h5py: HDF5 object ID comparison
498+
499+
We patch read_elem to verify no data is loaded during comparison.
498500
"""
499501
from anndata.experimental.backed._lazy_arrays import LazyCategoricalDtype
500502

501503
path = cat_data_paths[("n3", backend)]
502504

503505
if backend == "zarr":
504-
505-
def open_store(p):
506-
return zarr.open(p, mode="r")["cat"]
507-
506+
open_store = lambda p: zarr.open(p, mode="r")["cat"]
508507
else:
509-
# Keep h5py files open for the duration of the test
510508
open_store = lambda p: h5py.File(p, mode="r")["cat"]
511509

512-
# Open the same file twice to get different Python objects pointing to same location
513510
store1 = open_store(path)
514511
store2 = open_store(path)
515512
dtype1 = read_elem_lazy(store1).dtype
516513
dtype2 = read_elem_lazy(store2).dtype
517514

518515
assert isinstance(dtype1, LazyCategoricalDtype)
519516
assert isinstance(dtype2, LazyCategoricalDtype)
520-
# Verify these are different Python objects
521517
assert dtype1._categories_elem is not dtype2._categories_elem
522518

523-
# Patch __getitem__ to raise if data is loaded during comparison
524-
cat_arr1 = dtype1._get_categories_array()
525-
cat_arr2 = dtype2._get_categories_array()
519+
# Same-location comparison should NOT call read_elem
520+
with patch("anndata.io.read_elem", side_effect=AssertionError("read_elem called")):
521+
assert dtype1 == dtype2
526522

523+
# Positive control: comparison with regular CategoricalDtype DOES call read_elem
527524
with (
528-
patch.object(
529-
cat_arr1,
530-
"__getitem__",
531-
side_effect=AssertionError("Data was loaded from arr1"),
532-
),
533-
patch.object(
534-
cat_arr2,
535-
"__getitem__",
536-
side_effect=AssertionError("Data was loaded from arr2"),
537-
),
525+
pytest.raises(AssertionError, match="read_elem called"),
526+
patch("anndata.io.read_elem", side_effect=AssertionError("read_elem called")),
538527
):
539-
# This should use location-based comparison without triggering __getitem__
540-
assert dtype1 == dtype2
541-
542-
# Also verify our cache wasn't populated
543-
assert "categories" not in dtype1.__dict__
544-
assert "categories" not in dtype2.__dict__
528+
dtype1 == pd.CategoricalDtype(categories=["a", "b", "c"]) # noqa: B015
545529

546-
# Clean up h5py file handles
547530
if backend == "h5ad":
548531
store1.file.close()
549532
store2.file.close()

0 commit comments

Comments
 (0)