Skip to content

Commit 32c53fe

Browse files
committed
fix: update scdl tests for anndata 0.12.11 compatibility
anndata 0.12.11 now validates that raw X rows match n_obs when constructing AnnData objects. Tests that used AnnData(X=None, raw=...) failed because n_obs defaulted to 0 while raw X had rows. Fix: pass obs=pd.DataFrame(index=range(n_rows)) so anndata infers the correct n_obs from the obs DataFrame. Signed-off-by: svc-bionemo <267129667+svc-bionemo@users.noreply.github.com>
1 parent 2ab3700 commit 32c53fe

2 files changed

Lines changed: 11 additions & 4 deletions

File tree

sub-packages/bionemo-scdl/tests/bionemo/scdl/conftest.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,12 @@ def _make(tmp_path, dtype1: str, dtype2: str):
139139

140140
h1 = tmp_path / "var1.h5ad"
141141
h2 = tmp_path / "var2.h5ad"
142-
ad.AnnData(X=None, var=pd.DataFrame(index=np.arange(X1.shape[1])), raw={"X": X1}).write_h5ad(h1)
143-
ad.AnnData(X=None, var=pd.DataFrame(index=np.arange(X2.shape[1])), raw={"X": X2}).write_h5ad(h2)
142+
ad.AnnData(
143+
obs=pd.DataFrame(index=range(X1.shape[0])), var=pd.DataFrame(index=np.arange(X1.shape[1])), raw={"X": X1}
144+
).write_h5ad(h1)
145+
ad.AnnData(
146+
obs=pd.DataFrame(index=range(X2.shape[0])), var=pd.DataFrame(index=np.arange(X2.shape[1])), raw={"X": X2}
147+
).write_h5ad(h2)
144148

145149
ds1 = SingleCellMemMapDataset(tmp_path / "var_ds1", h5ad_path=h1, data_dtype=dtype1)
146150
ds2 = SingleCellMemMapDataset(tmp_path / "var_ds2", h5ad_path=h2, data_dtype=dtype2)
@@ -164,6 +168,7 @@ def _make(tmp_path):
164168
indices_small_vals = np.array([0, 11, 5, 7], dtype=np.int64)
165169
indptr_small_vals = np.array([0, 0, 2, 2, 4], dtype=np.int64)
166170
X_small = ad.AnnData(
171+
obs=pd.DataFrame(index=range(n_rows_small)),
167172
var=pd.DataFrame(index=np.arange(n_cols_small)),
168173
raw={
169174
"X": sp.csr_matrix(
@@ -180,6 +185,7 @@ def _make(tmp_path):
180185
indices_large_vals = np.array([10, 65_537], dtype=np.int64)
181186
indptr_large_vals = np.array([0, 1, 1, 2], dtype=np.int64)
182187
X_large = ad.AnnData(
188+
obs=pd.DataFrame(index=range(n_rows_large)),
183189
var=pd.DataFrame(index=np.arange(n_cols_large)),
184190
raw={
185191
"X": sp.csr_matrix(

sub-packages/bionemo-scdl/tests/bionemo/scdl/io/test_single_cell_memmap_dataset.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import anndata as ad
1919
import numpy as np
20+
import pandas as pd
2021
import pytest
2122
import scipy.sparse as sp
2223

@@ -112,7 +113,7 @@ def big_int_h5ad(tmp_path, big_h5ad_data):
112113
"""Create and return the path to an h5ad with large values/columns for dtype promotion tests."""
113114
d = big_h5ad_data
114115
X = sp.csr_matrix((d["data"].astype(np.uint32), d["indices"], d["indptr"]), shape=(d["n_rows"], d["n_cols"]))
115-
a = ad.AnnData(X=None, raw={"X": X})
116+
a = ad.AnnData(obs=pd.DataFrame(index=range(d["n_rows"])), raw={"X": X})
116117
h5ad_path = tmp_path / "big_dtype.h5ad"
117118
a.write_h5ad(h5ad_path)
118119
return h5ad_path
@@ -123,7 +124,7 @@ def big_float_h5ad(tmp_path, big_h5ad_data):
123124
"""Create and return the path to an h5ad with large values/columns for dtype promotion tests."""
124125
d = big_h5ad_data
125126
X = sp.csr_matrix((d["data"].astype("float32"), d["indices"], d["indptr"]), shape=(d["n_rows"], d["n_cols"]))
126-
a = ad.AnnData(X=None, raw={"X": X})
127+
a = ad.AnnData(obs=pd.DataFrame(index=range(d["n_rows"])), raw={"X": X})
127128
h5ad_path = tmp_path / "big_dtype.h5ad"
128129
a.write_h5ad(h5ad_path)
129130
return h5ad_path

0 commit comments

Comments
 (0)