unblock CI (#695)

Intron7 · web-flow · commit 85ea3d364ccc · 2026-06-17T11:17:55.000+02:00
* unblock CI

* update rapids-pre

* test a10

* loosen dask

* fix dask hangs

* use zeros instead of nan

* pin numpy

* more pinning

* update numba pin
diff --git a/.cirun.yml b/.cirun.yml
@@ -1,19 +1,19 @@
 runners:
-  # Primary: T4. Cheapest, usually has capacity.
-  - name: aws-gpu-runner-g4dn
+  # Primary: A10G. ~3-4x faster than the T4 (Ampere, tensor cores, ~600 GB/s).
+  - name: aws-gpu-runner-g5
     cloud: aws
-    instance_type: g4dn.xlarge
+    instance_type: g5.xlarge
     machine_image: ami-067a4ba2816407ee9
     region: eu-north-1
     preemptible:
       - true
       - false
     labels:
       - cirun-aws-gpu
-  # Fallback: A10G. cirun picks this when g4dn spot is dry.
-  - name: aws-gpu-runner-g5
+  # Fallback: T4. Cheapest; cirun picks this when g5 spot is dry.
+  - name: aws-gpu-runner-g4dn
     cloud: aws
-    instance_type: g5.xlarge
+    instance_type: g4dn.xlarge
     machine_image: ami-067a4ba2816407ee9
     region: eu-north-1
     preemptible:
diff --git a/docs/release-notes/0.15.2.md b/docs/release-notes/0.15.2.md
@@ -2,7 +2,7 @@
 
 ```{rubric} Features
 ```
-* Add `ptg.GuideAssignment.assign_mixture_model` for crispat-style Poisson-Gaussian guide assignment. The CUDA/nanobind implementation writes pertpy-compatible labels to `adata.obs` and stores both pertpy-style and crispat-style model readouts in `adata.var` {pr}`637` {smaller}`S Dicks`
+* Add GPU-accelerated {class}`~rapids_singlecell.ptg.GuideAssignment` (``assign_by_threshold``, ``assign_to_max_guide``, ``assign_mixture_model``), mirroring `pertpy.pp.GuideAssignment` {pr}`637` {smaller}`S Dicks`
 * Add pseudobulk based distance metrics to {class}`~rapids_singlecell.ptg.Distance`: ``euclidean``, ``root_mean_squared_error``, ``mse``, ``mean_absolute_error``, ``pearson_distance``, ``cosine_distance``, ``r2_distance``. Matches ``pertpy.tl.Distance`` {pr}`676` {smaller}`S Dicks`
 * Add bootstrap support (``bootstrap=True``) to the pseudobulk distance metrics of {class}`~rapids_singlecell.ptg.Distance` for ``pairwise`` and ``onesided_distances``, plus array-level ``Distance.bootstrap``. Each iteration resamples cells per group on the GPU and recomputes the group-mean distances {pr}`684` {smaller}`S Dicks`
 * Add ``wasserstein`` metric to {class}`~rapids_singlecell.ptg.Distance` {pr}`683` {smaller}`S Dicks`
diff --git a/hatch.toml b/hatch.toml
@@ -38,16 +38,35 @@ overrides.matrix.deps.extra-dependencies = [
     { if = [
         "dev",
     ], value = "scanpy @ git+https://github.com/scverse/scanpy.git" },
+    # numpy 2.5 removed `np.row_stack`, which numba-cuda still calls -> the whole
+    # suite fails at collection. Pin <2.5 for the prerelease-allowing envs until
+    # numba-cuda drops the call; remove this once that lands.
+    { if = [
+        "dev",
+        "rapids_prerelease",
+    ], value = "numpy<2.5" },
+    # UV_PRERELEASE=allow otherwise drags in mutually-incompatible numba/numba-cuda
+    # prereleases (numba 0.63.0b1 -> removed np.trapz; numba 0.66.0rc2 + numba-cuda
+    # 0.30.2 -> missing numba.cuda.types.NPDatetime). Pin the known-good stable pair
+    # (numba 0.64.x + numba-cuda 0.28.x) so only RAPIDS uses nightlies.
+    { if = [
+        "dev",
+        "rapids_prerelease",
+    ], value = "numba>=0.64,<0.65" },
+    { if = [
+        "dev",
+        "rapids_prerelease",
+    ], value = "numba-cuda<0.30" },
 ]
 overrides.matrix.cuda.extra-dependencies = [
-    { if = [ "13" ], value = "cuml-cu13<26.8" },
-    { if = [ "13" ], value = "cudf-cu13<26.8" },
-    { if = [ "13" ], value = "cugraph-cu13<26.8" },
-    { if = [ "13" ], value = "cuvs-cu13<26.8" },
-    { if = [ "12" ], value = "cuml-cu12<26.8" },
-    { if = [ "12" ], value = "cudf-cu12<26.8" },
-    { if = [ "12" ], value = "cugraph-cu12<26.8" },
-    { if = [ "12" ], value = "cuvs-cu12<26.8" },
+    { if = [ "13" ], value = "cuml-cu13<26.10" },
+    { if = [ "13" ], value = "cudf-cu13<26.10" },
+    { if = [ "13" ], value = "cugraph-cu13<26.10" },
+    { if = [ "13" ], value = "cuvs-cu13<26.10" },
+    { if = [ "12" ], value = "cuml-cu12<26.10" },
+    { if = [ "12" ], value = "cudf-cu12<26.10" },
+    { if = [ "12" ], value = "cugraph-cu12<26.10" },
+    { if = [ "12" ], value = "cuvs-cu12<26.10" },
 ]
 
 ## For prerelease we rely on UV_PRERELEASE + nightly index; features select cu12/cu13
diff --git a/pyproject.toml b/pyproject.toml
@@ -123,6 +123,7 @@ addopts = [
 ]
 markers = [
     "gpu: tests that use a GPU (currently unused, but needs to be specified here as we import anndata.tests.helpers, which uses it)",
+    "array_api: array-API tests (currently unused, but needs to be specified here as we import anndata.tests.helpers, which uses it)",
 ]
 
 [tool.setuptools_scm]
diff --git a/src/rapids_singlecell/pertpy_gpu/_guide_assignment.py b/src/rapids_singlecell/pertpy_gpu/_guide_assignment.py
@@ -25,11 +25,6 @@
     "mix_probs_0",
     "mix_probs_1",
     "threshold",
-    "weight_Poisson",
-    "weight_Normal",
-    "lambda",
-    "mu",
-    "scale",
 ]
 
 
@@ -38,9 +33,8 @@ class GuideAssignment:
 
     Provides threshold-based and mixture-model-based methods for assigning
     cells to guide RNAs, compatible with pertpy's ``GuideAssignment`` API.
-    The mixture model follows crispat's Poisson-Gaussian assignment rule
-    while using batched EM on GPU instead of per-guide Pyro SVI, yielding
-    orders-of-magnitude speedup.
+    The mixture model fits a Poisson-Gaussian mixture per guide with batched
+    EM on GPU, yielding orders-of-magnitude speedup.
     """
 
     def assign_by_threshold(
@@ -150,10 +144,9 @@ def assign_mixture_model(
 
         Fits a two-component mixture (Poisson background + Gaussian signal)
         to the log₂-transformed non-zero counts of each guide simultaneously
-        using batched Expectation-Maximization on GPU. Like crispat's
-        Poisson-Gaussian assignment, the fitted model is converted to an
-        integer raw-count threshold. The default posterior cutoff matches
-        pertpy's crispat-style threshold rule.
+        using batched Expectation-Maximization on GPU. The fitted model is
+        converted to an integer raw-count threshold; the default posterior
+        cutoff matches pertpy's threshold rule.
 
         Parameters
         ----------
@@ -255,11 +248,6 @@ def assign_mixture_model(
         adata.var.loc[valid_var_index, "mix_probs_0"] = pi0_cpu
         adata.var.loc[valid_var_index, "mix_probs_1"] = 1.0 - pi0_cpu
         adata.var.loc[valid_var_index, "threshold"] = thresholds_cpu
-        adata.var.loc[valid_var_index, "weight_Poisson"] = pi0_cpu
-        adata.var.loc[valid_var_index, "weight_Normal"] = 1.0 - pi0_cpu
-        adata.var.loc[valid_var_index, "lambda"] = lam_cpu
-        adata.var.loc[valid_var_index, "mu"] = mu_cpu
-        adata.var.loc[valid_var_index, "scale"] = sigma_cpu
 
         adata.obs[assigned_guides_key] = series_values
         return None
diff --git a/src/rapids_singlecell/preprocessing/_neighbors/_neighbors.py b/src/rapids_singlecell/preprocessing/_neighbors/_neighbors.py
@@ -144,7 +144,8 @@ def _get_connectivities_umap(
     """UMAP fuzzy simplicial set connectivities."""
     set_op_mix_ratio = 1.0
     local_connectivity = 1.0
-    X_conn = cp.empty((n_obs, 1), dtype=np.float32)
+
+    X_conn = cp.zeros((n_obs, 1), dtype=np.float32)
     logger_level = _get_logger_level(logger)
     connectivities = fuzzy_simplicial_set(
         X_conn,
diff --git a/tests/dask/conftest.py b/tests/dask/conftest.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import dask
 import pytest
 from dask.distributed import Client
 from dask_cuda import LocalCUDACluster
@@ -29,8 +30,12 @@ def dist_client(cluster):
     gets an isolated client (connecting to the shared cluster is cheap).
     """
     client = Client(cluster)
-    yield client
-    client.close()
+    try:
+        yield client
+    finally:
+        # Always deregister the global-default client, even if the test fails,
+        # so it can't leak into later `client`-fixture (synchronous) tests.
+        client.close()
 
 
 @pytest.fixture(scope="function")
@@ -42,5 +47,11 @@ def client():
     never touch the client object. Handing them ``None`` avoids spinning up a
     LocalCUDACluster and skips the distributed serialization round-trips of
     cupy chunks, which are pure overhead on the tiny test arrays.
+
+    Forces the synchronous scheduler so these tests can never be hijacked by a
+    distributed client left as the global default by an earlier ``dist_client``
+    test (which would route ``.compute()`` through the shared cluster and stall
+    on a GIL-holding cupy op -> the random 60s pytest-timeout hangs).
     """
-    yield None
+    with dask.config.set(scheduler="synchronous"):
+        yield None
diff --git a/tests/dask/test_dask_regress_out.py b/tests/dask/test_dask_regress_out.py
@@ -49,7 +49,7 @@ def test_regress_out_categorical_dask(client, data_kind, dtype):
 
     dask_X = dask_data.X.compute()
 
-    atol = 1e-5 if dtype == "float32" else 1e-7
+    atol = 5e-5 if dtype == "float32" else 1e-7
     cp.testing.assert_allclose(dask_X, ref.X, atol=atol)
 
 
@@ -75,7 +75,7 @@ def test_regress_out_continuous_dask(client, data_kind, dtype):
 
     dask_X = dask_data.X.compute()
 
-    atol = 1e-5 if dtype == "float32" else 1e-7
+    atol = 5e-5 if dtype == "float32" else 1e-7
     cp.testing.assert_allclose(dask_X, ref.X, atol=atol)
 
 
diff --git a/tests/pertpy/test_guide_assignment.py b/tests/pertpy/test_guide_assignment.py
@@ -190,11 +190,6 @@ def test_mixture_model_stores_params(guide_adata: AnnData) -> None:
         "mix_probs_0",
         "mix_probs_1",
         "threshold",
-        "weight_Poisson",
-        "weight_Normal",
-        "lambda",
-        "mu",
-        "scale",
     ]:
         assert col in guide_adata.var.columns, f"Missing column: {col}"
 
@@ -209,41 +204,20 @@ def test_mixture_model_stores_params(guide_adata: AnnData) -> None:
     means = guide_adata.var["gaussian_mean"].dropna()
     assert (rates < means).all(), "Poisson rate should be < Gaussian mean"
 
-    # Crispat-compatible aliases should mirror the pertpy-style parameter names.
-    np.testing.assert_allclose(
-        guide_adata.var["weight_Poisson"].dropna(),
-        guide_adata.var["mix_probs_0"].dropna(),
-    )
-    np.testing.assert_allclose(
-        guide_adata.var["weight_Normal"].dropna(),
-        guide_adata.var["mix_probs_1"].dropna(),
-    )
-    np.testing.assert_allclose(
-        guide_adata.var["lambda"].dropna(),
-        guide_adata.var["poisson_rate"].dropna(),
-    )
-    np.testing.assert_allclose(
-        guide_adata.var["mu"].dropna(),
-        guide_adata.var["gaussian_mean"].dropna(),
-    )
-    np.testing.assert_allclose(
-        guide_adata.var["scale"].dropna(),
-        guide_adata.var["gaussian_std"].dropna(),
-    )
     assert guide_adata.var["threshold"].dropna().ge(1).all()
 
 
 def test_mixture_model_overwrites_existing_var_columns(guide_adata: AnnData) -> None:
     guide_adata.var["threshold"] = pd.Categorical(["old"] * guide_adata.n_vars)
-    guide_adata.var["lambda"] = "old"
+    guide_adata.var["poisson_rate"] = "old"
 
     ga = rsc.ptg.GuideAssignment()
     ga.assign_mixture_model(guide_adata)
 
     assert pd.api.types.is_float_dtype(guide_adata.var["threshold"])
-    assert pd.api.types.is_float_dtype(guide_adata.var["lambda"])
+    assert pd.api.types.is_float_dtype(guide_adata.var["poisson_rate"])
     assert guide_adata.var["threshold"].dropna().ge(1).all()
-    assert np.isfinite(guide_adata.var["lambda"].dropna()).all()
+    assert np.isfinite(guide_adata.var["poisson_rate"].dropna()).all()
 
 
 def test_mixture_model_sparse_input(guide_adata_sparse: AnnData) -> None:
@@ -375,7 +349,7 @@ def test_mixture_model_skip_low_count() -> None:
 
 
 def test_mixture_model_skip_max_count_below_two() -> None:
-    """Crispat skips guides whose non-zero counts never reach 2 UMIs."""
+    """Guides whose non-zero counts never reach 2 UMIs are skipped."""
     X = np.zeros((50, 2), dtype=np.float32)
     X[:25, :] = 1.0
 

Original file line number	Diff line number	Diff line change
`@@ -123,6 +123,7 @@ addopts = [`
`123`	`123`	`]`
`124`	`124`	`markers = [`
`125`	`125`	`"gpu: tests that use a GPU (currently unused, but needs to be specified here as we import anndata.tests.helpers, which uses it)",`
	`126`	`+ "array_api: array-API tests (currently unused, but needs to be specified here as we import anndata.tests.helpers, which uses it)",`
`126`	`127`	`]`
`127`	`128`
`128`	`129`	`[tool.setuptools_scm]`