Skip to content

Commit 87dc1ec

Browse files
authored
chore!: remove use_highly_variable (#4039)
1 parent 6e7232c commit 87dc1ec

9 files changed

Lines changed: 48 additions & 120 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Remove `use_highly_variable` parameter to {func}`scanpy.pp.pca` and {func}`scanpy.experimental.pp.normalize_pearson_residuals_pca` {smaller}`P Angerer`

src/scanpy/experimental/pp/_normalization.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020
doc_layer,
2121
doc_pca_chunk,
2222
)
23-
from ...get import _get_obs_rep, _set_obs_rep
24-
from ...preprocessing._docs import doc_mask_var_hvg
25-
from ...preprocessing._pca import _handle_mask_var, pca
23+
from ...get import _check_mask, _get_obs_rep, _set_obs_rep
24+
from ...preprocessing._docs import doc_mask_var
25+
from ...preprocessing._pca import pca
2626

2727
if TYPE_CHECKING:
2828
from collections.abc import Mapping
@@ -158,7 +158,7 @@ def normalize_pearson_residuals(
158158
adata=doc_adata,
159159
dist_params=doc_dist_params,
160160
pca_chunk=doc_pca_chunk,
161-
mask_var_hvg=doc_mask_var_hvg,
161+
mask_var=doc_mask_var,
162162
check_values=doc_check_values,
163163
inplace=doc_inplace,
164164
)
@@ -171,8 +171,9 @@ def normalize_pearson_residuals_pca(
171171
n_comps: int | None = 50,
172172
rng: SeedLike | RNGLike | None = None,
173173
kwargs_pca: Mapping[str, Any] = MappingProxyType({}),
174-
mask_var: np.ndarray | str | None | Default = Default("'highly_variable'"),
175-
use_highly_variable: bool | None = None,
174+
mask_var: np.ndarray | str | None | Default = Default(
175+
"adata.var.get('highly_variable')"
176+
),
176177
check_values: bool = True,
177178
inplace: bool = True,
178179
) -> AnnData | None:
@@ -190,7 +191,7 @@ def normalize_pearson_residuals_pca(
190191
{adata}
191192
{dist_params}
192193
{pca_chunk}
193-
{mask_var_hvg}
194+
{mask_var}
194195
{check_values}
195196
{inplace}
196197
@@ -211,19 +212,17 @@ def normalize_pearson_residuals_pca(
211212
residual normalization.
212213
`.varm['PCs']`
213214
The principal components containing the loadings. When `inplace=True` and
214-
`use_highly_variable=True`, this will contain empty rows for the genes not
215+
`mask_var is not None`, this will contain empty rows for the genes not
215216
selected.
216217
`.uns['pca']['variance_ratio']`
217218
Ratio of explained variance.
218219
`.uns['pca']['variance']`
219220
Explained variance, equivalent to the eigenvalues of the covariance matrix.
220221
221222
"""
222-
# Unify new mask argument and deprecated use_highly_varible argument
223-
_, mask_var = _handle_mask_var(
224-
adata, mask_var, use_highly_variable=use_highly_variable
225-
)
226-
del use_highly_variable
223+
if isinstance(mask_var, Default):
224+
mask_var = "highly_variable" if "highly_variable" in adata.var else None
225+
mask_var = _check_mask(adata, mask_var, "var")
227226

228227
if mask_var is not None:
229228
adata_sub = adata[:, mask_var].copy()

src/scanpy/preprocessing/_docs.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,11 @@
1515
If True, use `adata.raw.X` for expression values instead of `adata.X`.\
1616
"""
1717

18-
doc_mask_var_hvg = """\
18+
doc_mask_var = """\
1919
mask_var
2020
To run only on a certain set of genes given by a boolean array
2121
or a string referring to an array in :attr:`~anndata.AnnData.var`.
2222
By default, uses `.var['highly_variable']` if available, else everything.
23-
use_highly_variable
24-
Whether to use highly variable genes only, stored in
25-
`.var['highly_variable']`.
26-
By default uses them if they have been determined beforehand.
27-
28-
.. deprecated:: 1.10.0
29-
Use `mask_var` instead
3023
"""
3124

3225
doc_obs_qc_args = """\

src/scanpy/preprocessing/_pca/__init__.py

Lines changed: 12 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from ..._utils import _doc_params, get_literal_vals, is_backed_type
1414
from ..._utils.random import _accepts_legacy_random_state, _legacy_random_state
1515
from ...get import _check_mask, _get_obs_rep
16-
from .._docs import doc_mask_var_hvg
16+
from .._docs import doc_mask_var
1717
from ._compat import _pca_compat_sparse
1818

1919
if TYPE_CHECKING:
@@ -51,7 +51,7 @@
5151
type SvdSolver = SvdSolvDaskML | SvdSolvSkearn | SvdSolvPCACustom
5252

5353

54-
@_doc_params(mask_var_hvg=doc_mask_var_hvg, rng=doc_rng)
54+
@_doc_params(mask_var=doc_mask_var, rng=doc_rng)
5555
@_accepts_legacy_random_state(0)
5656
def pca( # noqa: PLR0912, PLR0913, PLR0915
5757
data: AnnData | np.ndarray | CSBase,
@@ -65,8 +65,9 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
6565
chunk_size: int | None = None,
6666
rng: SeedLike | RNGLike | None = None,
6767
return_info: bool = False,
68-
mask_var: NDArray[np.bool] | str | None | Default = Default("'highly_variable'"),
69-
use_highly_variable: bool | None = None,
68+
mask_var: NDArray[np.bool] | str | None | Default = Default(
69+
"adata.var.get('highly_variable')"
70+
),
7071
dtype: DTypeLike = "float32",
7172
key_added: str | None | Default = Default(preset=("pca", "key_added")),
7273
copy: bool = False,
@@ -160,7 +161,7 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
160161
return_info
161162
Only relevant when not passing an :class:`~anndata.AnnData`:
162163
see “Returns”.
163-
{mask_var_hvg}
164+
{mask_var}
164165
layer
165166
Layer of `adata` to use as expression values.
166167
dtype
@@ -224,11 +225,12 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
224225
else:
225226
adata = AnnData(data)
226227

227-
# Unify new mask argument and deprecated use_highly_varible argument
228-
mask_var_param, mask_var = _handle_mask_var(
229-
adata, mask_var, obsm=obsm, use_highly_variable=use_highly_variable
230-
)
231-
del use_highly_variable
228+
if isinstance(mask_var, Default):
229+
mask_var = "highly_variable" if "highly_variable" in adata.var else None
230+
elif mask_var is not None and obsm is not None:
231+
msg = "Argument `mask_var` is incompatible with `obsm`."
232+
raise ValueError(msg)
233+
mask_var_param, mask_var = mask_var, _check_mask(adata, mask_var, "var")
232234
adata_comp = adata[:, mask_var] if mask_var is not None else adata
233235

234236
if n_comps is None:
@@ -355,7 +357,6 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
355357
adata.uns[key_uns] = dict(
356358
params=dict(
357359
zero_center=zero_center,
358-
use_highly_variable=mask_var_param == "highly_variable",
359360
mask_var=mask_var_param,
360361
**(dict(layer=layer) if layer is not None else {}),
361362
**(dict(obsm=obsm) if obsm is not None else {}),
@@ -387,49 +388,6 @@ def pca( # noqa: PLR0912, PLR0913, PLR0915
387388
return x_pca
388389

389390

390-
def _handle_mask_var(
391-
adata: AnnData,
392-
mask_var: NDArray[np.bool] | str | None | Default,
393-
*,
394-
obsm: str | None = None,
395-
use_highly_variable: bool | None,
396-
) -> tuple[np.ndarray | str | None, np.ndarray | None]:
397-
"""Unify new mask argument and deprecated use_highly_varible argument.
398-
399-
Returns both the normalized mask parameter and the validated mask array.
400-
"""
401-
if obsm:
402-
if not isinstance(mask_var, Default) and mask_var is not None:
403-
msg = "Argument `mask_var` is incompatible with `obsm`."
404-
raise ValueError(msg)
405-
return None, None
406-
407-
# First, verify and possibly warn
408-
if use_highly_variable is not None:
409-
hint = (
410-
'Use_highly_variable=True can be called through mask_var="highly_variable". '
411-
"Use_highly_variable=False can be called through mask_var=None"
412-
)
413-
msg = f"Argument `use_highly_variable` is deprecated, consider using the mask argument. {hint}"
414-
warn(msg, FutureWarning)
415-
if not isinstance(mask_var, Default):
416-
msg = f"These arguments are incompatible. {hint}"
417-
raise ValueError(msg)
418-
419-
# Handle default case and explicit use_highly_variable=True
420-
if use_highly_variable or (
421-
use_highly_variable is None
422-
and isinstance(mask_var, Default)
423-
and "highly_variable" in adata.var.columns
424-
):
425-
mask_var = "highly_variable"
426-
427-
# Without highly variable genes, we don’t use a mask by default
428-
if isinstance(mask_var, Default) or mask_var is None:
429-
return None, None
430-
return mask_var, _check_mask(adata, mask_var, "var")
431-
432-
433391
@overload
434392
def _handle_dask_ml_args(
435393
svd_solver: str | None, method: type[dmld.PCA | dmld.IncrementalPCA]

src/scanpy/tools/_ingest.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ class Ingest:
231231
_umap: UMAP
232232
# pca
233233
_pca_centered: bool
234-
_pca_use_hvg: bool
234+
_pca_mask: str | None
235235
_pca_basis: np.ndarray
236236
# adata
237237
_adata_ref: AnnData
@@ -338,15 +338,14 @@ def _init_neighbors(self, adata: AnnData, neighbors_key: str | None) -> None:
338338

339339
def _init_pca(self, adata: AnnData) -> None:
340340
self._pca_centered = adata.uns["pca"]["params"]["zero_center"]
341-
self._pca_use_hvg = adata.uns["pca"]["params"]["use_highly_variable"]
341+
self._pca_mask = adata.uns["pca"]["params"]["mask_var"]
342342

343-
mask = "highly_variable"
344-
if self._pca_use_hvg and mask not in adata.var.columns:
345-
msg = f"Did not find `adata.var[{mask!r}']`."
343+
if self._pca_mask and self._pca_mask not in adata.var.columns:
344+
msg = f"Did not find `adata.var[{self._pca_mask!r}']`."
346345
raise ValueError(msg)
347346

348-
if self._pca_use_hvg:
349-
self._pca_basis = adata.varm["PCs"][adata.var[mask]]
347+
if self._pca_mask:
348+
self._pca_basis = adata.varm["PCs"][adata.var[self._pca_mask]]
350349
else:
351350
self._pca_basis = adata.varm["PCs"]
352351

@@ -402,7 +401,7 @@ def __init__(
402401
def _pca(self, n_pcs=None):
403402
x = self._adata_new.X
404403
x = x.toarray() if isinstance(x, CSBase) else x.copy()
405-
if self._pca_use_hvg:
404+
if self._pca_mask:
406405
x = x[:, self._adata_ref.var["highly_variable"]]
407406
if self._pca_centered:
408407
x -= x.mean(axis=0)

tests/test_deprecations.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,3 @@ def test_deprecate_multicore_tsne() -> None:
1919
pytest.warns(ImportWarning, match=r"MulticoreTSNE"),
2020
):
2121
sc.tl.tsne(pbmc, use_fast_tsne=True)
22-
23-
24-
def test_deprecate_use_highly_variable_genes():
25-
pbmc = pbmc68k_reduced()
26-
27-
with pytest.warns(
28-
FutureWarning, match="Argument `use_highly_variable` is deprecated"
29-
):
30-
sc.pp.pca(pbmc, use_highly_variable=True)

tests/test_neighbors.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from pytest_mock import MockerFixture
2020

21+
2122
# the input data
2223
X = [[1, 0], [3, 0], [5, 6], [0, 4]]
2324
n_neighbors = 3 # includes data points themselves

tests/test_normalization.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
from contextlib import nullcontext
43
from functools import partial
54
from typing import TYPE_CHECKING
65

@@ -216,9 +215,7 @@ def _check_pearson_pca_fields(ad, n_cells, n_comps):
216215
[
217216
pytest.param(False, dict(), "n_genes", id="no_hvg"),
218217
pytest.param(True, dict(), "n_hvgs", id="hvg_default"),
219-
pytest.param(
220-
True, dict(use_highly_variable=False), "n_genes", id="hvg_opt_out"
221-
),
218+
pytest.param(True, dict(mask_var=None), "n_genes", id="hvg_opt_out"),
222219
pytest.param(False, dict(mask_var="test_mask"), "n_unmasked", id="mask"),
223220
],
224221
)
@@ -245,19 +242,14 @@ def test_normalize_pearson_residuals_pca(
245242
adata, flavor="pearson_residuals", n_top_genes=n_hvgs
246243
)
247244

248-
ctx = (
249-
pytest.warns(FutureWarning, match=r"use_highly_variable.*deprecated")
250-
if "use_highly_variable" in params
251-
else nullcontext()
245+
# inplace=False
246+
adata_pca = sc.experimental.pp.normalize_pearson_residuals_pca(
247+
adata.copy(), inplace=False, n_comps=n_comps, **params
248+
)
249+
# inplace=True modifies the input adata object
250+
sc.experimental.pp.normalize_pearson_residuals_pca(
251+
adata, inplace=True, n_comps=n_comps, **params
252252
)
253-
with ctx: # inplace=False
254-
adata_pca = sc.experimental.pp.normalize_pearson_residuals_pca(
255-
adata.copy(), inplace=False, n_comps=n_comps, **params
256-
)
257-
with ctx: # inplace=True modifies the input adata object
258-
sc.experimental.pp.normalize_pearson_residuals_pca(
259-
adata, inplace=True, n_comps=n_comps, **params
260-
)
261253

262254
for ad, n_var_ret in (
263255
(adata_pca, n_var_copy),

tests/test_pca.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -403,20 +403,14 @@ def test_pca_n_pcs():
403403
# We use all possible array types here since this error should be raised before
404404
# PCA can realize that it got a Dask array
405405
@pytest.mark.parametrize("array_type", ARRAY_TYPES_ALL)
406-
def test_mask_highly_var_error(array_type):
407-
"""Check if use_highly_variable=True throws an error if the annotation is missing."""
406+
def test_mask_var_error(array_type):
407+
"""Check if mask_var="..." throws an error if the annotation is missing."""
408408
adata = AnnData(array_type(A_list).astype("float32"))
409-
with (
410-
pytest.warns(
411-
FutureWarning,
412-
match=r"Argument `use_highly_variable` is deprecated, consider using the mask argument\.",
413-
),
414-
pytest.raises(
415-
ValueError,
416-
match=r"Did not find `adata\.var\['highly_variable'\]`\.",
417-
),
409+
with pytest.raises(
410+
ValueError,
411+
match=r"Did not find `adata\.var\['highly_variable'\]`\.",
418412
):
419-
sc.pp.pca(adata, use_highly_variable=True)
413+
sc.pp.pca(adata, mask_var="highly_variable")
420414

421415

422416
def test_mask_length_error():

0 commit comments

Comments
 (0)