Skip to content

Commit 467da56

Browse files
committed
CLR: add a flavor argument and add flavors stoeckius and standard (closes #144)
The Seurat implementation differs from both the standard definition of CLR and the version of CLR applied in the original CITE-Seq paper. Therefore, allow the user to choose the CLR flavor.
1 parent 55ef22c commit 467da56

1 file changed

Lines changed: 47 additions & 20 deletions

File tree

muon/_prot/preproc.py

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
from typing import Optional, Iterable, Tuple, Union
1+
from typing import Optional, Iterable, Tuple, Union, Literal
22
from numbers import Integral, Real
33
from warnings import warn
44

55
import numpy as np
66
import pandas as pd
7-
from scipy.sparse import issparse, csc_matrix, csr_matrix
7+
from scipy.sparse import issparse, csc_matrix, csr_matrix, csc_array, csr_array
8+
from scipy.stats import gmean
89
from sklearn.mixture import GaussianMixture
910
from sklearn.decomposition import PCA
1011
from sklearn.linear_model import LinearRegression
@@ -198,7 +199,12 @@ def dsb(
198199
return toreturn
199200

200201

201-
def clr(adata: AnnData, inplace: bool = True, axis: int = 0) -> Union[None, AnnData]:
202+
def clr(
203+
adata: AnnData,
204+
inplace: bool = True,
205+
axis: int = 0,
206+
flavor: Literal["seurat", "stoeckius", "standard"] = "seurat",
207+
) -> AnnData | None:
202208
"""
203209
Apply the centered log ratio (CLR) transformation
204210
to normalize counts in adata.X.
@@ -207,6 +213,19 @@ def clr(adata: AnnData, inplace: bool = True, axis: int = 0) -> Union[None, AnnD
207213
data: AnnData object with protein expression counts.
208214
inplace: Whether to update adata.X inplace.
209215
axis: Axis across which CLR is performed.
216+
flavor: How to perform the CLR transformation.
217+
218+
- seurat: Uses log1p transformations throughout. This results in non-negative values and preserves
219+
sparse matrices.
220+
- stoeckius: Follows the original CITE-Seq paper by adding a pseudocount of 1 to the data before
221+
performing any transformations and using the standard log transform. This adheres more closely
222+
to the standard definition of the CLR transform, but can yield negative values and does not
223+
preserve sparse matrices (the result is always a dense matrix.)
224+
- standard: The standard CLR transform without any pseudocounts. Does not preserve sparse matrices
225+
and may yield infinite values if the input contains zeros.
226+
227+
References:
228+
Stoeckius et al, 2017 (`doi:10.1038/nmeth.4380 <https://dx.doi.org/10.1038/nmeth.4380>`_)
210229
"""
211230

212231
if axis not in [0, 1]:
@@ -215,25 +234,33 @@ def clr(adata: AnnData, inplace: bool = True, axis: int = 0) -> Union[None, AnnD
215234
if not inplace:
216235
adata = adata.copy()
217236

218-
if issparse(adata.X) and axis == 0 and not isinstance(adata.X, csc_matrix):
219-
warn("adata.X is sparse but not in CSC format. Converting to CSC.")
220-
x = csc_matrix(adata.X)
221-
elif issparse(adata.X) and axis == 1 and not isinstance(adata.X, csr_matrix):
222-
warn("adata.X is sparse but not in CSR format. Converting to CSR.")
223-
x = csr_matrix(adata.X)
224-
else:
225-
x = adata.X
237+
x = adata.X
226238

227-
if issparse(x):
228-
x.data /= np.repeat(
229-
np.exp(np.log1p(x).sum(axis=axis).A / x.shape[axis]), x.getnnz(axis=axis)
230-
)
231-
np.log1p(x.data, out=x.data)
239+
if flavor == "seurat":
240+
if issparse(x) and axis == 0 and not isinstance(x, csc_matrix | csc_array):
241+
warn(
242+
"adata.X is sparse but not in CSC format. CSC format required for `axis=0`. Converting to CSC."
243+
)
244+
x = csc_matrix(x)
245+
elif issparse(x) and axis == 1 and not isinstance(x, csr_matrix | csr_array):
246+
warn(
247+
"adata.X is sparse but not in CSR format. CSR format required for `axis=1`. Converting to CSR."
248+
)
249+
x = csr_matrix(x)
250+
251+
if issparse(x):
252+
x.data /= np.repeat(np.exp(np.log1p(x).mean(axis=axis).toarray()), x.getnnz(axis=axis))
253+
np.log1p(x.data, out=x.data)
254+
else:
255+
np.log1p(x / np.exp(np.log1p(x).mean(axis=axis, keepdims=True)), out=x)
256+
elif flavor in ("stoeckius", "standard"):
257+
if issparse(x):
258+
x = x.toarray()
259+
if flavor == "stoeckius":
260+
x += 1
261+
np.log(x / gmean(x, axis=axis, keepdims=True), out=x)
232262
else:
233-
np.log1p(
234-
x / np.exp(np.log1p(x).sum(axis=axis, keepdims=True) / x.shape[axis]),
235-
out=x,
236-
)
263+
raise ValueError(f"Unknown flavor `{flavor}`.")
237264

238265
adata.X = x
239266

0 commit comments

Comments
 (0)