1- from typing import Optional , Iterable , Tuple , Union
1+ from typing import Optional , Iterable , Tuple , Union , Literal
22from numbers import Integral , Real
33from warnings import warn
44
55import numpy as np
66import pandas as pd
7- from scipy .sparse import issparse , csc_matrix , csr_matrix
7+ from scipy .sparse import issparse , csc_matrix , csr_matrix , csc_array , csr_array
8+ from scipy .stats import gmean
89from sklearn .mixture import GaussianMixture
910from sklearn .decomposition import PCA
1011from sklearn .linear_model import LinearRegression
@@ -198,7 +199,12 @@ def dsb(
198199 return toreturn
199200
200201
201- def clr (adata : AnnData , inplace : bool = True , axis : int = 0 ) -> Union [None , AnnData ]:
202+ def clr (
203+ adata : AnnData ,
204+ inplace : bool = True ,
205+ axis : int = 0 ,
206+ flavor : Literal ["seurat" , "stoeckius" , "standard" ] = "seurat" ,
207+ ) -> AnnData | None :
202208 """
203209 Apply the centered log ratio (CLR) transformation
204210 to normalize counts in adata.X.
@@ -207,6 +213,19 @@ def clr(adata: AnnData, inplace: bool = True, axis: int = 0) -> Union[None, AnnD
207213 data: AnnData object with protein expression counts.
208214 inplace: Whether to update adata.X inplace.
209215 axis: Axis across which CLR is performed.
216+ flavor: How to perform the CLR transformation.
217+
218+ - seurat: Uses log1p transformations throughout. This results in non-negative values and preserves
219+ sparse matrices.
220+ - stoeckius: Follows the original CITE-Seq paper by adding a pseudocount of 1 to the data before
221+ performing any transformations and using the standard log transform. This adheres more closely
222+ to the standard definition of the CLR transform, but can yield negative values and does not
223+ preserve sparse matrices (the result is always a dense matrix.)
224+ - standard: The standard CLR transform without any pseudocounts. Does not preserve sparse matrices
225+ and may yield infinite values if the input contains zeros.
226+
227+ References:
228+ Stoeckius et al, 2017 (`doi:10.1038/nmeth.4380 <https://dx.doi.org/10.1038/nmeth.4380>`_)
210229 """
211230
212231 if axis not in [0 , 1 ]:
@@ -215,25 +234,33 @@ def clr(adata: AnnData, inplace: bool = True, axis: int = 0) -> Union[None, AnnD
215234 if not inplace :
216235 adata = adata .copy ()
217236
218- if issparse (adata .X ) and axis == 0 and not isinstance (adata .X , csc_matrix ):
219- warn ("adata.X is sparse but not in CSC format. Converting to CSC." )
220- x = csc_matrix (adata .X )
221- elif issparse (adata .X ) and axis == 1 and not isinstance (adata .X , csr_matrix ):
222- warn ("adata.X is sparse but not in CSR format. Converting to CSR." )
223- x = csr_matrix (adata .X )
224- else :
225- x = adata .X
237+ x = adata .X
226238
227- if issparse (x ):
228- x .data /= np .repeat (
229- np .exp (np .log1p (x ).sum (axis = axis ).A / x .shape [axis ]), x .getnnz (axis = axis )
230- )
231- np .log1p (x .data , out = x .data )
239+ if flavor == "seurat" :
240+ if issparse (x ) and axis == 0 and not isinstance (x , csc_matrix | csc_array ):
241+ warn (
242+ "adata.X is sparse but not in CSC format. CSC format required for `axis=0`. Converting to CSC."
243+ )
244+ x = csc_matrix (x )
245+ elif issparse (x ) and axis == 1 and not isinstance (x , csr_matrix | csr_array ):
246+ warn (
247+ "adata.X is sparse but not in CSR format. CSR format required for `axis=1`. Converting to CSR."
248+ )
249+ x = csr_matrix (x )
250+
251+ if issparse (x ):
252+ x .data /= np .repeat (np .exp (np .log1p (x ).mean (axis = axis ).toarray ()), x .getnnz (axis = axis ))
253+ np .log1p (x .data , out = x .data )
254+ else :
255+ np .log1p (x / np .exp (np .log1p (x ).mean (axis = axis , keepdims = True )), out = x )
256+ elif flavor in ("stoeckius" , "standard" ):
257+ if issparse (x ):
258+ x = x .toarray ()
259+ if flavor == "stoeckius" :
260+ x += 1
261+ np .log (x / gmean (x , axis = axis , keepdims = True ), out = x )
232262 else :
233- np .log1p (
234- x / np .exp (np .log1p (x ).sum (axis = axis , keepdims = True ) / x .shape [axis ]),
235- out = x ,
236- )
263+ raise ValueError (f"Unknown flavor `{ flavor } `." )
237264
238265 adata .X = x
239266
0 commit comments