@@ -68,17 +68,7 @@ def __init__(self, value: int = 0) -> None:
6868def compute_empty_drops_bounds (
6969 chemistry_description : str | None = None , n_partitions : int | None = None
7070) -> tuple [int , int ]:
71- """
72- Determines the lower and upper bounds for empty drops background based on the provided chemistry description.
73-
74- Args:
75- chemistry_description (str | None): A string specifying the chemistry type.
76- n_partitions (int | None): Number of partitions specified in the input.
77-
78- Returns
79- -------
80- tuple[int, int]: A tuple containing the lower and upper bounds.
81- """
71+ """Determines the lower and upper bounds for empty drops background based on the provided chemistry description."""
8272 if n_partitions is not None :
8373 return (n_partitions // 2 , n_partitions )
8474
@@ -98,12 +88,15 @@ def get_fdr_threshold_by_chemistry(chemistry_name: str) -> float:
9888 """
9989 Return the maximum adjusted p-value (FDR threshold) for calling a barcode as non-ambient, based on the chemistry used.
10090
101- Args:
102- chemistry_name (str): Name of the 10X chemistry.
91+ Parameters
92+ ----------
93+ chemistry_name
94+ Name of the 10X chemistry.
10395
10496 Returns
10597 -------
106- float: FDR threshold (e.g., 0.001 or 0.01)
98+ float
99+ FDR threshold (e.g., 0.001 or 0.01)
107100 """
108101 high_gem_chemistries = {"10X_3p_v3" , "10X_3p_v4" , "10X_5p_v3" , "10X_HT" }
109102 return 0.001 if chemistry_name in high_gem_chemistries else 0.01
@@ -113,13 +106,17 @@ def find_within_ordmag(resampled_bc_counts: np.ndarray, quantile_point: int) ->
113106 """
114107 Find the number of barcodes above a cutoff determined by a quantile value.
115108
116- Args:
117- resampled_bc_counts (np.ndarray): Resampled barcode counts derived from non-zero_bc_counts.
118- quantile_point (int): Index of the cell at the quantile point.
109+ Parameters
110+ ----------
111+ resampled_bc_counts
112+ Resampled barcode counts derived from non-zero_bc_counts.
113+ quantile_point
114+ Index of the cell at the quantile point.
119115
120116 Returns
121117 -------
122- int: The number of barcodes above the cutoff (quantile_val / 10).
118+ int
119+ Number of barcodes above the cutoff (quantile_val / 10).
123120 """
124121 n = len (resampled_bc_counts )
125122 if n == 0 :
@@ -150,13 +147,17 @@ def ordMag_expected(sorted_bc_counts: np.ndarray, recovered_cells: np.ndarray) -
150147 """
151148 Estimate the expected number of cells by analyzing the distribution of barcode counts.
152149
153- Args:
154- sorted_bc_counts (np.ndarray): A sorted (descending) array of read counts per barcode.
155- recovered_cells (np.ndarray): A list of candidate cell indices (log2-spaced) to evaluate potential cell thresholds.
150+ Parameters
151+ ----------
152+ sorted_bc_counts
153+ A sorted (descending) array of read counts per barcode.
154+ recovered_cells
155+ A list of candidate cell indices (log2-spaced) to evaluate potential cell thresholds.
156156
157157 Returns
158158 -------
159- tuple[int, float]: The estimated number of expected cells and the associated loss value.
159+ tuple
160+ The estimated number of expected cells and the associated loss value.
160161 """
161162 # Initialize loss array
162163 loss = np .zeros (len (recovered_cells ))
@@ -201,17 +202,7 @@ def ordMag_expected(sorted_bc_counts: np.ndarray, recovered_cells: np.ndarray) -
201202
202203
203204def call_ordMag (nonzero_bc_counts : np .ndarray , max_expected_cells : int ) -> tuple [int , float ]:
204- """
205- Call the expected number of cells using the OrdMag method.
206-
207- Args:
208- nonzero_bc_counts (np.ndarray): Nonzero barcode counts.
209- max_expected_cells (int): Maximum number of expected cells.
210-
211- Returns
212- -------
213- tuple[int, float]: Estimated number of expected cells and associated loss.
214- """
205+ """Call the expected number of cells using the OrdMag method."""
215206 sorted_bc_counts = np .sort (nonzero_bc_counts )[::- 1 ]
216207 # Generate log2-spaced cell indices
217208 recovered_cells = np .linspace (1 , np .log2 (max_expected_cells ), 2000 )
@@ -221,17 +212,7 @@ def call_ordMag(nonzero_bc_counts: np.ndarray, max_expected_cells: int) -> tuple
221212
222213
223214def compute_bootstrapped_top_n (top_n_boot : np .ndarray , nonzero_counts : np .ndarray ) -> FilteredCellResults :
224- """
225- Compute the bootstrapped top N cells from bootstrap samples.
226-
227- Args:
228- top_n_boot (np.ndarray): Array of top N cell counts from bootstrap samples.
229- nonzero_counts (np.ndarray): Nonzero barcode counts.
230-
231- Returns
232- -------
233- FilteredCellResults: An object containing the estimated number of top cells to retain and the UMI count threshold.
234- """
215+ """Compute the bootstrapped top N cells from bootstrap samples."""
235216 top_n_bcs_mean = np .mean (top_n_boot )
236217 n_top_cells = int (np .round (top_n_bcs_mean ))
237218 logger .debug (f"INSIDE compute_bootstrapped_top_n(): n_top_cells: { n_top_cells } " )
@@ -261,19 +242,7 @@ def compute_bootstrapped_top_n(top_n_boot: np.ndarray, nonzero_counts: np.ndarra
261242def initial_filtering_OrdMag (
262243 matrix : csc_matrix , chemistry_description : str | None = None , n_partitions : int | None = None , verbose : bool = False
263244) -> tuple [np .ndarray , FilteredCellResults , str ] | np .ndarray :
264- """
265- Perform initial filtering of cells using the OrdMag method.
266-
267- Args:
268- matrix (csc_matrix): The count matrix.
269- chemistry_description (str | None): Chemistry description string.
270- n_partitions (int | None): Number of partitions.
271- verbose (bool): If True, enables verbose output.
272-
273- Returns
274- -------
275- tuple[np.ndarray, FilteredCellResults, str] or np.ndarray: Filtered barcodes, metrics, and warning message if applicable.
276- """
245+ """Perform initial filtering of cells using the OrdMag method."""
277246 metrics = FilteredCellResults (0 )
278247 bc_counts = matrix .get_counts_per_bc ()
279248 nonzero_bc_counts = bc_counts [bc_counts > 0 ]
@@ -339,16 +308,7 @@ def initial_filtering_OrdMag(
339308
340309
341310def adjust_pvalue_bh (p : np .ndarray ) -> np .ndarray :
342- """
343- Multiple testing correction of p-values using the Benjamini-Hochberg procedure.
344-
345- Args:
346- p (np.ndarray): Array of p-values.
347-
348- Returns
349- -------
350- np.ndarray: Array of adjusted p-values.
351- """
311+ """Multiple testing correction of p-values using the Benjamini-Hochberg procedure."""
352312 descending = np .argsort (p )[::- 1 ]
353313 # q = p * N / k where p = p-value, N = # tests, k = p-value rank
354314 scale = float (len (p )) / np .arange (len (p ), 0 , - 1 )
@@ -359,18 +319,7 @@ def adjust_pvalue_bh(p: np.ndarray) -> np.ndarray:
359319
360320
361321def eval_multinomial_loglikelihoods (matrix : csc_matrix , profile_p : np .ndarray , max_mem_gb : float = 0.1 ) -> np .ndarray :
362- """
363- Compute the multinomial log PMF for many barcodes.
364-
365- Args:
366- matrix (csc_matrix): Matrix of UMI counts (feature x barcode).
367- profile_p (np.ndarray): Multinomial probability vector.
368- max_mem_gb (float): Try to bound memory usage.
369-
370- Returns
371- -------
372- np.ndarray: Log-likelihood for each barcode.
373- """
322+ """Compute the multinomial log PMF for many barcodes."""
374323 gb_per_bc = float (matrix .shape [0 ] * matrix .dtype .itemsize ) / (1024 ** 3 )
375324 bcs_per_chunk = max (1 , int (round (max_mem_gb / gb_per_bc )))
376325 num_bcs = matrix .shape [1 ]
@@ -397,18 +346,6 @@ def simulate_multinomial_loglikelihoods(
397346 Simulate draws from a multinomial distribution for various values of N.
398347
399348 Uses the approximation from Lun et al. (https://www.biorxiv.org/content/biorxiv/early/2018/04/04/234872.full.pdf)
400-
401- Args:
402- profile_p (np.ndarray): Probability of observing each feature.
403- umis_per_bc (np.ndarray): UMI counts per barcode (multinomial N).
404- num_sims (int): Number of simulations per distinct N value.
405- jump (int): Vectorize the sampling if the gap between two distinct Ns exceeds this.
406- n_sample_feature_block (int): Vectorize this many feature samplings at a time.
407- verbose (bool): Enable verbose output.
408-
409- Returns
410- -------
411- tuple[np.ndarray, np.ndarray]: distinct_ns (simulated N values), log_likelihoods (simulated log likelihoods).
412349 """
413350 distinct_n = np .flatnonzero (np .bincount (umis_per_bc .astype (int )))
414351
@@ -463,19 +400,7 @@ def simulate_multinomial_loglikelihoods(
463400def compute_ambient_pvalues (
464401 umis_per_bc : np .ndarray , obs_loglk : np .ndarray , sim_n : np .ndarray , sim_loglk : np .ndarray
465402) -> np .ndarray :
466- """
467- Compute p-values for observed multinomial log-likelihoods.
468-
469- Args:
470- umis_per_bc (np.ndarray): UMI counts per barcode.
471- obs_loglk (np.ndarray): Observed log-likelihoods of each barcode deriving from an ambient profile.
472- sim_n (np.ndarray): Multinomial N for simulated log-likelihoods.
473- sim_loglk (np.ndarray): Simulated log-likelihoods of shape (len(sim_n), num_simulations).
474-
475- Returns
476- -------
477- np.ndarray: Array of p-values.
478- """
403+ """Compute p-values for observed multinomial log-likelihoods."""
479404 assert len (umis_per_bc ) == len (obs_loglk )
480405 assert sim_loglk .shape [0 ] == len (sim_n )
481406
@@ -494,18 +419,7 @@ def compute_ambient_pvalues(
494419
495420
496421def estimate_profile_sgt (matrix : csc_matrix , barcode_indices : np .ndarray , nz_feat : np .ndarray ) -> np .ndarray :
497- """
498- Estimate a gene expression profile by Simple Good Turing.
499-
500- Args:
501- matrix (csc_matrix): Sparse matrix of all counts.
502- barcode_indices (np.ndarray): Barcode indices to use.
503- nz_feat (np.ndarray): Indices of features that are non-zero at least once.
504-
505- Returns
506- -------
507- np.ndarray: Estimated probabilities of length len(nz_feat).
508- """
422+ """Estimate a gene expression profile by Simple Good Turing."""
509423 # Initial profile estimate
510424 prof_mat = matrix [:, barcode_indices ]
511425
@@ -527,17 +441,7 @@ def estimate_profile_sgt(matrix: csc_matrix, barcode_indices: np.ndarray, nz_fea
527441
528442# Construct a background expression profile from barcodes with <= T UMIs
529443def est_background_profile_sgt (matrix : csc_matrix , use_bcs : np .ndarray ) -> tuple [np .ndarray , np .ndarray ]:
530- """
531- Estimate a gene expression profile on a given subset of barcodes. Use Good-Turing to smooth the estimated profile.
532-
533- Args:
534- matrix (csc_matrix): Sparse matrix of all counts.
535- use_bcs (np.ndarray): Indices of barcodes to use (col indices into matrix).
536-
537- Returns
538- -------
539- tuple[np.ndarray, np.ndarray]: Tuple of (features used, estimated probabilities of length use_features).
540- """
444+ """Estimate a gene expression profile on a given subset of barcodes. Use Good-Turing to smooth the estimated profile."""
541445 # Use features that are nonzero anywhere in the data
542446 use_feats = np .flatnonzero (np .asarray (matrix .sum (1 )))
543447
@@ -560,20 +464,29 @@ def find_nonambient_barcodes(
560464 """
561465 Call barcodes as being sufficiently distinct from the ambient profile.
562466
563- Args:
564- matrix (csc_matrix): Full expression matrix.
565- orig_cell_bcs (list[str]): Strings of initially-called cell barcodes.
566- chemistry_description (str): Chemistry description.
567- n_partitions (int): Number of partitions.
568- max_mem_gb (float): Maximum memory to use in GB.
569- min_umi_frac_of_median (float): Minimum UMI fraction of median.
570- min_umis_nonambient (int): Minimum UMIs for nonambient barcodes.
571- max_adj_pvalue (float): Maximum adjusted p-value to call barcode as non-ambient.
572- verbose (bool): Enable verbose output.
467+ Parameters
468+ ----------
469+ matrix
470+ Full expression matrix.
471+ orig_cell_bcs
472+ Initially-called cell barcodes.
473+ chemistry_description
474+ Chemistry description.
475+ n_partitions
476+ Number of partitions used for ambient estimation.
477+ max_mem_gb
478+ Maximum memory to use in gigabytes.
479+ min_umi_frac_of_median
480+ Minimum UMI fraction (relative to median) required for candidate barcodes.
481+ min_umis_nonambient
482+ Minimum number of UMIs to consider a barcode for non-ambient testing.
483+ verbose
484+ Whether to enable verbose output.
573485
574486 Returns
575487 -------
576- NonAmbientBarcodeResult | None: A result object or None if no suitable barcodes found.
488+ NonAmbientBarcodeResult or None
489+ A result object containing evaluation metrics for non-ambient barcodes, or None if no suitable barcodes are found.
577490 """
578491 # Estimate an ambient RNA profile
579492 umis_per_bc = matrix .get_counts_per_bc ()
0 commit comments