Skip to content

Commit 72855c7

Browse files
committed
Modify docstring style. fix #7
1 parent 59a68e5 commit 72855c7

9 files changed

Lines changed: 229 additions & 253 deletions

File tree

.trigger

Whitespace-only changes.

src/qcatch/api.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,33 @@ def run_qcatch_api(
2323
"""
2424
Run QCatch as a Python API.
2525
26-
Args:
27-
input_path: Path to .h5ad file or quant folder
28-
chemistry: Chemistry type (same as CLI)
29-
gene_id2name_file: TSV with gene ID to name map
30-
valid_cell_list: Optional list of valid barcodes
31-
skip_umap_tsne: Skip clustering plots
32-
export_summary_table: Whether to compute and return summary table
33-
logger: Optional custom logger
26+
Parameters
27+
----------
28+
input_path
29+
Path to .h5ad file or quantification folder.
30+
output
31+
Output directory path. If None, uses the input directory.
32+
chemistry
33+
Chemistry type
34+
gene_id2name_file
35+
TSV file mapping gene IDs to gene names.
36+
valid_cell_list
37+
Optional list of valid barcodes to retain.
38+
skip_umap_tsne
39+
If True, skip generating UMAP and t-SNE plots.
40+
export_summary_table
41+
If True, include a summary HTML table in the output.
42+
logger
43+
Optional custom logger instance.
3444
3545
Returns
3646
-------
3747
dict with:
3848
- 'anndata': updated AnnData object
3949
- 'valid_barcodes': list of barcodes
40-
- 'figures': list of plotly figures
41-
- 'summary_table_html': str (optional)
42-
- 'warning_html': str
50+
- 'figures': HTML strings or optionally real plotly.Figure
51+
- 'summary_table_html': HTML string of summary table
52+
- 'warning_html': HTML string of warnings
4353
"""
4454
logger = logger or setup_logger("qcatch", verbose=False)
4555

src/qcatch/convert_to_html.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -198,14 +198,22 @@ def modify_html_with_plots(
198198
199199
Parameters
200200
----------
201-
soup (BeautifulSoup): Parsed HTML document to modify.
202-
output_html_path (str): Path to save the modified HTML file.
203-
plot_text_elements (tuple): A tuple containing:
204-
- dict: Plotly plot HTML div strings keyed by plot identifiers.
205-
- str: HTML string for the summary table.
206-
table_htmls (tuple): A tuple containing HTML strings for quant JSON and permit list log info tables.
207-
warning_html (str): HTML string containing warning messages to insert.
208-
usa_mode (bool): Flag indicating whether SUA mode is enabled; if False, SUA tab is removed.
201+
soup
202+
Parsed HTML document as a BeautifulSoup object.
203+
output_html_path
204+
Path to save the modified HTML file.
205+
plot_text_elements
206+
Tuple containing:
207+
- A dictionary of Plotly plot HTML div strings keyed by plot identifiers.
208+
- An HTML string representing the summary table.
209+
table_htmls
210+
Tuple containing HTML strings for the quant log and permit list log info tables.
211+
code_texts
212+
Python code snippet to insert into the report.
213+
warning_html
214+
HTML string containing warning messages.
215+
usa_mode
216+
Flag indicating whether SUA mode is enabled; if False, SUA tab is removed.
209217
210218
Returns
211219
-------

src/qcatch/find_retained_cells/cell_calling.py

Lines changed: 51 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,7 @@ def __init__(self, value: int = 0) -> None:
6868
def compute_empty_drops_bounds(
6969
chemistry_description: str | None = None, n_partitions: int | None = None
7070
) -> tuple[int, int]:
71-
"""
72-
Determines the lower and upper bounds for empty drops background based on the provided chemistry description.
73-
74-
Args:
75-
chemistry_description (str | None): A string specifying the chemistry type.
76-
n_partitions (int | None): Number of partitions specified in the input.
77-
78-
Returns
79-
-------
80-
tuple[int, int]: A tuple containing the lower and upper bounds.
81-
"""
71+
"""Determines the lower and upper bounds for empty drops background based on the provided chemistry description."""
8272
if n_partitions is not None:
8373
return (n_partitions // 2, n_partitions)
8474

@@ -98,12 +88,15 @@ def get_fdr_threshold_by_chemistry(chemistry_name: str) -> float:
9888
"""
9989
Return the maximum adjusted p-value (FDR threshold) for calling a barcode as non-ambient, based on the chemistry used.
10090
101-
Args:
102-
chemistry_name (str): Name of the 10X chemistry.
91+
Parameters
92+
----------
93+
chemistry_name
94+
Name of the 10X chemistry.
10395
10496
Returns
10597
-------
106-
float: FDR threshold (e.g., 0.001 or 0.01)
98+
float
99+
FDR threshold (e.g., 0.001 or 0.01)
107100
"""
108101
high_gem_chemistries = {"10X_3p_v3", "10X_3p_v4", "10X_5p_v3", "10X_HT"}
109102
return 0.001 if chemistry_name in high_gem_chemistries else 0.01
@@ -113,13 +106,17 @@ def find_within_ordmag(resampled_bc_counts: np.ndarray, quantile_point: int) ->
113106
"""
114107
Find the number of barcodes above a cutoff determined by a quantile value.
115108
116-
Args:
117-
resampled_bc_counts (np.ndarray): Resampled barcode counts derived from non-zero_bc_counts.
118-
quantile_point (int): Index of the cell at the quantile point.
109+
Parameters
110+
----------
111+
resampled_bc_counts
112+
Resampled barcode counts derived from non-zero_bc_counts.
113+
quantile_point
114+
Index of the cell at the quantile point.
119115
120116
Returns
121117
-------
122-
int: The number of barcodes above the cutoff (quantile_val / 10).
118+
int
119+
Number of barcodes above the cutoff (quantile_val / 10).
123120
"""
124121
n = len(resampled_bc_counts)
125122
if n == 0:
@@ -150,13 +147,17 @@ def ordMag_expected(sorted_bc_counts: np.ndarray, recovered_cells: np.ndarray) -
150147
"""
151148
Estimate the expected number of cells by analyzing the distribution of barcode counts.
152149
153-
Args:
154-
sorted_bc_counts (np.ndarray): A sorted (descending) array of read counts per barcode.
155-
recovered_cells (np.ndarray): A list of candidate cell indices (log2-spaced) to evaluate potential cell thresholds.
150+
Parameters
151+
----------
152+
sorted_bc_counts
153+
A sorted (descending) array of read counts per barcode.
154+
recovered_cells
155+
A list of candidate cell indices (log2-spaced) to evaluate potential cell thresholds.
156156
157157
Returns
158158
-------
159-
tuple[int, float]: The estimated number of expected cells and the associated loss value.
159+
tuple
160+
The estimated number of expected cells and the associated loss value.
160161
"""
161162
# Initialize loss array
162163
loss = np.zeros(len(recovered_cells))
@@ -201,17 +202,7 @@ def ordMag_expected(sorted_bc_counts: np.ndarray, recovered_cells: np.ndarray) -
201202

202203

203204
def call_ordMag(nonzero_bc_counts: np.ndarray, max_expected_cells: int) -> tuple[int, float]:
204-
"""
205-
Call the expected number of cells using the OrdMag method.
206-
207-
Args:
208-
nonzero_bc_counts (np.ndarray): Nonzero barcode counts.
209-
max_expected_cells (int): Maximum number of expected cells.
210-
211-
Returns
212-
-------
213-
tuple[int, float]: Estimated number of expected cells and associated loss.
214-
"""
205+
"""Call the expected number of cells using the OrdMag method."""
215206
sorted_bc_counts = np.sort(nonzero_bc_counts)[::-1]
216207
# Generate log2-spaced cell indices
217208
recovered_cells = np.linspace(1, np.log2(max_expected_cells), 2000)
@@ -221,17 +212,7 @@ def call_ordMag(nonzero_bc_counts: np.ndarray, max_expected_cells: int) -> tuple
221212

222213

223214
def compute_bootstrapped_top_n(top_n_boot: np.ndarray, nonzero_counts: np.ndarray) -> FilteredCellResults:
224-
"""
225-
Compute the bootstrapped top N cells from bootstrap samples.
226-
227-
Args:
228-
top_n_boot (np.ndarray): Array of top N cell counts from bootstrap samples.
229-
nonzero_counts (np.ndarray): Nonzero barcode counts.
230-
231-
Returns
232-
-------
233-
FilteredCellResults: An object containing the estimated number of top cells to retain and the UMI count threshold.
234-
"""
215+
"""Compute the bootstrapped top N cells from bootstrap samples."""
235216
top_n_bcs_mean = np.mean(top_n_boot)
236217
n_top_cells = int(np.round(top_n_bcs_mean))
237218
logger.debug(f"INSIDE compute_bootstrapped_top_n(): n_top_cells: {n_top_cells}")
@@ -261,19 +242,7 @@ def compute_bootstrapped_top_n(top_n_boot: np.ndarray, nonzero_counts: np.ndarra
261242
def initial_filtering_OrdMag(
262243
matrix: csc_matrix, chemistry_description: str | None = None, n_partitions: int | None = None, verbose: bool = False
263244
) -> tuple[np.ndarray, FilteredCellResults, str] | np.ndarray:
264-
"""
265-
Perform initial filtering of cells using the OrdMag method.
266-
267-
Args:
268-
matrix (csc_matrix): The count matrix.
269-
chemistry_description (str | None): Chemistry description string.
270-
n_partitions (int | None): Number of partitions.
271-
verbose (bool): If True, enables verbose output.
272-
273-
Returns
274-
-------
275-
tuple[np.ndarray, FilteredCellResults, str] or np.ndarray: Filtered barcodes, metrics, and warning message if applicable.
276-
"""
245+
"""Perform initial filtering of cells using the OrdMag method."""
277246
metrics = FilteredCellResults(0)
278247
bc_counts = matrix.get_counts_per_bc()
279248
nonzero_bc_counts = bc_counts[bc_counts > 0]
@@ -339,16 +308,7 @@ def initial_filtering_OrdMag(
339308

340309

341310
def adjust_pvalue_bh(p: np.ndarray) -> np.ndarray:
342-
"""
343-
Multiple testing correction of p-values using the Benjamini-Hochberg procedure.
344-
345-
Args:
346-
p (np.ndarray): Array of p-values.
347-
348-
Returns
349-
-------
350-
np.ndarray: Array of adjusted p-values.
351-
"""
311+
"""Multiple testing correction of p-values using the Benjamini-Hochberg procedure."""
352312
descending = np.argsort(p)[::-1]
353313
# q = p * N / k where p = p-value, N = # tests, k = p-value rank
354314
scale = float(len(p)) / np.arange(len(p), 0, -1)
@@ -359,18 +319,7 @@ def adjust_pvalue_bh(p: np.ndarray) -> np.ndarray:
359319

360320

361321
def eval_multinomial_loglikelihoods(matrix: csc_matrix, profile_p: np.ndarray, max_mem_gb: float = 0.1) -> np.ndarray:
362-
"""
363-
Compute the multinomial log PMF for many barcodes.
364-
365-
Args:
366-
matrix (csc_matrix): Matrix of UMI counts (feature x barcode).
367-
profile_p (np.ndarray): Multinomial probability vector.
368-
max_mem_gb (float): Try to bound memory usage.
369-
370-
Returns
371-
-------
372-
np.ndarray: Log-likelihood for each barcode.
373-
"""
322+
"""Compute the multinomial log PMF for many barcodes."""
374323
gb_per_bc = float(matrix.shape[0] * matrix.dtype.itemsize) / (1024**3)
375324
bcs_per_chunk = max(1, int(round(max_mem_gb / gb_per_bc)))
376325
num_bcs = matrix.shape[1]
@@ -397,18 +346,6 @@ def simulate_multinomial_loglikelihoods(
397346
Simulate draws from a multinomial distribution for various values of N.
398347
399348
Uses the approximation from Lun et al. (https://www.biorxiv.org/content/biorxiv/early/2018/04/04/234872.full.pdf)
400-
401-
Args:
402-
profile_p (np.ndarray): Probability of observing each feature.
403-
umis_per_bc (np.ndarray): UMI counts per barcode (multinomial N).
404-
num_sims (int): Number of simulations per distinct N value.
405-
jump (int): Vectorize the sampling if the gap between two distinct Ns exceeds this.
406-
n_sample_feature_block (int): Vectorize this many feature samplings at a time.
407-
verbose (bool): Enable verbose output.
408-
409-
Returns
410-
-------
411-
tuple[np.ndarray, np.ndarray]: distinct_ns (simulated N values), log_likelihoods (simulated log likelihoods).
412349
"""
413350
distinct_n = np.flatnonzero(np.bincount(umis_per_bc.astype(int)))
414351

@@ -463,19 +400,7 @@ def simulate_multinomial_loglikelihoods(
463400
def compute_ambient_pvalues(
464401
umis_per_bc: np.ndarray, obs_loglk: np.ndarray, sim_n: np.ndarray, sim_loglk: np.ndarray
465402
) -> np.ndarray:
466-
"""
467-
Compute p-values for observed multinomial log-likelihoods.
468-
469-
Args:
470-
umis_per_bc (np.ndarray): UMI counts per barcode.
471-
obs_loglk (np.ndarray): Observed log-likelihoods of each barcode deriving from an ambient profile.
472-
sim_n (np.ndarray): Multinomial N for simulated log-likelihoods.
473-
sim_loglk (np.ndarray): Simulated log-likelihoods of shape (len(sim_n), num_simulations).
474-
475-
Returns
476-
-------
477-
np.ndarray: Array of p-values.
478-
"""
403+
"""Compute p-values for observed multinomial log-likelihoods."""
479404
assert len(umis_per_bc) == len(obs_loglk)
480405
assert sim_loglk.shape[0] == len(sim_n)
481406

@@ -494,18 +419,7 @@ def compute_ambient_pvalues(
494419

495420

496421
def estimate_profile_sgt(matrix: csc_matrix, barcode_indices: np.ndarray, nz_feat: np.ndarray) -> np.ndarray:
497-
"""
498-
Estimate a gene expression profile by Simple Good Turing.
499-
500-
Args:
501-
matrix (csc_matrix): Sparse matrix of all counts.
502-
barcode_indices (np.ndarray): Barcode indices to use.
503-
nz_feat (np.ndarray): Indices of features that are non-zero at least once.
504-
505-
Returns
506-
-------
507-
np.ndarray: Estimated probabilities of length len(nz_feat).
508-
"""
422+
"""Estimate a gene expression profile by Simple Good Turing."""
509423
# Initial profile estimate
510424
prof_mat = matrix[:, barcode_indices]
511425

@@ -527,17 +441,7 @@ def estimate_profile_sgt(matrix: csc_matrix, barcode_indices: np.ndarray, nz_fea
527441

528442
# Construct a background expression profile from barcodes with <= T UMIs
529443
def est_background_profile_sgt(matrix: csc_matrix, use_bcs: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
530-
"""
531-
Estimate a gene expression profile on a given subset of barcodes. Use Good-Turing to smooth the estimated profile.
532-
533-
Args:
534-
matrix (csc_matrix): Sparse matrix of all counts.
535-
use_bcs (np.ndarray): Indices of barcodes to use (col indices into matrix).
536-
537-
Returns
538-
-------
539-
tuple[np.ndarray, np.ndarray]: Tuple of (features used, estimated probabilities of length use_features).
540-
"""
444+
"""Estimate a gene expression profile on a given subset of barcodes. Use Good-Turing to smooth the estimated profile."""
541445
# Use features that are nonzero anywhere in the data
542446
use_feats = np.flatnonzero(np.asarray(matrix.sum(1)))
543447

@@ -560,20 +464,29 @@ def find_nonambient_barcodes(
560464
"""
561465
Call barcodes as being sufficiently distinct from the ambient profile.
562466
563-
Args:
564-
matrix (csc_matrix): Full expression matrix.
565-
orig_cell_bcs (list[str]): Strings of initially-called cell barcodes.
566-
chemistry_description (str): Chemistry description.
567-
n_partitions (int): Number of partitions.
568-
max_mem_gb (float): Maximum memory to use in GB.
569-
min_umi_frac_of_median (float): Minimum UMI fraction of median.
570-
min_umis_nonambient (int): Minimum UMIs for nonambient barcodes.
571-
max_adj_pvalue (float): Maximum adjusted p-value to call barcode as non-ambient.
572-
verbose (bool): Enable verbose output.
467+
Parameters
468+
----------
469+
matrix
470+
Full expression matrix.
471+
orig_cell_bcs
472+
Initially-called cell barcodes.
473+
chemistry_description
474+
Chemistry description.
475+
n_partitions
476+
Number of partitions used for ambient estimation.
477+
max_mem_gb
478+
Maximum memory to use in gigabytes.
479+
min_umi_frac_of_median
480+
Minimum UMI fraction (relative to median) required for candidate barcodes.
481+
min_umis_nonambient
482+
Minimum number of UMIs to consider a barcode for non-ambient testing.
483+
verbose
484+
Whether to enable verbose output.
573485
574486
Returns
575487
-------
576-
NonAmbientBarcodeResult | None: A result object or None if no suitable barcodes found.
488+
NonAmbientBarcodeResult or None
489+
A result object containing evaluation metrics for non-ambient barcodes, or None if no suitable barcodes are found.
577490
"""
578491
# Estimate an ambient RNA profile
579492
umis_per_bc = matrix.get_counts_per_bc()

0 commit comments

Comments
 (0)