|
3 | 3 | import scanpy as sc |
4 | 4 | import anndata |
5 | 5 | from anndata import AnnData |
6 | | -from typing import List |
7 | | -from typing import Dict |
| 6 | +from typing import List, Dict |
8 | 7 | from sklearn.ensemble import RandomForestClassifier |
9 | 8 | from scipy.sparse import csr_matrix |
10 | 9 | import warnings |
|
13 | 12 | #from ..utils import * |
14 | 13 | #from .tsp_rf import * |
15 | 14 | #from .scn_assess import create_classifier_report |
16 | | -from ..utils import build_knn_graph, rank_genes_subsets, get_unique_colors, split_adata_indices |
| 15 | +from ..utils import build_knn_graph, get_unique_colors, split_adata_indices |
17 | 16 | from sklearn.metrics import classification_report |
18 | 17 | from pySingleCellNet.config import SCN_DIFFEXP_KEY, SCN_CATEGORY_COLOR_DICT |
19 | 18 | import random as rand |
20 | 19 |
|
| 20 | +def _rank_genes_subsets( |
| 21 | + adata, |
| 22 | + groupby, |
| 23 | + grpA, |
| 24 | + grpB, |
| 25 | + pval = 0.01, |
| 26 | + layer=None |
| 27 | +): |
| 28 | + """ |
| 29 | + Subset an AnnData object to specified groups, create a new .obs column labeling cells |
| 30 | + as group A or B, and run rank_genes_groups for differential expression analysis. Necessary because the scanpy reference does not seem to work |
| 31 | + |
| 32 | + Parameters: |
| 33 | + adata (AnnData): The AnnData object. |
| 34 | + groupby (str): The .obs column to group cells by. |
| 35 | + grpA (list): Values used to subset cells into group A. |
| 36 | + grpB (list): Values used to subset cells into group B. |
| 37 | + layer (str, optional): Layer to use for expression values. |
| 38 | + |
| 39 | + Returns: |
| 40 | + AnnData: Subsetted and labeled AnnData object after running rank_genes_groups. |
| 41 | + """ |
| 42 | + # Subset the data to cells in either grpA or grpB |
| 43 | + subset = adata[adata.obs[groupby].isin(grpA + grpB)].copy() |
| 44 | + # Create a new .obs column labeling cells as 'grpA' or 'grpB' |
| 45 | + subset.obs["comparison_group"] = subset.obs[groupby].apply( |
| 46 | + lambda x: "grpA" if x in grpA else "grpB" |
| 47 | + ) |
| 48 | + # Run rank_genes_groups |
| 49 | + sc.tl.rank_genes_groups( |
| 50 | + subset, |
| 51 | + groupby="comparison_group", |
| 52 | + layer=layer, |
| 53 | + pts = True, |
| 54 | + use_raw=False |
| 55 | + ) |
| 56 | + # return subset |
| 57 | + ans = sc.get.rank_genes_groups_df(subset, group='grpA', pval_cutoff=pval) |
| 58 | + return ans |
| 59 | + |
| 60 | + |
21 | 61 | def _query_transform(expMat, genePairs): |
22 | 62 | npairs = len(genePairs) |
23 | 63 | ans = pd.DataFrame(0, index = expMat.index, columns = np.arange(npairs)) |
@@ -254,7 +294,7 @@ def _get_classy_genes_3( |
254 | 294 | ]["name"] |
255 | 295 |
|
256 | 296 | xdata = adata.copy() |
257 | | - subsetDF = rank_genes_subsets( |
| 297 | + subsetDF = _rank_genes_subsets( |
258 | 298 | xdata, groupby=groupby, grpA=[g], grpB=other_groups, layer=layer, pval = pval |
259 | 299 | ) |
260 | 300 |
|
@@ -294,7 +334,7 @@ def _get_classy_genes_3( |
294 | 334 | all_others = [x for x in groups if x != g] |
295 | 335 |
|
296 | 336 | # E.g., run a fallback differential expression |
297 | | - fallbackDF = rank_genes_subsets( |
| 337 | + fallbackDF = _rank_genes_subsets( |
298 | 338 | xdata_fallback, groupby=groupby, grpA=[g], grpB=all_others, layer=layer, pval=1 |
299 | 339 | ) |
300 | 340 | fallback_genes = get_top_genes_from_df( |
@@ -684,7 +724,7 @@ def pick_different_gene(current_gene, gene_pool): |
684 | 724 | # Convert to a NumPy array of unique values |
685 | 725 | return np.unique(all_pairs) |
686 | 726 |
|
687 | | - |
| 727 | +# deprecated |
688 | 728 | def train_and_assess( |
689 | 729 | adata, |
690 | 730 | groupby, |
@@ -718,3 +758,5 @@ def train_and_assess( |
718 | 758 |
|
719 | 759 |
|
720 | 760 |
|
| 761 | + |
| 762 | + |
0 commit comments