Skip to content

Commit 90b827e

Browse files
committed
reorg src dir; bring back gene clustering; add new cell clustering functions
1 parent cfff154 commit 90b827e

18 files changed

Lines changed: 3197 additions & 2427 deletions

docs/CHANGELOG.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,28 @@ All notable changes to PySingleCellNet should be listed here. The definition of
44

55
## [Unreleased]
66

7+
### Changed
8+
9+
- replace cl with tl
10+
- moved functions to more fitting files, like unused ones to utils.misc.py
11+
- do not export unused functions
12+
13+
### Added
14+
15+
- tl.discover_cell_cliques labels cells by consensus cluster labels, kind of
16+
- tl.clustering_quality_vs_nn_summary computes metrics of clustering quality
17+
- tl.cluster_alot
18+
- resurrected gene clustering functions
19+
20+
### Fixed
21+
22+
- `filter_anndata_slots` to handle .uns and dependencies across slots
23+
724
### Removed
825

9-
- lots o stuff that is old or has been moved to STUF
26+
- ut.mito_rib_heme
27+
-
28+
- lots of stuff that is old or has been moved to other packages like STUF
1029

1130
## [0.1.2] - 2025-08-05
1231

src/pySingleCellNet/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
from .config import SCN_DIFFEXP_KEY
55
from . import plotting as pl
66
from . import utils as ut
7-
from . import classify as cl
7+
from . import tools as tl
88

99
# Public API
1010
__all__ = [
1111
"__version__",
1212
"pl",
1313
"ut",
14-
"cl"
14+
"tl"
1515
]
1616

1717

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,19 @@
1+
from .cluster import (
2+
cluster_alot,
3+
cluster_subclusters,
4+
)
5+
6+
from .cluster_eval import (
7+
clustering_quality_vs_nn_summary
8+
)
9+
10+
from .cluster_cliques import (
11+
discover_cell_cliques
12+
)
13+
114
from .classifier import (
215
classify_anndata,
316
train_classifier,
4-
train_and_assess,
517
create_classifier_report
618
)
719

@@ -19,11 +31,23 @@
1931
deg
2032
)
2133

34+
from .gene import (
35+
build_gene_knn,
36+
find_gene_modules,
37+
whoare_genes_neighbors,
38+
score_gene_modules,
39+
what_module_has_gene,
40+
)
41+
42+
2243
# API
2344
__all__ = [
45+
"cluster_alot",
46+
"cluster_subcluster",
47+
"clustering_quality_vs_nn_summary",
48+
"discover_cell_cliques",
2449
"classify_anndata",
2550
"train_classifier",
26-
"train_and_assess",
2751
"create_classifier_report",
2852
"categorize_classification",
2953
"comp_ct_thresh",
@@ -32,6 +56,11 @@
3256
"gsea_on_deg",
3357
"collect_gsea_results_from_dict",
3458
"convert_diffExp_to_dict",
35-
"deg"
59+
"deg",
60+
"build_gene_knn",
61+
"find_gene_modules",
62+
"whoare_genes_neighbors",
63+
"score_gene_modules",
64+
"what_module_has_gene",
3665
]
3766

File renamed without changes.

src/pySingleCellNet/classify/classifier.py renamed to src/pySingleCellNet/tools/classifier.py

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
import scanpy as sc
44
import anndata
55
from anndata import AnnData
6-
from typing import List
7-
from typing import Dict
6+
from typing import List, Dict
87
from sklearn.ensemble import RandomForestClassifier
98
from scipy.sparse import csr_matrix
109
import warnings
@@ -13,11 +12,52 @@
1312
#from ..utils import *
1413
#from .tsp_rf import *
1514
#from .scn_assess import create_classifier_report
16-
from ..utils import build_knn_graph, rank_genes_subsets, get_unique_colors, split_adata_indices
15+
from ..utils import build_knn_graph, get_unique_colors, split_adata_indices
1716
from sklearn.metrics import classification_report
1817
from pySingleCellNet.config import SCN_DIFFEXP_KEY, SCN_CATEGORY_COLOR_DICT
1918
import random as rand
2019

20+
def _rank_genes_subsets(
21+
adata,
22+
groupby,
23+
grpA,
24+
grpB,
25+
pval = 0.01,
26+
layer=None
27+
):
28+
"""
29+
Subset an AnnData object to specified groups, create a new .obs column labeling cells
30+
as group A or B, and run rank_genes_groups for differential expression analysis. Necessary because the scanpy reference does not seem to work
31+
32+
Parameters:
33+
adata (AnnData): The AnnData object.
34+
groupby (str): The .obs column to group cells by.
35+
grpA (list): Values used to subset cells into group A.
36+
grpB (list): Values used to subset cells into group B.
37+
layer (str, optional): Layer to use for expression values.
38+
39+
Returns:
40+
AnnData: Subsetted and labeled AnnData object after running rank_genes_groups.
41+
"""
42+
# Subset the data to cells in either grpA or grpB
43+
subset = adata[adata.obs[groupby].isin(grpA + grpB)].copy()
44+
# Create a new .obs column labeling cells as 'grpA' or 'grpB'
45+
subset.obs["comparison_group"] = subset.obs[groupby].apply(
46+
lambda x: "grpA" if x in grpA else "grpB"
47+
)
48+
# Run rank_genes_groups
49+
sc.tl.rank_genes_groups(
50+
subset,
51+
groupby="comparison_group",
52+
layer=layer,
53+
pts = True,
54+
use_raw=False
55+
)
56+
# return subset
57+
ans = sc.get.rank_genes_groups_df(subset, group='grpA', pval_cutoff=pval)
58+
return ans
59+
60+
2161
def _query_transform(expMat, genePairs):
2262
npairs = len(genePairs)
2363
ans = pd.DataFrame(0, index = expMat.index, columns = np.arange(npairs))
@@ -254,7 +294,7 @@ def _get_classy_genes_3(
254294
]["name"]
255295

256296
xdata = adata.copy()
257-
subsetDF = rank_genes_subsets(
297+
subsetDF = _rank_genes_subsets(
258298
xdata, groupby=groupby, grpA=[g], grpB=other_groups, layer=layer, pval = pval
259299
)
260300

@@ -294,7 +334,7 @@ def _get_classy_genes_3(
294334
all_others = [x for x in groups if x != g]
295335

296336
# E.g., run a fallback differential expression
297-
fallbackDF = rank_genes_subsets(
337+
fallbackDF = _rank_genes_subsets(
298338
xdata_fallback, groupby=groupby, grpA=[g], grpB=all_others, layer=layer, pval=1
299339
)
300340
fallback_genes = get_top_genes_from_df(
@@ -684,7 +724,7 @@ def pick_different_gene(current_gene, gene_pool):
684724
# Convert to a NumPy array of unique values
685725
return np.unique(all_pairs)
686726

687-
727+
# deprecated
688728
def train_and_assess(
689729
adata,
690730
groupby,
@@ -718,3 +758,5 @@ def train_and_assess(
718758

719759

720760

761+
762+

0 commit comments

Comments
 (0)