Skip to content

Commit de2236d

Browse files
committed
Lazy load imports besides numpy/pandas
1 parent 277e1d3 commit de2236d

9 files changed

Lines changed: 23 additions & 16 deletions

File tree

maayanlab_bioinformatics/clustering/silhouette_analysis.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import pandas as pd
2-
from sklearn.cluster import KMeans
3-
from sklearn.metrics import silhouette_score
42

53
def silhouette_analysis(mat: pd.DataFrame, min_clusters=2, max_clusters=25, metric='cosine', random_state=None, **kwargs):
64
''' Compute KMeans repeatedly on the matrix with different cluster
75
values between min_clusters and max_clusters, compute the silhouette_score,
86
and return the best kmeans model/predictions.
97
'''
8+
from sklearn.cluster import KMeans
9+
from sklearn.metrics import silhouette_score
1010
silhouette_scores = {}
1111
best = None
1212
for n in range(min_clusters, max_clusters+1):

maayanlab_bioinformatics/dge/characteristic_direction.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
import numpy as np
22
import pandas as pd
3-
from scipy.stats import chi2
4-
from scipy.stats.mstats import zscore
5-
from sklearn.decomposition import PCA
63

74
# TODO: revamp _chdir
85
def _chdir(data, sampleclass, genes, gamma=1., sort=True, calculate_sig=False, nnull=10, sig_only=False, norm_vector=True):
@@ -25,6 +22,9 @@ def _chdir(data, sampleclass, genes, gamma=1., sort=True, calculate_sig=False, n
2522
A list of tuples sorted by the absolute value in descending order characteristic directions of genes.
2623
If calculate_sig is set to True, each tuple contains a third element which is the ratio of characteristic directions to null ChDir
2724
"""
25+
from scipy.stats import chi2
26+
from scipy.stats.mstats import zscore
27+
from sklearn.decomposition import PCA
2828

2929
## check input
3030
data.astype(float)

maayanlab_bioinformatics/dge/deseq2.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
import os
22
import contextlib
33
import pandas as pd
4-
from pydeseq2.dds import DeseqDataSet
5-
from pydeseq2.default_inference import DefaultInference
6-
from pydeseq2.ds import DeseqStats
74

85
class _DevNull:
96
def write(self, *args, **kwargs): pass
@@ -29,6 +26,9 @@ def deseq2_differential_expression(
2926
:param stdout: (writeable stream) direct deseq's output, e.g. sys.stdout (default: suppress)
3027
:return: A data frame with the results
3128
'''
29+
from pydeseq2.dds import DeseqDataSet
30+
from pydeseq2.default_inference import DefaultInference
31+
from pydeseq2.ds import DeseqStats
3232
# Check if controls_mat and cases_mat have the same number of rows
3333
if controls_mat.shape[0] != cases_mat.shape[0]:
3434
raise ValueError("controls_mat and cases_mat must have the same number of rows.")

maayanlab_bioinformatics/dge/ttest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import pandas as pd
2-
import scipy.stats
32
from maayanlab_bioinformatics.normalization import log2_normalize
43

54
def ttest_differential_expression(controls_mat: pd.DataFrame, cases_mat: pd.DataFrame, equal_var=False, alternative='two-sided', log2norm=True):
@@ -13,6 +12,7 @@ def ttest_differential_expression(controls_mat: pd.DataFrame, cases_mat: pd.Data
1312
:param log2norm: (bool) Apply log2norm, typically keep with raw counts but disable if you have normalized data (default: True)
1413
:return: A data frame with the results
1514
'''
15+
import scipy.stats
1616
assert (controls_mat.index == cases_mat.index).all(), 'Index between controls and cases must be the same'
1717
if log2norm:
1818
cases_mat = log2_normalize(cases_mat)

maayanlab_bioinformatics/enrichment/crisp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# import fisher
2-
import scipy.stats
32
from typing import Union, Dict, Set, Iterable, Tuple, Hashable, Any, TypeVar, Optional
43
from dataclasses import dataclass
54

@@ -68,6 +67,7 @@ def fisher_overlap(
6867
''' Given input and background set, compute the overlap, fisher significance, and odds ratio.
6968
In the case of no overlap, will return None.
7069
'''
70+
import scipy.stats
7171
overlap = input_signature & background_signature
7272
n_overlap = len(overlap)
7373
n_input_signature = len(input_signature)
Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,7 @@
1-
from qnorm import quantile_normalize
1+
import numpy as np
2+
import pandas as pd
3+
import typing as t
4+
5+
def quantile_normalize(data: pd.DataFrame, axis: int = 1, target: t.Union[None, np.ndarray] = None, ncpus: int = 1):
6+
from qnorm import quantile_normalize as qnorm
7+
return qnorm(data, axis=axis, target=target, ncpus=ncpus)

maayanlab_bioinformatics/normalization/zscore.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
import numpy as np
33
import pandas as pd
44
from functools import singledispatch
5-
from scipy.stats import zscore
6-
75

86
@singledispatch
97
def zscore_normalize(mat, ddof=0):
@@ -15,6 +13,7 @@ def zscore_normalize(mat, ddof=0):
1513

1614
@zscore_normalize.register
1715
def zscore_normalize_np(mat: np.ndarray, ddof=0):
16+
from scipy.stats import zscore
1817
return zscore(mat, axis=0, ddof=ddof)
1918

2019
@zscore_normalize.register

maayanlab_bioinformatics/parse/suerat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import os
22
import pandas as pd
3-
import scipy.sparse as sp_sparse
43
from maayanlab_bioinformatics.utils import merge
54

65
def suerat_load(base_dir):
76
''' Files prepared for suerat are quite common, this function will load them
87
given the directory that contains `barcodes.tsv.gz`, `features.tsv.gz`, and `matrix.tsv.gz`.
98
'''
9+
import scipy.sparse as sp_sparse
1010
df_barcodes = pd.read_csv(
1111
os.path.join(base_dir, 'barcodes.tsv.gz'),
1212
index_col=0,

maayanlab_bioinformatics/utils/sparse.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import numpy as np
2-
import scipy.sparse as sp_sparse
2+
import pandas as pd
33

44
def sp_hdf_dump(hdf, sdf, **kwargs):
55
''' Dump Sparse Pandas DataFrame to h5py object.
@@ -40,7 +40,7 @@ def sp_hdf_load(hdf):
4040
f.close()
4141
```
4242
'''
43-
import pandas as pd
43+
import scipy.sparse as sp_sparse
4444
return pd.DataFrame.sparse.from_spmatrix(
4545
sp_sparse.coo_array((hdf['data'], (hdf['row'], hdf['col'])), shape=hdf.attrs['shape']),
4646
index=pd.Series(hdf['index']).str.decode('utf8'),
@@ -53,6 +53,7 @@ def sp_std(X_ij, ddof=1):
5353
5454
\sigma_j = \sqrt{\frac{\sum(x_ij - \mu_j)^2}{N_j - ddof}}}
5555
'''
56+
import scipy.sparse as sp_sparse
5657
N_j = X_ij.shape[-1]
5758
mu_j = X_ij.sum(axis=0) / N_j
5859
num_j = ((X_ij - mu_j)**2).sum(axis=0)
@@ -65,6 +66,7 @@ def sp_std(X_ij, ddof=1):
6566
def sp_nanpercentile(sp, q, axis=None, method='linear'):
6667
''' nanpercentile for a sparse matrix, basically we use np.percentile on the underlying data.
6768
'''
69+
import scipy.sparse as sp_sparse
6870
coo = sp_sparse.coo_array(sp)
6971
if axis is None:
7072
return np.percentile(coo.data, q, method=method)

0 commit comments

Comments
 (0)