Skip to content

Commit 7a63899

Browse files
authored
Merge pull request #46 from GilbertLabUCSF/abe-dev
debug, new modules, and code improvements
2 parents 93b6196 + fbdfee2 commit 7a63899

22 files changed

Lines changed: 554 additions & 266 deletions

.github/workflows/python-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
python-version: ["3.9"] # ["3.8", "3.9", "3.10"]
19+
python-version: ["3.11"]
2020

2121
steps:
2222
- uses: actions/checkout@v3

.github/workflows/python-publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
fail-fast: false
1616
matrix:
1717
os-version: ["ubuntu-latest"]
18-
python-version: ["3.9"] # ["3.8", "3.9", "3.10"]
18+
python-version: ["3.11"]
1919

2020
steps:
2121
- uses: actions/checkout@v3

.gitignore

8 Bytes
Binary file not shown.

CanDI/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version = "0.1.1"
1+
version = "0.2.0"

CanDI/candi/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
from . import load
12
from . import data
3+
24
data = data.Data() #Global object data instantiated on import required for access by GeneQuery Objects
3-
from . import (Gene, CellLine, Organelle, Cancer, CellLineCluster, GeneCluster)
45

6+
from .candi import (Gene, CellLine, Organelle, Cancer, CellLineCluster, GeneCluster)

CanDI/candi/candi.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Classes for handling data aggregations
22
import operator
3-
from collections import OrderedDict, MutableSequence
3+
from collections.abc import MutableSequence
44
import itertools as it
55
import pandas as pd
66
import numpy as np
77
from . import data, grabber
8-
from . import entity
8+
from ..structures import entity
99

1010
class SubsetHandler(object):
1111

CanDI/candi/data.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,27 @@ class Data(object):
1414
can be tuned to load specific datasets upon import by editing config.ini
1515
can call Data.load() to load any specific dataset
1616
"""
17-
def __init__(self):
17+
def __init__(self, config_path='auto', verbose=False):
1818

19-
self._file_path = Path(os.path.dirname(os.path.realpath(__file__))).parent.absolute() / 'setup'
20-
config_path = self._file_path / 'data/config.ini'
19+
if config_path == 'auto':
20+
self._file_path = Path(os.path.dirname(os.path.realpath(__file__))).parent.absolute() / 'setup'
21+
config_path = self._file_path / 'data/config.ini'
22+
elif os.path.exists(config_path) == False:
23+
raise FileNotFoundError("Config file not found at {}".format(config_path))
24+
elif os.path.exists(config_path) == True:
25+
if verbose: print("Using config file at {}".format(config_path))
2126

2227
parser = configparser.ConfigParser() #parses config for data sources
2328
parser.read(config_path)
2429

2530
self._parser = parser
26-
#self._verify_install()
31+
self._verify_install()
2732
self._init_sources()
2833
self._init_depmap_paths()
29-
# self._init_index_tables()
34+
self._init_index_tables()
3035

3136
def _verify_install(self): #ensures data being loaded is present
37+
#TODO: add more checks for different data sources
3238
try:
3339
assert "depmap_urls" in self._parser.sections()
3440
except AssertionError:
@@ -91,6 +97,7 @@ def _handle_autoload(method, path):
9197
df = pd.read_csv(path,
9298
memory_map=True,
9399
low_memory=False,
100+
sep='\t',
94101
index_col="DepMap_ID")
95102

96103
elif method == "locations":

CanDI/pipelines/__init__.py

Whitespace-only changes.

CanDI/pipelines/coessentiality/__init__.py

Whitespace-only changes.

CanDI/pipelines/diffexp.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import numpy as np
2+
import pandas as pd
3+
import anndata as ad
4+
5+
from pydeseq2.dds import DeseqDataSet
6+
from pydeseq2.default_inference import DefaultInference
7+
from pydeseq2.ds import DeseqStats
8+
from adpbulk import ADPBulk
9+
10+
11+
def pseudobulk_by_group(adt, groups, method="mean"):
12+
# initialize the object
13+
adpb = ADPBulk(adt, groupby=groups, method=method)
14+
15+
# perform the pseudobulking
16+
pseudobulk_matrix = adpb.fit_transform()
17+
18+
# retrieve the sample metadata (useful for easy incorporation with edgeR)
19+
sample_meta = adpb.get_meta()
20+
21+
out = ad.AnnData(
22+
X=pseudobulk_matrix,
23+
obs=sample_meta.set_index('SampleName')
24+
)
25+
26+
return out
27+
28+
29+
def run_deseq(adata, design, tested_level, ref_level, n_cpus=8):
30+
31+
inference = DefaultInference(n_cpus=n_cpus)
32+
33+
dds = DeseqDataSet(
34+
counts=adata.to_df().astype(int),
35+
metadata=adata.obs,
36+
design_factors=design, # compare samples based on the "condition"
37+
refit_cooks=True,
38+
inference=inference,
39+
)
40+
41+
dds.deseq2()
42+
43+
stat_res = DeseqStats(
44+
dds,
45+
contrast=[design, tested_level, ref_level],
46+
inference=inference
47+
)
48+
stat_res.summary()
49+
50+
df = stat_res.results_df
51+
52+
return df

0 commit comments

Comments
 (0)