GilbertLabUCSF
diff --git a/‎.github/workflows/python-package.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/python-package.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/python-publish.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/python-publish.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore‎
8 Bytes b/‎.gitignore‎
8 Bytes
diff --git a/‎CanDI/__version__.py‎
Lines changed: 1 addition & 1 deletion b/‎CanDI/__version__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CanDI/candi/__init__.py‎
Lines changed: 3 additions & 1 deletion b/‎CanDI/candi/__init__.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎CanDI/candi/candi.py‎
Lines changed: 2 additions & 2 deletions b/‎CanDI/candi/candi.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎CanDI/candi/data.py‎
Lines changed: 12 additions & 5 deletions b/‎CanDI/candi/data.py‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎CanDI/pipelines/__init__.py‎ b/‎CanDI/pipelines/__init__.py‎
diff --git a/‎CanDI/pipelines/coessentiality/__init__.py‎ b/‎CanDI/pipelines/coessentiality/__init__.py‎
diff --git a/‎CanDI/pipelines/diffexp.py‎
Lines changed: 52 additions & 0 deletions b/‎CanDI/pipelines/diffexp.py‎
Lines changed: 52 additions & 0 deletions
@@ -16,7 +16,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.9"] # ["3.8", "3.9", "3.10"]
+        python-version: ["3.11"]
 
     steps:
     - uses: actions/checkout@v3
 
@@ -15,7 +15,7 @@ jobs:
       fail-fast: false
       matrix:
         os-version: ["ubuntu-latest"]
-        python-version: ["3.9"] # ["3.8", "3.9", "3.10"]
+        python-version: ["3.11"]
 
     steps:
     - uses: actions/checkout@v3
 
@@ -1 +1 @@
-version = "0.1.1"
+version = "0.2.0"
@@ -1,4 +1,6 @@
+from . import load
 from . import data
+
 data = data.Data() #Global object data instantiated on import required for access by GeneQuery Objects
-from . import (Gene, CellLine, Organelle, Cancer, CellLineCluster, GeneCluster)
 
+from .candi import (Gene, CellLine, Organelle, Cancer, CellLineCluster, GeneCluster)
@@ -1,11 +1,11 @@
 # Classes for handling data aggregations
 import operator
-from collections import OrderedDict, MutableSequence
+from collections.abc import MutableSequence
 import itertools as it
 import pandas as pd
 import numpy as np
 from . import data, grabber
-from . import entity
+from ..structures import entity
 
 class SubsetHandler(object):
 
 
@@ -14,21 +14,27 @@ class Data(object):
     can be tuned to load specific datasets upon import by editing config.ini
     can call Data.load() to load any specific dataset
     """
-    def __init__(self):
+    def __init__(self, config_path='auto', verbose=False):
 
-        self._file_path = Path(os.path.dirname(os.path.realpath(__file__))).parent.absolute() / 'setup'
-        config_path = self._file_path / 'data/config.ini'
+        if config_path == 'auto':
+            self._file_path = Path(os.path.dirname(os.path.realpath(__file__))).parent.absolute() / 'setup'
+            config_path = self._file_path / 'data/config.ini'
+        elif os.path.exists(config_path) == False:
+            raise FileNotFoundError("Config file not found at {}".format(config_path))
+        elif os.path.exists(config_path) == True:
+            if verbose: print("Using config file at {}".format(config_path))
 
         parser = configparser.ConfigParser() #parses config for data sources
         parser.read(config_path)
 
         self._parser = parser
-        #self._verify_install()
+        self._verify_install()
         self._init_sources()
         self._init_depmap_paths()
-        # self._init_index_tables()
+        self._init_index_tables()
 
     def _verify_install(self): #ensures data being loaded is present
+        #TODO: add more checks for different data sources
         try:
             assert "depmap_urls" in self._parser.sections()
         except AssertionError:
@@ -91,6 +97,7 @@ def _handle_autoload(method, path):
             df = pd.read_csv(path,
                              memory_map=True,
                              low_memory=False,
+                             sep='\t',
                              index_col="DepMap_ID")
 
         elif method == "locations":
 
@@ -0,0 +1,52 @@
+import numpy as np
+import pandas as pd
+import anndata as ad
+
+from pydeseq2.dds import DeseqDataSet
+from pydeseq2.default_inference import DefaultInference
+from pydeseq2.ds import DeseqStats
+from adpbulk import ADPBulk
+
+
+def pseudobulk_by_group(adt, groups, method="mean"):
+    # initialize the object
+    adpb = ADPBulk(adt, groupby=groups, method=method)
+
+    # perform the pseudobulking
+    pseudobulk_matrix = adpb.fit_transform()
+
+    # retrieve the sample metadata (useful for easy incorporation with edgeR)
+    sample_meta = adpb.get_meta()
+
+    out = ad.AnnData(
+        X=pseudobulk_matrix,
+        obs=sample_meta.set_index('SampleName')
+    )
+
+    return out
+
+
+def run_deseq(adata, design, tested_level, ref_level, n_cpus=8):
+
+    inference = DefaultInference(n_cpus=n_cpus)
+    
+    dds = DeseqDataSet(
+        counts=adata.to_df().astype(int),
+        metadata=adata.obs,
+        design_factors=design,  # compare samples based on the "condition"
+        refit_cooks=True,
+        inference=inference,
+    )
+
+    dds.deseq2()
+
+    stat_res = DeseqStats(
+        dds, 
+        contrast=[design, tested_level, ref_level], 
+        inference=inference
+    )
+    stat_res.summary()
+
+    df = stat_res.results_df
+
+    return df