BayraktarLab
diff --git a/‎.github/ISSUE_TEMPLATE/config.yml‎
Lines changed: 4 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/config.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 5 additions & 5 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎.readthedocs.yml‎
Lines changed: 1 addition & 1 deletion b/‎.readthedocs.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 50 additions & 1 deletion b/‎README.md‎
Lines changed: 50 additions & 1 deletion
diff --git a/‎cell2location/__init__.py‎
Lines changed: 38 additions & 4 deletions b/‎cell2location/__init__.py‎
Lines changed: 38 additions & 4 deletions
diff --git a/‎cell2location/cell_comm/__init__.py‎ b/‎cell2location/cell_comm/__init__.py‎
diff --git a/‎cell2location/cell_comm/around_target.py‎
Lines changed: 215 additions & 0 deletions b/‎cell2location/cell_comm/around_target.py‎
Lines changed: 215 additions & 0 deletions
@@ -3,6 +3,10 @@ contact_links:
   - name: scverse Discorse
     url: https://discourse.scverse.org/c/ecosytem/cell2location/
     about: Ask usage questions, how to solve your problems using cell2location and other scvi-tools packages.
+    
+  - name: Frequently asked questions
+    url: https://github.com/BayraktarLab/cell2location/issues?q=is%3Aissue+is%3Aopen+label%3AFAQ
+    about: Before asking a question please check this list (issue with FAQ tag).
 
   - name: cell2location Community Discussions [deprecated]
     url: https://discourse.scverse.org/c/ecosytem/cell2location/
 
@@ -1,14 +1,14 @@
 repos:
-    - repo: https://github.com/python/black
-      rev: '22.3.0'
+    - repo: https://github.com/psf/black
+      rev: '23.3.0'
       hooks:
           - id: black
-    - repo: https://gitlab.com/pycqa/flake8
-      rev: 3.8.4
+    - repo: https://github.com/PyCQA/flake8
+      rev: 6.0.0
       hooks:
           - id: flake8
     - repo: https://github.com/pycqa/isort
-      rev: 5.7.0
+      rev: 5.12.0
       hooks:
           - id: isort
             name: isort (python)
 
@@ -13,7 +13,7 @@ conda:
   environment: docs/environment.yml
 
 python:
-  version: "3.7"
+  version: "3.8"
   install:
     - method: pip
       path: .
 
@@ -7,6 +7,7 @@
 [![Stars](https://img.shields.io/github/stars/BayraktarLab/cell2location?logo=GitHub&color=yellow)](https://github.com/BayraktarLab/cell2location/stargazers)
 ![Build Status](https://github.com/BayraktarLab/cell2location/actions/workflows/test.yml/badge.svg?event=push)
 [![Documentation Status](https://readthedocs.org/projects/cell2location/badge/?version=latest)](https://cell2location.readthedocs.io/en/stable/?badge=latest)
+[![Downloads](https://pepy.tech/badge/cell2location)](https://pepy.tech/project/cell2location)
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/BayraktarLab/cell2location/blob/master/docs/notebooks/cell2location_tutorial.ipynb)
 [![Docker image on quay.io](https://img.shields.io/badge/container-quay.io/vitkl/cell2location-brightgreen "Docker image on quay.io")](https://quay.io/vitkl/cell2location) 
 
@@ -42,7 +43,7 @@ Create conda environment and install `cell2location` package
 conda create -y -n cell2loc_env python=3.9
 
 conda activate cell2loc_env
-pip install git+https://github.com/BayraktarLab/cell2location.git#egg=cell2location[tutorials]
+pip install cell2location[tutorials]
 ```
 
 Finally, to use this environment in jupyter notebook, add jupyter kernel for this environment:
@@ -223,3 +224,51 @@ adata_incl_nontissue = read_all_and_qc(
     count_file='raw_feature_bc_matrix.h5',
 )
 ```
+
+Since Version 0.9.0 (released on 2023-04-11), the function `AnnData.concatenate()` has been deprecated in favour of `anndata.concat()` as per the official release notes ([Reference](https://anndata.readthedocs.io/en/latest/release-notes/index.html#id4)). Here is the updated code snippet of `read_all_and_qc`:
+
+```python
+from anndata import concat
+
+def read_all_and_qc(
+    sample_annot, Sample_ID_col, file_col, sp_data_folder, 
+    count_file='filtered_feature_bc_matrix.h5',
+):
+    """
+    Read and concatenate all Visium files.
+    """
+
+    # read all samples and store them in a list
+    adatas = []
+    for i, s in enumerate(sample_annot[Sample_ID_col]):
+        adata_i = read_and_qc(s, Sample_ID_col[file_col][i], path=sp_data_folder) 
+        adatas.append(adata_i)
+    # combine individual samples
+    adata = concat(
+        adatas,
+        merge="unique",
+        uns_merge="unique",
+        label="batch",
+        keys=sample_annot[Sample_ID_col].tolist(), 
+        index_unique=None
+    )
+
+    sample_annot.index = sample_annot[Sample_ID_col]
+    for c in sample_annot.columns:
+        sample_annot.loc[:, c] = sample_annot[c].astype(str)
+    adata.obs[sample_annot.columns] = sample_annot.reindex(index=adata.obs['sample']).values
+
+    return adata
+
+adata = read_all_and_qc(
+    sample_annot=sample_annot, 
+    Sample_ID_col='Sample_ID', 
+    file_col='file', 
+    sp_data_folder=sp_data_folder, 
+    count_file='filtered_feature_bc_matrix.h5',
+)
+
+cell2location.models.Cell2location.setup_anndata(
+    adata=adata_vis,
+    batch_key="batch")
+```
@@ -1,17 +1,51 @@
+import logging
+
 from pyro.distributions import constraints
 from pyro.distributions.transforms import SoftplusTransform
+from rich.console import Console
+from rich.logging import RichHandler
 from torch.distributions import biject_to, transform_to
 
 from . import models
+from .cell_comm.around_target import compute_weighted_average_around_target
 from .run_colocation import run_colocation
 
-__all__ = [
-    "models",
-    "run_colocation",
-]
+# https://github.com/python-poetry/poetry/pull/2366#issuecomment-652418094
+# https://github.com/python-poetry/poetry/issues/144#issuecomment-623927302
+try:
+    import importlib.metadata as importlib_metadata
+except ModuleNotFoundError:
+    import importlib_metadata
 
 
+# define custom distribution constraints
 @biject_to.register(constraints.positive)
 @transform_to.register(constraints.positive)
 def _transform_to_positive(constraint):
     return SoftplusTransform()
+
+
+package_name = "cell2location"
+__version__ = importlib_metadata.version(package_name)
+
+logger = logging.getLogger(__name__)
+# set the logging level
+logger.setLevel(logging.INFO)
+
+# nice logging outputs
+console = Console(force_terminal=True)
+if console.is_jupyter is True:
+    console.is_jupyter = False
+ch = RichHandler(show_path=False, console=console, show_time=False)
+formatter = logging.Formatter("cell2location: %(message)s")
+ch.setFormatter(formatter)
+logger.addHandler(ch)
+
+# this prevents double outputs
+logger.propagate = False
+
+__all__ = [
+    "models",
+    "run_colocation",
+    "compute_weighted_average_around_target",
+]
@@ -0,0 +1,215 @@
+import numpy as np
+import pandas as pd
+from scipy.sparse import csr_matrix
+
+
+def compute_weighted_average_around_target(
+    adata,
+    target_cell_type_quantile: float = 0.995,
+    source_cell_type_quantile: float = 0.95,
+    normalisation_quantile: float = 0.95,
+    distance_bin: list = None,
+    sample_key: str = "sample",
+    genes_to_use_as_source: list = None,
+    gene_symbols: str = None,
+    obsm_spatial_key: str = "X_spatial",
+    normalisation_key: str = None,
+    layer: str = None,
+    cell_abundance_key: str = "cell_abundance_w_sf",
+    cell_abundance_quantile_key: str = "q05",
+):
+    """
+    Compute average abundance of source cell types or genes around each target cell type.
+
+    Parameters
+    ----------
+    adata
+        AnnData object of spatial dataset with cell2location results
+    target_cell_type_quantile
+        Quantile of target cell type abundance to use for defining
+        a set locations with highest abundance of target cell types.
+        Cell abundance below this thereshold is set to 0.
+    source_cell_type_quantile
+        Quantile of source cell type abundance to use for defining
+        a set locations with highest abundance of source cell types.
+        Cell abundance or RNA abundance for genes below this thereshold is set to 0.
+    normalisation_quantile
+        Quantile of source cell type or source RNA abundance for genes to use as normalising constant.
+        This step can be seen as scaling that puts all source cell types or genes into the same scale.
+    distance_bin
+        If using concentric bins list with two elements specifying inner and outer edge of the bin.
+        Distances specified in coordinates of `obsm_spatial_key`.
+    sample_key
+        `adata.obs` column key specifying distinct sections across
+        which distance bin computation is invalid.
+    genes_to_use_as_source
+        To request RNA abundance of genes around target cells provide a list of
+        var_names or gene SYMBOLs.
+    gene_symbols
+        `adata.var` column key containing gene symbols
+    obsm_spatial_key
+        `adata.obsm` key containing spatial coordinates (can be 2D or 3D or N-D).
+    normalisation_key
+        RNA abundance must be normalised using y_s technical effect term
+        estimated by cell2location. Provide `adata.obsm` key containing this normalisation term.
+    layer
+        adata.layers to use for getting RNA abundance. Default: `adata.X`
+    cell_abundance_key
+        which cell2location variable to use as cell abundance
+    cell_abundance_quantile_key
+        which quantile of cell abundance to use
+
+    Returns
+    -------
+    pd.DataFrame of average abundance of source cell types or RNA abundance of requested genes
+    around target cell types.
+
+    """
+    # save initial names
+    if genes_to_use_as_source is None:
+        source_names = adata.uns["mod"]["factor_names"]
+    else:
+        source_names = genes_to_use_as_source
+
+    cell_abundance_key_ = cell_abundance_quantile_key + cell_abundance_key
+    cell_abundance_key = cell_abundance_quantile_key + "_" + cell_abundance_key
+
+    # create result data frame to be completed
+    weighted_avg = pd.DataFrame(
+        index=[f"target {ct}" for ct in adata.uns["mod"]["factor_names"]],
+        columns=source_names,
+    )
+    if genes_to_use_as_source is None:
+        # pick locations where source cell type abundance is above source_cell_type_quantile
+        source_cell_type_filter = adata.obsm[cell_abundance_key] > adata.obsm[cell_abundance_key].quantile(
+            source_cell_type_quantile
+        )
+        # zero-out source cell abundance below selected quantile
+        source_cell_type_data = adata.obsm[cell_abundance_key] * source_cell_type_filter
+        # get normalising quantile values
+        source_normalisation_quantile = adata.obsm[cell_abundance_key].quantile(normalisation_quantile, axis=0)
+        # compute average abundance above this quantile
+        source_normalisation_quantile = np.average(
+            adata.obsm[cell_abundance_key],
+            weights=adata.obsm[cell_abundance_key] > source_normalisation_quantile,
+            axis=0,
+        )
+    else:
+        # if using gene symbols get var names:
+        if gene_symbols is not None:
+            genes_to_use_as_source = adata.var_names[adata.var[gene_symbols].isin(genes_to_use_as_source)]
+        # get RNA abundance data
+        if layer is None:
+            source_cell_type_data = adata[:, genes_to_use_as_source].X.toarray()
+        else:
+            source_cell_type_data = adata[:, genes_to_use_as_source].layers[layer].toarray()
+        # apply technical across-location normalisation
+        if normalisation_key:
+            source_cell_type_data = source_cell_type_data / adata.obsm[normalisation_key]
+        # pick locations where source cell type abundance is above source_cell_type_quantile
+        source_cell_type_filter = source_cell_type_data > np.quantile(
+            source_cell_type_data, q=source_cell_type_quantile, axis=0
+        )
+        # zero-out source cell abundance below selected quantile
+        source_cell_type_data = source_cell_type_data * source_cell_type_filter
+        # create a dataframe with initial source RNA abundance
+        source_cell_type_data = pd.DataFrame(
+            source_cell_type_data,
+            index=adata.obs_names,
+            columns=source_names,
+        )
+        # get normalising quantile values
+        source_normalisation_quantile = source_cell_type_data.quantile(normalisation_quantile, axis=0)
+        # compute average abundance above this quantile
+        source_normalisation_quantile = np.average(
+            source_cell_type_data,
+            weights=source_cell_type_data > source_normalisation_quantile,
+            axis=0,
+        )
+
+    # [optional] compute average source_cell_type_data across closes locations (concentric circles)
+    if distance_bin is not None:
+        # iterate over samples of connected location from the same sections
+        # or independent chunks registered 3D data
+        for s in adata.obs[sample_key].unique():
+            # get sample observations
+            sample_ind = adata.obs[sample_key].isin([s])
+
+            # compute distances bewteen locations
+            from scipy.spatial.distance import cdist
+
+            distances = cdist(adata[sample_ind, :].obsm[obsm_spatial_key], adata[sample_ind, :].obsm[obsm_spatial_key])
+            # select locations in distance bin
+            binary_distance = csr_matrix((distances > distance_bin[0]) & (distances <= distance_bin[1]))
+            # compute average abundance across locations within a bin
+            data_ = (
+                (binary_distance @ csr_matrix(source_cell_type_data.loc[sample_ind, :].values))
+                .multiply(1 / binary_distance.sum(1))
+                .toarray()
+            )
+            # to account for locations with no neighbours within a bin (sum == 0)
+            data_[np.isnan(data_)] = 0
+            # complete the average for a given sample
+            source_cell_type_data.loc[sample_ind, :] = data_
+    # normalise data by normalising quantile (global value across distance bins)
+    source_cell_type_data = source_cell_type_data / source_normalisation_quantile
+    # account for cases of undetected signal
+    source_cell_type_data[source_cell_type_data.isna()] = 0
+
+    # compute average for each target cell type
+    for ct in adata.uns["mod"]["factor_names"]:
+        # find locations containing high abundance of target cell type
+        target_cell_type_filter = adata.obsm[cell_abundance_key][f"{cell_abundance_key_}_{ct}"] > adata.obsm[
+            cell_abundance_key
+        ][f"{cell_abundance_key_}_{ct}"].quantile(target_cell_type_quantile)
+        # use thresholded abundance of target cell type as a weight
+        weights = adata.obsm[cell_abundance_key][f"{cell_abundance_key_}_{ct}"] * target_cell_type_filter
+        # normalise for target cell type abundance
+        target_quantile = adata.obsm[cell_abundance_key][f"{cell_abundance_key_}_{ct}"].quantile(normalisation_quantile)
+        target_quantile = np.average(
+            adata.obsm[cell_abundance_key][f"{cell_abundance_key_}_{ct}"].values,
+            weights=adata.obsm[cell_abundance_key][f"{cell_abundance_key_}_{ct}"].values > target_quantile,
+        ).flatten()
+        assert target_quantile.shape == (1,), target_quantile.shape
+        weights = weights / target_quantile
+        # compute the final weighted average
+        weighted_avg_ = np.average(
+            source_cell_type_data,
+            weights=weights,
+            axis=0,
+        )
+        # weighted_avg_[weighted_avg_.isna()] = 0
+
+        weighted_avg_ = pd.Series(weighted_avg_, name=ct, index=source_names)
+
+        # hack to make self interactions less apparent
+        weighted_avg_[ct] = weighted_avg_[~weighted_avg_.index.isin([ct])].max() + 0.02
+        # complete the results dataframe
+        weighted_avg.loc[f"target {ct}", :] = weighted_avg_
+
+    return weighted_avg.astype("float32")
+
+
+def melt_data_frame_per_signal(weighted_avg_dict, source_var, distance_bins):
+    source_var_1 = pd.DataFrame(
+        np.array([weighted_avg_dict[str(distance_bin)][source_var].values for distance_bin in distance_bins]),
+        columns=weighted_avg_dict[str(distance_bins[0])].index,
+        index=[np.mean(distance_bin) for distance_bin in distance_bins],
+    ).T
+
+    source_var_1 = source_var_1.melt(
+        value_name="Abundance",
+        var_name="Distance bin",
+        ignore_index=False,
+    )
+    source_var_1["Target"] = source_var_1.index
+    source_var_1["Signal"] = source_var
+    return source_var_1
+
+
+def melt_signal_target_data_frame(weighted_avg_dict, distance_bins):
+    source_vars = weighted_avg_dict[str(distance_bins[0])].columns
+
+    return pd.concat(
+        [melt_data_frame_per_signal(weighted_avg_dict, source_var, distance_bins) for source_var in source_vars]
+    )