Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: gemma
dependencies:
- r=4.4
- r-rserve
- r-rjava
- repeatmasker
- blast
- sra-tools
- hdf5=1.12
- python
- pip
- pip:
- anndata
- scipy
- numpy
32 changes: 32 additions & 0 deletions gemma-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,38 @@
<version>${hdf5.version}</version>
</dependency>

<!-- rJava -->
<dependency>
<groupId>org.rosuda.REngine</groupId>
<artifactId>REngine</artifactId>
<version>${rJava.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.rosuda.REngine</groupId>
<artifactId>JRI</artifactId>
<version>${rJava.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.rosuda.REngine</groupId>
<artifactId>JRIEngine</artifactId>
<version>${rJava.version}</version>
<optional>true</optional>
</dependency>

<dependency>
<groupId>org.rosuda.REngine</groupId>
<artifactId>Rserve</artifactId>
<version>${rserve.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>com.kohlschutter.junixsocket</groupId>
<artifactId>junixsocket-core</artifactId>
<version>2.10.1</version>
</dependency>

<!-- Gemma Slack Bot -->
<dependency>
<groupId>com.slack.api</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ public class ExpressionDataFileUtils {
private static final String SC_DATA_SUFFIX = ".scdata";
public static final String MEX_SC_DATA_SUFFIX = SC_DATA_SUFFIX + ".mex";
public static final String TABULAR_SC_DATA_SUFFIX = SC_DATA_SUFFIX + ".tsv.gz";
public static final String CELL_BROWSER_SC_DATA_SUFFIX = SC_DATA_SUFFIX + ".cellbrowser.tsv.gz";
private static final String SC_METADATA_SUFFIX = ".scmetadata";
public static final String TABULAR_SC_METADATA_SUFFIX = SC_METADATA_SUFFIX + ".tsv.gz";
public static final String CELL_BROWSER_SC_DATA_SUFFIX = SC_METADATA_SUFFIX + ".cellbrowser.tsv.gz";

// for single-cell metadata
private static final String SC_METADATA_SUFFIX = ".scmetadata";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package ubic.gemma.core.analysis.singleCell.batcheffect;

import ubic.gemma.core.datastructure.matrix.SingleCellDesignMatrix;
import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix;

interface BatchCorrection {

/**
* Perform a batch correction on the provided data matrix using the specified design matrix.
*/
SingleCellExpressionDataMatrix<?> perform( SingleCellExpressionDataMatrix<?> dataMatrix, SingleCellDesignMatrix singleCellDesignMatrix );
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package ubic.gemma.core.analysis.singleCell.batcheffect;

import ubic.gemma.core.datastructure.matrix.SingleCellDesignMatrix;
import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix;

class ComBat implements BatchCorrection {
@Override
public SingleCellExpressionDataMatrix<?> perform( SingleCellExpressionDataMatrix<?> dataMatrix, SingleCellDesignMatrix singleCellDesignMatrix ) {
// TODO: reuse ComBat implementation
return dataMatrix;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package ubic.gemma.core.analysis.singleCell.batcheffect;

import org.rosuda.REngine.REXP;
import org.rosuda.REngine.REXPNull;
import org.springframework.util.Assert;
import ubic.gemma.core.datastructure.matrix.SingleCellDesignMatrix;
import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix;
import ubic.gemma.core.util.r.RClient;
import ubic.gemma.core.util.r.REngineFactory;

/**
* Perform batch correction using the <a href="">Harmony algorithm</a>.
* <p>
* Requirements: an R engine with the Harmony R package installed.
* @author poirigui
*/
class Harmony implements BatchCorrection {

private final REngineFactory rEngineFactory;

Harmony( REngineFactory rEngineFactory ) {
this.rEngineFactory = rEngineFactory;
}

@Override
public SingleCellExpressionDataMatrix<?> perform( SingleCellExpressionDataMatrix<?> dataMatrix, SingleCellDesignMatrix singleCellDesignMatrix ) {
Assert.isTrue( dataMatrix.getBioAssays().equals( singleCellDesignMatrix.getBioAssays() ),
"Assays in the data matrix must match exactly those of the design matrix." );
try ( RClient rEngine = new RClient( rEngineFactory ) ) {
// TODO: serialize both matrices to disk and call Harmony R package
rEngine.parseAndEval( "library(harmony);" );
// rEngine.assignDataFrame( "dataMatrix", toDataFrame( dataMatrix ) );
// rEngine.assignDataFrame( "designMatrix", toDataFrame( singleCellDesignMatrix ) );
//language=R
return fromDataFrame( rEngine.parseAndEval( "harmony::HarmonyMatrix(dataMatrix, designMatrix);" ) );
}
}

private REXP toDataFrame( SingleCellExpressionDataMatrix<?> dataMatrix ) {
// Convert the SingleCellExpressionDataMatrix to an REXP object
return new REXPNull();
}

private REXP toDataFrame( SingleCellDesignMatrix singleCellDesignMatrix ) {
return new REXPNull();
}

private SingleCellExpressionDataMatrix<?> fromDataFrame( REXP rexp ) {
return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package ubic.gemma.core.analysis.singleCell.batcheffect;

public enum SingleCellBatchCorrectionMethod {
HARMONY,
COMBAT
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package ubic.gemma.core.analysis.singleCell.batcheffect;

import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;

/**
* @author poirigui
*/
public interface SingleCellBatchCorrectionService {

/**
* Perform batch correction and save the results as a new {@link QuantitationType QuantitationType} sharing the same
* {@link ubic.gemma.model.expression.bioAssayData.SingleCellDimension}.
* @return the batch-corrected {@link QuantitationType}
*/
QuantitationType batchCorrect( ExpressionExperiment ee, QuantitationType qt, SingleCellBatchCorrectionMethod method );
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package ubic.gemma.core.analysis.singleCell.batcheffect;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.util.Assert;
import ubic.gemma.core.datastructure.matrix.SingleCellDesignMatrix;
import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix;
import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrixUtils;
import ubic.gemma.core.util.r.REngineFactory;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics;
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.persistence.service.expression.experiment.SingleCellExpressionExperimentService;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

@Service
public class SingleCellBatchCorrectionServiceImpl implements SingleCellBatchCorrectionService {

@Autowired
private SingleCellExpressionExperimentService singleCellExpressionExperimentService;

@Autowired
private REngineFactory rEngineFactory;

@Override
@Transactional
public QuantitationType batchCorrect( ExpressionExperiment ee, QuantitationType qt, SingleCellBatchCorrectionMethod method ) {
Assert.notNull( ee.getExperimentalDesign(), ee + " does not have experimental design. It is required to perform batch correction." );
BatchCorrection m = createBatchCorrection( method );
SingleCellDimension dimension = singleCellExpressionExperimentService.getSingleCellDimension( ee, qt );
if ( dimension == null ) {
throw new IllegalArgumentException( qt + " does not have single cell dimension." );
}
List<SingleCellExpressionDataVector> vectors = new ArrayList<>( singleCellExpressionExperimentService.getSingleCellDataVectors( ee, qt ) );
SingleCellExpressionDataMatrix<?> dataMatrix = SingleCellExpressionDataMatrix.getMatrix( vectors );
Collection<CellLevelCharacteristics> clcs = new ArrayList<>();
// TODO: select relevant CTAs and CLCs
clcs.addAll( dimension.getCellTypeAssignments() );
clcs.addAll( dimension.getCellLevelCharacteristics() );
SingleCellDesignMatrix designMatrix = SingleCellDesignMatrix.from( dimension, ee.getExperimentalDesign(), clcs );
SingleCellExpressionDataMatrix<?> correctedMatrix = m.perform( dataMatrix, designMatrix );
QuantitationType correctedQt = correctedMatrix.getQuantitationType();
List<SingleCellExpressionDataVector> correctedVectors = SingleCellExpressionDataMatrixUtils.toVectors( correctedMatrix );
String details = "Batch correction using " + method + " for " + ee.getShortName() + " on quantitation type " + qt.getName();
singleCellExpressionExperimentService.addSingleCellDataVectors( ee, correctedQt, correctedVectors, details );
return correctedQt;
}

private BatchCorrection createBatchCorrection( SingleCellBatchCorrectionMethod method ) {
switch ( method ) {
case HARMONY:
return new Harmony( rEngineFactory );
case COMBAT:
return new ComBat();
default:
throw new IllegalArgumentException( "Unknown batch correction method: " + method );
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/**
* Correct batch effects in single-cell RNA-seq data.
* @author poirigui
*/
package ubic.gemma.core.analysis.singleCell.batcheffect;
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package ubic.gemma.core.datastructure.matrix;

import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.experiment.FactorValue;

import javax.annotation.Nullable;
import java.util.List;

/**
* Interface representing a design matrix for bulk data.
* @author poirigui
*/
public interface BulkDesignMatrix extends DesignMatrix {

/**
* Get the row of factor values corresponding to a specific bioassay.
* @return the row, or null if the bioassay is not present in the matrix.
*/
@Nullable
List<FactorValue> getRow( BioAssay bioAssay );

@Nullable
List<FactorValue> getRow( BioMaterial bioMaterial );

int getRowIndex( BioAssay bioAssay );

int getRowIndex( BioMaterial bioMaterial );
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

/**
* Interface for bulk expression data matrices.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package ubic.gemma.core.datastructure.matrix;

import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.experiment.ExperimentalFactor;
import ubic.gemma.model.expression.experiment.FactorValue;

import javax.annotation.Nullable;
import java.util.List;

/**
* Base interface for design matrices used in experiments.
* @author poirigui
*/
public interface DesignMatrix {

/**
* Return the number of columns (i.e. factors) in the design matrix.
*/
int columns();

/**
* Obtain a list of factors in the design matrix.
*/
List<ExperimentalFactor> getFactors();

/**
* Obtain the factor for a given column.
* @throws IndexOutOfBoundsException if the column index is out of bounds.
*/
ExperimentalFactor getFactorForColumn( int column );

/**
* Obtain the factor values for a given column.
* @throws IndexOutOfBoundsException if the column index is out of bounds.
*/
List<FactorValue> getColumn( int column );

/**
* Obtain the factor values for a given experimental factor.
* @return the factor values, or null if the factor is not present in the design matrix.
*/
@Nullable
List<FactorValue> getColumn( ExperimentalFactor factor );

/**
* Obtain the index of a given factor in the design matrix.
*/
int getColumnIndex( ExperimentalFactor factor );

/**
* Return the number of rows (i.e. samples) in the design matrix.
*/
int rows();

List<BioAssay> getBioAssays();

/**
* Obtain the factor values for a given row (sample).
* @throws IndexOutOfBoundsException if the row index is out of bounds.
*/
List<FactorValue> getRow( int row );

/**
* Obtain the assay for a given row.
* @throws IndexOutOfBoundsException if the row index is out of bounds.
*/
BioAssay getBioAssayForRow( int row );

/**
* Obtain the sample for a given row.
* @throws IndexOutOfBoundsException if the row index is out of bounds.
*/
BioMaterial getBioMaterialForRow( int row );
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package ubic.gemma.core.datastructure.matrix;

import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.biomaterial.BioMaterial;

import java.util.Collection;

/**
* Design matrix for multi-assay bulk expression data.
* <p>
* A multi-assay bulk design matrix may have more than one {@link BioAssay} per {@link BioMaterial}.
* @author poirigui
* @see MultiAssayBulkExpressionDataMatrix
*/
public interface MultiAssayBulkDesignMatrix extends BulkDesignMatrix {

/**
* Obtain all the bioassays for a given row in the design matrix.
*/
Collection<BioAssay> getBioAssaysForRow( int row );
}
Loading