diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000..c3271ee06c --- /dev/null +++ b/environment.yml @@ -0,0 +1,15 @@ +name: gemma +dependencies: + - r=4.4 + - r-rserve + - r-rjava + - repeatmasker + - blast + - sra-tools + - hdf5=1.12 + - python + - pip + - pip: + - anndata + - scipy + - numpy diff --git a/gemma-core/pom.xml b/gemma-core/pom.xml index d9a23e313d..7a249f11bc 100644 --- a/gemma-core/pom.xml +++ b/gemma-core/pom.xml @@ -251,6 +251,38 @@ ${hdf5.version} + + + org.rosuda.REngine + REngine + ${rJava.version} + true + + + org.rosuda.REngine + JRI + ${rJava.version} + true + + + org.rosuda.REngine + JRIEngine + ${rJava.version} + true + + + + org.rosuda.REngine + Rserve + ${rserve.version} + true + + + com.kohlschutter.junixsocket + junixsocket-core + 2.10.1 + + com.slack.api diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileUtils.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileUtils.java index b29fb0b9bf..ed3b4cad08 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileUtils.java @@ -25,7 +25,9 @@ public class ExpressionDataFileUtils { private static final String SC_DATA_SUFFIX = ".scdata"; public static final String MEX_SC_DATA_SUFFIX = SC_DATA_SUFFIX + ".mex"; public static final String TABULAR_SC_DATA_SUFFIX = SC_DATA_SUFFIX + ".tsv.gz"; - public static final String CELL_BROWSER_SC_DATA_SUFFIX = SC_DATA_SUFFIX + ".cellbrowser.tsv.gz"; + private static final String SC_METADATA_SUFFIX = ".scmetadata"; + public static final String TABULAR_SC_METADATA_SUFFIX = SC_METADATA_SUFFIX + ".tsv.gz"; + public static final String CELL_BROWSER_SC_DATA_SUFFIX = SC_METADATA_SUFFIX + ".cellbrowser.tsv.gz"; // for single-cell metadata private static final String SC_METADATA_SUFFIX = ".scmetadata"; diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/BatchCorrection.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/BatchCorrection.java new file mode 100644 index 0000000000..444f0d4248 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/BatchCorrection.java @@ -0,0 +1,12 @@ +package ubic.gemma.core.analysis.singleCell.batcheffect; + +import ubic.gemma.core.datastructure.matrix.SingleCellDesignMatrix; +import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix; + +interface BatchCorrection { + + /** + * Perform a batch correction on the provided data matrix using the specified design matrix. + */ + SingleCellExpressionDataMatrix perform( SingleCellExpressionDataMatrix dataMatrix, SingleCellDesignMatrix singleCellDesignMatrix ); +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/ComBat.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/ComBat.java new file mode 100644 index 0000000000..c197fb2ce0 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/ComBat.java @@ -0,0 +1,12 @@ +package ubic.gemma.core.analysis.singleCell.batcheffect; + +import ubic.gemma.core.datastructure.matrix.SingleCellDesignMatrix; +import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix; + +class ComBat implements BatchCorrection { + @Override + public SingleCellExpressionDataMatrix perform( SingleCellExpressionDataMatrix dataMatrix, SingleCellDesignMatrix singleCellDesignMatrix ) { + // TODO: reuse ComBat implementation + return dataMatrix; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/Harmony.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/Harmony.java new file mode 100644 index 0000000000..e76df3d5c5 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/Harmony.java @@ -0,0 +1,51 @@ +package ubic.gemma.core.analysis.singleCell.batcheffect; + +import org.rosuda.REngine.REXP; +import org.rosuda.REngine.REXPNull; +import org.springframework.util.Assert; +import ubic.gemma.core.datastructure.matrix.SingleCellDesignMatrix; +import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix; +import ubic.gemma.core.util.r.RClient; +import ubic.gemma.core.util.r.REngineFactory; + +/** + * Perform batch correction using the Harmony algorithm. + *

+ * Requirements: an R engine with the Harmony R package installed. + * @author poirigui + */ +class Harmony implements BatchCorrection { + + private final REngineFactory rEngineFactory; + + Harmony( REngineFactory rEngineFactory ) { + this.rEngineFactory = rEngineFactory; + } + + @Override + public SingleCellExpressionDataMatrix perform( SingleCellExpressionDataMatrix dataMatrix, SingleCellDesignMatrix singleCellDesignMatrix ) { + Assert.isTrue( dataMatrix.getBioAssays().equals( singleCellDesignMatrix.getBioAssays() ), + "Assays in the data matrix must match exactly those of the design matrix." ); + try ( RClient rEngine = new RClient( rEngineFactory ) ) { + // TODO: serialize both matrices to disk and call Harmony R package + rEngine.parseAndEval( "library(harmony);" ); + // rEngine.assignDataFrame( "dataMatrix", toDataFrame( dataMatrix ) ); + // rEngine.assignDataFrame( "designMatrix", toDataFrame( singleCellDesignMatrix ) ); + //language=R + return fromDataFrame( rEngine.parseAndEval( "harmony::HarmonyMatrix(dataMatrix, designMatrix);" ) ); + } + } + + private REXP toDataFrame( SingleCellExpressionDataMatrix dataMatrix ) { + // Convert the SingleCellExpressionDataMatrix to an REXP object + return new REXPNull(); + } + + private REXP toDataFrame( SingleCellDesignMatrix singleCellDesignMatrix ) { + return new REXPNull(); + } + + private SingleCellExpressionDataMatrix fromDataFrame( REXP rexp ) { + return null; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionMethod.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionMethod.java new file mode 100644 index 0000000000..14544237fc --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionMethod.java @@ -0,0 +1,6 @@ +package ubic.gemma.core.analysis.singleCell.batcheffect; + +public enum SingleCellBatchCorrectionMethod { + HARMONY, + COMBAT +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionService.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionService.java new file mode 100644 index 0000000000..7bae48c241 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionService.java @@ -0,0 +1,17 @@ +package ubic.gemma.core.analysis.singleCell.batcheffect; + +import ubic.gemma.model.common.quantitationtype.QuantitationType; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; + +/** + * @author poirigui + */ +public interface SingleCellBatchCorrectionService { + + /** + * Perform batch correction and save the results as a new {@link QuantitationType QuantitationType} sharing the same + * {@link ubic.gemma.model.expression.bioAssayData.SingleCellDimension}. + * @return the batch-corrected {@link QuantitationType} + */ + QuantitationType batchCorrect( ExpressionExperiment ee, QuantitationType qt, SingleCellBatchCorrectionMethod method ); +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionServiceImpl.java new file mode 100644 index 0000000000..0d0e284bfe --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionServiceImpl.java @@ -0,0 +1,65 @@ +package ubic.gemma.core.analysis.singleCell.batcheffect; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.Assert; +import ubic.gemma.core.datastructure.matrix.SingleCellDesignMatrix; +import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix; +import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrixUtils; +import ubic.gemma.core.util.r.REngineFactory; +import ubic.gemma.model.common.quantitationtype.QuantitationType; +import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics; +import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.persistence.service.expression.experiment.SingleCellExpressionExperimentService; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +@Service +public class SingleCellBatchCorrectionServiceImpl implements SingleCellBatchCorrectionService { + + @Autowired + private SingleCellExpressionExperimentService singleCellExpressionExperimentService; + + @Autowired + private REngineFactory rEngineFactory; + + @Override + @Transactional + public QuantitationType batchCorrect( ExpressionExperiment ee, QuantitationType qt, SingleCellBatchCorrectionMethod method ) { + Assert.notNull( ee.getExperimentalDesign(), ee + " does not have experimental design. It is required to perform batch correction." ); + BatchCorrection m = createBatchCorrection( method ); + SingleCellDimension dimension = singleCellExpressionExperimentService.getSingleCellDimension( ee, qt ); + if ( dimension == null ) { + throw new IllegalArgumentException( qt + " does not have single cell dimension." ); + } + List vectors = new ArrayList<>( singleCellExpressionExperimentService.getSingleCellDataVectors( ee, qt ) ); + SingleCellExpressionDataMatrix dataMatrix = SingleCellExpressionDataMatrix.getMatrix( vectors ); + Collection clcs = new ArrayList<>(); + // TODO: select relevant CTAs and CLCs + clcs.addAll( dimension.getCellTypeAssignments() ); + clcs.addAll( dimension.getCellLevelCharacteristics() ); + SingleCellDesignMatrix designMatrix = SingleCellDesignMatrix.from( dimension, ee.getExperimentalDesign(), clcs ); + SingleCellExpressionDataMatrix correctedMatrix = m.perform( dataMatrix, designMatrix ); + QuantitationType correctedQt = correctedMatrix.getQuantitationType(); + List correctedVectors = SingleCellExpressionDataMatrixUtils.toVectors( correctedMatrix ); + String details = "Batch correction using " + method + " for " + ee.getShortName() + " on quantitation type " + qt.getName(); + singleCellExpressionExperimentService.addSingleCellDataVectors( ee, correctedQt, correctedVectors, details ); + return correctedQt; + } + + private BatchCorrection createBatchCorrection( SingleCellBatchCorrectionMethod method ) { + switch ( method ) { + case HARMONY: + return new Harmony( rEngineFactory ); + case COMBAT: + return new ComBat(); + default: + throw new IllegalArgumentException( "Unknown batch correction method: " + method ); + } + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/package-info.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/package-info.java new file mode 100644 index 0000000000..90892fc688 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/package-info.java @@ -0,0 +1,5 @@ +/** + * Correct batch effects in single-cell RNA-seq data. + * @author poirigui + */ +package ubic.gemma.core.analysis.singleCell.batcheffect; \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkDesignMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkDesignMatrix.java new file mode 100644 index 0000000000..ad228c05e4 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkDesignMatrix.java @@ -0,0 +1,29 @@ +package ubic.gemma.core.datastructure.matrix; + +import ubic.gemma.model.expression.bioAssay.BioAssay; +import ubic.gemma.model.expression.biomaterial.BioMaterial; +import ubic.gemma.model.expression.experiment.FactorValue; + +import javax.annotation.Nullable; +import java.util.List; + +/** + * Interface representing a design matrix for bulk data. + * @author poirigui + */ +public interface BulkDesignMatrix extends DesignMatrix { + + /** + * Get the row of factor values corresponding to a specific bioassay. + * @return the row, or null if the bioassay is not present in the matrix. + */ + @Nullable + List getRow( BioAssay bioAssay ); + + @Nullable + List getRow( BioMaterial bioMaterial ); + + int getRowIndex( BioAssay bioAssay ); + + int getRowIndex( BioMaterial bioMaterial ); +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java index 049aeda454..c0a301ea46 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java @@ -13,7 +13,6 @@ import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Collection; -import java.util.List; /** * Interface for bulk expression data matrices. diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DesignMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DesignMatrix.java new file mode 100644 index 0000000000..c5d9def647 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DesignMatrix.java @@ -0,0 +1,75 @@ +package ubic.gemma.core.datastructure.matrix; + +import ubic.gemma.model.expression.bioAssay.BioAssay; +import ubic.gemma.model.expression.biomaterial.BioMaterial; +import ubic.gemma.model.expression.experiment.ExperimentalFactor; +import ubic.gemma.model.expression.experiment.FactorValue; + +import javax.annotation.Nullable; +import java.util.List; + +/** + * Base interface for design matrices used in experiments. + * @author poirigui + */ +public interface DesignMatrix { + + /** + * Return the number of columns (i.e. factors) in the design matrix. + */ + int columns(); + + /** + * Obtain a list of factors in the design matrix. + */ + List getFactors(); + + /** + * Obtain the factor for a given column. + * @throws IndexOutOfBoundsException if the column index is out of bounds. + */ + ExperimentalFactor getFactorForColumn( int column ); + + /** + * Obtain the factor values for a given column. + * @throws IndexOutOfBoundsException if the column index is out of bounds. + */ + List getColumn( int column ); + + /** + * Obtain the factor values for a given experimental factor. + * @return the factor values, or null if the factor is not present in the design matrix. + */ + @Nullable + List getColumn( ExperimentalFactor factor ); + + /** + * Obtain the index of a given factor in the design matrix. + */ + int getColumnIndex( ExperimentalFactor factor ); + + /** + * Return the number of rows (i.e. samples) in the design matrix. + */ + int rows(); + + List getBioAssays(); + + /** + * Obtain the factor values for a given row (sample). + * @throws IndexOutOfBoundsException if the row index is out of bounds. + */ + List getRow( int row ); + + /** + * Obtain the assay for a given row. + * @throws IndexOutOfBoundsException if the row index is out of bounds. + */ + BioAssay getBioAssayForRow( int row ); + + /** + * Obtain the sample for a given row. + * @throws IndexOutOfBoundsException if the row index is out of bounds. + */ + BioMaterial getBioMaterialForRow( int row ); +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/MultiAssayBulkDesignMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/MultiAssayBulkDesignMatrix.java new file mode 100644 index 0000000000..35f8f775d1 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/MultiAssayBulkDesignMatrix.java @@ -0,0 +1,21 @@ +package ubic.gemma.core.datastructure.matrix; + +import ubic.gemma.model.expression.bioAssay.BioAssay; +import ubic.gemma.model.expression.biomaterial.BioMaterial; + +import java.util.Collection; + +/** + * Design matrix for multi-assay bulk expression data. + *

+ * A multi-assay bulk design matrix may have more than one {@link BioAssay} per {@link BioMaterial}. + * @author poirigui + * @see MultiAssayBulkExpressionDataMatrix + */ +public interface MultiAssayBulkDesignMatrix extends BulkDesignMatrix { + + /** + * Obtain all the bioassays for a given row in the design matrix. + */ + Collection getBioAssaysForRow( int row ); +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrix.java new file mode 100644 index 0000000000..66c9bb3c1f --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrix.java @@ -0,0 +1,47 @@ +package ubic.gemma.core.datastructure.matrix; + +import ubic.gemma.model.expression.bioAssay.BioAssay; +import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics; +import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.experiment.ExperimentalDesign; +import ubic.gemma.model.expression.experiment.ExperimentalFactor; +import ubic.gemma.model.expression.experiment.FactorValue; + +import javax.annotation.Nullable; +import java.util.Collection; +import java.util.List; +import java.util.stream.Collectors; + +/** + * @author poirigui + */ +public interface SingleCellDesignMatrix extends DesignMatrix { + + static SingleCellDesignMatrix from( SingleCellDimension dimension, ExperimentalDesign experimentalDesign, Collection clcs ) { + List factors = experimentalDesign.getExperimentalFactors().stream() + .sorted( ExperimentalFactor.COMPARATOR ) + .collect( Collectors.toList() ); + List clcsSorted = clcs.stream() + .sorted( CellLevelCharacteristics.COMPARATOR ) + .collect( Collectors.toList() ); + return new SingleCellDesignMatrixImpl( dimension, dimension.getBioAssays(), factors, clcsSorted ); + } + + /** + * Obtain the list of cell IDs in the design matrix. + *

+ * The list is not necessarily unique and has to be combined with {@link #getBioAssays()} to form a unique + * identifier. + */ + List getCellIds(); + + @Nullable + List getRow( BioAssay bioAssay, String cellId ); + + /** + * @throws IndexOutOfBoundsException if the row index is out of bounds. + */ + String getCellIdForRow( int row ); + + int getRowIndex( BioAssay bioAssay, String cellId ); +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrixImpl.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrixImpl.java new file mode 100644 index 0000000000..fd8b614f64 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrixImpl.java @@ -0,0 +1,174 @@ +package ubic.gemma.core.datastructure.matrix; + +import ubic.gemma.core.util.ListUtils; +import ubic.gemma.model.common.description.Characteristic; +import ubic.gemma.model.expression.bioAssay.BioAssay; +import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics; +import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.biomaterial.BioMaterial; +import ubic.gemma.model.expression.experiment.ExperimentalFactor; +import ubic.gemma.model.expression.experiment.FactorType; +import ubic.gemma.model.expression.experiment.FactorValue; +import ubic.gemma.model.util.SparseRangeArrayList; + +import javax.annotation.Nullable; +import java.util.*; + +public class SingleCellDesignMatrixImpl implements SingleCellDesignMatrix { + + // rows + private final SparseRangeArrayList assays; + private final List cellIds; + private final Map> index; + + // columns + private final List factors; + private final Map factorsIndex; + + /** + * This is technically a matrix, but using {@link List} allows for sparse range array to be used for sample-level + * factors. + *

+ * Also, this is transposed w.r.t. to rows/columns that the interface requires. This is due to the fact that + * sparsity is better handled along factors + */ + private final List> factorValues; + + public SingleCellDesignMatrixImpl( SingleCellDimension dimension, List assays, List factors, List cellLevelCharacteristics ) { + int[] bioAssayOffsets = new int[assays.size()]; + int k = 0; + List cellIdsL = new ArrayList<>( dimension.getNumberOfCells() ); + Map> index = new HashMap<>( assays.size() ); + for ( int i = 0; i < assays.size(); i++ ) { + BioAssay assay = assays.get( i ); + int sampleIndex = dimension.getBioAssays().indexOf( assay ); + if ( sampleIndex < 0 ) { + throw new IllegalArgumentException( assay + " is not part of " + dimension + "." ); + } + List sampleCellIds = dimension.getCellIdsBySample( sampleIndex ); + bioAssayOffsets[i] = k; + cellIdsL.addAll( sampleCellIds ); + Map cellid2pos = new HashMap<>(); + for ( int j = 0; j < sampleCellIds.size(); j++ ) { + cellid2pos.put( sampleCellIds.get( j ), k + j ); + } + index.put( assay, cellid2pos ); + k += sampleCellIds.size(); + } + this.assays = new SparseRangeArrayList<>( assays, bioAssayOffsets, k ); + this.cellIds = cellIdsL; + this.index = index; + ArrayList factorsL = new ArrayList<>( factors.size() + cellLevelCharacteristics.size() ); + factorsL.addAll( factors ); + for ( CellLevelCharacteristics clc : cellLevelCharacteristics ) { + ExperimentalFactor factor = createFactorFromCellLevelCharacteristics( clc ); + factorsL.add( factor ); + } + this.factors = Collections.unmodifiableList( factorsL ); + this.factorsIndex = Collections.unmodifiableMap( ListUtils.indexOfElements( factorsL ) ); + // TODO: fill the matrix + this.factorValues = new ArrayList<>( factors.size() ); + } + + @Override + public List getFactors() { + return factors; + } + + @Override + public List getBioAssays() { + return assays; + } + + @Override + public List getCellIds() { + return cellIds; + } + + @Override + public int columns() { + return factors.size(); + } + + @Override + public List getColumn( int column ) { + return factorValues.get( column ); + } + + @Nullable + @Override + public List getColumn( ExperimentalFactor factor ) { + int index = factors.indexOf( factor ); + if ( index == -1 ) { + return null; + } + return getColumn( index ); + } + + @Override + public int getColumnIndex( ExperimentalFactor factor ) { + return 0; + } + + @Override + public ExperimentalFactor getFactorForColumn( int column ) { + return factors.get( column ); + } + + @Nullable + @Override + public List getRow( BioAssay bioAssay, String cellId ) { + int row = getRowIndex( bioAssay, cellId ); + if ( row == -1 ) { + return null; + } + return getRow( row ); + } + + @Override + public List getRow( int row ) { + List fvs = new ArrayList<>( factors.size() ); + for ( int i = 0; i < factors.size(); i++ ) { + fvs.add( factorValues.get( i ).get( row ) ); + } + return fvs; + } + + @Override + public int rows() { + return cellIds.size(); + } + + @Override + public BioAssay getBioAssayForRow( int row ) { + return assays.get( row ); + } + + @Override + public BioMaterial getBioMaterialForRow( int row ) { + return getBioAssayForRow( row ).getSampleUsed(); + } + + @Override + public String getCellIdForRow( int row ) { + return cellIds.get( row ); + } + + @Override + public int getRowIndex( BioAssay bioAssay, String cellId ) { + Map cell2pos = index.get( bioAssay ); + if ( cell2pos == null ) { + return -1; + } + return cell2pos.getOrDefault( cellId, -1 ); + } + + private ExperimentalFactor createFactorFromCellLevelCharacteristics( CellLevelCharacteristics characteristics ) { + ExperimentalFactor factor = ExperimentalFactor.Factory.newInstance( characteristics.getName(), FactorType.CATEGORICAL ); + for ( Characteristic c : characteristics.getCharacteristics() ) { + FactorValue fv = FactorValue.Factory.newInstance( factor, c ); + factor.getFactorValues().add( fv ); + } + return factor; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/io/SingleCellMetadataWriter.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/io/SingleCellMetadataWriter.java new file mode 100644 index 0000000000..73e649a88d --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/io/SingleCellMetadataWriter.java @@ -0,0 +1,12 @@ +package ubic.gemma.core.datastructure.matrix.io; + +import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; + +import java.io.IOException; +import java.io.Writer; + +public interface SingleCellMetadataWriter { + + void write( ExpressionExperiment ee, SingleCellDimension singleCellDimension, Writer writer ) throws IOException; +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/io/TabularSingleCellMetadataWriter.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/io/TabularSingleCellMetadataWriter.java new file mode 100644 index 0000000000..189f46d2ed --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/io/TabularSingleCellMetadataWriter.java @@ -0,0 +1,132 @@ +package ubic.gemma.core.datastructure.matrix.io; + +import lombok.Setter; +import lombok.extern.apachecommons.CommonsLog; +import ubic.basecode.util.StringUtil; +import ubic.gemma.core.util.TsvUtils; +import ubic.gemma.model.common.description.Characteristic; +import ubic.gemma.model.expression.bioAssay.BioAssay; +import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; +import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.biomaterial.BioMaterial; +import ubic.gemma.model.expression.experiment.*; + +import java.io.IOException; +import java.io.Writer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +@CommonsLog +public class TabularSingleCellMetadataWriter implements SingleCellMetadataWriter { + + @Setter + private boolean useBioAssayIds = false; + @Setter + private boolean useRawColumnNames = false; + @Setter + private boolean autoFlush = false; + + @Override + public void write( ExpressionExperiment ee, SingleCellDimension singleCellDimension, Writer writer ) throws IOException { + List factors; + if ( ee.getExperimentalDesign() != null ) { + factors = ee.getExperimentalDesign().getExperimentalFactors().stream() + .sorted( ExperimentalFactor.COMPARATOR ) + .collect( Collectors.toList() ); + } else { + log.warn( ee + " does not have an experimental design, no factors will be written." ); + factors = Collections.emptyList(); + } + List samples = singleCellDimension.getBioAssays().stream() + .map( BioAssay::getSampleUsed ) + .collect( Collectors.toList() ); + Map> factorValueMap = ExperimentalDesignUtils.getFactorValueMap( ee.getExperimentalDesign(), samples ); + List clcs = new ArrayList<>( singleCellDimension.getCellTypeAssignments().size() + singleCellDimension.getCellLevelCharacteristics().size() ); + singleCellDimension.getCellTypeAssignments().stream() + .sorted( CellTypeAssignment.COMPARATOR ) + .forEach( clcs::add ); + singleCellDimension.getCellLevelCharacteristics().stream() + .sorted( CellLevelCharacteristics.COMPARATOR ) + .forEach( clcs::add ); + writeHeader( factors, clcs, writer ); + int cellIndex = 0; + for ( int sampleIndex = 0; sampleIndex < singleCellDimension.getBioAssays().size(); sampleIndex++ ) { + BioAssay bioAssay = singleCellDimension.getBioAssays().get( sampleIndex ); + for ( String cellId : singleCellDimension.getCellIdsBySample( sampleIndex ) ) { + writeCell( bioAssay, cellId, cellIndex++, factors, factorValueMap, clcs, writer ); + } + } + } + + private void writeHeader( List factors, List clcs, Writer writer ) throws IOException { + String[] columnNames = new String[2 + factors.size() + clcs.size()]; + int i = 0; + columnNames[i++] = "sample_id"; + columnNames[i++] = "cell_id"; + for ( ExperimentalFactor factor : factors ) { + columnNames[i++] = factor.getName(); + } + for ( CellLevelCharacteristics clc : clcs ) { + if ( clc.getName() != null ) { + columnNames[i++] = clc.getName(); + } else if ( !clc.getCharacteristics().isEmpty() ) { + // If the name is null, we can use the first characteristic's category as a fallback + Characteristic c = clc.getCharacteristics().iterator().next(); + columnNames[i++] = c.getCategory(); + } else { + throw new IllegalStateException( clc + " has no name nor characteristics, cannot write header." ); + } + } + if ( useRawColumnNames ) { + columnNames = StringUtil.makeUnique( columnNames ); + } else { + columnNames = StringUtil.makeNames( columnNames, true ); + } + for ( int j = 0; j < columnNames.length; j++ ) { + String colName = columnNames[j]; + if ( j > 0 ) { + writer.append( "\t" ); + } + writer.append( TsvUtils.format( colName ) ); + } + writer.append( "\n" ); + if ( autoFlush ) { + writer.flush(); + } + } + + public void writeCell( BioAssay bioAssay, String cellId, int cellIndex, List factors, Map> factorValueMap, List clcs, Writer writer ) throws IOException { + if ( useBioAssayIds ) { + writer.append( String.valueOf( bioAssay.getId() ) ); + } else if ( useRawColumnNames ) { + writer.append( bioAssay.getShortName() != null ? bioAssay.getShortName() : bioAssay.getName() ); + } else { + writer.append( ExpressionDataWriterUtils.constructAssayName( bioAssay ) ); + } + writer.append( "\t" ).append( cellId ); + for ( ExperimentalFactor factor : factors ) { + FactorValue value = factorValueMap.get( factor ).get( bioAssay.getSampleUsed() ); + writer.append( "\t" ); + if ( value != null ) { + writer.append( TsvUtils.format( FactorValueUtils.getValue( value, String.valueOf( TsvUtils.SUB_DELIMITER ) ) ) ); + } else { + writer.append( TsvUtils.format( ( String ) null ) ); + } + } + for ( CellLevelCharacteristics clc : clcs ) { + writer.append( "\t" ); + Characteristic c = clc.getCharacteristic( cellIndex ); + if ( c != null ) { + writer.append( TsvUtils.format( c.getValue() ) ); + } + } + writer.append( "\n" ); + if ( autoFlush ) { + writer.flush(); + } + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/r/RClient.java b/gemma-core/src/main/java/ubic/gemma/core/util/r/RClient.java new file mode 100644 index 0000000000..eec2d71a58 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/util/r/RClient.java @@ -0,0 +1,92 @@ +package ubic.gemma.core.util.r; + +import org.rosuda.REngine.REXP; +import org.rosuda.REngine.REXPMismatchException; +import org.rosuda.REngine.REngine; +import org.rosuda.REngine.REngineException; +import org.springframework.util.Assert; +import ubic.basecode.util.StringUtil; + +import java.util.List; + +/** + * A high-level client for interacting with R. + * @author poirigui + */ +public class RClient implements AutoCloseable { + + /** + * The R engine used for executing R commands. + */ + private final REngine rEngine; + + public RClient( REngineFactory rEngine ) { + try { + this.rEngine = rEngine.createREngine(); + } catch ( Exception e ) { + throw new RClientException( e ); + } + } + + /** + * data.frame + */ + public void assignDataFrame( String symbol, List columnNames, List rowNames, List vectors ) { + Assert.isTrue( !rowNames.isEmpty() ); + Assert.isTrue( columnNames.size() == vectors.size() ); + try { + // create valid and unique R identifiers for the column names + String[] cn = StringUtil.makeNames( columnNames.toArray( new String[0] ), true ); + String[] rn = StringUtil.makeNames( rowNames.toArray( new String[0] ), true ); + rEngine.assign( "rows", rn ); + StringBuilder data = new StringBuilder(); + for ( int i = 0; i < cn.length; i++ ) { + assignVector( "cols" + i, vectors.get( i ) ); + if ( i > 0 ) { + data.append( ", " ); + } + data.append( rowNames.get( i ) ).append( "=" ).append( "cols" ).append( i ); + } + rEngine.assign( "rows", rowNames.toArray( new String[0] ) ); + REXP dataFrame = rEngine.parse( "data.frame(" + data + ", row.names=rows)", false ); + rEngine.assign( symbol, dataFrame ); + rEngine.parseAndEval( "rm(rows);" ); + for ( int i = 0; i < cn.length; i++ ) { + rEngine.parseAndEval( "rm(cols" + i + ")" ); + } + } catch ( REngineException | REXPMismatchException e ) { + throw new RClientException( e ); + } + } + + public void retrieveDataFrame( String symbol, List columnNames, List rowNames, List vectors ) { + + } + + public void assignVector( String symbol, Object vector ) { + try { + if ( vector instanceof double[] ) { + rEngine.assign( symbol, ( double[] ) vector ); + } else { + throw new RClientException( vector.getClass() + " is not a supported vector type." ); + } + } catch ( REngineException e ) { + throw new RClientException( e ); + } + } + + public REXP parseAndEval( String cmd ) { + try { + return rEngine.parseAndEval( cmd ); + } catch ( REngineException | REXPMismatchException e ) { + throw new RClientException( e ); + } + } + + @Override + public void close() { + if ( !rEngine.close() ) { + throw new RClientException( "Failed to close the underlying R engine." ); + } + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/r/RClientException.java b/gemma-core/src/main/java/ubic/gemma/core/util/r/RClientException.java new file mode 100644 index 0000000000..f7f7b1f39b --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/util/r/RClientException.java @@ -0,0 +1,12 @@ +package ubic.gemma.core.util.r; + +public class RClientException extends RuntimeException { + + public RClientException( String message ) { + super( message ); + } + + public RClientException( Throwable cause ) { + super( cause ); + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/r/REngineConfig.java b/gemma-core/src/main/java/ubic/gemma/core/util/r/REngineConfig.java new file mode 100644 index 0000000000..a0f5804d29 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/util/r/REngineConfig.java @@ -0,0 +1,31 @@ +package ubic.gemma.core.util.r; + +import org.rosuda.REngine.JRI.JRIEngine; +import org.rosuda.REngine.Rserve.RConnection; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import java.nio.file.Path; + +@Configuration +public class REngineConfig { + + @Bean + public REngineFactory rEngineFactory( + @Value("${r.exe}") Path rExe, + @Value("${r.backend}") String rBackend, + @Value("${r.rserve.host}") String host, + @Value("${r.rserve.port}") int port + ) { + if ( rBackend.equalsIgnoreCase( "JRIEngine" ) ) { + return JRIEngine::new; + } else if ( rBackend.equalsIgnoreCase( "RConnection" ) ) { + return () -> new RConnection( host, port ); + } else if ( rBackend.equalsIgnoreCase( "StandaloneRConnection" ) ) { + return () -> new StandaloneRConnection( rExe ); + } else { + throw new IllegalArgumentException( "Unsupported R backend '" + rBackend + "'. Choose one among: JRIEngine, RConnection or StandaloneRConnection." ); + } + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/r/REngineFactory.java b/gemma-core/src/main/java/ubic/gemma/core/util/r/REngineFactory.java new file mode 100644 index 0000000000..3d8f490fdc --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/util/r/REngineFactory.java @@ -0,0 +1,8 @@ +package ubic.gemma.core.util.r; + +import org.rosuda.REngine.REngine; + +public interface REngineFactory { + + REngine createREngine() throws Exception; +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/r/RServe.java b/gemma-core/src/main/java/ubic/gemma/core/util/r/RServe.java new file mode 100644 index 0000000000..8f99e07b74 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/util/r/RServe.java @@ -0,0 +1,78 @@ +package ubic.gemma.core.util.r; + +import lombok.extern.apachecommons.CommonsLog; +import org.apache.commons.io.IOUtils; + +import javax.annotation.Nullable; +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.TimeUnit; + +@CommonsLog +class RServe implements AutoCloseable { + + @Nullable + private final Path socketPath; + + private final Process rserveProcess; + + public RServe( Path rExe, Path socketPath ) throws IOException { + this.socketPath = socketPath; + this.rserveProcess = createRServeProcess( rExe, "socket=" + quoteRString( socketPath.toString() ) ); + } + + /** + * Create an RServe instance that listens on the given port. + */ + public RServe( Path rExe, int port ) throws IOException { + this.socketPath = null; + this.rserveProcess = createRServeProcess( rExe, "port=" + port ); + } + + private static Process createRServeProcess( Path rExe, String... args ) throws IOException { + Process proc = new ProcessBuilder( rExe.toString(), "-e", "library(Rserve); run.Rserve(" + String.join( ", ", args ) + ")" ) + .redirectOutput( ProcessBuilder.Redirect.appendTo( new File( "/dev/null" ) ) ) + .redirectError( ProcessBuilder.Redirect.PIPE ) + .start(); + try { + log.debug( "Waiting for RServe to start..." ); + if ( proc.waitFor( 2000, TimeUnit.MILLISECONDS ) ) { + throw new RuntimeException( String.format( "RServe process exited unexpectedly with code: %d. %s", + proc.exitValue(), + IOUtils.toString( proc.getErrorStream(), StandardCharsets.UTF_8 ) ) ); + } + log.debug( "RServe appears to be running." ); + } catch ( InterruptedException e ) { + Thread.currentThread().interrupt(); + throw new RuntimeException( e ); + } + return proc; + } + + @Override + public void close() throws IOException { + log.debug( "Closing RServe process..." ); + rserveProcess.destroy(); + try { + if ( rserveProcess.waitFor() == 0 ) { + log.debug( "RServe closed successfully." ); + } else { + log.error( String.format( "RServe process exited with code: %d. %s", rserveProcess.exitValue(), + IOUtils.toString( rserveProcess.getErrorStream(), StandardCharsets.UTF_8 ) ) ); + } + } catch ( InterruptedException e ) { + Thread.currentThread().interrupt(); + throw new RuntimeException( e ); + } + if ( socketPath != null ) { + Files.deleteIfExists( socketPath ); + } + } + + private String quoteRString( String s ) { + return "\"" + s.replace( "\"", "\\\"" ) + "\""; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/r/StandaloneRConnection.java b/gemma-core/src/main/java/ubic/gemma/core/util/r/StandaloneRConnection.java new file mode 100644 index 0000000000..fad908ef04 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/util/r/StandaloneRConnection.java @@ -0,0 +1,103 @@ +package ubic.gemma.core.util.r; + +import lombok.extern.apachecommons.CommonsLog; +import org.newsclub.net.unix.AFUNIXSocket; +import org.newsclub.net.unix.AFUNIXSocketAddress; +import org.rosuda.REngine.REXP; +import org.rosuda.REngine.REXPMismatchException; +import org.rosuda.REngine.REngine; +import org.rosuda.REngine.REngineException; +import org.rosuda.REngine.Rserve.RConnection; + +import java.io.IOException; +import java.net.Socket; +import java.nio.file.Files; +import java.nio.file.Path; + +/** + * A {@link REngine} implementation that launches Rserve using a UNIX domain socket and connects to it with + * {@link RConnection}. + * @author poirigui + * @see RConnection + */ +@CommonsLog +public class StandaloneRConnection extends REngine { + + private final RServe rServe; + /** + * Socket through which the R connection communicates with Rserve. + */ + private final Socket rServeSocket; + private final RConnection rConnection; + + public StandaloneRConnection( Path rExe ) throws IOException, REngineException { + Path socketPath = Files.createTempFile( "rserve", ".sock" ); + log.info( "Launching Rserve with socket at " + socketPath + "..." ); + this.rServe = new RServe( rExe, socketPath ); + this.rServeSocket = AFUNIXSocket.connectTo( AFUNIXSocketAddress.of( socketPath ) ); + this.rConnection = new RConnection( rServeSocket ); + } + + @Override + public REXP parse( String text, boolean resolve ) throws REngineException { + return rConnection.parse( text, resolve ); + } + + @Override + public REXP eval( REXP what, REXP where, boolean resolve ) throws REngineException, REXPMismatchException { + return rConnection.eval( what, where, resolve ); + } + + @Override + public void assign( String symbol, REXP value, REXP env ) throws REngineException, REXPMismatchException { + rConnection.assign( symbol, value, env ); + } + + @Override + public REXP get( String symbol, REXP env, boolean resolve ) throws REngineException, REXPMismatchException { + return rConnection.get( symbol, env, resolve ); + } + + @Override + public REXP resolveReference( REXP ref ) throws REngineException { + return rConnection.resolveReference( ref ); + } + + @Override + public REXP createReference( REXP value ) throws REngineException { + return rConnection.createReference( value ); + } + + @Override + public void finalizeReference( REXP ref ) throws REngineException { + rConnection.finalizeReference( ref ); + } + + @Override + public REXP getParentEnvironment( REXP env, boolean resolve ) throws REngineException { + return rConnection.getParentEnvironment( env, resolve ); + } + + @Override + public REXP newEnvironment( REXP parent, boolean resolve ) throws REngineException { + return rConnection.newEnvironment( parent, resolve ); + } + + @Override + public boolean close() { + boolean ret = rConnection.close(); + try { + rServeSocket.close(); + } catch ( IOException e ) { + log.error( "Failed to close Rserve socket.", e ); + ret = false; + } + try { + rServe.close(); + } catch ( Exception e ) { + log.error( "Failed to close RServe." ); + ret = false; + } + return ret; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/r/package-info.java b/gemma-core/src/main/java/ubic/gemma/core/util/r/package-info.java new file mode 100644 index 0000000000..989264ef44 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/util/r/package-info.java @@ -0,0 +1,16 @@ +/** + * This package contains classes and interfaces related to R integration in the Gemma project. + *

+ *

    + *
  • {@link ubic.gemma.core.util.r.RClient}, a high-level client for interacting with R.
  • + *
  • {@link ubic.gemma.core.util.StandaloneREngine}, an extension of {@link org.rosuda.REngine.REngine} that supports
  • + * communicating with R using a UNIX domain socket. + *
+ * This is intended to be a complete replacement for {@link ubic.basecode.util.r} that should eventually be moved + * there. + * @author poirigui + */ +@ParametersAreNonnullByDefault +package ubic.gemma.core.util.r; + +import javax.annotation.ParametersAreNonnullByDefault; \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/core/visualization/cellbrowser/CellBrowserMetadataWriter.java b/gemma-core/src/main/java/ubic/gemma/core/visualization/cellbrowser/CellBrowserMetadataWriter.java index 726c24bcba..2fd9646b5e 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/visualization/cellbrowser/CellBrowserMetadataWriter.java +++ b/gemma-core/src/main/java/ubic/gemma/core/visualization/cellbrowser/CellBrowserMetadataWriter.java @@ -3,6 +3,7 @@ import lombok.Setter; import lombok.extern.apachecommons.CommonsLog; import ubic.basecode.util.StringUtil; +import ubic.gemma.core.datastructure.matrix.io.SingleCellMetadataWriter; import ubic.gemma.core.util.TsvUtils; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.expression.bioAssay.BioAssay; @@ -25,7 +26,7 @@ * @author poirigui */ @CommonsLog -public class CellBrowserMetadataWriter { +public class CellBrowserMetadataWriter implements SingleCellMetadataWriter { @Setter private boolean useBioAssayIds = false; @@ -37,6 +38,7 @@ public class CellBrowserMetadataWriter { @Setter private boolean autoFlush = false; + @Override public void write( ExpressionExperiment ee, SingleCellDimension singleCellDimension, Writer writer ) throws IOException { List factors; if ( ee.getExperimentalDesign() != null ) { diff --git a/gemma-core/src/main/resources/default.properties b/gemma-core/src/main/resources/default.properties index ac60fc55db..0a698931f2 100755 --- a/gemma-core/src/main/resources/default.properties +++ b/gemma-core/src/main/resources/default.properties @@ -253,6 +253,15 @@ python.exe=python # JavaScript npm.exe=npm ############################################################ +# R +r.exe=Rscript +# Available backends: JRIEngine, RConnection and StandaloneRConnection +r.backend=JRI +# Configuration for RServe +# Host and port are only applicable to RConnection backend, the remaining settings apply to both +r.rserve.host=localhost +r.rserve.port=6311 +############################################################ # Static Asset Server # Enable the static server for serving assets externally gemma.staticAssetServer.enabled=false diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/singleCell/batcheffect/HarmonyTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/singleCell/batcheffect/HarmonyTest.java new file mode 100644 index 0000000000..145b12e5cb --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/singleCell/batcheffect/HarmonyTest.java @@ -0,0 +1,75 @@ +package ubic.gemma.core.analysis.singleCell.batcheffect; + +import org.junit.Test; +import org.rosuda.REngine.REngineException; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import org.springframework.test.context.ContextConfiguration; +import ubic.gemma.core.config.SettingsConfig; +import ubic.gemma.core.context.TestComponent; +import ubic.gemma.core.datastructure.matrix.SingleCellDesignMatrix; +import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix; +import ubic.gemma.core.util.r.REngineFactory; +import ubic.gemma.core.util.r.StandaloneRConnection; +import ubic.gemma.core.util.test.BaseTest; +import ubic.gemma.core.visualization.RandomExperimentalDesignUtils; +import ubic.gemma.model.common.quantitationtype.*; +import ubic.gemma.model.expression.arrayDesign.ArrayDesign; +import ubic.gemma.model.expression.bioAssay.BioAssay; +import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; +import ubic.gemma.model.expression.biomaterial.BioMaterial; +import ubic.gemma.model.expression.designElement.CompositeSequence; +import ubic.gemma.model.expression.experiment.ExperimentalDesign; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.persistence.service.expression.bioAssayData.RandomSingleCellDataUtils; + +import java.nio.file.Path; +import java.util.Collections; +import java.util.List; + +@ContextConfiguration +public class HarmonyTest extends BaseTest { + + @Configuration + @TestComponent + @Import(SettingsConfig.class) + static class CC { + + @Bean + public REngineFactory rEngineFactory( @Value("${r.exe}") Path rExe ) { + return () -> new StandaloneRConnection( rExe ); + } + } + + @Autowired + private REngineFactory rEngineFactory; + + @Test + public void test() throws REngineException { + ArrayDesign ad = ArrayDesign.Factory.newInstance(); + for ( int i = 0; i < 100; i++ ) { + ad.getCompositeSequences().add( CompositeSequence.Factory.newInstance( "cs" + i, ad ) ); + } + ExpressionExperiment ee = ExpressionExperiment.Factory.newInstance(); + for ( int i = 0; i < 8; i++ ) { + BioMaterial sample = BioMaterial.Factory.newInstance( "bm" + i ); + BioAssay ba = BioAssay.Factory.newInstance( "ba" + i, ad, sample ); + sample.getBioAssaysUsedIn().add( ba ); + ee.getBioAssays().add( ba ); + } + QuantitationType qt = QuantitationType.Factory.newInstance(); + qt.setGeneralType( GeneralType.QUANTITATIVE ); + qt.setType( StandardQuantitationType.AMOUNT ); + qt.setScale( ScaleType.LOG2 ); + qt.setRepresentation( PrimitiveType.DOUBLE ); + List vectors = RandomSingleCellDataUtils.randomSingleCellVectors( ee, ad, qt ); + SingleCellExpressionDataMatrix dataMatrix = SingleCellExpressionDataMatrix.getMatrix( vectors ); + ExperimentalDesign design = RandomExperimentalDesignUtils.randomExperimentalDesign( ee, 4 ); + SingleCellDesignMatrix singleCellDesignMatrix = SingleCellDesignMatrix.from( dataMatrix.getSingleCellDimension(), design, Collections.emptyList() ); + Harmony h = new Harmony( rEngineFactory ); + h.perform( dataMatrix, singleCellDesignMatrix ); + } +} \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionServiceTest.java new file mode 100644 index 0000000000..75811dd151 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionServiceTest.java @@ -0,0 +1,10 @@ +package ubic.gemma.core.analysis.singleCell.batcheffect; + +import org.junit.Test; + +public class SingleCellBatchCorrectionServiceTest { + + @Test + public void test() { + } +} \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/r/RClientTest.java b/gemma-core/src/test/java/ubic/gemma/core/util/r/RClientTest.java new file mode 100644 index 0000000000..de005e6d25 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/core/util/r/RClientTest.java @@ -0,0 +1,42 @@ +package ubic.gemma.core.util.r; + +import org.junit.Test; +import org.rosuda.REngine.JRI.JRIEngine; +import org.rosuda.REngine.REXPMismatchException; + +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class RClientTest { + + @Test + public void testStandaloneREngine() throws REXPMismatchException { + try ( RClient client = new RClient( () -> new StandaloneRConnection( Paths.get( "Rscript" ) ) ) ) { + assertEquals( "Hello!", client.parseAndEval( "'Hello!'" ).asString() ); + } + } + + @Test + public void testJRIEngine() throws REXPMismatchException { + try ( RClient client = new RClient( JRIEngine::new ) ) { + assertEquals( "Hello!", client.parseAndEval( "'Hello!'" ).asString() ); + } + } + + @Test + public void testAssignDataFrame() { + try ( RClient client = new RClient( JRIEngine::new ) ) { + List colNames = Arrays.asList( "a", "b", "c" ); + List rowNames = Arrays.asList( "row1", "row2", "row3" ); + List data = Arrays.asList( + new double[] { 1, 2, 3 }, + new double[] { 4, 5, 6 }, + new double[] { 7, 8, 9 } + ); + client.assignDataFrame( "foo", colNames, rowNames, data ); + } + } +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 9f02a15167..79881670b4 100644 --- a/pom.xml +++ b/pom.xml @@ -139,7 +139,7 @@ baseCode baseCode - 1.1.26 + 1.1.27-SNAPSHOT @@ -815,6 +815,11 @@ 1.12.1 /usr/lib64/hdf5 + + + 1.0-13 + /usr/lib64/R/library/rJava/jri + 1.8-14 8.4.0 9.0.107 4.13.2 @@ -840,7 +845,7 @@ ${jvmOptions} - -Djava.library.path=${hdf5.libDir} + -Djava.library.path=${hdf5.libDir}:${rJava.libDir}