+ * Requirements: an R engine with the Harmony R package installed.
+ * @author poirigui
+ */
+class Harmony implements BatchCorrection {
+
+ private final REngineFactory rEngineFactory;
+
+ Harmony( REngineFactory rEngineFactory ) {
+ this.rEngineFactory = rEngineFactory;
+ }
+
+ @Override
+ public SingleCellExpressionDataMatrix> perform( SingleCellExpressionDataMatrix> dataMatrix, SingleCellDesignMatrix singleCellDesignMatrix ) {
+ Assert.isTrue( dataMatrix.getBioAssays().equals( singleCellDesignMatrix.getBioAssays() ),
+ "Assays in the data matrix must match exactly those of the design matrix." );
+ try ( RClient rEngine = new RClient( rEngineFactory ) ) {
+ // TODO: serialize both matrices to disk and call Harmony R package
+ rEngine.parseAndEval( "library(harmony);" );
+ // rEngine.assignDataFrame( "dataMatrix", toDataFrame( dataMatrix ) );
+ // rEngine.assignDataFrame( "designMatrix", toDataFrame( singleCellDesignMatrix ) );
+ //language=R
+ return fromDataFrame( rEngine.parseAndEval( "harmony::HarmonyMatrix(dataMatrix, designMatrix);" ) );
+ }
+ }
+
+ private REXP toDataFrame( SingleCellExpressionDataMatrix> dataMatrix ) {
+ // Convert the SingleCellExpressionDataMatrix to an REXP object
+ return new REXPNull();
+ }
+
+ private REXP toDataFrame( SingleCellDesignMatrix singleCellDesignMatrix ) {
+ return new REXPNull();
+ }
+
+ private SingleCellExpressionDataMatrix> fromDataFrame( REXP rexp ) {
+ return null;
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionMethod.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionMethod.java
new file mode 100644
index 0000000000..14544237fc
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionMethod.java
@@ -0,0 +1,6 @@
+package ubic.gemma.core.analysis.singleCell.batcheffect;
+
+public enum SingleCellBatchCorrectionMethod {
+ HARMONY,
+ COMBAT
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionService.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionService.java
new file mode 100644
index 0000000000..7bae48c241
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionService.java
@@ -0,0 +1,17 @@
+package ubic.gemma.core.analysis.singleCell.batcheffect;
+
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+
+/**
+ * @author poirigui
+ */
+public interface SingleCellBatchCorrectionService {
+
+ /**
+ * Perform batch correction and save the results as a new {@link QuantitationType QuantitationType} sharing the same
+ * {@link ubic.gemma.model.expression.bioAssayData.SingleCellDimension}.
+ * @return the batch-corrected {@link QuantitationType}
+ */
+ QuantitationType batchCorrect( ExpressionExperiment ee, QuantitationType qt, SingleCellBatchCorrectionMethod method );
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionServiceImpl.java
new file mode 100644
index 0000000000..0d0e284bfe
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/SingleCellBatchCorrectionServiceImpl.java
@@ -0,0 +1,65 @@
+package ubic.gemma.core.analysis.singleCell.batcheffect;
+
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+import org.springframework.util.Assert;
+import ubic.gemma.core.datastructure.matrix.SingleCellDesignMatrix;
+import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix;
+import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrixUtils;
+import ubic.gemma.core.util.r.REngineFactory;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics;
+import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
+import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import ubic.gemma.persistence.service.expression.experiment.SingleCellExpressionExperimentService;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+@Service
+public class SingleCellBatchCorrectionServiceImpl implements SingleCellBatchCorrectionService {
+
+ @Autowired
+ private SingleCellExpressionExperimentService singleCellExpressionExperimentService;
+
+ @Autowired
+ private REngineFactory rEngineFactory;
+
+ @Override
+ @Transactional
+ public QuantitationType batchCorrect( ExpressionExperiment ee, QuantitationType qt, SingleCellBatchCorrectionMethod method ) {
+ Assert.notNull( ee.getExperimentalDesign(), ee + " does not have experimental design. It is required to perform batch correction." );
+ BatchCorrection m = createBatchCorrection( method );
+ SingleCellDimension dimension = singleCellExpressionExperimentService.getSingleCellDimension( ee, qt );
+ if ( dimension == null ) {
+ throw new IllegalArgumentException( qt + " does not have single cell dimension." );
+ }
+ List vectors = new ArrayList<>( singleCellExpressionExperimentService.getSingleCellDataVectors( ee, qt ) );
+ SingleCellExpressionDataMatrix> dataMatrix = SingleCellExpressionDataMatrix.getMatrix( vectors );
+ Collection clcs = new ArrayList<>();
+ // TODO: select relevant CTAs and CLCs
+ clcs.addAll( dimension.getCellTypeAssignments() );
+ clcs.addAll( dimension.getCellLevelCharacteristics() );
+ SingleCellDesignMatrix designMatrix = SingleCellDesignMatrix.from( dimension, ee.getExperimentalDesign(), clcs );
+ SingleCellExpressionDataMatrix> correctedMatrix = m.perform( dataMatrix, designMatrix );
+ QuantitationType correctedQt = correctedMatrix.getQuantitationType();
+ List correctedVectors = SingleCellExpressionDataMatrixUtils.toVectors( correctedMatrix );
+ String details = "Batch correction using " + method + " for " + ee.getShortName() + " on quantitation type " + qt.getName();
+ singleCellExpressionExperimentService.addSingleCellDataVectors( ee, correctedQt, correctedVectors, details );
+ return correctedQt;
+ }
+
+ private BatchCorrection createBatchCorrection( SingleCellBatchCorrectionMethod method ) {
+ switch ( method ) {
+ case HARMONY:
+ return new Harmony( rEngineFactory );
+ case COMBAT:
+ return new ComBat();
+ default:
+ throw new IllegalArgumentException( "Unknown batch correction method: " + method );
+ }
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/package-info.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/package-info.java
new file mode 100644
index 0000000000..90892fc688
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/singleCell/batcheffect/package-info.java
@@ -0,0 +1,5 @@
+/**
+ * Correct batch effects in single-cell RNA-seq data.
+ * @author poirigui
+ */
+package ubic.gemma.core.analysis.singleCell.batcheffect;
\ No newline at end of file
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkDesignMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkDesignMatrix.java
new file mode 100644
index 0000000000..ad228c05e4
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkDesignMatrix.java
@@ -0,0 +1,29 @@
+package ubic.gemma.core.datastructure.matrix;
+
+import ubic.gemma.model.expression.bioAssay.BioAssay;
+import ubic.gemma.model.expression.biomaterial.BioMaterial;
+import ubic.gemma.model.expression.experiment.FactorValue;
+
+import javax.annotation.Nullable;
+import java.util.List;
+
+/**
+ * Interface representing a design matrix for bulk data.
+ * @author poirigui
+ */
+public interface BulkDesignMatrix extends DesignMatrix {
+
+ /**
+ * Get the row of factor values corresponding to a specific bioassay.
+ * @return the row, or null if the bioassay is not present in the matrix.
+ */
+ @Nullable
+ List getRow( BioAssay bioAssay );
+
+ @Nullable
+ List getRow( BioMaterial bioMaterial );
+
+ int getRowIndex( BioAssay bioAssay );
+
+ int getRowIndex( BioMaterial bioMaterial );
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java
index 049aeda454..c0a301ea46 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java
@@ -13,7 +13,6 @@
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.List;
/**
* Interface for bulk expression data matrices.
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DesignMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DesignMatrix.java
new file mode 100644
index 0000000000..c5d9def647
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DesignMatrix.java
@@ -0,0 +1,75 @@
+package ubic.gemma.core.datastructure.matrix;
+
+import ubic.gemma.model.expression.bioAssay.BioAssay;
+import ubic.gemma.model.expression.biomaterial.BioMaterial;
+import ubic.gemma.model.expression.experiment.ExperimentalFactor;
+import ubic.gemma.model.expression.experiment.FactorValue;
+
+import javax.annotation.Nullable;
+import java.util.List;
+
+/**
+ * Base interface for design matrices used in experiments.
+ * @author poirigui
+ */
+public interface DesignMatrix {
+
+ /**
+ * Return the number of columns (i.e. factors) in the design matrix.
+ */
+ int columns();
+
+ /**
+ * Obtain a list of factors in the design matrix.
+ */
+ List getFactors();
+
+ /**
+ * Obtain the factor for a given column.
+ * @throws IndexOutOfBoundsException if the column index is out of bounds.
+ */
+ ExperimentalFactor getFactorForColumn( int column );
+
+ /**
+ * Obtain the factor values for a given column.
+ * @throws IndexOutOfBoundsException if the column index is out of bounds.
+ */
+ List getColumn( int column );
+
+ /**
+ * Obtain the factor values for a given experimental factor.
+ * @return the factor values, or null if the factor is not present in the design matrix.
+ */
+ @Nullable
+ List getColumn( ExperimentalFactor factor );
+
+ /**
+ * Obtain the index of a given factor in the design matrix.
+ */
+ int getColumnIndex( ExperimentalFactor factor );
+
+ /**
+ * Return the number of rows (i.e. samples) in the design matrix.
+ */
+ int rows();
+
+ List getBioAssays();
+
+ /**
+ * Obtain the factor values for a given row (sample).
+ * @throws IndexOutOfBoundsException if the row index is out of bounds.
+ */
+ List getRow( int row );
+
+ /**
+ * Obtain the assay for a given row.
+ * @throws IndexOutOfBoundsException if the row index is out of bounds.
+ */
+ BioAssay getBioAssayForRow( int row );
+
+ /**
+ * Obtain the sample for a given row.
+ * @throws IndexOutOfBoundsException if the row index is out of bounds.
+ */
+ BioMaterial getBioMaterialForRow( int row );
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/MultiAssayBulkDesignMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/MultiAssayBulkDesignMatrix.java
new file mode 100644
index 0000000000..35f8f775d1
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/MultiAssayBulkDesignMatrix.java
@@ -0,0 +1,21 @@
+package ubic.gemma.core.datastructure.matrix;
+
+import ubic.gemma.model.expression.bioAssay.BioAssay;
+import ubic.gemma.model.expression.biomaterial.BioMaterial;
+
+import java.util.Collection;
+
+/**
+ * Design matrix for multi-assay bulk expression data.
+ *
+ * A multi-assay bulk design matrix may have more than one {@link BioAssay} per {@link BioMaterial}.
+ * @author poirigui
+ * @see MultiAssayBulkExpressionDataMatrix
+ */
+public interface MultiAssayBulkDesignMatrix extends BulkDesignMatrix {
+
+ /**
+ * Obtain all the bioassays for a given row in the design matrix.
+ */
+ Collection getBioAssaysForRow( int row );
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrix.java
new file mode 100644
index 0000000000..66c9bb3c1f
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrix.java
@@ -0,0 +1,47 @@
+package ubic.gemma.core.datastructure.matrix;
+
+import ubic.gemma.model.expression.bioAssay.BioAssay;
+import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics;
+import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
+import ubic.gemma.model.expression.experiment.ExperimentalDesign;
+import ubic.gemma.model.expression.experiment.ExperimentalFactor;
+import ubic.gemma.model.expression.experiment.FactorValue;
+
+import javax.annotation.Nullable;
+import java.util.Collection;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @author poirigui
+ */
+public interface SingleCellDesignMatrix extends DesignMatrix {
+
+ static SingleCellDesignMatrix from( SingleCellDimension dimension, ExperimentalDesign experimentalDesign, Collection clcs ) {
+ List factors = experimentalDesign.getExperimentalFactors().stream()
+ .sorted( ExperimentalFactor.COMPARATOR )
+ .collect( Collectors.toList() );
+ List clcsSorted = clcs.stream()
+ .sorted( CellLevelCharacteristics.COMPARATOR )
+ .collect( Collectors.toList() );
+ return new SingleCellDesignMatrixImpl( dimension, dimension.getBioAssays(), factors, clcsSorted );
+ }
+
+ /**
+ * Obtain the list of cell IDs in the design matrix.
+ *
+ * The list is not necessarily unique and has to be combined with {@link #getBioAssays()} to form a unique
+ * identifier.
+ */
+ List getCellIds();
+
+ @Nullable
+ List getRow( BioAssay bioAssay, String cellId );
+
+ /**
+ * @throws IndexOutOfBoundsException if the row index is out of bounds.
+ */
+ String getCellIdForRow( int row );
+
+ int getRowIndex( BioAssay bioAssay, String cellId );
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrixImpl.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrixImpl.java
new file mode 100644
index 0000000000..fd8b614f64
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrixImpl.java
@@ -0,0 +1,174 @@
+package ubic.gemma.core.datastructure.matrix;
+
+import ubic.gemma.core.util.ListUtils;
+import ubic.gemma.model.common.description.Characteristic;
+import ubic.gemma.model.expression.bioAssay.BioAssay;
+import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics;
+import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
+import ubic.gemma.model.expression.biomaterial.BioMaterial;
+import ubic.gemma.model.expression.experiment.ExperimentalFactor;
+import ubic.gemma.model.expression.experiment.FactorType;
+import ubic.gemma.model.expression.experiment.FactorValue;
+import ubic.gemma.model.util.SparseRangeArrayList;
+
+import javax.annotation.Nullable;
+import java.util.*;
+
+public class SingleCellDesignMatrixImpl implements SingleCellDesignMatrix {
+
+ // rows
+ private final SparseRangeArrayList assays;
+ private final List cellIds;
+ private final Map> index;
+
+ // columns
+ private final List factors;
+ private final Map factorsIndex;
+
+ /**
+ * This is technically a matrix, but using {@link List} allows for sparse range array to be used for sample-level
+ * factors.
+ *
+ * Also, this is transposed w.r.t. to rows/columns that the interface requires. This is due to the fact that
+ * sparsity is better handled along factors
+ */
+ private final List> factorValues;
+
+ public SingleCellDesignMatrixImpl( SingleCellDimension dimension, List assays, List factors, List cellLevelCharacteristics ) {
+ int[] bioAssayOffsets = new int[assays.size()];
+ int k = 0;
+ List cellIdsL = new ArrayList<>( dimension.getNumberOfCells() );
+ Map> index = new HashMap<>( assays.size() );
+ for ( int i = 0; i < assays.size(); i++ ) {
+ BioAssay assay = assays.get( i );
+ int sampleIndex = dimension.getBioAssays().indexOf( assay );
+ if ( sampleIndex < 0 ) {
+ throw new IllegalArgumentException( assay + " is not part of " + dimension + "." );
+ }
+ List sampleCellIds = dimension.getCellIdsBySample( sampleIndex );
+ bioAssayOffsets[i] = k;
+ cellIdsL.addAll( sampleCellIds );
+ Map cellid2pos = new HashMap<>();
+ for ( int j = 0; j < sampleCellIds.size(); j++ ) {
+ cellid2pos.put( sampleCellIds.get( j ), k + j );
+ }
+ index.put( assay, cellid2pos );
+ k += sampleCellIds.size();
+ }
+ this.assays = new SparseRangeArrayList<>( assays, bioAssayOffsets, k );
+ this.cellIds = cellIdsL;
+ this.index = index;
+ ArrayList factorsL = new ArrayList<>( factors.size() + cellLevelCharacteristics.size() );
+ factorsL.addAll( factors );
+ for ( CellLevelCharacteristics clc : cellLevelCharacteristics ) {
+ ExperimentalFactor factor = createFactorFromCellLevelCharacteristics( clc );
+ factorsL.add( factor );
+ }
+ this.factors = Collections.unmodifiableList( factorsL );
+ this.factorsIndex = Collections.unmodifiableMap( ListUtils.indexOfElements( factorsL ) );
+ // TODO: fill the matrix
+ this.factorValues = new ArrayList<>( factors.size() );
+ }
+
+ @Override
+ public List getFactors() {
+ return factors;
+ }
+
+ @Override
+ public List getBioAssays() {
+ return assays;
+ }
+
+ @Override
+ public List getCellIds() {
+ return cellIds;
+ }
+
+ @Override
+ public int columns() {
+ return factors.size();
+ }
+
+ @Override
+ public List getColumn( int column ) {
+ return factorValues.get( column );
+ }
+
+ @Nullable
+ @Override
+ public List getColumn( ExperimentalFactor factor ) {
+ int index = factors.indexOf( factor );
+ if ( index == -1 ) {
+ return null;
+ }
+ return getColumn( index );
+ }
+
+ @Override
+ public int getColumnIndex( ExperimentalFactor factor ) {
+ return 0;
+ }
+
+ @Override
+ public ExperimentalFactor getFactorForColumn( int column ) {
+ return factors.get( column );
+ }
+
+ @Nullable
+ @Override
+ public List getRow( BioAssay bioAssay, String cellId ) {
+ int row = getRowIndex( bioAssay, cellId );
+ if ( row == -1 ) {
+ return null;
+ }
+ return getRow( row );
+ }
+
+ @Override
+ public List getRow( int row ) {
+ List fvs = new ArrayList<>( factors.size() );
+ for ( int i = 0; i < factors.size(); i++ ) {
+ fvs.add( factorValues.get( i ).get( row ) );
+ }
+ return fvs;
+ }
+
+ @Override
+ public int rows() {
+ return cellIds.size();
+ }
+
+ @Override
+ public BioAssay getBioAssayForRow( int row ) {
+ return assays.get( row );
+ }
+
+ @Override
+ public BioMaterial getBioMaterialForRow( int row ) {
+ return getBioAssayForRow( row ).getSampleUsed();
+ }
+
+ @Override
+ public String getCellIdForRow( int row ) {
+ return cellIds.get( row );
+ }
+
+ @Override
+ public int getRowIndex( BioAssay bioAssay, String cellId ) {
+ Map cell2pos = index.get( bioAssay );
+ if ( cell2pos == null ) {
+ return -1;
+ }
+ return cell2pos.getOrDefault( cellId, -1 );
+ }
+
+ private ExperimentalFactor createFactorFromCellLevelCharacteristics( CellLevelCharacteristics characteristics ) {
+ ExperimentalFactor factor = ExperimentalFactor.Factory.newInstance( characteristics.getName(), FactorType.CATEGORICAL );
+ for ( Characteristic c : characteristics.getCharacteristics() ) {
+ FactorValue fv = FactorValue.Factory.newInstance( factor, c );
+ factor.getFactorValues().add( fv );
+ }
+ return factor;
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/io/SingleCellMetadataWriter.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/io/SingleCellMetadataWriter.java
new file mode 100644
index 0000000000..73e649a88d
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/io/SingleCellMetadataWriter.java
@@ -0,0 +1,12 @@
+package ubic.gemma.core.datastructure.matrix.io;
+
+import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+
+import java.io.IOException;
+import java.io.Writer;
+
+public interface SingleCellMetadataWriter {
+
+ void write( ExpressionExperiment ee, SingleCellDimension singleCellDimension, Writer writer ) throws IOException;
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/io/TabularSingleCellMetadataWriter.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/io/TabularSingleCellMetadataWriter.java
new file mode 100644
index 0000000000..189f46d2ed
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/io/TabularSingleCellMetadataWriter.java
@@ -0,0 +1,132 @@
+package ubic.gemma.core.datastructure.matrix.io;
+
+import lombok.Setter;
+import lombok.extern.apachecommons.CommonsLog;
+import ubic.basecode.util.StringUtil;
+import ubic.gemma.core.util.TsvUtils;
+import ubic.gemma.model.common.description.Characteristic;
+import ubic.gemma.model.expression.bioAssay.BioAssay;
+import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics;
+import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment;
+import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
+import ubic.gemma.model.expression.biomaterial.BioMaterial;
+import ubic.gemma.model.expression.experiment.*;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+@CommonsLog
+public class TabularSingleCellMetadataWriter implements SingleCellMetadataWriter {
+
+ @Setter
+ private boolean useBioAssayIds = false;
+ @Setter
+ private boolean useRawColumnNames = false;
+ @Setter
+ private boolean autoFlush = false;
+
+ @Override
+ public void write( ExpressionExperiment ee, SingleCellDimension singleCellDimension, Writer writer ) throws IOException {
+ List factors;
+ if ( ee.getExperimentalDesign() != null ) {
+ factors = ee.getExperimentalDesign().getExperimentalFactors().stream()
+ .sorted( ExperimentalFactor.COMPARATOR )
+ .collect( Collectors.toList() );
+ } else {
+ log.warn( ee + " does not have an experimental design, no factors will be written." );
+ factors = Collections.emptyList();
+ }
+ List samples = singleCellDimension.getBioAssays().stream()
+ .map( BioAssay::getSampleUsed )
+ .collect( Collectors.toList() );
+ Map> factorValueMap = ExperimentalDesignUtils.getFactorValueMap( ee.getExperimentalDesign(), samples );
+ List clcs = new ArrayList<>( singleCellDimension.getCellTypeAssignments().size() + singleCellDimension.getCellLevelCharacteristics().size() );
+ singleCellDimension.getCellTypeAssignments().stream()
+ .sorted( CellTypeAssignment.COMPARATOR )
+ .forEach( clcs::add );
+ singleCellDimension.getCellLevelCharacteristics().stream()
+ .sorted( CellLevelCharacteristics.COMPARATOR )
+ .forEach( clcs::add );
+ writeHeader( factors, clcs, writer );
+ int cellIndex = 0;
+ for ( int sampleIndex = 0; sampleIndex < singleCellDimension.getBioAssays().size(); sampleIndex++ ) {
+ BioAssay bioAssay = singleCellDimension.getBioAssays().get( sampleIndex );
+ for ( String cellId : singleCellDimension.getCellIdsBySample( sampleIndex ) ) {
+ writeCell( bioAssay, cellId, cellIndex++, factors, factorValueMap, clcs, writer );
+ }
+ }
+ }
+
+ private void writeHeader( List factors, List clcs, Writer writer ) throws IOException {
+ String[] columnNames = new String[2 + factors.size() + clcs.size()];
+ int i = 0;
+ columnNames[i++] = "sample_id";
+ columnNames[i++] = "cell_id";
+ for ( ExperimentalFactor factor : factors ) {
+ columnNames[i++] = factor.getName();
+ }
+ for ( CellLevelCharacteristics clc : clcs ) {
+ if ( clc.getName() != null ) {
+ columnNames[i++] = clc.getName();
+ } else if ( !clc.getCharacteristics().isEmpty() ) {
+ // If the name is null, we can use the first characteristic's category as a fallback
+ Characteristic c = clc.getCharacteristics().iterator().next();
+ columnNames[i++] = c.getCategory();
+ } else {
+ throw new IllegalStateException( clc + " has no name nor characteristics, cannot write header." );
+ }
+ }
+ if ( useRawColumnNames ) {
+ columnNames = StringUtil.makeUnique( columnNames );
+ } else {
+ columnNames = StringUtil.makeNames( columnNames, true );
+ }
+ for ( int j = 0; j < columnNames.length; j++ ) {
+ String colName = columnNames[j];
+ if ( j > 0 ) {
+ writer.append( "\t" );
+ }
+ writer.append( TsvUtils.format( colName ) );
+ }
+ writer.append( "\n" );
+ if ( autoFlush ) {
+ writer.flush();
+ }
+ }
+
+ public void writeCell( BioAssay bioAssay, String cellId, int cellIndex, List factors, Map> factorValueMap, List clcs, Writer writer ) throws IOException {
+ if ( useBioAssayIds ) {
+ writer.append( String.valueOf( bioAssay.getId() ) );
+ } else if ( useRawColumnNames ) {
+ writer.append( bioAssay.getShortName() != null ? bioAssay.getShortName() : bioAssay.getName() );
+ } else {
+ writer.append( ExpressionDataWriterUtils.constructAssayName( bioAssay ) );
+ }
+ writer.append( "\t" ).append( cellId );
+ for ( ExperimentalFactor factor : factors ) {
+ FactorValue value = factorValueMap.get( factor ).get( bioAssay.getSampleUsed() );
+ writer.append( "\t" );
+ if ( value != null ) {
+ writer.append( TsvUtils.format( FactorValueUtils.getValue( value, String.valueOf( TsvUtils.SUB_DELIMITER ) ) ) );
+ } else {
+ writer.append( TsvUtils.format( ( String ) null ) );
+ }
+ }
+ for ( CellLevelCharacteristics clc : clcs ) {
+ writer.append( "\t" );
+ Characteristic c = clc.getCharacteristic( cellIndex );
+ if ( c != null ) {
+ writer.append( TsvUtils.format( c.getValue() ) );
+ }
+ }
+ writer.append( "\n" );
+ if ( autoFlush ) {
+ writer.flush();
+ }
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/r/RClient.java b/gemma-core/src/main/java/ubic/gemma/core/util/r/RClient.java
new file mode 100644
index 0000000000..eec2d71a58
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/util/r/RClient.java
@@ -0,0 +1,92 @@
+package ubic.gemma.core.util.r;
+
+import org.rosuda.REngine.REXP;
+import org.rosuda.REngine.REXPMismatchException;
+import org.rosuda.REngine.REngine;
+import org.rosuda.REngine.REngineException;
+import org.springframework.util.Assert;
+import ubic.basecode.util.StringUtil;
+
+import java.util.List;
+
+/**
+ * A high-level client for interacting with R.
+ * @author poirigui
+ */
+public class RClient implements AutoCloseable {
+
+ /**
+ * The R engine used for executing R commands.
+ */
+ private final REngine rEngine;
+
+ public RClient( REngineFactory rEngine ) {
+ try {
+ this.rEngine = rEngine.createREngine();
+ } catch ( Exception e ) {
+ throw new RClientException( e );
+ }
+ }
+
+ /**
+ * data.frame
+ */
+ public void assignDataFrame( String symbol, List columnNames, List rowNames, List