Skip to content

Commit e130c03

Browse files
committed
Integrate Harmony
Use rJava instead of outdated packages from Maven Central.
1 parent 5afe886 commit e130c03

21 files changed

Lines changed: 832 additions & 17 deletions

environment.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: gemma
2+
dependencies:
3+
- r=4.4
4+
- r-rserve
5+
- r-rjava
6+
- repeatmasker
7+
- blast
8+
- sra-tools
9+
- hdf5=1.12
10+
- python
11+
- pip
12+
- pip:
13+
- anndata
14+
- scipy
15+
- numpy

gemma-core/pom.xml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,38 @@
251251
<version>${hdf5.version}</version>
252252
</dependency>
253253

254+
<!-- rJava -->
255+
<dependency>
256+
<groupId>org.rosuda.REngine</groupId>
257+
<artifactId>REngine</artifactId>
258+
<version>${rJava.version}</version>
259+
<optional>true</optional>
260+
</dependency>
261+
<dependency>
262+
<groupId>org.rosuda.REngine</groupId>
263+
<artifactId>JRI</artifactId>
264+
<version>${rJava.version}</version>
265+
<optional>true</optional>
266+
</dependency>
267+
<dependency>
268+
<groupId>org.rosuda.REngine</groupId>
269+
<artifactId>JRIEngine</artifactId>
270+
<version>${rJava.version}</version>
271+
<optional>true</optional>
272+
</dependency>
273+
274+
<dependency>
275+
<groupId>org.rosuda.REngine</groupId>
276+
<artifactId>Rserve</artifactId>
277+
<version>${rserve.version}</version>
278+
<optional>true</optional>
279+
</dependency>
280+
<dependency>
281+
<groupId>com.kohlschutter.junixsocket</groupId>
282+
<artifactId>junixsocket-core</artifactId>
283+
<version>2.10.1</version>
284+
</dependency>
285+
254286
<!-- Gemma Slack Bot -->
255287
<dependency>
256288
<groupId>com.slack.api</groupId>

gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileUtils.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ public class ExpressionDataFileUtils {
2525
private static final String SC_DATA_SUFFIX = ".scdata";
2626
public static final String MEX_SC_DATA_SUFFIX = SC_DATA_SUFFIX + ".mex";
2727
public static final String TABULAR_SC_DATA_SUFFIX = SC_DATA_SUFFIX + ".tsv.gz";
28-
public static final String CELL_BROWSER_SC_DATA_SUFFIX = SC_DATA_SUFFIX + ".cellbrowser.tsv.gz";
28+
private static final String SC_METADATA_SUFFIX = ".scmetadata";
29+
public static final String TABULAR_SC_METADATA_SUFFIX = SC_METADATA_SUFFIX + ".tsv.gz";
30+
public static final String CELL_BROWSER_SC_DATA_SUFFIX = SC_METADATA_SUFFIX + ".cellbrowser.tsv.gz";
2931

3032
// for single-cell metadata
3133
private static final String SC_METADATA_SUFFIX = ".scmetadata";
Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,51 @@
11
package ubic.gemma.core.analysis.singleCell.batcheffect;
22

3+
import org.rosuda.REngine.REXP;
4+
import org.rosuda.REngine.REXPNull;
35
import org.springframework.util.Assert;
46
import ubic.gemma.core.datastructure.matrix.SingleCellDesignMatrix;
57
import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix;
8+
import ubic.gemma.core.util.r.RClient;
9+
import ubic.gemma.core.util.r.REngineFactory;
610

11+
/**
12+
* Perform batch correction using the <a href="">Harmony algorithm</a>.
13+
* <p>
14+
* Requirements: an R engine with the Harmony R package installed.
15+
* @author poirigui
16+
*/
717
class Harmony implements BatchCorrection {
818

19+
private final REngineFactory rEngineFactory;
20+
21+
Harmony( REngineFactory rEngineFactory ) {
22+
this.rEngineFactory = rEngineFactory;
23+
}
24+
925
@Override
1026
public SingleCellExpressionDataMatrix<?> perform( SingleCellExpressionDataMatrix<?> dataMatrix, SingleCellDesignMatrix singleCellDesignMatrix ) {
1127
Assert.isTrue( dataMatrix.getBioAssays().equals( singleCellDesignMatrix.getBioAssays() ),
1228
"Assays in the data matrix must match exactly those of the design matrix." );
13-
// TODO: serialize both matrices to disk and call Harmony R package
14-
return dataMatrix;
29+
try ( RClient rEngine = new RClient( rEngineFactory ) ) {
30+
// TODO: serialize both matrices to disk and call Harmony R package
31+
rEngine.parseAndEval( "library(harmony);" );
32+
// rEngine.assignDataFrame( "dataMatrix", toDataFrame( dataMatrix ) );
33+
// rEngine.assignDataFrame( "designMatrix", toDataFrame( singleCellDesignMatrix ) );
34+
//language=R
35+
return fromDataFrame( rEngine.parseAndEval( "harmony::HarmonyMatrix(dataMatrix, designMatrix);" ) );
36+
}
37+
}
38+
39+
private REXP toDataFrame( SingleCellExpressionDataMatrix<?> dataMatrix ) {
40+
// Convert the SingleCellExpressionDataMatrix to an REXP object
41+
return new REXPNull();
42+
}
43+
44+
private REXP toDataFrame( SingleCellDesignMatrix singleCellDesignMatrix ) {
45+
return new REXPNull();
46+
}
47+
48+
private SingleCellExpressionDataMatrix<?> fromDataFrame( REXP rexp ) {
49+
return null;
1550
}
1651
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package ubic.gemma.core.analysis.singleCell.batcheffect;
2+
3+
import org.springframework.beans.factory.annotation.Autowired;
4+
import org.springframework.stereotype.Service;
5+
import org.springframework.transaction.annotation.Transactional;
6+
import org.springframework.util.Assert;
7+
import ubic.gemma.core.datastructure.matrix.SingleCellDesignMatrix;
8+
import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix;
9+
import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrixUtils;
10+
import ubic.gemma.core.util.r.REngineFactory;
11+
import ubic.gemma.model.common.quantitationtype.QuantitationType;
12+
import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics;
13+
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
14+
import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
15+
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
16+
import ubic.gemma.persistence.service.expression.experiment.SingleCellExpressionExperimentService;
17+
18+
import java.util.ArrayList;
19+
import java.util.Collection;
20+
import java.util.List;
21+
22+
@Service
23+
public class SingleCellBatchCorrectionServiceImpl implements SingleCellBatchCorrectionService {
24+
25+
@Autowired
26+
private SingleCellExpressionExperimentService singleCellExpressionExperimentService;
27+
28+
@Autowired
29+
private REngineFactory rEngineFactory;
30+
31+
@Override
32+
@Transactional
33+
public QuantitationType batchCorrect( ExpressionExperiment ee, QuantitationType qt, SingleCellBatchCorrectionMethod method ) {
34+
Assert.notNull( ee.getExperimentalDesign(), ee + " does not have experimental design. It is required to perform batch correction." );
35+
BatchCorrection m = createBatchCorrection( method );
36+
SingleCellDimension dimension = singleCellExpressionExperimentService.getSingleCellDimension( ee, qt );
37+
if ( dimension == null ) {
38+
throw new IllegalArgumentException( qt + " does not have single cell dimension." );
39+
}
40+
List<SingleCellExpressionDataVector> vectors = new ArrayList<>( singleCellExpressionExperimentService.getSingleCellDataVectors( ee, qt ) );
41+
SingleCellExpressionDataMatrix<?> dataMatrix = SingleCellExpressionDataMatrix.getMatrix( vectors );
42+
Collection<CellLevelCharacteristics> clcs = new ArrayList<>();
43+
// TODO: select relevant CTAs and CLCs
44+
clcs.addAll( dimension.getCellTypeAssignments() );
45+
clcs.addAll( dimension.getCellLevelCharacteristics() );
46+
SingleCellDesignMatrix designMatrix = SingleCellDesignMatrix.from( dimension, ee.getExperimentalDesign(), clcs );
47+
SingleCellExpressionDataMatrix<?> correctedMatrix = m.perform( dataMatrix, designMatrix );
48+
QuantitationType correctedQt = correctedMatrix.getQuantitationType();
49+
List<SingleCellExpressionDataVector> correctedVectors = SingleCellExpressionDataMatrixUtils.toVectors( correctedMatrix );
50+
String details = "Batch correction using " + method + " for " + ee.getShortName() + " on quantitation type " + qt.getName();
51+
singleCellExpressionExperimentService.addSingleCellDataVectors( ee, correctedQt, correctedVectors, details );
52+
return correctedQt;
53+
}
54+
55+
private BatchCorrection createBatchCorrection( SingleCellBatchCorrectionMethod method ) {
56+
switch ( method ) {
57+
case HARMONY:
58+
return new Harmony( rEngineFactory );
59+
case COMBAT:
60+
return new ComBat();
61+
default:
62+
throw new IllegalArgumentException( "Unknown batch correction method: " + method );
63+
}
64+
}
65+
}

gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrixImpl.java

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package ubic.gemma.core.datastructure.matrix;
22

3+
import ubic.gemma.core.util.ListUtils;
34
import ubic.gemma.model.common.description.Characteristic;
45
import ubic.gemma.model.expression.bioAssay.BioAssay;
56
import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics;
@@ -11,20 +12,33 @@
1112
import ubic.gemma.model.util.SparseRangeArrayList;
1213

1314
import javax.annotation.Nullable;
14-
import java.util.ArrayList;
15-
import java.util.Collections;
16-
import java.util.List;
15+
import java.util.*;
1716

1817
public class SingleCellDesignMatrixImpl implements SingleCellDesignMatrix {
1918

19+
// rows
2020
private final SparseRangeArrayList<BioAssay> assays;
2121
private final List<String> cellIds;
22+
private final Map<BioAssay, Map<String, Integer>> index;
23+
24+
// columns
2225
private final List<ExperimentalFactor> factors;
26+
private final Map<ExperimentalFactor, Integer> factorsIndex;
27+
28+
/**
29+
* This is technically a matrix, but using {@link List} allows for sparse range array to be used for sample-level
30+
* factors.
31+
* <p>
32+
* Also, this is transposed w.r.t. to rows/columns that the interface requires. This is due to the fact that
33+
* sparsity is better handled along factors
34+
*/
35+
private final List<List<FactorValue>> factorValues;
2336

2437
public SingleCellDesignMatrixImpl( SingleCellDimension dimension, List<BioAssay> assays, List<ExperimentalFactor> factors, List<CellLevelCharacteristics> cellLevelCharacteristics ) {
2538
int[] bioAssayOffsets = new int[assays.size()];
2639
int k = 0;
2740
List<String> cellIdsL = new ArrayList<>( dimension.getNumberOfCells() );
41+
Map<BioAssay, Map<String, Integer>> index = new HashMap<>( assays.size() );
2842
for ( int i = 0; i < assays.size(); i++ ) {
2943
BioAssay assay = assays.get( i );
3044
int sampleIndex = dimension.getBioAssays().indexOf( assay );
@@ -34,17 +48,26 @@ public SingleCellDesignMatrixImpl( SingleCellDimension dimension, List<BioAssay>
3448
List<String> sampleCellIds = dimension.getCellIdsBySample( sampleIndex );
3549
bioAssayOffsets[i] = k;
3650
cellIdsL.addAll( sampleCellIds );
51+
Map<String, Integer> cellid2pos = new HashMap<>();
52+
for ( int j = 0; j < sampleCellIds.size(); j++ ) {
53+
cellid2pos.put( sampleCellIds.get( j ), k + j );
54+
}
55+
index.put( assay, cellid2pos );
3756
k += sampleCellIds.size();
3857
}
3958
this.assays = new SparseRangeArrayList<>( assays, bioAssayOffsets, k );
4059
this.cellIds = cellIdsL;
60+
this.index = index;
4161
ArrayList<ExperimentalFactor> factorsL = new ArrayList<>( factors.size() + cellLevelCharacteristics.size() );
4262
factorsL.addAll( factors );
4363
for ( CellLevelCharacteristics clc : cellLevelCharacteristics ) {
4464
ExperimentalFactor factor = createFactorFromCellLevelCharacteristics( clc );
4565
factorsL.add( factor );
4666
}
4767
this.factors = Collections.unmodifiableList( factorsL );
68+
this.factorsIndex = Collections.unmodifiableMap( ListUtils.indexOfElements( factorsL ) );
69+
// TODO: fill the matrix
70+
this.factorValues = new ArrayList<>( factors.size() );
4871
}
4972

5073
@Override
@@ -69,13 +92,17 @@ public int columns() {
6992

7093
@Override
7194
public List<FactorValue> getColumn( int column ) {
72-
return new FactorValue[0];
95+
return factorValues.get( column );
7396
}
7497

7598
@Nullable
7699
@Override
77100
public List<FactorValue> getColumn( ExperimentalFactor factor ) {
78-
return new FactorValue[0];
101+
int index = factors.indexOf( factor );
102+
if ( index == -1 ) {
103+
return null;
104+
}
105+
return getColumn( index );
79106
}
80107

81108
@Override
@@ -91,22 +118,30 @@ public ExperimentalFactor getFactorForColumn( int column ) {
91118
@Nullable
92119
@Override
93120
public List<FactorValue> getRow( BioAssay bioAssay, String cellId ) {
94-
return new FactorValue[0];
121+
int row = getRowIndex( bioAssay, cellId );
122+
if ( row == -1 ) {
123+
return null;
124+
}
125+
return getRow( row );
95126
}
96127

97128
@Override
98129
public List<FactorValue> getRow( int row ) {
99-
return new FactorValue[0];
130+
List<FactorValue> fvs = new ArrayList<>( factors.size() );
131+
for ( int i = 0; i < factors.size(); i++ ) {
132+
fvs.add( factorValues.get( i ).get( row ) );
133+
}
134+
return fvs;
100135
}
101136

102137
@Override
103138
public int rows() {
104-
return 0;
139+
return cellIds.size();
105140
}
106141

107142
@Override
108143
public BioAssay getBioAssayForRow( int row ) {
109-
return null;
144+
return assays.get( row );
110145
}
111146

112147
@Override
@@ -116,12 +151,16 @@ public BioMaterial getBioMaterialForRow( int row ) {
116151

117152
@Override
118153
public String getCellIdForRow( int row ) {
119-
return "";
154+
return cellIds.get( row );
120155
}
121156

122157
@Override
123158
public int getRowIndex( BioAssay bioAssay, String cellId ) {
124-
return 0;
159+
Map<String, Integer> cell2pos = index.get( bioAssay );
160+
if ( cell2pos == null ) {
161+
return -1;
162+
}
163+
return cell2pos.getOrDefault( cellId, -1 );
125164
}
126165

127166
private ExperimentalFactor createFactorFromCellLevelCharacteristics( CellLevelCharacteristics characteristics ) {
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package ubic.gemma.core.datastructure.matrix.io;
2+
3+
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
4+
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
5+
6+
import java.io.IOException;
7+
import java.io.Writer;
8+
9+
public interface SingleCellMetadataWriter {
10+
11+
void write( ExpressionExperiment ee, SingleCellDimension singleCellDimension, Writer writer ) throws IOException;
12+
}

0 commit comments

Comments
 (0)