Skip to content

Commit fd60f67

Browse files
committed
Integrate Harmony
1 parent 0ae879b commit fd60f67

17 files changed

Lines changed: 665 additions & 16 deletions

File tree

environment.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
name: gemma
2+
dependencies:
3+
- r=4.4
4+
- r-rserve
5+
- repeatmasker
6+
- blast
7+
- sra-tools
8+
- hdf5=1.12
9+
- python
10+
- pip
11+
- pip:
12+
- anndata
13+
- scipy
14+
- numpy

gemma-core/pom.xml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,23 @@
253253
<systemPath>${hdf5.jarPath}</systemPath>
254254
</dependency>
255255

256+
<!-- REngine + Rserve -->
257+
<dependency>
258+
<groupId>org.rosuda.REngine</groupId>
259+
<artifactId>REngine</artifactId>
260+
<version>2.1.0</version>
261+
</dependency>
262+
<dependency>
263+
<groupId>org.rosuda.REngine</groupId>
264+
<artifactId>Rserve</artifactId>
265+
<version>1.8.1</version>
266+
</dependency>
267+
<dependency>
268+
<groupId>com.kohlschutter.junixsocket</groupId>
269+
<artifactId>junixsocket-core</artifactId>
270+
<version>2.10.1</version>
271+
</dependency>
272+
256273
<!-- Gemma Slack Bot -->
257274
<dependency>
258275
<groupId>com.slack.api</groupId>

gemma-core/src/main/java/ubic/gemma/core/analysis/service/ExpressionDataFileUtils.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ public class ExpressionDataFileUtils {
2525
private static final String SC_DATA_SUFFIX = ".scdata";
2626
public static final String MEX_SC_DATA_SUFFIX = SC_DATA_SUFFIX + ".mex";
2727
public static final String TABULAR_SC_DATA_SUFFIX = SC_DATA_SUFFIX + ".tsv.gz";
28-
public static final String CELL_BROWSER_SC_DATA_SUFFIX = SC_DATA_SUFFIX + ".cellbrowser.tsv.gz";
28+
private static final String SC_METADATA_SUFFIX = ".scmetadata";
29+
public static final String TABULAR_SC_METADATA_SUFFIX = SC_METADATA_SUFFIX + ".tsv.gz";
30+
public static final String CELL_BROWSER_SC_DATA_SUFFIX = SC_METADATA_SUFFIX + ".cellbrowser.tsv.gz";
2931

3032
// for bulk (raw or processed vectors)
3133
private static final String BULK_DATA_SUFFIX = ".data";
Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,52 @@
11
package ubic.gemma.core.analysis.singleCell.batcheffect;
22

3+
import org.rosuda.REngine.REXP;
4+
import org.rosuda.REngine.REXPNull;
35
import org.springframework.util.Assert;
46
import ubic.gemma.core.datastructure.matrix.SingleCellDesignMatrix;
57
import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix;
8+
import ubic.gemma.core.util.r.RClient;
69

10+
import java.nio.file.Path;
11+
12+
/**
13+
* Perform batch correction using the <a href="">Harmony algorithm</a>.
14+
* <p>
15+
* Requirements: an R engine with the Harmony R package installed.
16+
* @author poirigui
17+
*/
718
class Harmony implements BatchCorrection {
819

20+
private final Path rExe;
21+
22+
Harmony( Path rExe ) {
23+
this.rExe = rExe;
24+
}
25+
926
@Override
1027
public SingleCellExpressionDataMatrix<?> perform( SingleCellExpressionDataMatrix<?> dataMatrix, SingleCellDesignMatrix singleCellDesignMatrix ) {
1128
Assert.isTrue( dataMatrix.getBioAssays().equals( singleCellDesignMatrix.getBioAssays() ),
1229
"Assays in the data matrix must match exactly those of the design matrix." );
13-
// TODO: serialize both matrices to disk and call Harmony R package
14-
return dataMatrix;
30+
try ( RClient rEngine = new RClient( rExe ) ) {
31+
// TODO: serialize both matrices to disk and call Harmony R package
32+
rEngine.parseAndEval( "library(harmony);" );
33+
rEngine.assignDataFrame( "dataMatrix", toDataFrame( dataMatrix ) );
34+
rEngine.assignDataFrame( "designMatrix", toDataFrame( singleCellDesignMatrix ) );
35+
//language=R
36+
return fromDataFrame( rEngine.parseAndEval( "harmony::HarmonyMatrix(dataMatrix, designMatrix);" ) );
37+
}
38+
}
39+
40+
private REXP toDataFrame( SingleCellExpressionDataMatrix<?> dataMatrix ) {
41+
// Convert the SingleCellExpressionDataMatrix to an REXP object
42+
return new REXPNull();
43+
}
44+
45+
private REXP toDataFrame( SingleCellDesignMatrix singleCellDesignMatrix ) {
46+
return new REXPNull();
47+
}
48+
49+
private SingleCellExpressionDataMatrix<?> fromDataFrame( REXP rexp ) {
50+
return null;
1551
}
1652
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package ubic.gemma.core.analysis.singleCell.batcheffect;
2+
3+
import org.springframework.beans.factory.annotation.Autowired;
4+
import org.springframework.beans.factory.annotation.Value;
5+
import org.springframework.stereotype.Service;
6+
import org.springframework.transaction.annotation.Transactional;
7+
import ubic.gemma.model.common.quantitationtype.QuantitationType;
8+
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
9+
import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
10+
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
11+
import ubic.gemma.persistence.service.expression.experiment.SingleCellExpressionExperimentService;
12+
13+
import java.nio.file.Path;
14+
import java.util.Collection;
15+
16+
@Service
17+
public class SingleCellBatchCorrectionServiceImpl implements SingleCellBatchCorrectionService {
18+
19+
@Autowired
20+
private SingleCellExpressionExperimentService singleCellExpressionExperimentService;
21+
22+
@Value("${r.exe}")
23+
private Path rExe;
24+
25+
@Override
26+
@Transactional
27+
public QuantitationType batchCorrect( ExpressionExperiment ee, QuantitationType qt, SingleCellBatchCorrectionMethod method ) {
28+
SingleCellDimension dimension = singleCellExpressionExperimentService.getSingleCellDimension( ee, qt );
29+
if ( dimension == null ) {
30+
throw new IllegalArgumentException( qt + " does not have single cell dimension." );
31+
}
32+
Collection<SingleCellExpressionDataVector> vectors = singleCellExpressionExperimentService.getSingleCellDataVectors( ee, qt );
33+
return null;
34+
}
35+
36+
private BatchCorrection createBatchCorrection( SingleCellBatchCorrectionMethod method ) {
37+
switch ( method ) {
38+
case HARMONY:
39+
return new Harmony( rExe );
40+
case COMBAT:
41+
return new ComBat();
42+
default:
43+
throw new IllegalArgumentException( "Unknown batch correction method: " + method );
44+
}
45+
}
46+
}

gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellDesignMatrixImpl.java

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package ubic.gemma.core.datastructure.matrix;
22

3+
import ubic.gemma.core.util.ListUtils;
34
import ubic.gemma.model.common.description.Characteristic;
45
import ubic.gemma.model.expression.bioAssay.BioAssay;
56
import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics;
@@ -11,20 +12,33 @@
1112
import ubic.gemma.model.util.SparseRangeArrayList;
1213

1314
import javax.annotation.Nullable;
14-
import java.util.ArrayList;
15-
import java.util.Collections;
16-
import java.util.List;
15+
import java.util.*;
1716

1817
public class SingleCellDesignMatrixImpl implements SingleCellDesignMatrix {
1918

19+
// rows
2020
private final SparseRangeArrayList<BioAssay> assays;
2121
private final List<String> cellIds;
22+
private final Map<BioAssay, Map<String, Integer>> index;
23+
24+
// columns
2225
private final List<ExperimentalFactor> factors;
26+
private final Map<ExperimentalFactor, Integer> factorsIndex;
27+
28+
/**
29+
* This is technically a matrix, but using {@link List} allows for sparse range array to be used for sample-level
30+
* factors.
31+
* <p>
32+
* Also, this is transposed w.r.t. to rows/columns that the interface requires. This is due to the fact that
33+
* sparsity is better handled along factors
34+
*/
35+
private final List<List<FactorValue>> factorValues;
2336

2437
public SingleCellDesignMatrixImpl( SingleCellDimension dimension, List<BioAssay> assays, List<ExperimentalFactor> factors, List<CellLevelCharacteristics> cellLevelCharacteristics ) {
2538
int[] bioAssayOffsets = new int[assays.size()];
2639
int k = 0;
2740
List<String> cellIdsL = new ArrayList<>( dimension.getNumberOfCells() );
41+
Map<BioAssay, Map<String, Integer>> index = new HashMap<>( assays.size() );
2842
for ( int i = 0; i < assays.size(); i++ ) {
2943
BioAssay assay = assays.get( i );
3044
int sampleIndex = dimension.getBioAssays().indexOf( assay );
@@ -34,17 +48,26 @@ public SingleCellDesignMatrixImpl( SingleCellDimension dimension, List<BioAssay>
3448
List<String> sampleCellIds = dimension.getCellIdsBySample( sampleIndex );
3549
bioAssayOffsets[i] = k;
3650
cellIdsL.addAll( sampleCellIds );
51+
Map<String, Integer> cellid2pos = new HashMap<>();
52+
for ( int j = 0; j < sampleCellIds.size(); j++ ) {
53+
cellid2pos.put( sampleCellIds.get( j ), k + j );
54+
}
55+
index.put( assay, cellid2pos );
3756
k += sampleCellIds.size();
3857
}
3958
this.assays = new SparseRangeArrayList<>( assays, bioAssayOffsets, k );
4059
this.cellIds = cellIdsL;
60+
this.index = index;
4161
ArrayList<ExperimentalFactor> factorsL = new ArrayList<>( factors.size() + cellLevelCharacteristics.size() );
4262
factorsL.addAll( factors );
4363
for ( CellLevelCharacteristics clc : cellLevelCharacteristics ) {
4464
ExperimentalFactor factor = createFactorFromCellLevelCharacteristics( clc );
4565
factorsL.add( factor );
4666
}
4767
this.factors = Collections.unmodifiableList( factorsL );
68+
this.factorsIndex = Collections.unmodifiableMap( ListUtils.indexOfElements( factorsL ) );
69+
// TODO: fill the matrix
70+
this.factorValues = new ArrayList<>( factors.size() );
4871
}
4972

5073
@Override
@@ -69,13 +92,17 @@ public int columns() {
6992

7093
@Override
7194
public List<FactorValue> getColumn( int column ) {
72-
return new FactorValue[0];
95+
return factorValues.get( column );
7396
}
7497

7598
@Nullable
7699
@Override
77100
public List<FactorValue> getColumn( ExperimentalFactor factor ) {
78-
return new FactorValue[0];
101+
int index = factors.indexOf( factor );
102+
if ( index == -1 ) {
103+
return null;
104+
}
105+
return getColumn( index );
79106
}
80107

81108
@Override
@@ -91,22 +118,30 @@ public ExperimentalFactor getFactorForColumn( int column ) {
91118
@Nullable
92119
@Override
93120
public List<FactorValue> getRow( BioAssay bioAssay, String cellId ) {
94-
return new FactorValue[0];
121+
int row = getRowIndex( bioAssay, cellId );
122+
if ( row == -1 ) {
123+
return null;
124+
}
125+
return getRow( row );
95126
}
96127

97128
@Override
98129
public List<FactorValue> getRow( int row ) {
99-
return new FactorValue[0];
130+
List<FactorValue> fvs = new ArrayList<>( factors.size() );
131+
for ( int i = 0; i < factors.size(); i++ ) {
132+
fvs.add( factorValues.get( i ).get( row ) );
133+
}
134+
return fvs;
100135
}
101136

102137
@Override
103138
public int rows() {
104-
return 0;
139+
return cellIds.size();
105140
}
106141

107142
@Override
108143
public BioAssay getBioAssayForRow( int row ) {
109-
return null;
144+
return assays.get( row );
110145
}
111146

112147
@Override
@@ -116,12 +151,16 @@ public BioMaterial getBioMaterialForRow( int row ) {
116151

117152
@Override
118153
public String getCellIdForRow( int row ) {
119-
return "";
154+
return cellIds.get( row );
120155
}
121156

122157
@Override
123158
public int getRowIndex( BioAssay bioAssay, String cellId ) {
124-
return 0;
159+
Map<String, Integer> cell2pos = index.get( bioAssay );
160+
if ( cell2pos == null ) {
161+
return -1;
162+
}
163+
return cell2pos.getOrDefault( cellId, -1 );
125164
}
126165

127166
private ExperimentalFactor createFactorFromCellLevelCharacteristics( CellLevelCharacteristics characteristics ) {
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package ubic.gemma.core.datastructure.matrix.io;
2+
3+
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
4+
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
5+
6+
import java.io.IOException;
7+
import java.io.Writer;
8+
9+
public interface SingleCellMetadataWriter {
10+
11+
void write( ExpressionExperiment ee, SingleCellDimension singleCellDimension, Writer writer ) throws IOException;
12+
}

0 commit comments

Comments
 (0)