Skip to content

Commit 9824fd1

Browse files
committed
Handle more cases for splitting datasets
Make splitting the experiment transactional by isolating the relevant operations in a helper service. Use generic BulkDataSlicerUtils and SingleCellSlicerUtils for slicing samples in splits which handle all the possible fixed-size encoding cases and single-cell data slicing. Add options to the splitExperiment CLI to skip post-processing and delete the original experiment automatically.
1 parent 2498268 commit 9824fd1

12 files changed

Lines changed: 828 additions & 603 deletions

File tree

gemma-cli/src/main/java/ubic/gemma/apps/SplitExperimentCli.java

Lines changed: 16 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,14 @@
2020
package ubic.gemma.apps;
2121

2222
import org.apache.commons.cli.CommandLine;
23-
import org.apache.commons.cli.Option;
2423
import org.apache.commons.cli.Options;
2524
import org.apache.commons.cli.ParseException;
2625
import org.springframework.beans.factory.annotation.Autowired;
26+
import ubic.gemma.cli.util.EntityLocator;
2727
import ubic.gemma.core.analysis.preprocess.SplitExperimentService;
2828
import ubic.gemma.model.analysis.expression.ExpressionExperimentSet;
29-
import ubic.gemma.model.expression.experiment.ExperimentFactorUtils;
3029
import ubic.gemma.model.expression.experiment.ExperimentalFactor;
31-
import ubic.gemma.model.expression.experiment.ExperimentalFactorValueObject;
3230
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
33-
import ubic.gemma.persistence.service.expression.experiment.ExperimentalFactorService;
34-
35-
import java.util.Collection;
3631

3732
/**
3833
* Split an experiment into parts based on an experimental factor
@@ -41,18 +36,18 @@
4136
*/
4237
public class SplitExperimentCli extends ExpressionExperimentManipulatingCLI {
4338

44-
/**
45-
*
46-
*/
47-
private static final String FACTOR_OPTION = "factor";
39+
private static final String FACTOR_OPTION = "factor",
40+
SKIP_POST_PROCESSING_OPTION = "nopost",
41+
DELETE_ORIGINAL_EXPERIMENT_OPTION = "deleteOriginalExperiment";
4842

4943
@Autowired
5044
private SplitExperimentService serv;
5145
@Autowired
52-
private ExperimentalFactorService efs;
46+
private EntityLocator entityLocator;
5347

54-
private Long factorId;
55-
private String factorName;
48+
private String factorIdentifier;
49+
private boolean skipPostProcessing;
50+
private boolean deleteOriginalExperiment;
5651

5752
public SplitExperimentCli() {
5853
super();
@@ -71,80 +66,23 @@ public String getShortDesc() {
7166

7267
@Override
7368
protected void buildExperimentOptions( Options options ) {
74-
options.addOption( Option.builder( FACTOR_OPTION ).hasArg()
75-
.desc( "ID numbers, categories or names of the factor to use, with spaces replaced by underscores (must not be 'batch')" )
76-
.build() );
69+
options.addRequiredOption( FACTOR_OPTION, "factor", true, "ID numbers, categories or names of the factor to use, with spaces replaced by underscores (must not be 'batch')" );
70+
options.addOption( SKIP_POST_PROCESSING_OPTION, "no-post-processing", false, "Skip post-processing of resulting splits if applicable." );
71+
options.addOption( DELETE_ORIGINAL_EXPERIMENT_OPTION, "delete-original-experiment", false, "Delete the original experiment once the split succeeds." );
7772
}
7873

7974
@Override
8075
protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
81-
if ( !commandLine.hasOption( FACTOR_OPTION ) ) {
82-
throw new IllegalArgumentException( "Please specify the factor" );
83-
}
84-
String rawFactor = commandLine.getOptionValue( FACTOR_OPTION );
85-
try {
86-
this.factorId = Long.parseLong( rawFactor );
87-
} catch ( NumberFormatException e ) {
88-
this.factorName = rawFactor;
89-
}
76+
factorIdentifier = commandLine.getOptionValue( FACTOR_OPTION );
77+
skipPostProcessing = commandLine.hasOption( SKIP_POST_PROCESSING_OPTION );
78+
deleteOriginalExperiment = commandLine.hasOption( DELETE_ORIGINAL_EXPERIMENT_OPTION );
9079
}
9180

9281
@Override
9382
protected void processExpressionExperiment( ExpressionExperiment ee ) {
9483
ee = this.eeService.thawLite( ee );
95-
ExperimentalFactor splitOn = this.guessFactor( ee );
96-
ExpressionExperimentSet eeSet = serv.split( ee, splitOn, true );
84+
ExperimentalFactor splitOn = entityLocator.locateExperimentalFactor( ee, factorIdentifier );
85+
ExpressionExperimentSet eeSet = serv.split( ee, splitOn, !skipPostProcessing, deleteOriginalExperiment );
9786
addSuccessObject( ee, "Experiment was split on " + splitOn + " into " + eeSet.getExperiments().size() + " parts." );
9887
}
99-
100-
/**
101-
* Adapted from code in DifferentialExpressionAnalysisCli
102-
*/
103-
private ExperimentalFactor guessFactor( ExpressionExperiment ee ) {
104-
if ( ee.getExperimentalDesign() == null ) {
105-
throw new IllegalStateException( ee + " does not have an experimental design, it cannot be split on a factor." );
106-
}
107-
108-
if ( this.factorName != null ) {
109-
110-
Collection<ExperimentalFactor> experimentalFactors = ee.getExperimentalDesign().getExperimentalFactors();
111-
for ( ExperimentalFactor experimentalFactor : experimentalFactors ) {
112-
113-
// has already implemented way of figuring out human-friendly name of factor value.
114-
ExperimentalFactorValueObject fvo = new ExperimentalFactorValueObject( experimentalFactor );
115-
116-
// do not attempt to switch on 'batch'
117-
if ( ExperimentFactorUtils.isBatchFactor( experimentalFactor ) ) {
118-
continue;
119-
}
120-
121-
if ( factorName.contains( experimentalFactor.getName().replaceAll( " ", "_" ) ) ) {
122-
return experimentalFactor;
123-
} else if ( fvo.getCategory() != null && factorName
124-
.contains( fvo.getCategory().replaceAll( " ", "_" ) ) ) {
125-
return experimentalFactor;
126-
}
127-
}
128-
129-
throw new IllegalArgumentException( "Didn't find factor the provided factor name " );
130-
131-
}
132-
133-
ExperimentalFactor factor = efs.loadOrFail( factorId );
134-
factor = efs.thaw( factor );
135-
if ( factor == null ) {
136-
throw new IllegalArgumentException( "No factor for id=" + factorId );
137-
}
138-
if ( !factor.getExperimentalDesign().equals( ee.getExperimentalDesign() ) ) {
139-
throw new IllegalArgumentException( "Factor with id=" + factorId + " does not belong to " + ee );
140-
}
141-
142-
if ( ExperimentFactorUtils.isBatchFactor( factor ) ) {
143-
throw new IllegalArgumentException( "Selected factor looks like batch, split not allowed, choose another factor instead" );
144-
}
145-
146-
return factor;
147-
148-
}
149-
15088
}

0 commit comments

Comments
 (0)