From 2085baf05431e737fedb6f22ba9bcadcc67028d4 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 30 Oct 2025 08:14:59 -0700 Subject: [PATCH] Improve detection of experiment type WIP --- .../ExpressionExperimentService.java | 18 +++++- .../ExpressionExperimentServiceImpl.java | 60 ++++++++++++++++++- 2 files changed, 72 insertions(+), 6 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java index 618b4c2f22..a8c58340fb 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java @@ -853,7 +853,7 @@ Map getSubSetsByFactorValue( Taxon getTaxon( ExpressionExperiment expressionExperiment ); /** - * Indicate if the given experiment is a single-cell experiment. + * Indicate if the given experiment is a single-cell RNA-Seq experiment. *

* Gemma does not treat single-cell experiments differently from other experiments, so we need to rely on various * aspect of the dataset to determine if it is a single-cell experiment. @@ -861,11 +861,23 @@ Map getSubSetsByFactorValue( boolean isSingleCell( ExpressionExperiment ee ); /** - * @param expressionExperiment ee - * @return true if this experiment was run on a sequencing-based platform. + * Indicate if the given experiment is a bulk RNA-Seq experiment. + */ + boolean isBulkRNASeq( ExpressionExperiment ee ); + + /** + * Indicate if the given experiment is a RNA-Seq experiment. + *

+ * This includes single-cell, bulk and potentially other kind of RNA-Seq experiments. */ boolean isRNASeq( ExpressionExperiment expressionExperiment ); + /** + * Indicate if the given experiment is a microarray experiment. + * @return + */ + boolean isMicroarray(ExpressionExperiment expressionExperiment); + /** * Check if the dataset is either troubled or uses a troubled platform. */ diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 9f1fb52d1e..b8815317cf 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -1593,13 +1593,59 @@ public boolean isSingleCell( ExpressionExperiment ee ) { && ee.getCharacteristics().stream() .noneMatch( c -> hasCategory( c, Categories.ASSAY ) && hasValue( c, Values.FLUORESCENCE_ACTIVATED_CELL_SORTING ) ) ) - || expressionExperimentDao.hasSingleCellQuantitationTypes( ee ); + // more expensive, check the presence of SC vectors + || hasSingleCellData( ee ); + } + + @Override + public boolean isBulkRNASeq( ExpressionExperiment ee ) { + return ee.getCharacteristics().stream() + .anyMatch( c -> hasCategory( c, Categories.ASSAY ) + && hasAnyValue( c ) ) + || + // include FAC-sorted single-cell datasets + ( ee.getCharacteristics().stream() + .anyMatch( c -> hasCategory( c, Categories.ASSAY ) && hasAnyValue( c, + Values.SINGLE_NUCLEUS_RNA_SEQUENCING_ASSAY, + Values.SINGLE_CELL_RNA_SEQUENCING_ASSAY, + Values.RNASEQ_OF_CODING_RNA_FROM_SINGLE_CELLS, + Values.SINGLE_NUCLEUS_RNA_SEQUENCING, + Values.SINGLE_CELL_RNA_SEQUENCING + ) ) + && ee.getCharacteristics().stream() + .anyMatch( c -> hasCategory( c, Categories.ASSAY ) + && hasValue( c, Values.FLUORESCENCE_ACTIVATED_CELL_SORTING ) ) ) + || hasBulkRnaSeqData( ee ); + // TODO: check the presence of vectors from the RNA-Seq pipeline } @Override @Transactional(readOnly = true) - public boolean isRNASeq( ExpressionExperiment expressionExperiment ) { - Collection ads = this.expressionExperimentDao.getArrayDesignsUsed( expressionExperiment ); + public boolean isRNASeq( ExpressionExperiment ee ) { + return ee.getCharacteristics().stream() + .anyMatch( c -> hasCategory( c, Categories.ASSAY ) && hasAnyValue( c, + Values.SINGLE_NUCLEUS_RNA_SEQUENCING_ASSAY, + Values.SINGLE_CELL_RNA_SEQUENCING_ASSAY, + Values.RNASEQ_OF_CODING_RNA_FROM_SINGLE_CELLS, + Values.SINGLE_NUCLEUS_RNA_SEQUENCING, + Values.SINGLE_CELL_RNA_SEQUENCING, + Values.BULK_RNA_SEQ + ) ) + || ( !isMicroarray( ee ) && hasSequencingPlatform( ee ) ) + || expressionExperimentDao.hasSingleCellQuantitationTypes( ee ) + || hasBulkRnaSeqData( ee ) + ; + } + + @Override + @Transactional(readOnly = true) + public boolean isMicroarray( ExpressionExperiment ee ) { + return ee.getCharacteristics().stream() + .anyMatch( c -> hasCategory( c, Categories.ASSAY ) && hasValue( c, Values.MICROARRAY ) ); + } + + private boolean hasSequencingPlatform( ExpressionExperiment ee ) { + Collection ads = this.expressionExperimentDao.getArrayDesignsUsed( ee ); /* * This isn't completely bulletproof. We are simply assuming that if any of the platforms isn't a microarray (or * 'OTHER'), it's RNA-seq. @@ -1613,7 +1659,15 @@ public boolean isRNASeq( ExpressionExperiment expressionExperiment ) { } } return false; + } + + private boolean hasBulkRnaSeqData( ExpressionExperiment ee ) { + // TODO: check for the presence of vectors from the RNA-Seq pipeline + return false; + } + private boolean hasSingleCellData(ExpressionExperiment ee) { + return expressionExperimentDao.hasSingleCellQuantitationTypes( ee ); } /**