Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ public void setExpressionExperiment( ExpressionExperiment expressionExperiment )
*/
public Set<Gene> getGenesTested() {
Set<Gene> genes = new HashSet<>();
for ( CompositeSequence cs : dataMatrix.getRowNames() ) {
for ( CompositeSequence cs : dataMatrix.getDesignElements() ) {
Set<Gene> geneClusters = this.probeToGeneMap.get( cs );
if ( geneClusters == null ) {
if ( numWarnings <= LinkAnalysis.MAX_WARNINGS ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ private DifferentialExpressionAnalysis doAnalysis( BioAssaySet bioAssaySet,
throw new FilteringRelatedAnalysisException( config, filterResult, e );
}

DoubleMatrix<CompositeSequence, BioMaterial> bareFilteredDataMatrix = expressionData.getMatrix();
DoubleMatrix<CompositeSequence, BioMaterial> bareFilteredDataMatrix = expressionData.asDoubleMatrix();

DoubleMatrix1D librarySizes = getLibrarySizes( config, expressionData );

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import org.springframework.transaction.annotation.Transactional;
import ubic.basecode.math.distribution.Histogram;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
import ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement;
import ubic.gemma.core.datastructure.matrix.TwoChannelExpressionDataMatrixBuilder;
import ubic.gemma.model.common.auditAndSecurity.eventType.MissingValueAnalysisEvent;
import ubic.gemma.model.common.quantitationtype.*;
Expand All @@ -46,6 +45,7 @@
import javax.annotation.Nullable;
import java.util.Collection;
import java.util.HashSet;
import java.util.Objects;

/**
* Computes a missing value matrix for ratiometric data sets.
Expand Down Expand Up @@ -239,14 +239,14 @@ private Collection<RawExpressionDataVector> computeMissingValues( ExpressionExpe

Double signalThreshold = Double.NaN;
if ( bkgChannelA == null && bkgChannelB == null ) {
signalThreshold = this.computeSignalThreshold( preferred, signalChannelA, signalChannelB, baseChannel );
signalThreshold = this.computeSignalThreshold( baseChannel, signalChannelA, signalChannelB );
}
QuantitationType present = this.getMissingDataQuantitationType( signalToNoiseThreshold, signalThreshold );
source.getQuantitationTypes().add( present );
for ( ExpressionDataMatrixRowElement element : baseChannel.getRowElements() ) {
for ( CompositeSequence designElement : baseChannel.getDesignElements() ) {
count = this.examineVector( source, preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB,
signalToNoiseThreshold, extraMissingValueIndicators, results, count, baseChannel,
signalThreshold, present, element );
signalThreshold, present, designElement );

}
TwoChannelMissingValuesImpl.log.info( "Finished: " + count + " vectors examined for missing values" );
Expand All @@ -268,16 +268,14 @@ private int examineVector( ExpressionExperiment source, ExpressionDataDoubleMatr
double signalToNoiseThreshold, @Nullable Collection<Double> extraMissingValueIndicators,
Collection<RawExpressionDataVector> results, int count,
ExpressionDataDoubleMatrix baseChannel, Double signalThreshold, QuantitationType present,
ExpressionDataMatrixRowElement element ) {
CompositeSequence designElement = element.getDesignElement();

CompositeSequence designElement ) {
RawExpressionDataVector vect = RawExpressionDataVector.Factory.newInstance();
vect.setQuantitationType( present );
vect.setExpressionExperiment( source );
vect.setDesignElement( designElement );
vect.setBioAssayDimension( baseChannel.getBioAssayDimension( designElement ) );

int numCols = preferred.columns( designElement );
int numCols = vect.getBioAssayDimension().getBioAssays().size();

Boolean[] detectionCalls = new Boolean[numCols];
double[] prefRow = preferred.getRowAsDoubles( designElement );
Expand Down Expand Up @@ -357,17 +355,14 @@ private boolean checkMissingValue( @Nullable Collection<Double> extraMissingValu
/**
* Determine a threshold based on the data.
*/
private Double computeSignalThreshold( ExpressionDataDoubleMatrix preferred,
@Nullable ExpressionDataDoubleMatrix signalChannelA, @Nullable ExpressionDataDoubleMatrix signalChannelB,
ExpressionDataDoubleMatrix baseChannel ) {
private Double computeSignalThreshold( ExpressionDataDoubleMatrix baseChannel,
@Nullable ExpressionDataDoubleMatrix signalChannelA, @Nullable ExpressionDataDoubleMatrix signalChannelB ) {

double min = Double.MAX_VALUE;
double max = Double.MIN_VALUE;

for ( ExpressionDataMatrixRowElement element : baseChannel.getRowElements() ) {
CompositeSequence designElement = element.getDesignElement();

int numCols = preferred.columns( designElement );
for ( CompositeSequence designElement : baseChannel.getDesignElements() ) {
int numCols = Objects.requireNonNull( baseChannel.getBioAssayDimension( designElement ) ).getBioAssays().size();
for ( int col = 0; col < numCols; col++ ) {

double[] signalA = null;
Expand Down Expand Up @@ -397,10 +392,8 @@ private Double computeSignalThreshold( ExpressionDataDoubleMatrix preferred,
}

Histogram h = new Histogram( "range", 100, min, max );
for ( ExpressionDataMatrixRowElement element : baseChannel.getRowElements() ) {
CompositeSequence designElement = element.getDesignElement();

int numCols = preferred.columns( designElement );
for ( CompositeSequence designElement : baseChannel.getDesignElements() ) {
int numCols = Objects.requireNonNull( baseChannel.getBioAssayDimension( designElement ) ).getBioAssays().size();
for ( int col = 0; col < numCols; col++ ) {

double[] signalA = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,18 +223,19 @@ private ExpressionDataDoubleMatrix restoreOutliers( ExpressionDataDoubleMatrix o
Iterate over the rows and columns of the original matrix and copy the values from the corrected matrix.
If the column is an outlier in the original matrix, just skip it.
*/
for ( int i = 0; i < originalDataMatrix.rows(); i++ ) {
DoubleMatrix<CompositeSequence, BioMaterial> dmatrix = originalDataMatrix.asDoubleMatrix();
for ( int i = 0; i < dmatrix.rows(); i++ ) {
int skip = 0;
for ( int j = 0; j < originalDataMatrix.columns(); j++ ) {
for ( int j = 0; j < dmatrix.columns(); j++ ) {
if ( outlierColumns.contains( j ) ) {
skip++;
continue; // leave it alone; normally this will be an NaN.
}
originalDataMatrix.set( i, j, correctedMatrix.getAsDouble( i, j - skip ) );
dmatrix.set( i, j, correctedMatrix.getAsDouble( i, j - skip ) );
}
}

return originalDataMatrix;
return originalDataMatrix.withMatrix( dmatrix );
}

/**
Expand Down Expand Up @@ -312,7 +313,7 @@ private ExpressionDataDoubleMatrix doComBat( ExpressionExperiment ee, Expression
originalDataMatrix,
ObjectMatrix<BioMaterial, ExperimentalFactor, Object> design ) {
ObjectMatrix<BioMaterial, String, Object> designU = this.convertFactorValuesToStrings( design );
DoubleMatrix<CompositeSequence, BioMaterial> matrix = originalDataMatrix.getMatrix();
DoubleMatrix<CompositeSequence, BioMaterial> matrix = originalDataMatrix.asDoubleMatrix();

designU = this.orderMatrix( matrix, designU );

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public static ExpressionDataDoubleMatrix ensureLog2Scale( ExpressionDataDoubleMa
}

StandardQuantitationType type = quantitationType.getType();
DoubleMatrix<CompositeSequence, BioMaterial> transformedMatrix = dmatrix.getMatrix().copy();
DoubleMatrix<CompositeSequence, BioMaterial> transformedMatrix = dmatrix.asDoubleMatrix();
switch ( quantitationType.getScale() ) {
case LOG2:
log.warn( String.format( "Data was detected on a log2-scale, but the quantitation type indicate %s. No transformation is necessary.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public static QuantitationType inferQuantitationType( ExpressionDataMatrix<?> ex
private static InferredQuantitationType infer( ExpressionDataMatrix<?> expressionData, @Nullable QuantitationType qt ) {
Object matrix;
if ( expressionData instanceof ExpressionDataDoubleMatrix ) {
matrix = new DenseDoubleMatrix2D( ( ( ExpressionDataDoubleMatrix ) expressionData ).getMatrix().asArray() );
matrix = new DenseDoubleMatrix2D( ( ( ExpressionDataDoubleMatrix ) expressionData ).asDoubleMatrix().asArray() );
} else if ( expressionData instanceof SingleCellExpressionDataDoubleMatrix ) {
matrix = ( ( SingleCellExpressionDataDoubleMatrix ) expressionData ).getMatrix();
} else {
Expand Down Expand Up @@ -453,6 +453,7 @@ private static boolean isPercent100( DoubleMatrix2D matrix, double maximum ) {

/**
* Check if any of the rows of a given matrix are normalized.
*
* @see #isZScore(DoubleMatrix1D)
*/
private static boolean isZScore( Object matrix ) {
Expand Down Expand Up @@ -535,10 +536,11 @@ private static boolean isClose( double a, double b ) {

/**
* Detect suspicious values for a given quantitation type.
*
* @throws SuspiciousValuesForQuantitationException if there are any suspicious values
*/
public static void detectSuspiciousValues( ExpressionDataDoubleMatrix a, QuantitationType qt ) throws SuspiciousValuesForQuantitationException {
DoubleMatrix2D matrix = new DenseDoubleMatrix2D( a.getMatrix().asArray() );
DoubleMatrix2D matrix = new DenseDoubleMatrix2D( a.getMatrixAsDoubles() );

List<SuspiciousValuesForQuantitationException.SuspiciousValueResult> flaggingResults = new ArrayList<>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public static Set<BioMaterial> getSamplesWithData( ExpressionDataDoubleMatrix da
Set<BioMaterial> samplesWithData = new HashSet<>( dataMatrix.columns() );
for ( int j = 0; j < dataMatrix.columns(); j++ ) {
for ( int i = 0; i < dataMatrix.rows(); i++ ) {
if ( !Double.isNaN( dataMatrix.getMatrix().get( i, j ) ) ) {
if ( !Double.isNaN( dataMatrix.getAsDouble( i, j ) ) ) {
samplesWithData.add( dataMatrix.getBioMaterialForColumn( j ) );
break;
}
Expand All @@ -35,7 +35,7 @@ public static int countSamplesWithData( ExpressionDataDoubleMatrix dataMatrix )
int samplesWithData = 0;
for ( int j = 0; j < dataMatrix.columns(); j++ ) {
for ( int i = 0; i < dataMatrix.rows(); i++ ) {
if ( !Double.isNaN( dataMatrix.getMatrix().get( i, j ) ) ) {
if ( !Double.isNaN( dataMatrix.getAsDouble( i, j ) ) ) {
samplesWithData++;
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,7 @@
import ubic.gemma.model.expression.designElement.CompositeSequence;

import javax.annotation.Nullable;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.*;

/**
* Default filter used for various analyses of expression experiments.
Expand Down Expand Up @@ -155,7 +152,11 @@ public ExpressionDataDoubleMatrix filter( ExpressionDataDoubleMatrix dataMatrix,
// Filtering lowly expressed genes.
if ( config.getLowExpressionCut() > 0.0 ) {
ExpressionExperimentFilter.log.debug( "Filtering for low or too high expression" );
Map<CompositeSequence, Double> ranks = dataMatrix.getRanksByMean();
Map<CompositeSequence, Double> ranks = new HashMap<>();
double[] rbm = dataMatrix.getRanksByMean();
for ( int i = 0; i < dataMatrix.rows(); i++ ) {
ranks.put( dataMatrix.getDesignElementForRow( i ), rbm[i] );
}
dataMatrix = this.filterLowExpression( dataMatrix, ranks );
result.setLowExpressionFilterApplied( true );
result.setAfterLowExpressionFilter( dataMatrix.rows() );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ else if ( dataMatrix.getQuantitationType().isPreferred( RawExpressionDataVector.
if ( allowSlicingColumns ) {
return dataMatrix.sliceColumns( keptSamples );
} else {
DoubleMatrix<CompositeSequence, BioMaterial> maskedMatrix = dataMatrix.getMatrix().copy();
DoubleMatrix<CompositeSequence, BioMaterial> maskedMatrix = dataMatrix.asDoubleMatrix();
for ( int j = 0; j < dataMatrix.columns(); j++ ) {
BioMaterial sample = dataMatrix.getBioMaterialForColumn( j );
if ( !keptSamples.contains( sample ) ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public ExpressionDataDoubleMatrix filter( ExpressionDataDoubleMatrix dataMatrix
}
log.info( "There are " + outlierAssays.size() + " outlier assays; masking them out..." );

DoubleMatrix<CompositeSequence, BioMaterial> maskedMatrix = dataMatrix.getMatrix().copy();
DoubleMatrix<CompositeSequence, BioMaterial> maskedMatrix = dataMatrix.asDoubleMatrix();

Set<BioAssayDimension> dimensionWithOutliers = dataMatrix.getBioAssayDimensions().stream()
.filter( bad -> CollectionUtils.containsAny( bad.getBioAssays(), outlierAssays ) )
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
package ubic.gemma.core.analysis.preprocess.filter;

import cern.colt.matrix.DoubleMatrix2D;
import lombok.extern.apachecommons.CommonsLog;
import org.springframework.util.Assert;
import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.gemma.core.analysis.preprocess.convert.QuantitationTypeConversionException;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
import ubic.gemma.core.util.MatrixStats;
Expand All @@ -13,7 +12,6 @@
import ubic.gemma.model.common.quantitationtype.StandardQuantitationType;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.designElement.CompositeSequence;

import java.util.ArrayList;
Expand Down Expand Up @@ -191,15 +189,15 @@ private Optional<long[]> getLibrarySize( BioAssayDimension bioAssayDimension ) {

private ExpressionDataDoubleMatrix filterLog2cpm( ExpressionDataDoubleMatrix dmatrix, long[] librarySize ) throws
NoDesignElementsException {
DoubleMatrix<CompositeSequence, BioMaterial> unnormalizedMatrix = dmatrix.getMatrix().copy();
double[][] unnormalizedMatrix = dmatrix.getMatrixAsDoubles();
double[] log2LibrarySize = new double[librarySize.length];
for ( int j = 0; j < librarySize.length; j++ ) {
log2LibrarySize[j] = Math.log( librarySize[j] + 1.0 ) / Math.log( 2 );
}
// undo the log2cpm transformation, but keep values in the log2 scale
for ( int i = 0; i < unnormalizedMatrix.rows(); i++ ) {
for ( int j = 0; j < unnormalizedMatrix.columns(); j++ ) {
unnormalizedMatrix.set( i, j, unnormalizedMatrix.get( i, j ) + log2LibrarySize[j] );
for ( int i = 0; i < unnormalizedMatrix.length; i++ ) {
for ( int j = 0; j < unnormalizedMatrix[i].length; j++ ) {
unnormalizedMatrix[i][j] = unnormalizedMatrix[i][j] + log2LibrarySize[j];
}
}
Map<QuantitationType, QuantitationType> unnormalizedQts = dmatrix.getQuantitationTypes().stream()
Expand Down Expand Up @@ -239,9 +237,7 @@ private ExpressionDataDoubleMatrix filterDistinctValuesByRanks( ExpressionDataDo
}

private ExpressionDataDoubleMatrix rank( ExpressionDataDoubleMatrix dmatrix ) {
DenseDoubleMatrix<CompositeSequence, BioMaterial> rankMatrix = new DenseDoubleMatrix<>( MatrixStats.ranksByColumn( dmatrix.getMatrix() ).toArray() );
rankMatrix.setRowNames( dmatrix.getMatrix().getRowNames() );
rankMatrix.setColumnNames( dmatrix.getMatrix().getColNames() );
DoubleMatrix2D rankMatrix = MatrixStats.ranksByColumn( dmatrix.asDoubleMatrix() );
Map<QuantitationType, QuantitationType> rankQts = dmatrix.getQuantitationTypes().stream()
.collect( Collectors.toMap( qt -> qt, qt -> {
qt = QuantitationType.Factory.newInstance( qt );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import cern.colt.matrix.impl.DenseDoubleMatrix2D;
import cern.colt.matrix.linalg.Algebra;
import org.apache.commons.lang3.StringUtils;
import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.basecode.math.DescriptiveWithMissing;
import ubic.basecode.math.MatrixStats;
Expand Down Expand Up @@ -122,7 +121,7 @@ public ExpressionDataSVD( ExpressionDataDoubleMatrix expressionData, boolean nor
// }

this.normalized = normalizeMatrix;
DoubleMatrix<CompositeSequence, BioMaterial> matrix = this.expressionData.getMatrix();
DoubleMatrix<CompositeSequence, BioMaterial> matrix = this.expressionData.asDoubleMatrix();

assert matrix.getRowNames().size() > 0;
assert matrix.getColNames().size() > 0;
Expand Down Expand Up @@ -158,17 +157,13 @@ public ExpressionDataDoubleMatrix equalize() {
DoubleMatrix2D v = new DenseDoubleMatrix2D( rawV );

Algebra a = new Algebra();
DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(
a.mult( a.mult( u, s ), a.transpose( v ) ).toArray() );

reconstructed.setRowNames( this.expressionData.getMatrix().getRowNames() );
reconstructed.setColumnNames( this.expressionData.getMatrix().getColNames() );
DoubleMatrix2D reconstructed = a.mult( a.mult( u, s ), a.transpose( v ) );

// re-mask the missing values.
for ( int i = 0; i < reconstructed.rows(); i++ ) {
for ( int j = 0; j < reconstructed.columns(); j++ ) {
if ( Double.isNaN( this.missingValueInfo.get( i, j ) ) ) {
reconstructed.set( i, j, Double.NaN );
reconstructed.setQuick( i, j, Double.NaN );
}
}
}
Expand Down Expand Up @@ -286,11 +281,7 @@ public ExpressionDataDoubleMatrix removeHighestComponents( int numComponentsToRe
DoubleMatrix2D v = new DenseDoubleMatrix2D( rawV );

Algebra a = new Algebra();
DoubleMatrix<CompositeSequence, BioMaterial> reconstructed = new DenseDoubleMatrix<>(
a.mult( a.mult( u, s ), a.transpose( v ) ).toArray() );

reconstructed.setRowNames( this.expressionData.getMatrix().getRowNames() );
reconstructed.setColumnNames( this.expressionData.getMatrix().getColNames() );
DoubleMatrix2D reconstructed = a.mult( a.mult( u, s ), a.transpose( v ) );

// re-mask the missing values.
for ( int i = 0; i < reconstructed.rows(); i++ ) {
Expand Down Expand Up @@ -322,19 +313,14 @@ public ExpressionDataDoubleMatrix uMatrixAsExpressionData() {

DoubleMatrix<CompositeSequence, Integer> rawUMatrix = svd.getU();

DoubleMatrix<CompositeSequence, BioMaterial> result = new DenseDoubleMatrix<>( rawUMatrix.rows(),
rawUMatrix.columns() );
DenseDoubleMatrix2D result = new DenseDoubleMatrix2D( rawUMatrix.rows(), rawUMatrix.columns() );

// take the absolute value of the U matrix.
for ( int i = 0; i < rawUMatrix.rows(); i++ ) {
for ( int j = 0; j < rawUMatrix.columns(); j++ ) {
result.set( i, j, Math.abs( rawUMatrix.get( i, j ) ) );
result.setQuick( i, j, Math.abs( rawUMatrix.get( i, j ) ) );
}
}
List<BioMaterial> colNames = svd.getV().getColNames();

result.setColumnNames( colNames );
result.setRowNames( rawUMatrix.getRowNames() );

// use that as the 'expression data'
return this.expressionData.withMatrix( result );
Expand Down
Loading