diff --git a/gemma-core/pom.xml b/gemma-core/pom.xml index 6d2caaa3ae..2a7f5d4d56 100644 --- a/gemma-core/pom.xml +++ b/gemma-core/pom.xml @@ -316,6 +316,14 @@ 4.2.2.GA + + + + com.googlecode.matrix-toolkits-java + mtj + 1.0.4 + + diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java index b3fd1e6350..1a655dd7e0 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java @@ -49,6 +49,18 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix { */ Collection getQuantitationTypes(); + /** + * @return a {@link BioAssayDimension} that covers all the biomaterials in this matrix. + * @throws IllegalStateException if there isn't a single bioassaydimension that encapsulates all the biomaterials + * used in the experiment. + */ + BioAssayDimension getBestBioAssayDimension(); + + /** + * @return true if any values are null or NaN (for Doubles); all other values are considered non-missing. + */ + boolean hasMissingValues(); + /** * Access a single value of the matrix. Note that because there can be multiple bioassays per column and multiple * designelements per row, it is possible for this method to retrieve a data that does not come from the bioassay @@ -69,6 +81,13 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix { */ T[][] get( List designElements, List bioAssays ); + /** + * Access the entire matrix. + * + * @return T[][] + */ + T[][] getRawMatrix(); + /** * Access a single column of the matrix. * @@ -85,6 +104,21 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix { */ T[][] getColumns( List bioAssays ); + + /** + * @return list of elements representing the row 'labels'. + */ + List getRowElements(); + + /** + * Number of columns that use the given design element. Useful if the matrix includes data from more than one array + * design. + * + * @param el el + * @return int + */ + int columns( CompositeSequence el ); + /** * @param index i * @return BioMaterial. Note that if this represents a subsetted data set, the BioMaterial may be a lightweight @@ -98,13 +132,6 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix { */ int getColumnIndex( BioMaterial bioMaterial ); - /** - * @return The bioassaydimension that covers all the biomaterials in this matrix. - * @throws IllegalStateException if there isn't a single bioassaydimension that encapsulates all the biomaterials - * used in the experiment. - */ - BioAssayDimension getBestBioAssayDimension(); - /** * Produce a BioAssayDimension representing the matrix columns for a specific row. The designelement argument is * needed because a matrix can combine data from multiple array designs, each of which will generate its own @@ -122,4 +149,13 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix { * used in the study. */ Collection getBioAssaysForColumn( int index ); + + /** + * Set a value in the matrix, by index + * + * @param row row + * @param column col + * @param value val + */ + void set( int row, int column, T value ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DoubleSingleCellExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DoubleSingleCellExpressionDataMatrix.java new file mode 100644 index 0000000000..1beae14df6 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DoubleSingleCellExpressionDataMatrix.java @@ -0,0 +1,135 @@ +package ubic.gemma.core.datastructure.matrix; + +import no.uib.cipr.matrix.sparse.CompRowMatrix; +import org.springframework.util.Assert; +import ubic.gemma.model.common.quantitationtype.QuantitationType; +import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; +import ubic.gemma.model.expression.designElement.CompositeSequence; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.persistence.util.ByteArrayUtils; + +import java.util.*; + +/** + * @author poirigui + */ +public class DoubleSingleCellExpressionDataMatrix implements SingleCellExpressionDataMatrix { + + private static final Comparator designElementComparator = Comparator.comparing( CompositeSequence::getName ) + .thenComparing( CompositeSequence::getId ); + + private final ExpressionExperiment expressionExperiment; + private final QuantitationType quantitationType; + private final SingleCellDimension singleCellDimension; + private final CompRowMatrix matrix; + private final List designElements; + + public DoubleSingleCellExpressionDataMatrix( Collection vectors ) { + Assert.isTrue( !vectors.isEmpty(), "At least one vector must be supplied. Use EmptyExpressionDataMatrix for empty data matrices instead." ); + Assert.isTrue( vectors.stream().map( SingleCellExpressionDataVector::getQuantitationType ).distinct().count() == 1, + "All vectors must have the same quantitation type." ); + Assert.isTrue( vectors.stream().map( SingleCellExpressionDataVector::getSingleCellDimension ).distinct().count() == 1, + "All vectors must have the same single-cell dimension." ); + SingleCellExpressionDataVector vector = vectors.iterator().next(); + expressionExperiment = vector.getExpressionExperiment(); + quantitationType = vector.getQuantitationType(); + singleCellDimension = vector.getSingleCellDimension(); + // sort vectors by CS + List sortedVectors = new ArrayList<>( vectors ); + sortedVectors.sort( Comparator.comparing( SingleCellExpressionDataVector::getDesignElement, designElementComparator ) ); + int rows = sortedVectors.size(); + int i = 0; + int[][] nz = new int[rows][]; + for ( SingleCellExpressionDataVector v : sortedVectors ) { + nz[i++] = v.getDataIndices(); + } + matrix = new CompRowMatrix( rows, singleCellDimension.getNumberOfCells(), nz ); + designElements = new ArrayList<>( sortedVectors.size() ); + i = 0; + for ( SingleCellExpressionDataVector v : sortedVectors ) { + designElements.add( v.getDesignElement() ); + double[] row = ByteArrayUtils.byteArrayToDoubles( v.getData() ); + int[] indices = v.getDataIndices(); + for ( int j = 0; j < row.length; j++ ) { + matrix.set( i, indices[j], row[j] ); + } + i++; + } + } + + @Override + public ExpressionExperiment getExpressionExperiment() { + return expressionExperiment; + } + + @Override + public int columns() { + return matrix.numColumns(); + } + + @Override + public Double get( int row, int column ) { + return matrix.get( row, column ); + } + + @Override + public Double[] getColumn( int column ) { + Double[] vec = new Double[matrix.numRows()]; + for ( int j = 0; j < matrix.numRows(); j++ ) { + vec[j] = matrix.get( j, column ); + } + return vec; + } + + @Override + public List getDesignElements() { + return designElements; + } + + @Override + public CompositeSequence getDesignElementForRow( int index ) { + return designElements.get( index ); + } + + @Override + public Double[] getRow( CompositeSequence designElement ) { + int ix = getRowIndex( designElement ); + if ( ix == -1 ) { + return null; + } + return getRow( ix ); + } + + @Override + public Double[] getRow( int index ) { + Double[] vec = new Double[matrix.numColumns()]; + int[] rowptr = matrix.getRowPointers(); + int[] colind = matrix.getColumnIndices(); + double[] data = matrix.getData(); + for ( int i = rowptr[index]; i < rowptr[index + 1]; i++ ) { + vec[colind[i]] = data[i]; + } + return vec; + } + + @Override + public int getRowIndex( CompositeSequence designElement ) { + return Math.max( Collections.binarySearch( designElements, designElement, designElementComparator ), -1 ); + } + + @Override + public int rows() { + return matrix.numRows(); + } + + @Override + public QuantitationType getQuantitationType() { + return quantitationType; + } + + @Override + public SingleCellDimension getSingleCellDimension() { + return singleCellDimension; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/EmptyExpressionMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/EmptyExpressionMatrix.java index 24325c7b28..1c6c012809 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/EmptyExpressionMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/EmptyExpressionMatrix.java @@ -84,7 +84,7 @@ public Object[] getColumn( BioAssay bioAssay ) { } @Override - public Object[] getColumn( Integer column ) { + public Object[] getColumn( int column ) { throw new UnsupportedOperationException(); } @@ -104,12 +104,7 @@ public Object[] getRow( CompositeSequence designElement ) { } @Override - public Object[] getRow( Integer index ) { - throw new UnsupportedOperationException(); - } - - @Override - public Object[][] getRows( List designElements ) { + public Object[] getRow( int index ) { throw new UnsupportedOperationException(); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataBooleanMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataBooleanMatrix.java index 17b5e8e762..967785b019 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataBooleanMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataBooleanMatrix.java @@ -91,7 +91,7 @@ public Boolean[] getColumn( BioAssay bioAssay ) { } @Override - public Boolean[] getColumn( Integer index ) { + public Boolean[] getColumn( int index ) { ObjectMatrix1D rawResult = this.matrix.viewColumn( index ); Boolean[] res = new Boolean[rawResult.size()]; int i = 0; @@ -135,26 +135,10 @@ public Boolean[] getRow( CompositeSequence designElement ) { } @Override - public Boolean[] getRow( Integer index ) { + public Boolean[] getRow( int index ) { return matrix.getRow( index ); } - @Override - public Boolean[][] getRows( List designElements ) { - if ( designElements == null ) { - return null; - } - - Boolean[][] result = new Boolean[designElements.size()][]; - int i = 0; - for ( CompositeSequence element : designElements ) { - Boolean[] rowResult = this.getRow( element ); - result[i] = rowResult; - i++; - } - return result; - } - @Override public boolean hasMissingValues() { for ( int i = 0; i < matrix.rows(); i++ ) { diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataDoubleMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataDoubleMatrix.java index 2f7615a815..8762372430 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataDoubleMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataDoubleMatrix.java @@ -314,7 +314,7 @@ public Double[] getColumn( BioAssay bioAssay ) { } @Override - public Double[] getColumn( Integer index ) { + public Double[] getColumn( int index ) { double[] rawResult = this.matrix.getColumn( index ); assert rawResult != null; Double[] result = new Double[rawResult.length]; @@ -350,27 +350,11 @@ public Double[] getRow( CompositeSequence designElement ) { } @Override - public Double[] getRow( Integer index ) { + public Double[] getRow( int index ) { double[] rawRow = matrix.getRow( index ); return ArrayUtils.toObject( rawRow ); } - @Override - public Double[][] getRows( List designElements ) { - if ( designElements == null ) { - return null; - } - - Double[][] result = new Double[designElements.size()][]; - int i = 0; - for ( CompositeSequence element : designElements ) { - Double[] rowResult = this.getRow( element ); - result[i] = rowResult; - i++; - } - return result; - } - @Override public boolean hasMissingValues() { for ( int i = 0; i < matrix.rows(); i++ ) { diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataIntegerMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataIntegerMatrix.java index 258ef2fc27..a6dc073b9e 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataIntegerMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataIntegerMatrix.java @@ -70,7 +70,7 @@ public Integer[] getColumn( BioAssay bioAssay ) { } @Override - public Integer[] getColumn( Integer index ) { + public Integer[] getColumn( int index ) { return this.matrix.getColumn( index ); } @@ -98,19 +98,10 @@ public Integer[] getRow( CompositeSequence designElement ) { } @Override - public Integer[] getRow( Integer index ) { + public Integer[] getRow( int index ) { return this.matrix.getRow( index ); } - @Override - public Integer[][] getRows( List designElements ) { - Integer[][] res = new Integer[this.rows()][]; - for ( int i = 0; i < designElements.size(); i++ ) { - res[i] = this.matrix.getRow( this.getRowIndex( designElements.get( i ) ) ); - } - return res; - } - @Override public boolean hasMissingValues() { for ( int i = 0; i < matrix.rows(); i++ ) { diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrix.java index 3f9715f2f3..17c61bf95b 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrix.java @@ -21,13 +21,18 @@ import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import javax.annotation.Nullable; import java.util.List; /** * Represents a matrix of data from an {@link ExpressionExperiment}. + *

+ * The rows of this matrix represent design elements. * * @author pavlidis * @author keshav + * @see BulkExpressionDataMatrix + * @see SingleCellExpressionDataMatrix */ public interface ExpressionDataMatrix { @@ -37,103 +42,63 @@ public interface ExpressionDataMatrix { ExpressionExperiment getExpressionExperiment(); /** - * Total number of columns. - * - * @return int + * Obtain all the design elements in this data matrix. */ - int columns(); + List getDesignElements(); /** - * Number of columns that use the given design element. Useful if the matrix includes data from more than one array - * design. + * Return a design element for a given index. * - * @param el el - * @return int + * @throws IndexOutOfBoundsException if the supplied index is not within zero and {@link #rows()} */ - int columns( CompositeSequence el ); + CompositeSequence getDesignElementForRow( int index ); /** - * Access a single value of the matrix. This is generally the easiest way to do it. - * - * @param row row - * @param column col - * @return t + * Obtain the total number of columns. */ - T get( int row, int column ); + int columns(); /** * Access a single column of the matrix. * * @param column index * @return T[] + * @throws IndexOutOfBoundsException if the supplied index is not within zero and {@link #columns()} */ - T[] getColumn( Integer column ); - - /** - * Obtain all the design elements in this data matrix. - */ - List getDesignElements(); + T[] getColumn( int column ); /** - * @param index i - * @return cs + * @return int */ - CompositeSequence getDesignElementForRow( int index ); + int rows(); /** - * Access the entire matrix. + * Access a single row of the matrix, by index. A complete row is returned. * - * @return T[][] + * @param index i + * @return t[] + * @throws IndexOutOfBoundsException if the supplied index is not within zero and {@link #rows()} */ - T[][] getRawMatrix(); + T[] getRow( int index ); /** * Return a row that 'came from' the given design element. * * @param designElement de - * @return t + * @return the corresponding row or null if the design element is not found in the matrix */ + @Nullable T[] getRow( CompositeSequence designElement ); /** - * Access a single row of the matrix, by index. A complete row is returned. - * - * @param index i - * @return t[] + * @return the index for the given design element, or -1 if not found */ - T[] getRow( Integer index ); - - /** - * @return list of elements representing the row 'labels'. - */ - List getRowElements(); - int getRowIndex( CompositeSequence designElement ); /** - * Access a submatrix + * Access a single value of the matrix by row and column. * - * @param designElements de - * @return T[][] - */ - T[][] getRows( List designElements ); - - /** - * @return true if any values are null or NaN (for Doubles); all other values are considered non-missing. + * @throws IndexOutOfBoundsException if either the row or column is outside the matrix bounds */ - boolean hasMissingValues(); - - /** - * @return int - */ - int rows(); - - /** - * Set a value in the matrix, by index - * - * @param row row - * @param column col - * @param value val - */ - void set( int row, int column, T value ); + T get( int row, int column ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrixColumnSort.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrixColumnSort.java index 2333fad2f5..7c0ff3ecbf 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrixColumnSort.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrixColumnSort.java @@ -204,7 +204,7 @@ public static DoubleMatrix orderByExperimentalDesign( DoubleMat * @param mat matrix * @return bio materials */ - public static List orderByExperimentalDesign( ExpressionDataMatrix mat ) { + public static List orderByExperimentalDesign( BulkExpressionDataMatrix mat ) { List start = ExpressionDataMatrixColumnSort.getBms( mat ); List ordered = ExpressionDataMatrixColumnSort.orderByExperimentalDesign( start, null ); @@ -466,7 +466,7 @@ private static LinkedHashMap> chunkOnFactor( Expe /** * Get all biomaterials for a matrix. */ - private static List getBms( ExpressionDataMatrix mat ) { + private static List getBms( BulkExpressionDataMatrix mat ) { List result = new ArrayList<>(); for ( int i = 0; i < mat.columns(); i++ ) { result.add( mat.getBioMaterialForColumn( i ) ); @@ -476,6 +476,7 @@ private static List getBms( ExpressionDataMatrix mat ) { /** * Get all (non-constant) factors used by the passed biomaterials + * * @param bms biomaterials * @return factors relevant to these biomaterials, ignoring those which have constant values. */ @@ -574,6 +575,7 @@ private static List orderByFactor( ExperimentalFactor ef, Map

* Any batch factor is used last (we sort by batch only within the most granular factor's levels) *

+ * * @param start biomaterials to sort * @param factors sorted list of factors to define sort order for biomaterials, cannot be null */ diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataStringMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataStringMatrix.java index 111409dc2e..5bbadb48d9 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataStringMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataStringMatrix.java @@ -94,7 +94,7 @@ public String[] getColumn( BioAssay bioAssay ) { } @Override - public String[] getColumn( Integer index ) { + public String[] getColumn( int index ) { return this.matrix.getColumn( index ); } @@ -122,19 +122,10 @@ public String[] getRow( CompositeSequence designElement ) { } @Override - public String[] getRow( Integer index ) { + public String[] getRow( int index ) { return matrix.getRow( index ); } - @Override - public String[][] getRows( List designElements ) { - String[][] res = new String[this.rows()][]; - for ( int i = 0; i < designElements.size(); i++ ) { - res[i] = this.matrix.getRow( this.getRowIndex( designElements.get( i ) ) ); - } - return res; - } - @Override public boolean hasMissingValues() { for ( int i = 0; i < matrix.rows(); i++ ) { diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellExpressionDataMatrix.java index 2821fa3329..c9118be112 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellExpressionDataMatrix.java +++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellExpressionDataMatrix.java @@ -24,4 +24,13 @@ public interface SingleCellExpressionDataMatrix extends ExpressionDataMatrix< * Return the single-cell dimension for this matrix. */ SingleCellDimension getSingleCellDimension(); + + /** + * {@inheritDoc} + *

+ * Important note: Retrieving a column is a {@code O(n log m)} operation where {@code n} is the number of + * vectors and {@code m} is the number of cells. Always favour row-oriented operations when possible. + */ + @Override + T[] getColumn( int column ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoader.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoader.java index eb59585463..327385f10e 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoader.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoader.java @@ -8,9 +8,8 @@ import org.springframework.util.Assert; import ubic.basecode.io.ByteArrayConverter; import ubic.gemma.model.common.quantitationtype.*; -import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; import ubic.gemma.model.expression.designElement.CompositeSequence; @@ -25,8 +24,6 @@ import java.util.stream.Stream; import java.util.zip.GZIPInputStream; -import static java.util.function.Function.identity; - /** * Load single cell data from 10X Genomics MEX format. * @@ -59,13 +56,17 @@ public MexSingleCellDataLoader( List sampleNames, List barcodeFile && barcodeFiles.size() == genesFiles.size() && genesFiles.size() == matrixFiles.size(), "There must be exactly the same number of each type of files." ); - this.sampleNames = sampleNames; + this.sampleNames = Collections.unmodifiableList( sampleNames ); this.barcodeFiles = barcodeFiles; this.genesFiles = genesFiles; this.matrixFiles = matrixFiles; this.numberOfSamples = barcodeFiles.size(); } + public List getSampleNames() { + return sampleNames; + } + @Override public SingleCellDimension getSingleCellDimension( Collection bioAssays ) throws IOException { SingleCellDimension scd = new SingleCellDimension(); @@ -106,15 +107,12 @@ public Set getQuantitationTypes() { * MEX does not provide cell type labels. */ @Override - public Optional getCellTypeLabelling() { + public Optional getCellTypeLabelling() { return Optional.empty(); } @Override - public Stream loadVectors( ArrayDesign platform, SingleCellDimension scd, QuantitationType quantitationType ) throws IOException { - Map probeByName = platform.getCompositeSequences().stream() - .collect( Collectors.toMap( CompositeSequence::getName, identity() ) ); - + public Stream loadVectors( Map elementsMapping, SingleCellDimension scd, QuantitationType quantitationType ) throws IOException { // location of a given element in individual matrices Map elementsToSampleMatrixRow = new HashMap<>(); ArrayList matrices = new ArrayList<>( numberOfSamples ); @@ -129,9 +127,9 @@ public Stream loadVectors( ArrayDesign platform, String[] pieces = s.split( "\t", 3 ); String geneId = pieces[0]; String geneSymbol = pieces[1]; - CompositeSequence probe = probeByName.get( geneId ); + CompositeSequence probe = elementsMapping.get( geneId ); if ( probe == null && allowMappingProbeNamesToGeneSymbols ) { - probe = probeByName.get( geneSymbol ); + probe = elementsMapping.get( geneSymbol ); } if ( probe == null ) { missingElements.add( geneId ); @@ -149,11 +147,11 @@ public Stream loadVectors( ArrayDesign platform, } if ( missingElements.size() == elements.size() ) { - throw new IllegalArgumentException( "None of the elements of " + platform + " match genes from " + genesFile + "." ); + throw new IllegalArgumentException( "None of the elements matched genes from " + genesFile + "." ); } else if ( missingElements.size() > 10 ) { - log.warn( String.format( "%s does not have elements for %d/%d genes from %s.", platform, missingElements.size(), elements.size(), genesFile ) ); + log.warn( String.format( "The supplied mapping does not have elements for %d/%d genes from %s.", missingElements.size(), elements.size(), genesFile ) ); } else if ( !missingElements.isEmpty() ) { - log.warn( String.format( "%s does not have elements for the following genes: %s from %s.", platform, + log.warn( String.format( "The supplied mapping does not have elements for the following genes: %s from %s.", missingElements.stream().sorted().collect( Collectors.joining( ", " ) ), genesFile ) ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/SingleCellDataLoader.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/SingleCellDataLoader.java index 61591ecb9d..cc32470874 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/SingleCellDataLoader.java +++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/SingleCellDataLoader.java @@ -1,14 +1,15 @@ package ubic.gemma.core.loader.expression.singleCell; import ubic.gemma.model.common.quantitationtype.QuantitationType; -import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; +import ubic.gemma.model.expression.designElement.CompositeSequence; import java.io.IOException; import java.util.Collection; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Stream; @@ -40,18 +41,18 @@ public interface SingleCellDataLoader { /** * Load single-cell type labelling present in the data. */ - Optional getCellTypeLabelling() throws IOException; + Optional getCellTypeLabelling() throws IOException; /** * Produces a stream of single-cell expression data vectors for the given {@link QuantitationType}. - *

- * Make sure to close the stream when done, preferably using a try-with-resource block. * - * @param platform a platform to use when mapping vectors to probes/genes + * @param elementsMapping a mapping of element names used in the dataset to {@link CompositeSequence} * @param dimension a dimension to use for creating vectors, may be loaded from the single-cell data with * {@link #getSingleCellDimension(Collection)} * @param quantitationType a quantitation type to extract from the data for, may be loaded from the single-cell data * with {@link #getQuantitationTypes()} + * @return a stream of single-cell expression data vectors that must be closed when done, preferably using a + * try-with-resource block. */ - Stream loadVectors( ArrayDesign platform, SingleCellDimension dimension, QuantitationType quantitationType ) throws IOException; + Stream loadVectors( Map elementsMapping, SingleCellDimension dimension, QuantitationType quantitationType ) throws IOException; } diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java b/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java index 8caf21bf72..27fb81e20f 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java @@ -11,6 +11,7 @@ /** * Utilities and algorithms for {@link List}. + * * @author poirigui */ public class ListUtils { @@ -31,6 +32,7 @@ public static Map indexOfElements( List list ) { /** * Get a case-insensitive mapping of string elements to their first occurrence in a {@link List}. + * * @see #indexOfElements(List) */ public static Map indexOfCaseInsensitiveStringElements( List list ) { @@ -49,25 +51,57 @@ private static void fillMap( Map element2position, List list } } + /** + * Get an element of a sparse array. + * + * @param array + * @param indices + * @param index + * @param defaultValue + * @param + * @return + */ + public static T getSparseArrayElement( T[] array, int[] indices, int numberOfElements, int index, T defaultValue ) { + Assert.isTrue( array.length == indices.length, + String.format( "Invalid size for sparse array, it must contain %d indices.", array.length ) ); + // special case for dense array + if ( indices.length == numberOfElements ) { + return array[index]; + } + if ( index < 0 ) { + // FIXME: add support for negative indexing + throw new IndexOutOfBoundsException( "Negative indexing of sparse range arrays is not allowed." ); + } + if ( index >= numberOfElements ) { + throw new IndexOutOfBoundsException( "The index exceeds the upper bound of the array." ); + } + int offset = binarySearch( indices, index ); + if ( offset < 0 ) { + return defaultValue; + } + return array[offset]; + } + /** * Get an element of a sparse range array. + * * @param array collection of elements applying for the ranges * @param offsets starting offsets of the ranges * @param numberOfElements the size of the original array * @param index a position to retrieve - * @throws ArrayIndexOutOfBoundsException if the index is out of bounds - * @throws IllegalArgumentException if the array and offsets do not have the same size + * @throws IndexOutOfBoundsException if the requested index is out of bounds + * @throws IllegalArgumentException if the array is empty or its size differs from offsets * @see #validateSparseRangeArray(List, int[], int) */ - public static T getSparseRangeArrayElement( List array, int[] offsets, int numberOfElements, int index ) { + public static T getSparseRangeArrayElement( List array, int[] offsets, int numberOfElements, int index ) throws IllegalArgumentException, IndexOutOfBoundsException { Assert.isTrue( array.size() == offsets.length, - String.format( "Invalid size for offsets array, it must contain %d indices.", array.size() ) ); + String.format( "Invalid size for sparse range array, it must contain %d indices.", array.size() ) ); if ( index < 0 ) { // FIXME: add support for negative indexing - throw new ArrayIndexOutOfBoundsException( "Negative indexing of sparse range arrays is not allowed." ); + throw new IndexOutOfBoundsException( "Negative indexing of sparse range arrays is not allowed." ); } if ( index >= numberOfElements ) { - throw new ArrayIndexOutOfBoundsException( "The index exceeds the upper bound of the array." ); + throw new IndexOutOfBoundsException( "The index exceeds the upper bound of the array." ); } int offset = binarySearch( offsets, index ); if ( offset < 0 ) { @@ -78,12 +112,15 @@ public static T getSparseRangeArrayElement( List array, int[] offsets, in /** * Validate a sparse range array. + * * @param array collection of elements applying for the ranges * @param offsets starting offsets of the ranges * @param numberOfElements the size of the original array * @throws IllegalArgumentException if the sparse range array is invalid */ public static void validateSparseRangeArray( List array, int[] offsets, int numberOfElements ) throws IllegalArgumentException { + Assert.isTrue( numberOfElements == 0 || !array.isEmpty(), + "A non-empty sparse range array must have at least one element." ); Assert.isTrue( array.size() == offsets.length, "There must be as many offsets as entries in the corresponding array." ); int k = 0; diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/AnalysisValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/AnalysisValueObject.java index 41dc9bac31..b6de5faf67 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/analysis/AnalysisValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/AnalysisValueObject.java @@ -4,11 +4,24 @@ public abstract class AnalysisValueObject extends IdentifiableValueObject { + private ProtocolValueObject protocol; + protected AnalysisValueObject() { super(); } protected AnalysisValueObject( T analysis ) { super( analysis ); + if ( analysis.getProtocol() != null ) { + this.protocol = new ProtocolValueObject( analysis.getProtocol() ); + } + } + + public ProtocolValueObject getProtocol() { + return protocol; + } + + public void setProtocol( ProtocolValueObject protocol ) { + this.protocol = protocol; } } diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/CellTypeAssignmentValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/CellTypeAssignmentValueObject.java new file mode 100644 index 0000000000..6e8e887dd3 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/CellTypeAssignmentValueObject.java @@ -0,0 +1,50 @@ +package ubic.gemma.model.analysis; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.extern.apachecommons.CommonsLog; +import ubic.gemma.model.common.description.CharacteristicValueObject; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; + +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * @author poirigui + */ +@Data +@EqualsAndHashCode(callSuper = true) +@CommonsLog +public class CellTypeAssignmentValueObject extends AnalysisValueObject { + + /** + * A list of IDs, one-per-cell, that refers to one of the cell type labels in {@link #cellTypes}. + *

+ * {@code null} is used to indicate an unknown cell type. + */ + private List cellTypeIds; + + /** + * A set of cell types that are assigned to individual cells. + */ + private Set cellTypes; + + public CellTypeAssignmentValueObject( CellTypeAssignment cellTypeAssignment ) { + super( cellTypeAssignment ); + try { + cellTypeIds = Arrays.stream( cellTypeAssignment.getCellTypeIndices() ) + .mapToObj( cellTypeAssignment::getCellType ) + .map( characteristic -> characteristic != null ? characteristic.getId() : null ) + .collect( Collectors.toList() ); + } catch ( IndexOutOfBoundsException e ) { + // this may happen because getCellType() can fail if the data we have is incorrect, but we don't want to + // break the VO serialization which would break the REST API. + log.warn( "Cell type IDs is invalid for " + cellTypeAssignment + "." ); + } + cellTypes = cellTypeAssignment.getCellTypes().stream() + .map( CharacteristicValueObject::new ) + .collect( Collectors.toSet() ); + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/ProtocolValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/ProtocolValueObject.java new file mode 100644 index 0000000000..a821658ef7 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/ProtocolValueObject.java @@ -0,0 +1,26 @@ +package ubic.gemma.model.analysis; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import ubic.gemma.model.IdentifiableValueObject; +import ubic.gemma.model.common.description.CharacteristicValueObject; +import ubic.gemma.model.common.protocol.Protocol; + +import java.util.Set; + +@Data +@EqualsAndHashCode(callSuper = true) +public class ProtocolValueObject extends IdentifiableValueObject { + + private String name; + + private String description; + + private Set characteristics; + + public ProtocolValueObject( Protocol protocol ) { + super( protocol ); + this.name = protocol.getName(); + this.description = protocol.getDescription(); + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/protocol/Protocol.java b/gemma-core/src/main/java/ubic/gemma/model/common/protocol/Protocol.java index 781665b209..12ef59c785 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/protocol/Protocol.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/protocol/Protocol.java @@ -1,8 +1,8 @@ /* * The Gemma project. - * + * * Copyright (c) 2006-2012 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -20,13 +20,28 @@ import gemma.gsec.model.Securable; import ubic.gemma.model.common.AbstractDescribable; +import ubic.gemma.model.common.description.Characteristic; import java.io.Serializable; +import java.util.Set; public class Protocol extends AbstractDescribable implements Securable, Serializable { private static final long serialVersionUID = -1902891452989019766L; + /** + * Characteristics describing the protocol. + */ + private Set characteristics; + + public Set getCharacteristics() { + return characteristics; + } + + public void setCharacteristics( Set characteristics ) { + this.characteristics = characteristics; + } + public static final class Factory { public static Protocol newInstance() { diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/quantitationtype/QuantitationType.java b/gemma-core/src/main/java/ubic/gemma/model/common/quantitationtype/QuantitationType.java index ab1b243ed5..7b130d5b66 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/quantitationtype/QuantitationType.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/quantitationtype/QuantitationType.java @@ -214,84 +214,27 @@ public boolean equals( Object object ) { return false; } final QuantitationType that = ( QuantitationType ) object; - if ( that.getId() != null && this.getId() != null ) { - return Objects.equals( that.getId(), this.getId() ); - } - - if ( that.getName() != null && this.getName() != null && !this.getName().equals( that.getName() ) ) { - return false; - } - - if ( this.getScale() != null && that.getScale() != null && !this.getScale().equals( that.getScale() ) ) { - return false; - } - - if ( this.getIsPreferred() != that.getIsPreferred() ) { - return false; - } - - if ( this.getIsRatio() != that.getIsRatio() ) { - return false; - } - - if ( this.getIsNormalized() != that.getIsNormalized() ) { - return false; - } - - if ( this.getIsBackground() != that.getIsBackground() ) { - return false; + return getId().equals( that.getId() ); } - - if ( this.getIsBackgroundSubtracted() != that.getIsBackgroundSubtracted() ) { - return false; - } - - if ( this.getGeneralType() != null && that.getGeneralType() != null && !this.getGeneralType() - .equals( that.getGeneralType() ) ) { - return false; - } - - //noinspection SimplifiableIfStatement // Better readability - if ( this.getRepresentation() != null && that.getRepresentation() != null && !this.getRepresentation() - .equals( that.getRepresentation() ) ) { - return false; - } - - return this.getType() == null || that.getRepresentation() == null || this.getType().equals( that.getType() ); + return Objects.equals( getName(), that.getName() ) + && Objects.equals( scale, that.scale ) + && Objects.equals( isPreferred, that.isPreferred ) + && Objects.equals( isRatio, that.isRatio ) + && Objects.equals( isNormalized, that.isNormalized ) + && Objects.equals( isBackground, that.isBackground ) + && Objects.equals( isBackgroundSubtracted, that.isBackgroundSubtracted ) + && Objects.equals( isBatchCorrected, that.isBatchCorrected ) + && Objects.equals( type, that.type ) + && Objects.equals( generalType, that.generalType ) + && Objects.equals( representation, that.representation ) + && Objects.equals( isRecomputedFromRawData, that.isRecomputedFromRawData ); } @Override public int hashCode() { - int hashCode = 0; - hashCode = 29 * hashCode + ( this.getId() == null ? this.computeHashCode() : this.getId().hashCode() ); - return hashCode; - } - - private int computeHashCode() { - int hashCode = 0; - if ( this.getName() != null ) { - hashCode = hashCode + this.getName().hashCode(); - } - if ( this.getType() != null ) { - hashCode = hashCode + this.getType().hashCode(); - } - if ( this.getRepresentation() != null ) { - hashCode = hashCode + this.getRepresentation().hashCode(); - } - if ( this.getGeneralType() != null ) { - hashCode = hashCode + this.getGeneralType().hashCode(); - } - if ( this.getScale() != null ) { - hashCode = hashCode + this.getScale().hashCode(); - } - hashCode += Boolean.hashCode( this.getIsBackground() ); - hashCode += Boolean.hashCode( this.getIsBackgroundSubtracted() ); - hashCode += Boolean.hashCode( this.getIsNormalized() ); - hashCode += Boolean.hashCode( this.getIsPreferred() ); - hashCode += Boolean.hashCode( this.getIsRatio() ); - - return hashCode; + return Objects.hash( getName(), type, representation, generalType, scale, isBackground, isBackgroundSubtracted, + isNormalized, isPreferred, isBatchCorrected, isRatio, isRecomputedFromRawData ); } @Override @@ -350,6 +293,7 @@ public static QuantitationType newInstance( QuantitationType quantitationType ) result.isBackground = quantitationType.isBackground; result.isBackgroundSubtracted = quantitationType.isBackgroundSubtracted; result.isBatchCorrected = quantitationType.isBatchCorrected; + result.isRecomputedFromRawData = quantitationType.isRecomputedFromRawData; return result; } diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeAssignment.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeAssignment.java new file mode 100644 index 0000000000..14e6d9f3f4 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeAssignment.java @@ -0,0 +1,75 @@ +package ubic.gemma.model.expression.bioAssayData; + +import lombok.Getter; +import lombok.Setter; +import ubic.gemma.model.analysis.Analysis; +import ubic.gemma.model.common.description.Characteristic; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +/** + * Represents the labelling of cell types. + */ +@Getter +@Setter +public class CellTypeAssignment extends Analysis { + + /** + * A special indicator for {@link #cellTypeIndices} when the cell type is unknown. + */ + public static final int UNKNOWN_CELL_TYPE = -1; + + /** + * Indicate if this labelling is the preferred one. + */ + private boolean preferred; + + /** + * Cell types assignment to individual cells from the {@link #cellTypes} collections. + *

+ * The value {@code -1} is used to indicate an unknown cell type. + */ + private int[] cellTypeIndices; + + /** + * List of cell types. + */ + private List cellTypes = new ArrayList<>(); + + /** + * Number of cell types. + *

+ * This must always be equal to number of elements of {@link #cellTypes}. + */ + private Integer numberOfCellTypes; + + /** + * Obtain the type assignment of a given cell. + * + * @return the type assignment of a given cell, or null if the type was assigne to {@link #UNKNOWN_CELL_TYPE}. + * @throws IndexOutOfBoundsException if the cell index is out of range or if the value is ousitde the range o + */ + @Nullable + public Characteristic getCellType( int cellIndex ) throws IndexOutOfBoundsException { + int i = cellTypeIndices[cellIndex]; + if ( i == UNKNOWN_CELL_TYPE ) { + return null; + } else { + return cellTypes.get( i ); + } + } + + @Override + public int hashCode() { + return Objects.hash( Arrays.hashCode( cellTypeIndices ), cellTypes ); + } + + @Override + public boolean equals( Object object ) { + return super.equals( object ); + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java deleted file mode 100644 index b67bf182e0..0000000000 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java +++ /dev/null @@ -1,57 +0,0 @@ -package ubic.gemma.model.expression.bioAssayData; - -import lombok.Getter; -import lombok.Setter; -import org.springframework.util.Assert; -import ubic.gemma.model.analysis.Analysis; -import ubic.gemma.model.common.description.Characteristic; - -import java.util.Arrays; -import java.util.List; -import java.util.Objects; - -/** - * Represents the labelling of cell types. - */ -@Getter -@Setter -public class CellTypeLabelling extends Analysis { - - /** - * Indicate if this labelling is the preferred one. - */ - private boolean preferred; - - /** - * Cell types assignment to individual cells from the {@link #cellTypeLabels} collections. - */ - private int[] cellTypes; - - /** - * Cell type labels. - */ - private List cellTypeLabels; - - /** - * Number of distinct cell types. - *

- * This must always be equal to number of distinct elements of {@link #cellTypeLabels}. - */ - private Integer numberOfCellTypeLabels; - - public Characteristic getCellTypeLabel( int index ) { - Assert.notNull( cellTypes, "No cell types have been assigned." ); - Assert.notNull( cellTypeLabels, "No cell labels exist." ); - return cellTypeLabels.get( cellTypes[index] ); - } - - @Override - public int hashCode() { - return Objects.hash( Arrays.hashCode( cellTypes ), cellTypeLabels ); - } - - @Override - public boolean equals( Object object ) { - return super.equals( object ); - } -} diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java index 5e3dba7915..3c172c0c4c 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java @@ -11,6 +11,12 @@ import static ubic.gemma.core.util.ListUtils.getSparseRangeArrayElement; +/** + * Represents a single-cell dimension, holding shared information for a set of {@link SingleCellExpressionDataVector}. + * + * @author poirigui + * @see SingleCellExpressionDataVector + */ @Getter @Setter public class SingleCellDimension implements Identifiable { @@ -29,28 +35,28 @@ public class SingleCellDimension implements Identifiable { /** * Number of cells. *

- * This should always be equal to the size of {@link #cellIds}. + * This must always be equal to the size of {@link #cellIds}. */ private int numberOfCells = 0; /** * Set of cell types assignment to individual cells. This is empty if no cell types have been assigned and should - * always contain a preferred labelling as per {@link CellTypeLabelling#preferred} if non-empty. + * always contain a preferred labelling as per {@link CellTypeAssignment#isPreferred()} if non-empty. */ - private Set cellTypeLabellings = new HashSet<>(); + private Set cellTypeAssignments = new HashSet<>(); /** - * List of bioassays that each cell belongs to. + * List of {@link BioAssay}s applicable to the cells. *

- * The {@link BioAssay} {@code bioAssays[sampleIndex]} applies to all the cells in the interval {@code [bioAssaysOffset[sampleIndex], bioAssaysOffset[sampleIndex+1][}. - * To find the bioassay type of a given cell, use {@link #getBioAssay(int)}. + * The {@link BioAssay} in {@code bioAssays[sampleIndex]} applies to all the cells in the interval {@code [bioAssaysOffset[sampleIndex], bioAssaysOffset[sampleIndex+1][}. + * To find the bioassay of a given cell, use {@link #getBioAssay(int)}. */ private List bioAssays = new ArrayList<>(); /** * Offsets of the bioassays. *

- * This always contain {@code bioAssays.size()} elements. + * This must always contain {@code bioAssays.size()} elements. *

* This is stored in the database using {@link ByteArrayType}. */ @@ -60,8 +66,10 @@ public class SingleCellDimension implements Identifiable { * Obtain the {@link BioAssay} for a given cell position. * * @param cellIndex the cell position in {@link #cellIds} + * @throws IllegalArgumentException if the sparse range array is invalid as per {@link ubic.gemma.core.util.ListUtils#getSparseRangeArrayElement(List, int[], int, int)} + * @throws IndexOutOfBoundsException if the index is out of bounds */ - public BioAssay getBioAssay( int cellIndex ) { + public BioAssay getBioAssay( int cellIndex ) throws IndexOutOfBoundsException { return getSparseRangeArrayElement( bioAssays, bioAssaysOffset, cellIds.size(), cellIndex ); } @@ -71,11 +79,13 @@ public BioAssay getBioAssay( int cellIndex ) { * @param sampleIndex the sample position in {@link #bioAssays} */ public List getCellIdsBySample( int sampleIndex ) { - return cellIds.subList( bioAssaysOffset[sampleIndex], bioAssaysOffset[sampleIndex] + getNumberOfCellsBySample( sampleIndex ) ); + return Collections.unmodifiableList( cellIds.subList( bioAssaysOffset[sampleIndex], bioAssaysOffset[sampleIndex] + getNumberOfCellsBySample( sampleIndex ) ) ); } /** * Obtain the number for cells for the given sample. + *

+ * This is more efficient than looking up the size of {@link #getCellIdsBySample(int)}. * * @param sampleIndex the sample position in {@link #bioAssays} */ diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimensionValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimensionValueObject.java new file mode 100644 index 0000000000..d5277ecdac --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimensionValueObject.java @@ -0,0 +1,63 @@ +package ubic.gemma.model.expression.bioAssayData; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.extern.apachecommons.CommonsLog; +import ubic.gemma.model.IdentifiableValueObject; +import ubic.gemma.model.analysis.CellTypeAssignmentValueObject; +import ubic.gemma.model.expression.bioAssay.BioAssay; +import ubic.gemma.model.expression.bioAssay.BioAssayValueObject; +import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.List; + +/** + * Value object for a single-cell dimension. + *

+ * {@link BioAssay}s are unpacked into a list of IDs. This is suitable because this object is displayed in the context + * of an {@link ExpressionExperimentValueObject} and its associated {@link BioAssayValueObject}. + * + * @author poirigui + */ +@Data +@EqualsAndHashCode(callSuper = true) +@CommonsLog +public class SingleCellDimensionValueObject extends IdentifiableValueObject { + + /** + * Cell identifiers. + */ + private List cellIds; + + /** + * A list of {@link ubic.gemma.model.expression.bioAssay.BioAssay} IDs that are applicable to the cells. + */ + private List bioAssayIds; + + /** + * The preferred cell type assignment. + */ + @Nullable + private CellTypeAssignmentValueObject cellTypeAssignment; + + /** + * @param cellTypeAssignment a featured cell type assignment from {@link SingleCellDimension#getCellTypeAssignments()} + */ + public SingleCellDimensionValueObject( SingleCellDimension singleCellDimension, @Nullable CellTypeAssignment cellTypeAssignment ) { + super( singleCellDimension ); + this.cellIds = singleCellDimension.getCellIds(); + this.bioAssayIds = new ArrayList<>( singleCellDimension.getCellIds().size() ); + try { + for ( int i = 0; i < singleCellDimension.getCellIds().size(); i++ ) { + this.bioAssayIds.add( singleCellDimension.getBioAssay( i ).getId() ); + } + } catch ( IllegalArgumentException | IndexOutOfBoundsException e ) { + log.warn( "The bioassays sparse range array is invalid for " + singleCellDimension, e ); + } + if ( cellTypeAssignment != null ) { + this.cellTypeAssignment = new CellTypeAssignmentValueObject( cellTypeAssignment ); + } + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellExpressionDataVector.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellExpressionDataVector.java index 8278286721..a704f16122 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellExpressionDataVector.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellExpressionDataVector.java @@ -8,11 +8,12 @@ import java.util.Objects; /** - * An expression data vector that contains data at the resolution of a single cell. + * An expression data vector that contains data at the resolution of individual cells. *

* This is achieved by storing cell metadata such as IDs and cell types in a {@link SingleCellDimension} that is shared * among all vectors of a given {@link ubic.gemma.model.expression.experiment.ExpressionExperiment} and individual * non-zero cell expression in a sparse data structure similar to the rows of a CSR matrix. + * * @author poirigui */ @Getter diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/designElement/CompositeSequence.java b/gemma-core/src/main/java/ubic/gemma/model/expression/designElement/CompositeSequence.java index a09ddcd534..5c091ee13d 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/designElement/CompositeSequence.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/designElement/CompositeSequence.java @@ -133,6 +133,14 @@ public static CompositeSequence newInstance( String name, ArrayDesign ad ) { cs.setArrayDesign( ad ); return cs; } + + public static CompositeSequence newInstance( String name, ArrayDesign ad, BioSequence bioSequence ) { + CompositeSequence cs = new CompositeSequence(); + cs.setName( name ); + cs.setArrayDesign( ad ); + cs.setBiologicalCharacteristic( bioSequence ); + return cs; + } } } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java index 046b57a557..0a4077b4cb 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java @@ -13,6 +13,9 @@ import org.hibernate.Hibernate; import ubic.gemma.model.annotations.GemmaWebOnly; import ubic.gemma.model.common.auditAndSecurity.curation.AbstractCuratableValueObject; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; +import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.bioAssayData.SingleCellDimensionValueObject; import ubic.gemma.model.genome.TaxonValueObject; import ubic.gemma.persistence.util.EntityUtils; @@ -74,6 +77,12 @@ public class ExpressionExperimentValueObject extends AbstractCuratableValueObjec private String technologyType; + /** + * The single-cell dimension of the preferred single-cell vectors. + */ + @Nullable + private SingleCellDimensionValueObject singleCellDimension; + /** * Required when using the class as a spring bean. */ @@ -142,6 +151,11 @@ public ExpressionExperimentValueObject( ExpressionExperiment ee ) { } } + public ExpressionExperimentValueObject( ExpressionExperiment ee, SingleCellDimension singleCellDimension, CellTypeAssignment cellTypeAssignment ) { + this( ee ); + this.singleCellDimension = new SingleCellDimensionValueObject( singleCellDimension, cellTypeAssignment ); + } + /** * Creates a new {@link ExpressionExperiment} value object with additional information about ownership. */ @@ -169,6 +183,7 @@ protected ExpressionExperimentValueObject( ExpressionExperimentValueObject vo ) this.accession = vo.getAccession(); this.batchConfound = vo.getBatchConfound(); this.batchEffect = vo.getBatchEffect(); + this.batchEffectStatistics = vo.getBatchEffectStatistics(); this.externalDatabase = vo.getExternalDatabase(); this.externalUri = vo.getExternalUri(); this.metadata = vo.getMetadata(); @@ -186,6 +201,7 @@ protected ExpressionExperimentValueObject( ExpressionExperimentValueObject vo ) this.isShared = vo.getIsShared(); this.geeq = vo.getGeeq(); this.suitableForDEA = vo.getSuitableForDEA(); + this.singleCellDimension = vo.getSingleCellDimension(); } /** diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java index bf253e1820..fb5dc6f9f5 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java @@ -10,10 +10,7 @@ import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.arrayDesign.TechnologyType; import ubic.gemma.model.expression.bioAssay.BioAssay; -import ubic.gemma.model.expression.bioAssayData.BioAssayDimension; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; -import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation; -import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.bioAssayData.*; import ubic.gemma.model.expression.biomaterial.BioMaterial; import ubic.gemma.model.expression.experiment.*; import ubic.gemma.model.genome.Gene; @@ -105,6 +102,7 @@ public interface ExpressionExperimentDao * Obtain the dataset usage frequency by technology type for the given dataset IDs. *

* Note: No ACL filtering is performed. + * * @see #getTechnologyTypeUsageFrequency() */ Map getTechnologyTypeUsageFrequency( Collection eeIds ); @@ -123,6 +121,7 @@ public interface ExpressionExperimentDao * Obtain dataset usage frequency by platform currently for the given dataset IDs. *

* Note: no ACL filtering is performed. Only administrator can see troubled platforms. + * * @see #getArrayDesignsUsageFrequency(int) */ Map getArrayDesignsUsageFrequency( Collection eeIds, int maxResults ); @@ -142,6 +141,7 @@ public interface ExpressionExperimentDao * Obtain dataset usage frequency by platform currently for the given dataset IDs. *

* Note: no ACL filtering is performed. Only administrators can see troubled platforms. + * * @see #getOriginalPlatformsUsageFrequency(int) */ Map getOriginalPlatformsUsageFrequency( Collection eeIds, int maxResults ); @@ -214,11 +214,11 @@ Map> getSampleRemovalEvents( * Special method for front-end access. This is partly redundant with {@link #loadValueObjects(Filters, Sort, int, int)}; * however, it fills in more information, returns ExpressionExperimentDetailsValueObject * - * @param ids only list specific ids, or null to ignore - * @param taxon only list EEs in the specified taxon, or null to ignore - * @param sort the field to order the results by. - * @param offset offset - * @param limit maximum number of results to return + * @param ids only list specific ids, or null to ignore + * @param taxon only list EEs in the specified taxon, or null to ignore + * @param sort the field to order the results by. + * @param offset offset + * @param limit maximum number of results to return * @return a list of EE details VOs representing experiments matching the given arguments. */ Slice loadDetailsValueObjects( @Nullable Collection ids, @Nullable Taxon taxon, @Nullable Sort sort, int offset, int limit ); @@ -313,22 +313,28 @@ Map> getSampleRemovalEvents( void deleteSingleCellDimension( ExpressionExperiment ee, SingleCellDimension singleCellDimension ); - List getCellTypeLabellings( ExpressionExperiment ee ); + List getCellTypeLabellings( ExpressionExperiment ee ); /** * Obtain the preferred labelling of the preferred single-cell vectors. + * * @throws org.springframework.dao.IncorrectResultSizeDataAccessException if there are multiple preferred cell-type - * labellings + * labellings */ @Nullable - CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ); + CellTypeAssignment getPreferredCellTypeLabelling( ExpressionExperiment ee ); /** * Add the given cell type labelling to the single-cell dimension. *

* If the new labelling is preferred, any existing one is marked as non-preferred. */ - void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension singleCellDimension, CellTypeLabelling cellTypeLabelling ); + void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension singleCellDimension, CellTypeAssignment cellTypeAssignment ); List getCellTypes( ExpressionExperiment ee ); + + /** + * Obtain a set of single-cell data vectors for the given quantitation type. + */ + List getSingleCellDataVectors( ExpressionExperiment expressionExperiment, QuantitationType quantitationType ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index ecc8156b9b..a1703e4669 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -44,10 +44,7 @@ import ubic.gemma.model.expression.arrayDesign.ArrayDesignValueObject; import ubic.gemma.model.expression.arrayDesign.TechnologyType; import ubic.gemma.model.expression.bioAssay.BioAssay; -import ubic.gemma.model.expression.bioAssayData.BioAssayDimension; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; -import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation; -import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.bioAssayData.*; import ubic.gemma.model.expression.biomaterial.BioMaterial; import ubic.gemma.model.expression.experiment.*; import ubic.gemma.model.genome.Gene; @@ -841,6 +838,7 @@ public Map getAnnotationsUsageFrequency( @Nullable Collect *

* FIXME: There's a bug in Hibernate that that prevents it from producing proper tuples the excluded URIs and * retained term URIs + * * @param column column holding the URI to be excluded * @param labelColumn column holding the label (only used if excludeFreeText or excludeUncategorized is true, * then we will check if the label is non-null to cover some edge cases) @@ -1749,6 +1747,7 @@ protected ExpressionExperimentValueObject doLoadValueObject( ExpressionExperimen @Override protected void postProcessValueObjects( List results ) { populateArrayDesignCount( results ); + populateSingleCellMetadata( results ); } @Override @@ -1967,7 +1966,7 @@ public void deleteSingleCellDimension( ExpressionExperiment ee, SingleCellDimens } @Override - public List getCellTypeLabellings( ExpressionExperiment ee ) { + public List getCellTypeLabellings( ExpressionExperiment ee ) { //noinspection unchecked return getSessionFactory().getCurrentSession() .createQuery( "select distinct ctl from SingleCellExpressionDataVector scedv " @@ -1980,8 +1979,8 @@ public List getCellTypeLabellings( ExpressionExperiment ee ) @Nullable @Override - public CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ) { - return ( CellTypeLabelling ) getSessionFactory().getCurrentSession() + public CellTypeAssignment getPreferredCellTypeLabelling( ExpressionExperiment ee ) { + return ( CellTypeAssignment ) getSessionFactory().getCurrentSession() .createQuery( "select distinct ctl from SingleCellExpressionDataVector scedv " + "join scedv.singleCellDimension scd " + "join scd.cellTypeLabellings ctl " @@ -1991,9 +1990,9 @@ public CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee } @Override - public void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeLabelling labelling ) { + public void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeAssignment labelling ) { if ( labelling.isPreferred() ) { - for ( CellTypeLabelling l : dimension.getCellTypeLabellings() ) { + for ( CellTypeAssignment l : dimension.getCellTypeAssignments() ) { if ( l.isPreferred() ) { log.info( "Marking existing cell type labelling as non-preferred, a new preferred labelling will be added." ); l.setPreferred( false ); @@ -2002,7 +2001,7 @@ public void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension d } } getSessionFactory().getCurrentSession().persist( labelling ); - dimension.getCellTypeLabellings().add( labelling ); + dimension.getCellTypeAssignments().add( labelling ); } @Override @@ -2018,6 +2017,17 @@ public List getCellTypes( ExpressionExperiment ee ) { .list(); } + @Override + public List getSingleCellDataVectors( ExpressionExperiment expressionExperiment, QuantitationType quantitationType ) { + //noinspection unchecked + return getSessionFactory().getCurrentSession() + .createQuery( "select scedv from SingleCellExpressionDataVector scedv " + + "where scedv.expressionExperiment = :ee and scedv.quantitationType = :qt" ) + .setParameter( "ee", expressionExperiment ) + .setParameter( "qt", quantitationType ) + .list(); + } + @Override protected Query getFilteringQuery( @Nullable Filters filters, @Nullable Sort sort ) { // the constants for aliases are messing with the inspector @@ -2272,4 +2282,28 @@ private void populateArrayDesignCount( Collection eevos ) { + //noinspection unchecked + List results = getSessionFactory().getCurrentSession() + .createQuery( "select scedv.expressionExperiment.id, scd, cta from ExpressionExperiment ee " + + "join ee.singleCellExpressionDataVectors scedv " + + "join scedv.quantitationType qt " + + "join scedv.singleCellDimension scd " + + "left join scd.cellTypeAssignments cta " + + "where scedv.expressionExperiment.id in :ees " + + "and qt.isPreferred = true and cta is null or cta.preferred = true " + + "group by scedv.expressionExperiment" ) + .setParameterList( "ees", EntityUtils.getIds( eevos ) ) + .list(); + if ( !results.isEmpty() ) { + Map voById = EntityUtils.getIdMap( eevos ); + for ( Object[] row : results ) { + Long id = ( Long ) row[0]; + SingleCellDimension scd = ( SingleCellDimension ) row[1]; + CellTypeAssignment cta = ( CellTypeAssignment ) row[2]; + voById.get( id ).setSingleCellDimension( new SingleCellDimensionValueObject( scd, cta ) ); + } + } + } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java index 0388b076ed..8d91cdc2c9 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentService.java @@ -1,10 +1,11 @@ package ubic.gemma.persistence.service.expression.experiment; import org.springframework.security.access.annotation.Secured; +import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.protocol.Protocol; import ubic.gemma.model.common.quantitationtype.QuantitationType; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; import ubic.gemma.model.expression.experiment.ExperimentalFactor; @@ -16,6 +17,12 @@ public interface SingleCellExpressionExperimentService { + /** + * Obtain a single-cell expression data matrix for the given quantitation type. + */ + @Secured({ "GROUP_USER", "ACL_SECURABLE_EDIT" }) + SingleCellExpressionDataMatrix getSingleCellExpressionDataMatrix( ExpressionExperiment expressionExperiment, QuantitationType quantitationType ); + /** * Add single-cell data vectors. */ @@ -44,12 +51,13 @@ void replaceSingleCellDataVectors( ExpressionExperiment ee, QuantitationType qua /** * Relabel the cell types of an existing set of single-cell vectors. + * * @param newCellTypeLabels the new cell types labels, must match the number of cells * @param labellingProtocol the protocol used to generate the new labelling, or null if unknown * @return a new, preferred cell type labelling */ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) - CellTypeLabelling relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels, @Nullable Protocol labellingProtocol, @Nullable String description ); + CellTypeAssignment relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels, @Nullable Protocol labellingProtocol, @Nullable String description ); /** * Remove the given cell type labelling. @@ -57,20 +65,20 @@ void replaceSingleCellDataVectors( ExpressionExperiment ee, QuantitationType qua * If the cell type labelling is preferred and applies the the preferred vectors as per {@link #getPreferredCellTypeLabelling(ExpressionExperiment)}, the cell type factor will be removed. */ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) - void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension scd, CellTypeLabelling cellTypeLabelling ); + void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension scd, CellTypeAssignment cellTypeAssignment ); /** * Obtain all the cell type labellings from all single-cell vectors. */ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) - List getCellTypeLabellings( ExpressionExperiment ee ); + List getCellTypeLabellings( ExpressionExperiment ee ); /** * Obtain the preferred cell type labelling from the preferred single-cell vectors. */ @Nullable @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) - CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ); + CellTypeAssignment getPreferredCellTypeLabelling( ExpressionExperiment ee ); /** * Obtain the cell types of a given single-cell dataset. @@ -85,9 +93,10 @@ void replaceSingleCellDataVectors( ExpressionExperiment ee, QuantitationType qua *

* Analyses involving the factor are removed and samples mentioning the factor values are updated as per * {@link ExperimentalFactorService#remove(ExperimentalFactor)}. + * * @return the created cell type factor * @throws IllegalStateException if the dataset does not have a preferred cell type labelling for its preferred set - * of single-cell vectors + * of single-cell vectors */ @Secured({ "GROUP_USER", "ACL_SECURABLE_READ" }) ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java index 55def3d502..8cbb99fa50 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceImpl.java @@ -6,6 +6,8 @@ import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.springframework.util.Assert; +import ubic.gemma.core.datastructure.matrix.DoubleSingleCellExpressionDataMatrix; +import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix; import ubic.gemma.model.common.auditAndSecurity.eventType.DataAddedEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.DataRemovedEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.DataReplacedEvent; @@ -16,7 +18,7 @@ import ubic.gemma.model.common.quantitationtype.PrimitiveType; import ubic.gemma.model.common.quantitationtype.QuantitationType; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; import ubic.gemma.model.expression.designElement.CompositeSequence; @@ -52,6 +54,12 @@ public class SingleCellExpressionExperimentServiceImpl implements SingleCellExpr @Deprecated private SessionFactory sessionFactory; + @Override + @Transactional(readOnly = true) + public SingleCellExpressionDataMatrix getSingleCellExpressionDataMatrix( ExpressionExperiment expressionExperiment, QuantitationType quantitationType ) { + return new DoubleSingleCellExpressionDataMatrix( expressionExperimentDao.getSingleCellDataVectors( expressionExperiment, quantitationType ) ); + } + @Override @Transactional public void addSingleCellDataVectors( ExpressionExperiment ee, QuantitationType quantitationType, Collection vectors ) { @@ -88,7 +96,7 @@ public void addSingleCellDataVectors( ExpressionExperiment ee, QuantitationType ee.getQuantitationTypes().add( quantitationType ); expressionExperimentDao.update( ee ); // will take care of creating vectors if ( quantitationType.getIsPreferred() && scdCreated ) { - CellTypeLabelling preferredLabelling = scd.getCellTypeLabellings().stream().filter( CellTypeLabelling::isPreferred ).findFirst().orElse( null ); + CellTypeAssignment preferredLabelling = scd.getCellTypeAssignments().stream().filter( CellTypeAssignment::isPreferred ).findFirst().orElse( null ); if ( preferredLabelling != null ) { log.info( "New single-cell preferred vectors were added, recreating the cell type factor." ); recreateCellTypeFactor( ee, preferredLabelling ); @@ -135,7 +143,7 @@ public void replaceSingleCellDataVectors( ExpressionExperiment ee, QuantitationT int numVectorsAdded = ee.getSingleCellExpressionDataVectors().size() - ( previousSize - numVectorsRemoved ); expressionExperimentDao.update( ee ); if ( quantitationType.getIsPreferred() && scdCreated ) { - CellTypeLabelling preferredLabelling = scd.getCellTypeLabellings().stream().filter( CellTypeLabelling::isPreferred ).findFirst().orElse( null ); + CellTypeAssignment preferredLabelling = scd.getCellTypeAssignments().stream().filter( CellTypeAssignment::isPreferred ).findFirst().orElse( null ); if ( preferredLabelling != null ) { log.info( "Preferred single-cell vectors were replaced, recreating the cell type factor." ); recreateCellTypeFactor( ee, preferredLabelling ); @@ -213,7 +221,8 @@ public void removeSingleCellDataVectors( ExpressionExperiment ee, QuantitationTy /** * Remove the given single-cell vectors and their corresponding single-cell dimension if necessary. - * @param ee the experiment to remove the vectors from. + * + * @param ee the experiment to remove the vectors from. * @param additionalVectors additional vectors to check if the single-cell dimension is still in use (i.e. vectors that are in the process of being added). */ private void removeSingleCellVectorsAndDimensionIfNecessary( ExpressionExperiment ee, @@ -254,11 +263,11 @@ public List getSingleCellDimensions( ExpressionExperiment e @Override @Transactional - public CellTypeLabelling relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels, Protocol protocol, String description ) { + public CellTypeAssignment relabelCellTypes( ExpressionExperiment ee, SingleCellDimension dimension, List newCellTypeLabels, @Nullable Protocol protocol, @Nullable String description ) { Assert.notNull( ee.getId(), "Dataset must be persistent." ); Assert.notNull( dimension.getId(), "Single-cell dimension must be persistent." ); Assert.isTrue( ee.getBioAssays().containsAll( dimension.getBioAssays() ), "Single-cell dimension does not belong to the dataset." ); - CellTypeLabelling labelling = new CellTypeLabelling(); + CellTypeAssignment labelling = new CellTypeAssignment(); labelling.setPreferred( true ); labelling.setProtocol( protocol ); labelling.setDescription( description ); @@ -267,11 +276,11 @@ public CellTypeLabelling relabelCellTypes( ExpressionExperiment ee, SingleCellDi for ( int i = 0; i < ct.length; i++ ) { ct[i] = Collections.binarySearch( labels, newCellTypeLabels.get( i ) ); } - labelling.setCellTypes( ct ); - labelling.setCellTypeLabels( labels.stream() + labelling.setCellTypeIndices( ct ); + labelling.setCellTypes( labels.stream() .map( l -> Characteristic.Factory.newInstance( Categories.CELL_TYPE, l, null ) ) .collect( Collectors.toList() ) ); - labelling.setNumberOfCellTypeLabels( labels.size() ); + labelling.setNumberOfCellTypes( labels.size() ); expressionExperimentDao.addCellTypeLabelling( ee, dimension, labelling ); validateSingleCellDimension( ee, dimension ); log.info( "Relabelled single-cell vectors for " + ee + " with: " + labelling ); @@ -287,14 +296,14 @@ public CellTypeLabelling relabelCellTypes( ExpressionExperiment ee, SingleCellDi @Override @Transactional - public void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeLabelling cellTypeLabelling ) { + public void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeAssignment cellTypeAssignment ) { Assert.notNull( ee.getId(), "Dataset must be persistent." ); Assert.notNull( dimension.getId(), "Single-cell dimension must be persistent." ); Assert.isTrue( ee.getBioAssays().containsAll( dimension.getBioAssays() ), "Single-cell dimension does not belong to the dataset." ); - Assert.isTrue( dimension.getCellTypeLabellings().contains( cellTypeLabelling ), + Assert.isTrue( dimension.getCellTypeAssignments().contains( cellTypeAssignment ), "The supplied labelling does not belong to the dimension." ); - boolean alsoRemoveFactor = cellTypeLabelling.equals( getPreferredCellTypeLabelling( ee ) ); - dimension.getCellTypeLabellings().remove( cellTypeLabelling ); + boolean alsoRemoveFactor = cellTypeAssignment.equals( getPreferredCellTypeLabelling( ee ) ); + dimension.getCellTypeAssignments().remove( cellTypeAssignment ); if ( alsoRemoveFactor ) { log.info( "The preferred cell type labels have been removed, removing the cell type factor..." ); removeCellTypeFactorIfExists( ee ); @@ -303,13 +312,13 @@ public void removeCellTypeLabels( ExpressionExperiment ee, SingleCellDimension d @Override @Transactional(readOnly = true) - public List getCellTypeLabellings( ExpressionExperiment ee ) { + public List getCellTypeLabellings( ExpressionExperiment ee ) { return expressionExperimentDao.getCellTypeLabellings( ee ); } @Override @Transactional(readOnly = true) - public CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ) { + public CellTypeAssignment getPreferredCellTypeLabelling( ExpressionExperiment ee ) { return expressionExperimentDao.getPreferredCellTypeLabelling( ee ); } @@ -331,21 +340,21 @@ private void validateSingleCellDimension( ExpressionExperiment ee, SingleCellDim } Assert.isTrue( scbad.getCellIds().size() == scbad.getNumberOfCells(), "The number of cell IDs must match the number of cells." ); - Assert.isTrue( scbad.getCellTypeLabellings().stream().filter( CellTypeLabelling::isPreferred ).count() <= 1, + Assert.isTrue( scbad.getCellTypeAssignments().stream().filter( CellTypeAssignment::isPreferred ).count() <= 1, "There must be at most one preferred cell type labelling." ); - for ( CellTypeLabelling labelling : scbad.getCellTypeLabellings() ) { - Assert.notNull( labelling.getNumberOfCellTypeLabels() ); - Assert.notNull( labelling.getCellTypeLabels() ); - Assert.isTrue( labelling.getCellTypes().length == scbad.getCellIds().size(), + for ( CellTypeAssignment labelling : scbad.getCellTypeAssignments() ) { + Assert.notNull( labelling.getNumberOfCellTypes() ); + Assert.notNull( labelling.getCellTypes() ); + Assert.isTrue( labelling.getCellTypeIndices().length == scbad.getCellIds().size(), "The number of cell types must match the number of cell IDs." ); - int numberOfCellTypeLabels = labelling.getCellTypeLabels().size(); + int numberOfCellTypeLabels = labelling.getCellTypes().size(); Assert.isTrue( numberOfCellTypeLabels > 0, "There must be at least one cell type label declared in the cellTypeLabels collection." ); - Assert.isTrue( labelling.getCellTypeLabels().stream().distinct().count() == labelling.getCellTypeLabels().size(), + Assert.isTrue( labelling.getCellTypes().stream().distinct().count() == labelling.getCellTypes().size(), "Cell type labels must be unique." ); - Assert.isTrue( numberOfCellTypeLabels == labelling.getNumberOfCellTypeLabels(), + Assert.isTrue( numberOfCellTypeLabels == labelling.getNumberOfCellTypes(), "The number of cell types must match the number of values the cellTypeLabels collection." ); - for ( int k : labelling.getCellTypes() ) { + for ( int k : labelling.getCellTypeIndices() ) { Assert.isTrue( k >= 0 && k < numberOfCellTypeLabels, String.format( "Cell type vector values must be within the [%d, %d[ range.", 0, numberOfCellTypeLabels ) ); } @@ -359,12 +368,12 @@ private void validateSingleCellDimension( ExpressionExperiment ee, SingleCellDim @Override @Transactional public ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee ) { - CellTypeLabelling ctl = getPreferredCellTypeLabelling( ee ); + CellTypeAssignment ctl = getPreferredCellTypeLabelling( ee ); Assert.notNull( ctl, "There must be a preferred cell type labelling for " + ee + " to update the cell type factor." ); return recreateCellTypeFactor( ee, ctl ); } - private ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee, CellTypeLabelling ctl ) { + private ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee, CellTypeAssignment ctl ) { removeCellTypeFactorIfExists( ee ); // create a new cell type factor ExperimentalFactor cellTypeFactor = ExperimentalFactor.Factory.newInstance(); @@ -372,7 +381,7 @@ private ExperimentalFactor recreateCellTypeFactor( ExpressionExperiment ee, Cell cellTypeFactor.setCategory( Characteristic.Factory.newInstance( Categories.CELL_TYPE ) ); cellTypeFactor.setExperimentalDesign( ee.getExperimentalDesign() ); ee.getExperimentalDesign().getExperimentalFactors().add( cellTypeFactor ); - for ( Characteristic ct : ctl.getCellTypeLabels() ) { + for ( Characteristic ct : ctl.getCellTypes() ) { FactorValue fv = new FactorValue(); Statement s = new Statement(); s.setCategory( ct.getCategory() ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/ByteArrayUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/ByteArrayUtils.java new file mode 100644 index 0000000000..f405986ded --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/ByteArrayUtils.java @@ -0,0 +1,26 @@ +package ubic.gemma.persistence.util; + +import ubic.basecode.io.ByteArrayConverter; + +/** + * Utilities for working with byte arrays. + * + * @author poirigui + * @see ByteArrayConverter + */ +public class ByteArrayUtils { + + private static final ByteArrayConverter byteArrayConverter = new ByteArrayConverter(); + + public static byte[] doubleArrayToBytes( Double[] data ) { + return byteArrayConverter.doubleArrayToBytes( data ); + } + + public static byte[] doubleArrayToBytes( double[] data ) { + return byteArrayConverter.doubleArrayToBytes( data ); + } + + public static double[] byteArrayToDoubles( byte[] bytes ) { + return byteArrayConverter.byteArrayToDoubles( bytes ); + } +} diff --git a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml index 51f001613f..d403b35bb9 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml @@ -142,27 +142,27 @@ - - - + + int - - - + + + + - + - - + + diff --git a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Investigation.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Investigation.hbm.xml index 8073ab7bc5..e77178bfdd 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Investigation.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Investigation.hbm.xml @@ -89,7 +89,7 @@ fetch="select"> - + diff --git a/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml index f673b92e86..be7fd31712 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.hbm.xml @@ -20,11 +20,11 @@ - + - + diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderPersistenceTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderPersistenceTest.java new file mode 100644 index 0000000000..1aa3395a55 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderPersistenceTest.java @@ -0,0 +1,104 @@ +package ubic.gemma.core.loader.expression.singleCell; + +import org.hibernate.SessionFactory; +import org.junit.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.core.io.ClassPathResource; +import org.springframework.test.context.ContextConfiguration; +import ubic.gemma.core.util.test.BaseDatabaseTest; +import ubic.gemma.model.common.quantitationtype.QuantitationType; +import ubic.gemma.model.expression.arrayDesign.ArrayDesign; +import ubic.gemma.model.expression.bioAssay.BioAssay; +import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; +import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; +import ubic.gemma.model.expression.biomaterial.BioMaterial; +import ubic.gemma.model.expression.designElement.CompositeSequence; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.model.genome.Taxon; +import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; +import ubic.gemma.persistence.service.expression.experiment.*; +import ubic.gemma.persistence.util.TestComponent; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.zip.GZIPInputStream; + +import static org.mockito.Mockito.mock; +import static ubic.gemma.core.loader.expression.singleCell.MexTestUtils.createLoaderForResourceDir; + +/** + * Load and persist single-cell data stored in the MEX format. + */ +@ContextConfiguration +public class MexSingleCellDataLoaderPersistenceTest extends BaseDatabaseTest { + + @Configuration + @TestComponent + static class MexSingleCellDataLoaderPersistenceTestContextConfiguration extends BaseDatabaseTestContextConfiguration { + @Bean + public SingleCellExpressionExperimentService singleCellExpressionExperimentService() { + return new SingleCellExpressionExperimentServiceImpl(); + } + + @Bean + public ExpressionExperimentDao expressionExperimentDao( SessionFactory sessionFactory ) { + return new ExpressionExperimentDaoImpl( sessionFactory ); + } + + @Bean + public ExperimentalFactorService experimentalFactorService() { + return mock(); + } + + @Bean + public AuditTrailService auditTrailService() { + return mock(); + } + } + + @Autowired + private SingleCellExpressionExperimentService singleCellExpressionExperimentService; + + @Test + public void test() throws IOException { + MexSingleCellDataLoader loader = createLoaderForResourceDir( "/data/loader/expression/singleCell/GSE224438" ); + + Taxon taxon = new Taxon(); + sessionFactory.getCurrentSession().persist( taxon ); + ArrayDesign platform = new ArrayDesign(); + platform.setPrimaryTaxon( taxon ); + Map elementsMapping; + ClassPathResource cpr = new ClassPathResource( "data/loader/expression/singleCell/GSE224438/GSM7022367_1_features.tsv.gz" ); + try ( BufferedReader br = new BufferedReader( new InputStreamReader( new GZIPInputStream( cpr.getInputStream() ) ) ) ) { + elementsMapping = br.lines() + .map( line -> line.split( "\t", 2 )[0] ) + .collect( Collectors.toMap( s -> s, name -> CompositeSequence.Factory.newInstance( name, platform ) ) ); + } + platform.getCompositeSequences().addAll( elementsMapping.values() ); + sessionFactory.getCurrentSession().persist( platform ); + ExpressionExperiment ee = new ExpressionExperiment(); + + for ( String sampleName : loader.getSampleNames() ) { + BioMaterial bm = BioMaterial.Factory.newInstance( sampleName, taxon ); + sessionFactory.getCurrentSession().persist( bm ); + BioAssay ba = BioAssay.Factory.newInstance( sampleName, platform, bm ); + bm.getBioAssaysUsedIn().add( ba ); + ee.getBioAssays().add( ba ); + } + + sessionFactory.getCurrentSession().persist( ee ); + sessionFactory.getCurrentSession().flush(); + SingleCellDimension dimension = loader.getSingleCellDimension( ee.getBioAssays() ); + QuantitationType qt = loader.getQuantitationTypes().iterator().next(); + sessionFactory.getCurrentSession().persist( qt ); + try ( Stream stream = loader.loadVectors( elementsMapping, dimension, qt ) ) { + singleCellExpressionExperimentService.addSingleCellDataVectors( ee, qt, stream.collect( Collectors.toList() ) ); + } + } +} diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderTest.java index 832a3699bb..3453f0dbd6 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoaderTest.java @@ -2,12 +2,9 @@ import org.junit.Test; import org.springframework.core.io.ClassPathResource; -import org.springframework.core.io.Resource; -import org.springframework.core.io.support.PathMatchingResourcePatternResolver; import ubic.basecode.io.ByteArrayConverter; import ubic.gemma.model.common.quantitationtype.PrimitiveType; import ubic.gemma.model.common.quantitationtype.QuantitationType; -import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; @@ -17,54 +14,33 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; -import java.nio.file.Path; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; import static org.assertj.core.api.Assertions.assertThat; +import static ubic.gemma.core.loader.expression.singleCell.MexTestUtils.createLoaderForResourceDir; -public class MexCellDataLoaderTest { +public class MexSingleCellDataLoaderTest { private static final ByteArrayConverter byteArrayConverter = new ByteArrayConverter(); @Test public void test() throws IOException { - ArrayDesign platform = ArrayDesign.Factory.newInstance( "GPL12311", null ); - - // consider the first file as a platform! + // consider the first file for mapping to elements + Map elementsMapping; ClassPathResource cpr = new ClassPathResource( "data/loader/expression/singleCell/GSE224438/GSM7022367_1_features.tsv.gz" ); try ( BufferedReader br = new BufferedReader( new InputStreamReader( new GZIPInputStream( cpr.getInputStream() ) ) ) ) { - br.lines().forEach( line -> platform.getCompositeSequences().add( CompositeSequence.Factory.newInstance( line.split( "\t", 2 )[0] ) ) ); + elementsMapping = br.lines() + .map( line -> line.split( "\t", 2 )[0] ) + .collect( Collectors.toMap( s -> s, CompositeSequence.Factory::newInstance ) ); } - List sampleNames = new ArrayList<>(); - List barcodeFiles = new ArrayList<>(); - List geneFiles = new ArrayList<>(); - List matrixFiles = new ArrayList<>(); - PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver(); - Resource[] resources = resolver.getResources( "data/loader/expression/singleCell/GSE224438/*" ); - Map> f = Arrays.stream( resources ) - .collect( Collectors.groupingBy( r -> r.getFilename().split( "_", 2 )[0], Collectors.toList() ) ); - f = new TreeMap<>( f ); - for ( Map.Entry> entry : f.entrySet() ) { - String sampleName = entry.getKey(); - Resource barcodeFile = entry.getValue().stream() - .filter( p -> p.getFilename().endsWith( "barcodes.tsv.gz" ) ) - .findFirst() - .orElse( null ); - Resource geneFile = entry.getValue().stream().filter( p -> p.getFilename().endsWith( "features.tsv.gz" ) ).findFirst().orElse( null ); - Resource matrixFile = entry.getValue().stream().filter( p -> p.getFilename().endsWith( "matrix.mtx.gz" ) ).findFirst().orElse( null ); - if ( barcodeFile != null && geneFile != null && matrixFile != null ) { - sampleNames.add( sampleName ); - barcodeFiles.add( barcodeFile.getFile().toPath() ); - geneFiles.add( geneFile.getFile().toPath() ); - matrixFiles.add( matrixFile.getFile().toPath() ); - } - } - MexCellDataLoader loader = new MexCellDataLoader( sampleNames, barcodeFiles, geneFiles, matrixFiles ); + MexSingleCellDataLoader loader = createLoaderForResourceDir( "data/loader/expression/singleCell/GSE224438" ); ArrayList bas = new ArrayList<>(); - for ( String sampleName : sampleNames ) { + for ( String sampleName : loader.getSampleNames() ) { bas.add( BioAssay.Factory.newInstance( sampleName, null, BioMaterial.Factory.newInstance( sampleName ) ) ); } assertThat( loader.getCellTypeLabelling() ).isEmpty(); @@ -79,7 +55,7 @@ public void test() throws IOException { assertThat( dimension.getNumberOfCellsBySample( 9 ) ).isEqualTo( 1000 ); assertThat( dimension.getBioAssaysOffset() ) .containsExactly( 0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000 ); - List vectors = loader.loadVectors( platform, dimension, qt ).collect( Collectors.toList() ); + List vectors = loader.loadVectors( elementsMapping, dimension, qt ).collect( Collectors.toList() ); assertThat( vectors ) .hasSize( 1000 ) .allSatisfy( v -> { diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexTestUtils.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexTestUtils.java new file mode 100644 index 0000000000..828536f389 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/singleCell/MexTestUtils.java @@ -0,0 +1,40 @@ +package ubic.gemma.core.loader.expression.singleCell; + +import org.springframework.core.io.Resource; +import org.springframework.core.io.support.PathMatchingResourcePatternResolver; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.*; +import java.util.stream.Collectors; + +public class MexTestUtils { + + public static MexSingleCellDataLoader createLoaderForResourceDir( String resourceDir ) throws IOException { + List sampleNames = new ArrayList<>(); + List barcodeFiles = new ArrayList<>(); + List geneFiles = new ArrayList<>(); + List matrixFiles = new ArrayList<>(); + PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver(); + Resource[] resources = resolver.getResources( resourceDir + "/*" ); + Map> f = Arrays.stream( resources ) + .collect( Collectors.groupingBy( r -> r.getFilename().split( "_", 2 )[0], Collectors.toList() ) ); + f = new TreeMap<>( f ); + for ( Map.Entry> entry : f.entrySet() ) { + String sampleName = entry.getKey(); + Resource barcodeFile = entry.getValue().stream() + .filter( p -> p.getFilename().endsWith( "barcodes.tsv.gz" ) ) + .findFirst() + .orElse( null ); + Resource geneFile = entry.getValue().stream().filter( p -> p.getFilename().endsWith( "features.tsv.gz" ) ).findFirst().orElse( null ); + Resource matrixFile = entry.getValue().stream().filter( p -> p.getFilename().endsWith( "matrix.mtx.gz" ) ).findFirst().orElse( null ); + if ( barcodeFile != null && geneFile != null && matrixFile != null ) { + sampleNames.add( sampleName ); + barcodeFiles.add( barcodeFile.getFile().toPath() ); + geneFiles.add( geneFile.getFile().toPath() ); + matrixFiles.add( matrixFile.getFile().toPath() ); + } + } + return new MexSingleCellDataLoader( sampleNames, barcodeFiles, geneFiles, matrixFiles ); + } +} diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java index 38ba47506b..6fd0e86bce 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java @@ -15,14 +15,17 @@ import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.TestExecutionListeners; import ubic.gemma.core.util.test.BaseDatabaseTest; +import ubic.gemma.model.common.description.Categories; import ubic.gemma.model.common.description.Characteristic; +import ubic.gemma.model.common.quantitationtype.*; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; -import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector; -import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector; +import ubic.gemma.model.expression.bioAssayData.*; import ubic.gemma.model.expression.biomaterial.BioMaterial; +import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExperimentalDesign; import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.util.*; @@ -386,6 +389,62 @@ public void testRemoveExperimentWithSharedBioMaterial() { assertTrue( bm.getBioAssaysUsedIn().contains( ba2 ) ); } + @Test + @WithMockUser + public void testLoadValueObjectWithSingleCellData() { + Taxon taxon = new Taxon(); + sessionFactory.getCurrentSession().persist( taxon ); + ArrayDesign ad = new ArrayDesign(); + ad.setPrimaryTaxon( taxon ); + sessionFactory.getCurrentSession().persist( ad ); + CompositeSequence cs = new CompositeSequence(); + cs.setArrayDesign( ad ); + sessionFactory.getCurrentSession().persist( cs ); + BioMaterial bm = new BioMaterial(); + bm.setSourceTaxon( taxon ); + sessionFactory.getCurrentSession().persist( bm ); + ExpressionExperiment ee = new ExpressionExperiment(); + BioAssay ba = new BioAssay(); + ba.setArrayDesignUsed( ad ); + ba.setSampleUsed( bm ); + ee.getBioAssays().add( ba ); + SingleCellDimension scd = new SingleCellDimension(); + scd.setCellIds( Arrays.asList( "A", "B", "C" ) ); + scd.getBioAssays().add( ba ); + scd.setBioAssaysOffset( new int[] { 0 } ); + CellTypeAssignment cta = new CellTypeAssignment(); + cta.setCellTypeIndices( new int[] { 0, 1, 1, 0 } ); + cta.setCellTypes( Arrays.asList( Characteristic.Factory.newInstance( Categories.CELL_TYPE, "X", null ), + Characteristic.Factory.newInstance( Categories.CELL_TYPE, "Y", null ) ) ); + cta.setPreferred( true ); + cta.setNumberOfCellTypes( 0 ); + scd.getCellTypeAssignments().add( cta ); + sessionFactory.getCurrentSession().persist( scd ); + QuantitationType qt = new QuantitationType(); + qt.setGeneralType( GeneralType.QUANTITATIVE ); + qt.setType( StandardQuantitationType.COUNT ); + qt.setRepresentation( PrimitiveType.DOUBLE ); + qt.setScale( ScaleType.COUNT ); + qt.setIsPreferred( true ); + ee.getQuantitationTypes().add( qt ); + SingleCellExpressionDataVector vector = new SingleCellExpressionDataVector(); + vector.setData( ByteArrayUtils.doubleArrayToBytes( new double[] { 1.0, 2.0, 1.0, 2.0 } ) ); + vector.setDataIndices( new int[] { 0, 1, 2, 4 } ); + vector.setExpressionExperiment( ee ); + vector.setDesignElement( cs ); + vector.setQuantitationType( qt ); + vector.setSingleCellDimension( scd ); + ee.getSingleCellExpressionDataVectors().add( vector ); + sessionFactory.getCurrentSession().persist( ee ); + sessionFactory.getCurrentSession().flush(); + ExpressionExperimentValueObject eevo = expressionExperimentDao.loadValueObject( ee ); + assertNotNull( eevo ); + assertNotNull( eevo.getSingleCellDimension() ); + assertEquals( Arrays.asList( "A", "B", "C" ), eevo.getSingleCellDimension().getCellIds() ); + assertNotNull( eevo.getSingleCellDimension().getCellTypeAssignment() ); + assertEquals( Arrays.asList( 1L, 2L, 2L, 1L ), eevo.getSingleCellDimension().getCellTypeAssignment().getCellTypeIds() ); + } + private ExpressionExperiment reload( ExpressionExperiment e ) { sessionFactory.getCurrentSession().flush(); sessionFactory.getCurrentSession().evict( e ); diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java index 79728e4a46..ea0b27727b 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/SingleCellExpressionExperimentServiceTest.java @@ -1,6 +1,5 @@ package ubic.gemma.persistence.service.expression.experiment; -import gemma.gsec.SecurityService; import org.apache.commons.lang3.RandomStringUtils; import org.hibernate.NonUniqueResultException; import org.hibernate.SessionFactory; @@ -10,11 +9,8 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.security.access.AccessDecisionManager; import org.springframework.test.context.ContextConfiguration; -import ubic.gemma.core.analysis.preprocess.svd.SVDService; -import ubic.gemma.core.ontology.OntologyService; -import ubic.gemma.core.search.SearchService; +import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix; import ubic.gemma.core.util.test.BaseDatabaseTest; import ubic.gemma.model.common.auditAndSecurity.eventType.DataAddedEvent; import ubic.gemma.model.common.auditAndSecurity.eventType.DataRemovedEvent; @@ -24,7 +20,7 @@ import ubic.gemma.model.common.quantitationtype.*; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; -import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling; +import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment; import ubic.gemma.model.expression.bioAssayData.SingleCellDimension; import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector; import ubic.gemma.model.expression.biomaterial.BioMaterial; @@ -33,14 +29,7 @@ import ubic.gemma.model.expression.experiment.ExperimentalFactor; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.genome.Taxon; -import ubic.gemma.persistence.service.analysis.expression.coexpression.CoexpressionAnalysisService; -import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionAnalysisService; -import ubic.gemma.persistence.service.analysis.expression.pca.PrincipalComponentAnalysisService; -import ubic.gemma.persistence.service.analysis.expression.sampleCoexpression.SampleCoexpressionAnalysisService; -import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService; import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService; -import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService; -import ubic.gemma.persistence.service.expression.bioAssayData.BioAssayDimensionService; import ubic.gemma.persistence.util.TestComponent; import java.util.Arrays; @@ -76,94 +65,14 @@ public ExpressionExperimentDao expressionExperimentDao( SessionFactory sessionFa return new ExpressionExperimentDaoImpl( sessionFactory ); } - @Bean - public AuditEventService auditEventService() { - return mock( AuditEventService.class ); - } - - @Bean - public AuditTrailService auditTrailService() { - return mock( AuditTrailService.class ); - } - - @Bean - public BioAssayDimensionService bioAssayDimensionService() { - return mock( BioAssayDimensionService.class ); - } - - @Bean - public DifferentialExpressionAnalysisService differentialExpressionAnalysisService() { - return mock( DifferentialExpressionAnalysisService.class ); - } - - @Bean - public ExpressionExperimentSetService expressionExperimentSetService() { - return mock( ExpressionExperimentSetService.class ); - } - - @Bean - public ExpressionExperimentSubSetService expressionExperimentSubSetService() { - return mock( ExpressionExperimentSubSetService.class ); - } - @Bean public ExperimentalFactorService experimentalFactorService() { - return mock( ExperimentalFactorService.class ); - } - - @Bean - public FactorValueService factorValueService() { - return mock( FactorValueService.class ); - } - - @Bean - public OntologyService ontologyService() { - return mock( OntologyService.class ); - } - - @Bean - public PrincipalComponentAnalysisService principalComponentAnalysisService() { - return mock( PrincipalComponentAnalysisService.class ); + return mock(); } @Bean - public QuantitationTypeService quantitationTypeService() { - return mock( QuantitationTypeService.class ); - } - - @Bean - public SearchService searchService() { - return mock( SearchService.class ); - } - - @Bean - public SecurityService securityService() { - return mock( SecurityService.class ); - } - - @Bean - public SVDService svdService() { - return mock( SVDService.class ); - } - - @Bean - public CoexpressionAnalysisService coexpressionAnalysisService() { - return mock( CoexpressionAnalysisService.class ); - } - - @Bean - public SampleCoexpressionAnalysisService sampleCoexpressionAnalysisService() { - return mock( SampleCoexpressionAnalysisService.class ); - } - - @Bean - public BlacklistedEntityService blacklistedEntityService() { - return mock( BlacklistedEntityService.class ); - } - - @Bean - public AccessDecisionManager accessDecisionManager() { - return mock( AccessDecisionManager.class ); + public AuditTrailService auditTrailService() { + return mock(); } } @@ -176,6 +85,9 @@ public AccessDecisionManager accessDecisionManager() { @Autowired private ExpressionExperimentDao expressionExperimentDao; + @Autowired + private ExperimentalFactorService experimentalFactorService; + private ArrayDesign ad; private ExpressionExperiment ee; @@ -210,6 +122,19 @@ public void resetMocks() { reset( auditTrailService ); } + @Test + public void testGetSingleCellDataMatrix() { + Collection vectors = createSingleCellVectors( true ); + QuantitationType qt = vectors.iterator().next().getQuantitationType(); + SingleCellDimension scd = vectors.iterator().next().getSingleCellDimension(); + scExpressionExperimentService.addSingleCellDataVectors( ee, qt, vectors ); + SingleCellExpressionDataMatrix matrix = scExpressionExperimentService.getSingleCellExpressionDataMatrix( ee, qt ); + assertThat( matrix.getQuantitationType() ).isEqualTo( qt ); + assertThat( matrix.getSingleCellDimension() ).isEqualTo( scd ); + assertThat( matrix.columns() ).isEqualTo( 100 ); + assertThat( matrix.rows() ).isEqualTo( 100 ); + } + @Test public void testAddSingleCellDataVectors() { Collection vectors = createSingleCellVectors( true ); @@ -225,8 +150,8 @@ public void testAddSingleCellDataVectors() { assertThat( scExpressionExperimentService.getSingleCellDimensions( ee ) ) .hasSize( 1 ) .allSatisfy( scd -> { - assertThat( scd.getCellTypeLabellings().iterator().next().getCellTypeLabel( 0 ).getValue() ).isEqualTo( "A" ); - assertThat( scd.getCellTypeLabellings().iterator().next().getCellTypeLabel( 50 ).getValue() ).isEqualTo( "B" ); + assertThat( scd.getCellTypeAssignments().iterator().next().getCellType( 0 ).getValue() ).isEqualTo( "A" ); + assertThat( scd.getCellTypeAssignments().iterator().next().getCellType( 50 ).getValue() ).isEqualTo( "B" ); } ); Collection vectors2 = createSingleCellVectors( true ); @@ -356,14 +281,14 @@ public void testRelabelCellTypes() { for ( int i = 0; i < ct.length; i++ ) { ct[i] = i < 75 ? "A" : "B"; } - CellTypeLabelling newLabelling = scExpressionExperimentService.relabelCellTypes( ee, scd, Arrays.asList( ct ), null, null ); + CellTypeAssignment newLabelling = scExpressionExperimentService.relabelCellTypes( ee, scd, Arrays.asList( ct ), null, null ); assertThat( newLabelling ).satisfies( l -> { assertThat( l.getId() ).isNotNull(); assertThat( l.isPreferred() ).isTrue(); } ); assertThat( ee.getSingleCellExpressionDataVectors() ) .hasSize( 10 ) - .allSatisfy( v -> assertThat( v.getSingleCellDimension().getCellTypeLabellings() ).contains( newLabelling ) ); + .allSatisfy( v -> assertThat( v.getSingleCellDimension().getCellTypeAssignments() ).contains( newLabelling ) ); assertThat( scExpressionExperimentService.getCellTypeLabellings( ee ) ) .hasSize( 1 ) .contains( newLabelling ); @@ -403,9 +328,6 @@ public void testGetPreferredCellTypeLabellingWhenNonUnique() { .isInstanceOf( NonUniqueResultException.class ); } - @Autowired - private ExperimentalFactorService experimentalFactorService; - @Test public void testRecreateCellTypeFactor() { when( experimentalFactorService.create( any( ExperimentalFactor.class ) ) ).thenAnswer( a -> a.getArgument( 0 ) ); @@ -426,14 +348,14 @@ private SingleCellDimension createSingleCellDimension() { for ( int i = 0; i < ct.length; i++ ) { ct[i] = i < 50 ? 0 : 1; } - CellTypeLabelling labelling = new CellTypeLabelling(); + CellTypeAssignment labelling = new CellTypeAssignment(); labelling.setPreferred( true ); - labelling.setCellTypes( ct ); - labelling.setCellTypeLabels( Arrays.asList( + labelling.setCellTypeIndices( ct ); + labelling.setCellTypes( Arrays.asList( Characteristic.Factory.newInstance( Categories.CELL_TYPE, "A", null ), Characteristic.Factory.newInstance( Categories.CELL_TYPE, "B", null ) ) ); - labelling.setNumberOfCellTypeLabels( 2 ); - scd.getCellTypeLabellings().add( labelling ); + labelling.setNumberOfCellTypes( 2 ); + scd.getCellTypeAssignments().add( labelling ); scd.getBioAssays().addAll( ee.getBioAssays() ); scd.setBioAssaysOffset( new int[] { 0, 25, 50, 75 } ); return scd; @@ -473,8 +395,8 @@ private Collection createSingleCellVectors( Sing v.setSingleCellDimension( scd ); v.setQuantitationType( qt ); v.setData( new byte[8 * 100] ); - int[] ix = new int[8 * 100]; - for ( int i = 0; i < 800; i++ ) { + int[] ix = new int[100]; + for ( int i = 0; i < 100; i++ ) { ix[i] = i; } v.setDataIndices( ix );