diff --git a/gemma-core/pom.xml b/gemma-core/pom.xml
index 6d2caaa3ae..2a7f5d4d56 100644
--- a/gemma-core/pom.xml
+++ b/gemma-core/pom.xml
@@ -316,6 +316,14 @@
4.2.2.GA
+
+
+
+ com.googlecode.matrix-toolkits-java
+ mtj
+ 1.0.4
+
+
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java
index b3fd1e6350..1a655dd7e0 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/BulkExpressionDataMatrix.java
@@ -49,6 +49,18 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix {
*/
Collection getQuantitationTypes();
+ /**
+ * @return a {@link BioAssayDimension} that covers all the biomaterials in this matrix.
+ * @throws IllegalStateException if there isn't a single bioassaydimension that encapsulates all the biomaterials
+ * used in the experiment.
+ */
+ BioAssayDimension getBestBioAssayDimension();
+
+ /**
+ * @return true if any values are null or NaN (for Doubles); all other values are considered non-missing.
+ */
+ boolean hasMissingValues();
+
/**
* Access a single value of the matrix. Note that because there can be multiple bioassays per column and multiple
* designelements per row, it is possible for this method to retrieve a data that does not come from the bioassay
@@ -69,6 +81,13 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix {
*/
T[][] get( List designElements, List bioAssays );
+ /**
+ * Access the entire matrix.
+ *
+ * @return T[][]
+ */
+ T[][] getRawMatrix();
+
/**
* Access a single column of the matrix.
*
@@ -85,6 +104,21 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix {
*/
T[][] getColumns( List bioAssays );
+
+ /**
+ * @return list of elements representing the row 'labels'.
+ */
+ List getRowElements();
+
+ /**
+ * Number of columns that use the given design element. Useful if the matrix includes data from more than one array
+ * design.
+ *
+ * @param el el
+ * @return int
+ */
+ int columns( CompositeSequence el );
+
/**
* @param index i
* @return BioMaterial. Note that if this represents a subsetted data set, the BioMaterial may be a lightweight
@@ -98,13 +132,6 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix {
*/
int getColumnIndex( BioMaterial bioMaterial );
- /**
- * @return The bioassaydimension that covers all the biomaterials in this matrix.
- * @throws IllegalStateException if there isn't a single bioassaydimension that encapsulates all the biomaterials
- * used in the experiment.
- */
- BioAssayDimension getBestBioAssayDimension();
-
/**
* Produce a BioAssayDimension representing the matrix columns for a specific row. The designelement argument is
* needed because a matrix can combine data from multiple array designs, each of which will generate its own
@@ -122,4 +149,13 @@ public interface BulkExpressionDataMatrix extends ExpressionDataMatrix {
* used in the study.
*/
Collection getBioAssaysForColumn( int index );
+
+ /**
+ * Set a value in the matrix, by index
+ *
+ * @param row row
+ * @param column col
+ * @param value val
+ */
+ void set( int row, int column, T value );
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DoubleSingleCellExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DoubleSingleCellExpressionDataMatrix.java
new file mode 100644
index 0000000000..1beae14df6
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/DoubleSingleCellExpressionDataMatrix.java
@@ -0,0 +1,135 @@
+package ubic.gemma.core.datastructure.matrix;
+
+import no.uib.cipr.matrix.sparse.CompRowMatrix;
+import org.springframework.util.Assert;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
+import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
+import ubic.gemma.model.expression.designElement.CompositeSequence;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import ubic.gemma.persistence.util.ByteArrayUtils;
+
+import java.util.*;
+
+/**
+ * @author poirigui
+ */
+public class DoubleSingleCellExpressionDataMatrix implements SingleCellExpressionDataMatrix {
+
+ private static final Comparator designElementComparator = Comparator.comparing( CompositeSequence::getName )
+ .thenComparing( CompositeSequence::getId );
+
+ private final ExpressionExperiment expressionExperiment;
+ private final QuantitationType quantitationType;
+ private final SingleCellDimension singleCellDimension;
+ private final CompRowMatrix matrix;
+ private final List designElements;
+
+ public DoubleSingleCellExpressionDataMatrix( Collection vectors ) {
+ Assert.isTrue( !vectors.isEmpty(), "At least one vector must be supplied. Use EmptyExpressionDataMatrix for empty data matrices instead." );
+ Assert.isTrue( vectors.stream().map( SingleCellExpressionDataVector::getQuantitationType ).distinct().count() == 1,
+ "All vectors must have the same quantitation type." );
+ Assert.isTrue( vectors.stream().map( SingleCellExpressionDataVector::getSingleCellDimension ).distinct().count() == 1,
+ "All vectors must have the same single-cell dimension." );
+ SingleCellExpressionDataVector vector = vectors.iterator().next();
+ expressionExperiment = vector.getExpressionExperiment();
+ quantitationType = vector.getQuantitationType();
+ singleCellDimension = vector.getSingleCellDimension();
+ // sort vectors by CS
+ List sortedVectors = new ArrayList<>( vectors );
+ sortedVectors.sort( Comparator.comparing( SingleCellExpressionDataVector::getDesignElement, designElementComparator ) );
+ int rows = sortedVectors.size();
+ int i = 0;
+ int[][] nz = new int[rows][];
+ for ( SingleCellExpressionDataVector v : sortedVectors ) {
+ nz[i++] = v.getDataIndices();
+ }
+ matrix = new CompRowMatrix( rows, singleCellDimension.getNumberOfCells(), nz );
+ designElements = new ArrayList<>( sortedVectors.size() );
+ i = 0;
+ for ( SingleCellExpressionDataVector v : sortedVectors ) {
+ designElements.add( v.getDesignElement() );
+ double[] row = ByteArrayUtils.byteArrayToDoubles( v.getData() );
+ int[] indices = v.getDataIndices();
+ for ( int j = 0; j < row.length; j++ ) {
+ matrix.set( i, indices[j], row[j] );
+ }
+ i++;
+ }
+ }
+
+ @Override
+ public ExpressionExperiment getExpressionExperiment() {
+ return expressionExperiment;
+ }
+
+ @Override
+ public int columns() {
+ return matrix.numColumns();
+ }
+
+ @Override
+ public Double get( int row, int column ) {
+ return matrix.get( row, column );
+ }
+
+ @Override
+ public Double[] getColumn( int column ) {
+ Double[] vec = new Double[matrix.numRows()];
+ for ( int j = 0; j < matrix.numRows(); j++ ) {
+ vec[j] = matrix.get( j, column );
+ }
+ return vec;
+ }
+
+ @Override
+ public List getDesignElements() {
+ return designElements;
+ }
+
+ @Override
+ public CompositeSequence getDesignElementForRow( int index ) {
+ return designElements.get( index );
+ }
+
+ @Override
+ public Double[] getRow( CompositeSequence designElement ) {
+ int ix = getRowIndex( designElement );
+ if ( ix == -1 ) {
+ return null;
+ }
+ return getRow( ix );
+ }
+
+ @Override
+ public Double[] getRow( int index ) {
+ Double[] vec = new Double[matrix.numColumns()];
+ int[] rowptr = matrix.getRowPointers();
+ int[] colind = matrix.getColumnIndices();
+ double[] data = matrix.getData();
+ for ( int i = rowptr[index]; i < rowptr[index + 1]; i++ ) {
+ vec[colind[i]] = data[i];
+ }
+ return vec;
+ }
+
+ @Override
+ public int getRowIndex( CompositeSequence designElement ) {
+ return Math.max( Collections.binarySearch( designElements, designElement, designElementComparator ), -1 );
+ }
+
+ @Override
+ public int rows() {
+ return matrix.numRows();
+ }
+
+ @Override
+ public QuantitationType getQuantitationType() {
+ return quantitationType;
+ }
+
+ @Override
+ public SingleCellDimension getSingleCellDimension() {
+ return singleCellDimension;
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/EmptyExpressionMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/EmptyExpressionMatrix.java
index 24325c7b28..1c6c012809 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/EmptyExpressionMatrix.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/EmptyExpressionMatrix.java
@@ -84,7 +84,7 @@ public Object[] getColumn( BioAssay bioAssay ) {
}
@Override
- public Object[] getColumn( Integer column ) {
+ public Object[] getColumn( int column ) {
throw new UnsupportedOperationException();
}
@@ -104,12 +104,7 @@ public Object[] getRow( CompositeSequence designElement ) {
}
@Override
- public Object[] getRow( Integer index ) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public Object[][] getRows( List designElements ) {
+ public Object[] getRow( int index ) {
throw new UnsupportedOperationException();
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataBooleanMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataBooleanMatrix.java
index 17b5e8e762..967785b019 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataBooleanMatrix.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataBooleanMatrix.java
@@ -91,7 +91,7 @@ public Boolean[] getColumn( BioAssay bioAssay ) {
}
@Override
- public Boolean[] getColumn( Integer index ) {
+ public Boolean[] getColumn( int index ) {
ObjectMatrix1D rawResult = this.matrix.viewColumn( index );
Boolean[] res = new Boolean[rawResult.size()];
int i = 0;
@@ -135,26 +135,10 @@ public Boolean[] getRow( CompositeSequence designElement ) {
}
@Override
- public Boolean[] getRow( Integer index ) {
+ public Boolean[] getRow( int index ) {
return matrix.getRow( index );
}
- @Override
- public Boolean[][] getRows( List designElements ) {
- if ( designElements == null ) {
- return null;
- }
-
- Boolean[][] result = new Boolean[designElements.size()][];
- int i = 0;
- for ( CompositeSequence element : designElements ) {
- Boolean[] rowResult = this.getRow( element );
- result[i] = rowResult;
- i++;
- }
- return result;
- }
-
@Override
public boolean hasMissingValues() {
for ( int i = 0; i < matrix.rows(); i++ ) {
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataDoubleMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataDoubleMatrix.java
index 2f7615a815..8762372430 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataDoubleMatrix.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataDoubleMatrix.java
@@ -314,7 +314,7 @@ public Double[] getColumn( BioAssay bioAssay ) {
}
@Override
- public Double[] getColumn( Integer index ) {
+ public Double[] getColumn( int index ) {
double[] rawResult = this.matrix.getColumn( index );
assert rawResult != null;
Double[] result = new Double[rawResult.length];
@@ -350,27 +350,11 @@ public Double[] getRow( CompositeSequence designElement ) {
}
@Override
- public Double[] getRow( Integer index ) {
+ public Double[] getRow( int index ) {
double[] rawRow = matrix.getRow( index );
return ArrayUtils.toObject( rawRow );
}
- @Override
- public Double[][] getRows( List designElements ) {
- if ( designElements == null ) {
- return null;
- }
-
- Double[][] result = new Double[designElements.size()][];
- int i = 0;
- for ( CompositeSequence element : designElements ) {
- Double[] rowResult = this.getRow( element );
- result[i] = rowResult;
- i++;
- }
- return result;
- }
-
@Override
public boolean hasMissingValues() {
for ( int i = 0; i < matrix.rows(); i++ ) {
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataIntegerMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataIntegerMatrix.java
index 258ef2fc27..a6dc073b9e 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataIntegerMatrix.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataIntegerMatrix.java
@@ -70,7 +70,7 @@ public Integer[] getColumn( BioAssay bioAssay ) {
}
@Override
- public Integer[] getColumn( Integer index ) {
+ public Integer[] getColumn( int index ) {
return this.matrix.getColumn( index );
}
@@ -98,19 +98,10 @@ public Integer[] getRow( CompositeSequence designElement ) {
}
@Override
- public Integer[] getRow( Integer index ) {
+ public Integer[] getRow( int index ) {
return this.matrix.getRow( index );
}
- @Override
- public Integer[][] getRows( List designElements ) {
- Integer[][] res = new Integer[this.rows()][];
- for ( int i = 0; i < designElements.size(); i++ ) {
- res[i] = this.matrix.getRow( this.getRowIndex( designElements.get( i ) ) );
- }
- return res;
- }
-
@Override
public boolean hasMissingValues() {
for ( int i = 0; i < matrix.rows(); i++ ) {
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrix.java
index 3f9715f2f3..17c61bf95b 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrix.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrix.java
@@ -21,13 +21,18 @@
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import javax.annotation.Nullable;
import java.util.List;
/**
* Represents a matrix of data from an {@link ExpressionExperiment}.
+ *
+ * The rows of this matrix represent design elements.
*
* @author pavlidis
* @author keshav
+ * @see BulkExpressionDataMatrix
+ * @see SingleCellExpressionDataMatrix
*/
public interface ExpressionDataMatrix {
@@ -37,103 +42,63 @@ public interface ExpressionDataMatrix {
ExpressionExperiment getExpressionExperiment();
/**
- * Total number of columns.
- *
- * @return int
+ * Obtain all the design elements in this data matrix.
*/
- int columns();
+ List getDesignElements();
/**
- * Number of columns that use the given design element. Useful if the matrix includes data from more than one array
- * design.
+ * Return a design element for a given index.
*
- * @param el el
- * @return int
+ * @throws IndexOutOfBoundsException if the supplied index is not within zero and {@link #rows()}
*/
- int columns( CompositeSequence el );
+ CompositeSequence getDesignElementForRow( int index );
/**
- * Access a single value of the matrix. This is generally the easiest way to do it.
- *
- * @param row row
- * @param column col
- * @return t
+ * Obtain the total number of columns.
*/
- T get( int row, int column );
+ int columns();
/**
* Access a single column of the matrix.
*
* @param column index
* @return T[]
+ * @throws IndexOutOfBoundsException if the supplied index is not within zero and {@link #columns()}
*/
- T[] getColumn( Integer column );
-
- /**
- * Obtain all the design elements in this data matrix.
- */
- List getDesignElements();
+ T[] getColumn( int column );
/**
- * @param index i
- * @return cs
+ * @return int
*/
- CompositeSequence getDesignElementForRow( int index );
+ int rows();
/**
- * Access the entire matrix.
+ * Access a single row of the matrix, by index. A complete row is returned.
*
- * @return T[][]
+ * @param index i
+ * @return t[]
+ * @throws IndexOutOfBoundsException if the supplied index is not within zero and {@link #rows()}
*/
- T[][] getRawMatrix();
+ T[] getRow( int index );
/**
* Return a row that 'came from' the given design element.
*
* @param designElement de
- * @return t
+ * @return the corresponding row or null if the design element is not found in the matrix
*/
+ @Nullable
T[] getRow( CompositeSequence designElement );
/**
- * Access a single row of the matrix, by index. A complete row is returned.
- *
- * @param index i
- * @return t[]
+ * @return the index for the given design element, or -1 if not found
*/
- T[] getRow( Integer index );
-
- /**
- * @return list of elements representing the row 'labels'.
- */
- List getRowElements();
-
int getRowIndex( CompositeSequence designElement );
/**
- * Access a submatrix
+ * Access a single value of the matrix by row and column.
*
- * @param designElements de
- * @return T[][]
- */
- T[][] getRows( List designElements );
-
- /**
- * @return true if any values are null or NaN (for Doubles); all other values are considered non-missing.
+ * @throws IndexOutOfBoundsException if either the row or column is outside the matrix bounds
*/
- boolean hasMissingValues();
-
- /**
- * @return int
- */
- int rows();
-
- /**
- * Set a value in the matrix, by index
- *
- * @param row row
- * @param column col
- * @param value val
- */
- void set( int row, int column, T value );
+ T get( int row, int column );
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrixColumnSort.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrixColumnSort.java
index 2333fad2f5..7c0ff3ecbf 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrixColumnSort.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataMatrixColumnSort.java
@@ -204,7 +204,7 @@ public static DoubleMatrix orderByExperimentalDesign( DoubleMat
* @param mat matrix
* @return bio materials
*/
- public static List orderByExperimentalDesign( ExpressionDataMatrix> mat ) {
+ public static List orderByExperimentalDesign( BulkExpressionDataMatrix> mat ) {
List start = ExpressionDataMatrixColumnSort.getBms( mat );
List ordered = ExpressionDataMatrixColumnSort.orderByExperimentalDesign( start, null );
@@ -466,7 +466,7 @@ private static LinkedHashMap> chunkOnFactor( Expe
/**
* Get all biomaterials for a matrix.
*/
- private static List getBms( ExpressionDataMatrix> mat ) {
+ private static List getBms( BulkExpressionDataMatrix> mat ) {
List result = new ArrayList<>();
for ( int i = 0; i < mat.columns(); i++ ) {
result.add( mat.getBioMaterialForColumn( i ) );
@@ -476,6 +476,7 @@ private static List getBms( ExpressionDataMatrix> mat ) {
/**
* Get all (non-constant) factors used by the passed biomaterials
+ *
* @param bms biomaterials
* @return factors relevant to these biomaterials, ignoring those which have constant values.
*/
@@ -574,6 +575,7 @@ private static List orderByFactor( ExperimentalFactor ef, Map
* Any batch factor is used last (we sort by batch only within the most granular factor's levels)
*
+ *
* @param start biomaterials to sort
* @param factors sorted list of factors to define sort order for biomaterials, cannot be null
*/
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataStringMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataStringMatrix.java
index 111409dc2e..5bbadb48d9 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataStringMatrix.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/ExpressionDataStringMatrix.java
@@ -94,7 +94,7 @@ public String[] getColumn( BioAssay bioAssay ) {
}
@Override
- public String[] getColumn( Integer index ) {
+ public String[] getColumn( int index ) {
return this.matrix.getColumn( index );
}
@@ -122,19 +122,10 @@ public String[] getRow( CompositeSequence designElement ) {
}
@Override
- public String[] getRow( Integer index ) {
+ public String[] getRow( int index ) {
return matrix.getRow( index );
}
- @Override
- public String[][] getRows( List designElements ) {
- String[][] res = new String[this.rows()][];
- for ( int i = 0; i < designElements.size(); i++ ) {
- res[i] = this.matrix.getRow( this.getRowIndex( designElements.get( i ) ) );
- }
- return res;
- }
-
@Override
public boolean hasMissingValues() {
for ( int i = 0; i < matrix.rows(); i++ ) {
diff --git a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellExpressionDataMatrix.java b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellExpressionDataMatrix.java
index 2821fa3329..c9118be112 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellExpressionDataMatrix.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/datastructure/matrix/SingleCellExpressionDataMatrix.java
@@ -24,4 +24,13 @@ public interface SingleCellExpressionDataMatrix extends ExpressionDataMatrix<
* Return the single-cell dimension for this matrix.
*/
SingleCellDimension getSingleCellDimension();
+
+ /**
+ * {@inheritDoc}
+ *
+ * Important note: Retrieving a column is a {@code O(n log m)} operation where {@code n} is the number of
+ * vectors and {@code m} is the number of cells. Always favour row-oriented operations when possible.
+ */
+ @Override
+ T[] getColumn( int column );
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoader.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoader.java
index eb59585463..327385f10e 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoader.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/MexSingleCellDataLoader.java
@@ -8,9 +8,8 @@
import org.springframework.util.Assert;
import ubic.basecode.io.ByteArrayConverter;
import ubic.gemma.model.common.quantitationtype.*;
-import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.bioAssay.BioAssay;
-import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling;
+import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment;
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
import ubic.gemma.model.expression.designElement.CompositeSequence;
@@ -25,8 +24,6 @@
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
-import static java.util.function.Function.identity;
-
/**
* Load single cell data from 10X Genomics MEX format.
*
@@ -59,13 +56,17 @@ public MexSingleCellDataLoader( List sampleNames, List barcodeFile
&& barcodeFiles.size() == genesFiles.size()
&& genesFiles.size() == matrixFiles.size(),
"There must be exactly the same number of each type of files." );
- this.sampleNames = sampleNames;
+ this.sampleNames = Collections.unmodifiableList( sampleNames );
this.barcodeFiles = barcodeFiles;
this.genesFiles = genesFiles;
this.matrixFiles = matrixFiles;
this.numberOfSamples = barcodeFiles.size();
}
+ public List getSampleNames() {
+ return sampleNames;
+ }
+
@Override
public SingleCellDimension getSingleCellDimension( Collection bioAssays ) throws IOException {
SingleCellDimension scd = new SingleCellDimension();
@@ -106,15 +107,12 @@ public Set getQuantitationTypes() {
* MEX does not provide cell type labels.
*/
@Override
- public Optional getCellTypeLabelling() {
+ public Optional getCellTypeLabelling() {
return Optional.empty();
}
@Override
- public Stream loadVectors( ArrayDesign platform, SingleCellDimension scd, QuantitationType quantitationType ) throws IOException {
- Map probeByName = platform.getCompositeSequences().stream()
- .collect( Collectors.toMap( CompositeSequence::getName, identity() ) );
-
+ public Stream loadVectors( Map elementsMapping, SingleCellDimension scd, QuantitationType quantitationType ) throws IOException {
// location of a given element in individual matrices
Map elementsToSampleMatrixRow = new HashMap<>();
ArrayList matrices = new ArrayList<>( numberOfSamples );
@@ -129,9 +127,9 @@ public Stream loadVectors( ArrayDesign platform,
String[] pieces = s.split( "\t", 3 );
String geneId = pieces[0];
String geneSymbol = pieces[1];
- CompositeSequence probe = probeByName.get( geneId );
+ CompositeSequence probe = elementsMapping.get( geneId );
if ( probe == null && allowMappingProbeNamesToGeneSymbols ) {
- probe = probeByName.get( geneSymbol );
+ probe = elementsMapping.get( geneSymbol );
}
if ( probe == null ) {
missingElements.add( geneId );
@@ -149,11 +147,11 @@ public Stream loadVectors( ArrayDesign platform,
}
if ( missingElements.size() == elements.size() ) {
- throw new IllegalArgumentException( "None of the elements of " + platform + " match genes from " + genesFile + "." );
+ throw new IllegalArgumentException( "None of the elements matched genes from " + genesFile + "." );
} else if ( missingElements.size() > 10 ) {
- log.warn( String.format( "%s does not have elements for %d/%d genes from %s.", platform, missingElements.size(), elements.size(), genesFile ) );
+ log.warn( String.format( "The supplied mapping does not have elements for %d/%d genes from %s.", missingElements.size(), elements.size(), genesFile ) );
} else if ( !missingElements.isEmpty() ) {
- log.warn( String.format( "%s does not have elements for the following genes: %s from %s.", platform,
+ log.warn( String.format( "The supplied mapping does not have elements for the following genes: %s from %s.",
missingElements.stream().sorted().collect( Collectors.joining( ", " ) ), genesFile ) );
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/SingleCellDataLoader.java b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/SingleCellDataLoader.java
index 61591ecb9d..cc32470874 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/SingleCellDataLoader.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/loader/expression/singleCell/SingleCellDataLoader.java
@@ -1,14 +1,15 @@
package ubic.gemma.core.loader.expression.singleCell;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
-import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.bioAssay.BioAssay;
-import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling;
+import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment;
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
+import ubic.gemma.model.expression.designElement.CompositeSequence;
import java.io.IOException;
import java.util.Collection;
+import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Stream;
@@ -40,18 +41,18 @@ public interface SingleCellDataLoader {
/**
* Load single-cell type labelling present in the data.
*/
- Optional getCellTypeLabelling() throws IOException;
+ Optional getCellTypeLabelling() throws IOException;
/**
* Produces a stream of single-cell expression data vectors for the given {@link QuantitationType}.
- *
- * Make sure to close the stream when done, preferably using a try-with-resource block.
*
- * @param platform a platform to use when mapping vectors to probes/genes
+ * @param elementsMapping a mapping of element names used in the dataset to {@link CompositeSequence}
* @param dimension a dimension to use for creating vectors, may be loaded from the single-cell data with
* {@link #getSingleCellDimension(Collection)}
* @param quantitationType a quantitation type to extract from the data for, may be loaded from the single-cell data
* with {@link #getQuantitationTypes()}
+ * @return a stream of single-cell expression data vectors that must be closed when done, preferably using a
+ * try-with-resource block.
*/
- Stream loadVectors( ArrayDesign platform, SingleCellDimension dimension, QuantitationType quantitationType ) throws IOException;
+ Stream loadVectors( Map elementsMapping, SingleCellDimension dimension, QuantitationType quantitationType ) throws IOException;
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java b/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java
index 8caf21bf72..27fb81e20f 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java
@@ -11,6 +11,7 @@
/**
* Utilities and algorithms for {@link List}.
+ *
* @author poirigui
*/
public class ListUtils {
@@ -31,6 +32,7 @@ public static Map indexOfElements( List list ) {
/**
* Get a case-insensitive mapping of string elements to their first occurrence in a {@link List}.
+ *
* @see #indexOfElements(List)
*/
public static Map indexOfCaseInsensitiveStringElements( List list ) {
@@ -49,25 +51,57 @@ private static void fillMap( Map element2position, List list
}
}
+ /**
+ * Get an element of a sparse array.
+ *
+ * @param array
+ * @param indices
+ * @param index
+ * @param defaultValue
+ * @param
+ * @return
+ */
+ public static T getSparseArrayElement( T[] array, int[] indices, int numberOfElements, int index, T defaultValue ) {
+ Assert.isTrue( array.length == indices.length,
+ String.format( "Invalid size for sparse array, it must contain %d indices.", array.length ) );
+ // special case for dense array
+ if ( indices.length == numberOfElements ) {
+ return array[index];
+ }
+ if ( index < 0 ) {
+ // FIXME: add support for negative indexing
+ throw new IndexOutOfBoundsException( "Negative indexing of sparse range arrays is not allowed." );
+ }
+ if ( index >= numberOfElements ) {
+ throw new IndexOutOfBoundsException( "The index exceeds the upper bound of the array." );
+ }
+ int offset = binarySearch( indices, index );
+ if ( offset < 0 ) {
+ return defaultValue;
+ }
+ return array[offset];
+ }
+
/**
* Get an element of a sparse range array.
+ *
* @param array collection of elements applying for the ranges
* @param offsets starting offsets of the ranges
* @param numberOfElements the size of the original array
* @param index a position to retrieve
- * @throws ArrayIndexOutOfBoundsException if the index is out of bounds
- * @throws IllegalArgumentException if the array and offsets do not have the same size
+ * @throws IndexOutOfBoundsException if the requested index is out of bounds
+ * @throws IllegalArgumentException if the array is empty or its size differs from offsets
* @see #validateSparseRangeArray(List, int[], int)
*/
- public static T getSparseRangeArrayElement( List array, int[] offsets, int numberOfElements, int index ) {
+ public static T getSparseRangeArrayElement( List array, int[] offsets, int numberOfElements, int index ) throws IllegalArgumentException, IndexOutOfBoundsException {
Assert.isTrue( array.size() == offsets.length,
- String.format( "Invalid size for offsets array, it must contain %d indices.", array.size() ) );
+ String.format( "Invalid size for sparse range array, it must contain %d indices.", array.size() ) );
if ( index < 0 ) {
// FIXME: add support for negative indexing
- throw new ArrayIndexOutOfBoundsException( "Negative indexing of sparse range arrays is not allowed." );
+ throw new IndexOutOfBoundsException( "Negative indexing of sparse range arrays is not allowed." );
}
if ( index >= numberOfElements ) {
- throw new ArrayIndexOutOfBoundsException( "The index exceeds the upper bound of the array." );
+ throw new IndexOutOfBoundsException( "The index exceeds the upper bound of the array." );
}
int offset = binarySearch( offsets, index );
if ( offset < 0 ) {
@@ -78,12 +112,15 @@ public static T getSparseRangeArrayElement( List array, int[] offsets, in
/**
* Validate a sparse range array.
+ *
* @param array collection of elements applying for the ranges
* @param offsets starting offsets of the ranges
* @param numberOfElements the size of the original array
* @throws IllegalArgumentException if the sparse range array is invalid
*/
public static void validateSparseRangeArray( List> array, int[] offsets, int numberOfElements ) throws IllegalArgumentException {
+ Assert.isTrue( numberOfElements == 0 || !array.isEmpty(),
+ "A non-empty sparse range array must have at least one element." );
Assert.isTrue( array.size() == offsets.length,
"There must be as many offsets as entries in the corresponding array." );
int k = 0;
diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/AnalysisValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/AnalysisValueObject.java
index 41dc9bac31..b6de5faf67 100644
--- a/gemma-core/src/main/java/ubic/gemma/model/analysis/AnalysisValueObject.java
+++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/AnalysisValueObject.java
@@ -4,11 +4,24 @@
public abstract class AnalysisValueObject extends IdentifiableValueObject {
+ private ProtocolValueObject protocol;
+
protected AnalysisValueObject() {
super();
}
protected AnalysisValueObject( T analysis ) {
super( analysis );
+ if ( analysis.getProtocol() != null ) {
+ this.protocol = new ProtocolValueObject( analysis.getProtocol() );
+ }
+ }
+
+ public ProtocolValueObject getProtocol() {
+ return protocol;
+ }
+
+ public void setProtocol( ProtocolValueObject protocol ) {
+ this.protocol = protocol;
}
}
diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/CellTypeAssignmentValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/CellTypeAssignmentValueObject.java
new file mode 100644
index 0000000000..6e8e887dd3
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/CellTypeAssignmentValueObject.java
@@ -0,0 +1,50 @@
+package ubic.gemma.model.analysis;
+
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.extern.apachecommons.CommonsLog;
+import ubic.gemma.model.common.description.CharacteristicValueObject;
+import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * @author poirigui
+ */
+@Data
+@EqualsAndHashCode(callSuper = true)
+@CommonsLog
+public class CellTypeAssignmentValueObject extends AnalysisValueObject {
+
+ /**
+ * A list of IDs, one-per-cell, that refers to one of the cell type labels in {@link #cellTypes}.
+ *
+ * {@code null} is used to indicate an unknown cell type.
+ */
+ private List cellTypeIds;
+
+ /**
+ * A set of cell types that are assigned to individual cells.
+ */
+ private Set cellTypes;
+
+ public CellTypeAssignmentValueObject( CellTypeAssignment cellTypeAssignment ) {
+ super( cellTypeAssignment );
+ try {
+ cellTypeIds = Arrays.stream( cellTypeAssignment.getCellTypeIndices() )
+ .mapToObj( cellTypeAssignment::getCellType )
+ .map( characteristic -> characteristic != null ? characteristic.getId() : null )
+ .collect( Collectors.toList() );
+ } catch ( IndexOutOfBoundsException e ) {
+ // this may happen because getCellType() can fail if the data we have is incorrect, but we don't want to
+ // break the VO serialization which would break the REST API.
+ log.warn( "Cell type IDs is invalid for " + cellTypeAssignment + "." );
+ }
+ cellTypes = cellTypeAssignment.getCellTypes().stream()
+ .map( CharacteristicValueObject::new )
+ .collect( Collectors.toSet() );
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/model/analysis/ProtocolValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/analysis/ProtocolValueObject.java
new file mode 100644
index 0000000000..a821658ef7
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/model/analysis/ProtocolValueObject.java
@@ -0,0 +1,26 @@
+package ubic.gemma.model.analysis;
+
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import ubic.gemma.model.IdentifiableValueObject;
+import ubic.gemma.model.common.description.CharacteristicValueObject;
+import ubic.gemma.model.common.protocol.Protocol;
+
+import java.util.Set;
+
+@Data
+@EqualsAndHashCode(callSuper = true)
+public class ProtocolValueObject extends IdentifiableValueObject {
+
+ private String name;
+
+ private String description;
+
+ private Set characteristics;
+
+ public ProtocolValueObject( Protocol protocol ) {
+ super( protocol );
+ this.name = protocol.getName();
+ this.description = protocol.getDescription();
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/protocol/Protocol.java b/gemma-core/src/main/java/ubic/gemma/model/common/protocol/Protocol.java
index 781665b209..12ef59c785 100644
--- a/gemma-core/src/main/java/ubic/gemma/model/common/protocol/Protocol.java
+++ b/gemma-core/src/main/java/ubic/gemma/model/common/protocol/Protocol.java
@@ -1,8 +1,8 @@
/*
* The Gemma project.
- *
+ *
* Copyright (c) 2006-2012 University of British Columbia
- *
+ *
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -20,13 +20,28 @@
import gemma.gsec.model.Securable;
import ubic.gemma.model.common.AbstractDescribable;
+import ubic.gemma.model.common.description.Characteristic;
import java.io.Serializable;
+import java.util.Set;
public class Protocol extends AbstractDescribable implements Securable, Serializable {
private static final long serialVersionUID = -1902891452989019766L;
+ /**
+ * Characteristics describing the protocol.
+ */
+ private Set characteristics;
+
+ public Set getCharacteristics() {
+ return characteristics;
+ }
+
+ public void setCharacteristics( Set characteristics ) {
+ this.characteristics = characteristics;
+ }
+
public static final class Factory {
public static Protocol newInstance() {
diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/quantitationtype/QuantitationType.java b/gemma-core/src/main/java/ubic/gemma/model/common/quantitationtype/QuantitationType.java
index ab1b243ed5..7b130d5b66 100644
--- a/gemma-core/src/main/java/ubic/gemma/model/common/quantitationtype/QuantitationType.java
+++ b/gemma-core/src/main/java/ubic/gemma/model/common/quantitationtype/QuantitationType.java
@@ -214,84 +214,27 @@ public boolean equals( Object object ) {
return false;
}
final QuantitationType that = ( QuantitationType ) object;
-
if ( that.getId() != null && this.getId() != null ) {
- return Objects.equals( that.getId(), this.getId() );
- }
-
- if ( that.getName() != null && this.getName() != null && !this.getName().equals( that.getName() ) ) {
- return false;
- }
-
- if ( this.getScale() != null && that.getScale() != null && !this.getScale().equals( that.getScale() ) ) {
- return false;
- }
-
- if ( this.getIsPreferred() != that.getIsPreferred() ) {
- return false;
- }
-
- if ( this.getIsRatio() != that.getIsRatio() ) {
- return false;
- }
-
- if ( this.getIsNormalized() != that.getIsNormalized() ) {
- return false;
- }
-
- if ( this.getIsBackground() != that.getIsBackground() ) {
- return false;
+ return getId().equals( that.getId() );
}
-
- if ( this.getIsBackgroundSubtracted() != that.getIsBackgroundSubtracted() ) {
- return false;
- }
-
- if ( this.getGeneralType() != null && that.getGeneralType() != null && !this.getGeneralType()
- .equals( that.getGeneralType() ) ) {
- return false;
- }
-
- //noinspection SimplifiableIfStatement // Better readability
- if ( this.getRepresentation() != null && that.getRepresentation() != null && !this.getRepresentation()
- .equals( that.getRepresentation() ) ) {
- return false;
- }
-
- return this.getType() == null || that.getRepresentation() == null || this.getType().equals( that.getType() );
+ return Objects.equals( getName(), that.getName() )
+ && Objects.equals( scale, that.scale )
+ && Objects.equals( isPreferred, that.isPreferred )
+ && Objects.equals( isRatio, that.isRatio )
+ && Objects.equals( isNormalized, that.isNormalized )
+ && Objects.equals( isBackground, that.isBackground )
+ && Objects.equals( isBackgroundSubtracted, that.isBackgroundSubtracted )
+ && Objects.equals( isBatchCorrected, that.isBatchCorrected )
+ && Objects.equals( type, that.type )
+ && Objects.equals( generalType, that.generalType )
+ && Objects.equals( representation, that.representation )
+ && Objects.equals( isRecomputedFromRawData, that.isRecomputedFromRawData );
}
@Override
public int hashCode() {
- int hashCode = 0;
- hashCode = 29 * hashCode + ( this.getId() == null ? this.computeHashCode() : this.getId().hashCode() );
- return hashCode;
- }
-
- private int computeHashCode() {
- int hashCode = 0;
- if ( this.getName() != null ) {
- hashCode = hashCode + this.getName().hashCode();
- }
- if ( this.getType() != null ) {
- hashCode = hashCode + this.getType().hashCode();
- }
- if ( this.getRepresentation() != null ) {
- hashCode = hashCode + this.getRepresentation().hashCode();
- }
- if ( this.getGeneralType() != null ) {
- hashCode = hashCode + this.getGeneralType().hashCode();
- }
- if ( this.getScale() != null ) {
- hashCode = hashCode + this.getScale().hashCode();
- }
- hashCode += Boolean.hashCode( this.getIsBackground() );
- hashCode += Boolean.hashCode( this.getIsBackgroundSubtracted() );
- hashCode += Boolean.hashCode( this.getIsNormalized() );
- hashCode += Boolean.hashCode( this.getIsPreferred() );
- hashCode += Boolean.hashCode( this.getIsRatio() );
-
- return hashCode;
+ return Objects.hash( getName(), type, representation, generalType, scale, isBackground, isBackgroundSubtracted,
+ isNormalized, isPreferred, isBatchCorrected, isRatio, isRecomputedFromRawData );
}
@Override
@@ -350,6 +293,7 @@ public static QuantitationType newInstance( QuantitationType quantitationType )
result.isBackground = quantitationType.isBackground;
result.isBackgroundSubtracted = quantitationType.isBackgroundSubtracted;
result.isBatchCorrected = quantitationType.isBatchCorrected;
+ result.isRecomputedFromRawData = quantitationType.isRecomputedFromRawData;
return result;
}
diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeAssignment.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeAssignment.java
new file mode 100644
index 0000000000..14e6d9f3f4
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeAssignment.java
@@ -0,0 +1,75 @@
+package ubic.gemma.model.expression.bioAssayData;
+
+import lombok.Getter;
+import lombok.Setter;
+import ubic.gemma.model.analysis.Analysis;
+import ubic.gemma.model.common.description.Characteristic;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Represents the labelling of cell types.
+ */
+@Getter
+@Setter
+public class CellTypeAssignment extends Analysis {
+
+ /**
+ * A special indicator for {@link #cellTypeIndices} when the cell type is unknown.
+ */
+ public static final int UNKNOWN_CELL_TYPE = -1;
+
+ /**
+ * Indicate if this labelling is the preferred one.
+ */
+ private boolean preferred;
+
+ /**
+ * Cell types assignment to individual cells from the {@link #cellTypes} collections.
+ *
+ * The value {@code -1} is used to indicate an unknown cell type.
+ */
+ private int[] cellTypeIndices;
+
+ /**
+ * List of cell types.
+ */
+ private List cellTypes = new ArrayList<>();
+
+ /**
+ * Number of cell types.
+ *
+ * This must always be equal to number of elements of {@link #cellTypes}.
+ */
+ private Integer numberOfCellTypes;
+
+ /**
+ * Obtain the type assignment of a given cell.
+ *
+ * @return the type assignment of a given cell, or null if the type was assigne to {@link #UNKNOWN_CELL_TYPE}.
+ * @throws IndexOutOfBoundsException if the cell index is out of range or if the value is ousitde the range o
+ */
+ @Nullable
+ public Characteristic getCellType( int cellIndex ) throws IndexOutOfBoundsException {
+ int i = cellTypeIndices[cellIndex];
+ if ( i == UNKNOWN_CELL_TYPE ) {
+ return null;
+ } else {
+ return cellTypes.get( i );
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash( Arrays.hashCode( cellTypeIndices ), cellTypes );
+ }
+
+ @Override
+ public boolean equals( Object object ) {
+ return super.equals( object );
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java
deleted file mode 100644
index b67bf182e0..0000000000
--- a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/CellTypeLabelling.java
+++ /dev/null
@@ -1,57 +0,0 @@
-package ubic.gemma.model.expression.bioAssayData;
-
-import lombok.Getter;
-import lombok.Setter;
-import org.springframework.util.Assert;
-import ubic.gemma.model.analysis.Analysis;
-import ubic.gemma.model.common.description.Characteristic;
-
-import java.util.Arrays;
-import java.util.List;
-import java.util.Objects;
-
-/**
- * Represents the labelling of cell types.
- */
-@Getter
-@Setter
-public class CellTypeLabelling extends Analysis {
-
- /**
- * Indicate if this labelling is the preferred one.
- */
- private boolean preferred;
-
- /**
- * Cell types assignment to individual cells from the {@link #cellTypeLabels} collections.
- */
- private int[] cellTypes;
-
- /**
- * Cell type labels.
- */
- private List cellTypeLabels;
-
- /**
- * Number of distinct cell types.
- *
- * This must always be equal to number of distinct elements of {@link #cellTypeLabels}.
- */
- private Integer numberOfCellTypeLabels;
-
- public Characteristic getCellTypeLabel( int index ) {
- Assert.notNull( cellTypes, "No cell types have been assigned." );
- Assert.notNull( cellTypeLabels, "No cell labels exist." );
- return cellTypeLabels.get( cellTypes[index] );
- }
-
- @Override
- public int hashCode() {
- return Objects.hash( Arrays.hashCode( cellTypes ), cellTypeLabels );
- }
-
- @Override
- public boolean equals( Object object ) {
- return super.equals( object );
- }
-}
diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java
index 5e3dba7915..3c172c0c4c 100644
--- a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java
+++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimension.java
@@ -11,6 +11,12 @@
import static ubic.gemma.core.util.ListUtils.getSparseRangeArrayElement;
+/**
+ * Represents a single-cell dimension, holding shared information for a set of {@link SingleCellExpressionDataVector}.
+ *
+ * @author poirigui
+ * @see SingleCellExpressionDataVector
+ */
@Getter
@Setter
public class SingleCellDimension implements Identifiable {
@@ -29,28 +35,28 @@ public class SingleCellDimension implements Identifiable {
/**
* Number of cells.
*
- * This should always be equal to the size of {@link #cellIds}.
+ * This must always be equal to the size of {@link #cellIds}.
*/
private int numberOfCells = 0;
/**
* Set of cell types assignment to individual cells. This is empty if no cell types have been assigned and should
- * always contain a preferred labelling as per {@link CellTypeLabelling#preferred} if non-empty.
+ * always contain a preferred labelling as per {@link CellTypeAssignment#isPreferred()} if non-empty.
*/
- private Set cellTypeLabellings = new HashSet<>();
+ private Set cellTypeAssignments = new HashSet<>();
/**
- * List of bioassays that each cell belongs to.
+ * List of {@link BioAssay}s applicable to the cells.
*
- * The {@link BioAssay} {@code bioAssays[sampleIndex]} applies to all the cells in the interval {@code [bioAssaysOffset[sampleIndex], bioAssaysOffset[sampleIndex+1][}.
- * To find the bioassay type of a given cell, use {@link #getBioAssay(int)}.
+ * The {@link BioAssay} in {@code bioAssays[sampleIndex]} applies to all the cells in the interval {@code [bioAssaysOffset[sampleIndex], bioAssaysOffset[sampleIndex+1][}.
+ * To find the bioassay of a given cell, use {@link #getBioAssay(int)}.
*/
private List bioAssays = new ArrayList<>();
/**
* Offsets of the bioassays.
*
- * This always contain {@code bioAssays.size()} elements.
+ * This must always contain {@code bioAssays.size()} elements.
*
* This is stored in the database using {@link ByteArrayType}.
*/
@@ -60,8 +66,10 @@ public class SingleCellDimension implements Identifiable {
* Obtain the {@link BioAssay} for a given cell position.
*
* @param cellIndex the cell position in {@link #cellIds}
+ * @throws IllegalArgumentException if the sparse range array is invalid as per {@link ubic.gemma.core.util.ListUtils#getSparseRangeArrayElement(List, int[], int, int)}
+ * @throws IndexOutOfBoundsException if the index is out of bounds
*/
- public BioAssay getBioAssay( int cellIndex ) {
+ public BioAssay getBioAssay( int cellIndex ) throws IndexOutOfBoundsException {
return getSparseRangeArrayElement( bioAssays, bioAssaysOffset, cellIds.size(), cellIndex );
}
@@ -71,11 +79,13 @@ public BioAssay getBioAssay( int cellIndex ) {
* @param sampleIndex the sample position in {@link #bioAssays}
*/
public List getCellIdsBySample( int sampleIndex ) {
- return cellIds.subList( bioAssaysOffset[sampleIndex], bioAssaysOffset[sampleIndex] + getNumberOfCellsBySample( sampleIndex ) );
+ return Collections.unmodifiableList( cellIds.subList( bioAssaysOffset[sampleIndex], bioAssaysOffset[sampleIndex] + getNumberOfCellsBySample( sampleIndex ) ) );
}
/**
* Obtain the number for cells for the given sample.
+ *
+ * This is more efficient than looking up the size of {@link #getCellIdsBySample(int)}.
*
* @param sampleIndex the sample position in {@link #bioAssays}
*/
diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimensionValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimensionValueObject.java
new file mode 100644
index 0000000000..d5277ecdac
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellDimensionValueObject.java
@@ -0,0 +1,63 @@
+package ubic.gemma.model.expression.bioAssayData;
+
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import lombok.extern.apachecommons.CommonsLog;
+import ubic.gemma.model.IdentifiableValueObject;
+import ubic.gemma.model.analysis.CellTypeAssignmentValueObject;
+import ubic.gemma.model.expression.bioAssay.BioAssay;
+import ubic.gemma.model.expression.bioAssay.BioAssayValueObject;
+import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject;
+
+import javax.annotation.Nullable;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Value object for a single-cell dimension.
+ *
+ * {@link BioAssay}s are unpacked into a list of IDs. This is suitable because this object is displayed in the context
+ * of an {@link ExpressionExperimentValueObject} and its associated {@link BioAssayValueObject}.
+ *
+ * @author poirigui
+ */
+@Data
+@EqualsAndHashCode(callSuper = true)
+@CommonsLog
+public class SingleCellDimensionValueObject extends IdentifiableValueObject {
+
+ /**
+ * Cell identifiers.
+ */
+ private List cellIds;
+
+ /**
+ * A list of {@link ubic.gemma.model.expression.bioAssay.BioAssay} IDs that are applicable to the cells.
+ */
+ private List bioAssayIds;
+
+ /**
+ * The preferred cell type assignment.
+ */
+ @Nullable
+ private CellTypeAssignmentValueObject cellTypeAssignment;
+
+ /**
+ * @param cellTypeAssignment a featured cell type assignment from {@link SingleCellDimension#getCellTypeAssignments()}
+ */
+ public SingleCellDimensionValueObject( SingleCellDimension singleCellDimension, @Nullable CellTypeAssignment cellTypeAssignment ) {
+ super( singleCellDimension );
+ this.cellIds = singleCellDimension.getCellIds();
+ this.bioAssayIds = new ArrayList<>( singleCellDimension.getCellIds().size() );
+ try {
+ for ( int i = 0; i < singleCellDimension.getCellIds().size(); i++ ) {
+ this.bioAssayIds.add( singleCellDimension.getBioAssay( i ).getId() );
+ }
+ } catch ( IllegalArgumentException | IndexOutOfBoundsException e ) {
+ log.warn( "The bioassays sparse range array is invalid for " + singleCellDimension, e );
+ }
+ if ( cellTypeAssignment != null ) {
+ this.cellTypeAssignment = new CellTypeAssignmentValueObject( cellTypeAssignment );
+ }
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellExpressionDataVector.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellExpressionDataVector.java
index 8278286721..a704f16122 100644
--- a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellExpressionDataVector.java
+++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/SingleCellExpressionDataVector.java
@@ -8,11 +8,12 @@
import java.util.Objects;
/**
- * An expression data vector that contains data at the resolution of a single cell.
+ * An expression data vector that contains data at the resolution of individual cells.
*
* This is achieved by storing cell metadata such as IDs and cell types in a {@link SingleCellDimension} that is shared
* among all vectors of a given {@link ubic.gemma.model.expression.experiment.ExpressionExperiment} and individual
* non-zero cell expression in a sparse data structure similar to the rows of a CSR matrix.
+ *
* @author poirigui
*/
@Getter
diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/designElement/CompositeSequence.java b/gemma-core/src/main/java/ubic/gemma/model/expression/designElement/CompositeSequence.java
index a09ddcd534..5c091ee13d 100644
--- a/gemma-core/src/main/java/ubic/gemma/model/expression/designElement/CompositeSequence.java
+++ b/gemma-core/src/main/java/ubic/gemma/model/expression/designElement/CompositeSequence.java
@@ -133,6 +133,14 @@ public static CompositeSequence newInstance( String name, ArrayDesign ad ) {
cs.setArrayDesign( ad );
return cs;
}
+
+ public static CompositeSequence newInstance( String name, ArrayDesign ad, BioSequence bioSequence ) {
+ CompositeSequence cs = new CompositeSequence();
+ cs.setName( name );
+ cs.setArrayDesign( ad );
+ cs.setBiologicalCharacteristic( bioSequence );
+ return cs;
+ }
}
}
\ No newline at end of file
diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java
index 046b57a557..0a4077b4cb 100644
--- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java
+++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java
@@ -13,6 +13,9 @@
import org.hibernate.Hibernate;
import ubic.gemma.model.annotations.GemmaWebOnly;
import ubic.gemma.model.common.auditAndSecurity.curation.AbstractCuratableValueObject;
+import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment;
+import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
+import ubic.gemma.model.expression.bioAssayData.SingleCellDimensionValueObject;
import ubic.gemma.model.genome.TaxonValueObject;
import ubic.gemma.persistence.util.EntityUtils;
@@ -74,6 +77,12 @@ public class ExpressionExperimentValueObject extends AbstractCuratableValueObjec
private String technologyType;
+ /**
+ * The single-cell dimension of the preferred single-cell vectors.
+ */
+ @Nullable
+ private SingleCellDimensionValueObject singleCellDimension;
+
/**
* Required when using the class as a spring bean.
*/
@@ -142,6 +151,11 @@ public ExpressionExperimentValueObject( ExpressionExperiment ee ) {
}
}
+ public ExpressionExperimentValueObject( ExpressionExperiment ee, SingleCellDimension singleCellDimension, CellTypeAssignment cellTypeAssignment ) {
+ this( ee );
+ this.singleCellDimension = new SingleCellDimensionValueObject( singleCellDimension, cellTypeAssignment );
+ }
+
/**
* Creates a new {@link ExpressionExperiment} value object with additional information about ownership.
*/
@@ -169,6 +183,7 @@ protected ExpressionExperimentValueObject( ExpressionExperimentValueObject vo )
this.accession = vo.getAccession();
this.batchConfound = vo.getBatchConfound();
this.batchEffect = vo.getBatchEffect();
+ this.batchEffectStatistics = vo.getBatchEffectStatistics();
this.externalDatabase = vo.getExternalDatabase();
this.externalUri = vo.getExternalUri();
this.metadata = vo.getMetadata();
@@ -186,6 +201,7 @@ protected ExpressionExperimentValueObject( ExpressionExperimentValueObject vo )
this.isShared = vo.getIsShared();
this.geeq = vo.getGeeq();
this.suitableForDEA = vo.getSuitableForDEA();
+ this.singleCellDimension = vo.getSingleCellDimension();
}
/**
diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java
index bf253e1820..fb5dc6f9f5 100644
--- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java
+++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java
@@ -10,10 +10,7 @@
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.arrayDesign.TechnologyType;
import ubic.gemma.model.expression.bioAssay.BioAssay;
-import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
-import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling;
-import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation;
-import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
+import ubic.gemma.model.expression.bioAssayData.*;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.model.genome.Gene;
@@ -105,6 +102,7 @@ public interface ExpressionExperimentDao
* Obtain the dataset usage frequency by technology type for the given dataset IDs.
*
* Note: No ACL filtering is performed.
+ *
* @see #getTechnologyTypeUsageFrequency()
*/
Map getTechnologyTypeUsageFrequency( Collection eeIds );
@@ -123,6 +121,7 @@ public interface ExpressionExperimentDao
* Obtain dataset usage frequency by platform currently for the given dataset IDs.
*
* Note: no ACL filtering is performed. Only administrator can see troubled platforms.
+ *
* @see #getArrayDesignsUsageFrequency(int)
*/
Map getArrayDesignsUsageFrequency( Collection eeIds, int maxResults );
@@ -142,6 +141,7 @@ public interface ExpressionExperimentDao
* Obtain dataset usage frequency by platform currently for the given dataset IDs.
*
* Note: no ACL filtering is performed. Only administrators can see troubled platforms.
+ *
* @see #getOriginalPlatformsUsageFrequency(int)
*/
Map getOriginalPlatformsUsageFrequency( Collection eeIds, int maxResults );
@@ -214,11 +214,11 @@ Map> getSampleRemovalEvents(
* Special method for front-end access. This is partly redundant with {@link #loadValueObjects(Filters, Sort, int, int)};
* however, it fills in more information, returns ExpressionExperimentDetailsValueObject
*
- * @param ids only list specific ids, or null to ignore
- * @param taxon only list EEs in the specified taxon, or null to ignore
- * @param sort the field to order the results by.
- * @param offset offset
- * @param limit maximum number of results to return
+ * @param ids only list specific ids, or null to ignore
+ * @param taxon only list EEs in the specified taxon, or null to ignore
+ * @param sort the field to order the results by.
+ * @param offset offset
+ * @param limit maximum number of results to return
* @return a list of EE details VOs representing experiments matching the given arguments.
*/
Slice loadDetailsValueObjects( @Nullable Collection ids, @Nullable Taxon taxon, @Nullable Sort sort, int offset, int limit );
@@ -313,22 +313,28 @@ Map> getSampleRemovalEvents(
void deleteSingleCellDimension( ExpressionExperiment ee, SingleCellDimension singleCellDimension );
- List getCellTypeLabellings( ExpressionExperiment ee );
+ List getCellTypeLabellings( ExpressionExperiment ee );
/**
* Obtain the preferred labelling of the preferred single-cell vectors.
+ *
* @throws org.springframework.dao.IncorrectResultSizeDataAccessException if there are multiple preferred cell-type
- * labellings
+ * labellings
*/
@Nullable
- CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee );
+ CellTypeAssignment getPreferredCellTypeLabelling( ExpressionExperiment ee );
/**
* Add the given cell type labelling to the single-cell dimension.
*
* If the new labelling is preferred, any existing one is marked as non-preferred.
*/
- void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension singleCellDimension, CellTypeLabelling cellTypeLabelling );
+ void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension singleCellDimension, CellTypeAssignment cellTypeAssignment );
List getCellTypes( ExpressionExperiment ee );
+
+ /**
+ * Obtain a set of single-cell data vectors for the given quantitation type.
+ */
+ List getSingleCellDataVectors( ExpressionExperiment expressionExperiment, QuantitationType quantitationType );
}
diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java
index ecc8156b9b..a1703e4669 100644
--- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java
@@ -44,10 +44,7 @@
import ubic.gemma.model.expression.arrayDesign.ArrayDesignValueObject;
import ubic.gemma.model.expression.arrayDesign.TechnologyType;
import ubic.gemma.model.expression.bioAssay.BioAssay;
-import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
-import ubic.gemma.model.expression.bioAssayData.CellTypeLabelling;
-import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation;
-import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
+import ubic.gemma.model.expression.bioAssayData.*;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.model.genome.Gene;
@@ -841,6 +838,7 @@ public Map getAnnotationsUsageFrequency( @Nullable Collect
*
* FIXME: There's a bug in Hibernate that that prevents it from producing proper tuples the excluded URIs and
* retained term URIs
+ *
* @param column column holding the URI to be excluded
* @param labelColumn column holding the label (only used if excludeFreeText or excludeUncategorized is true,
* then we will check if the label is non-null to cover some edge cases)
@@ -1749,6 +1747,7 @@ protected ExpressionExperimentValueObject doLoadValueObject( ExpressionExperimen
@Override
protected void postProcessValueObjects( List results ) {
populateArrayDesignCount( results );
+ populateSingleCellMetadata( results );
}
@Override
@@ -1967,7 +1966,7 @@ public void deleteSingleCellDimension( ExpressionExperiment ee, SingleCellDimens
}
@Override
- public List getCellTypeLabellings( ExpressionExperiment ee ) {
+ public List getCellTypeLabellings( ExpressionExperiment ee ) {
//noinspection unchecked
return getSessionFactory().getCurrentSession()
.createQuery( "select distinct ctl from SingleCellExpressionDataVector scedv "
@@ -1980,8 +1979,8 @@ public List getCellTypeLabellings( ExpressionExperiment ee )
@Nullable
@Override
- public CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee ) {
- return ( CellTypeLabelling ) getSessionFactory().getCurrentSession()
+ public CellTypeAssignment getPreferredCellTypeLabelling( ExpressionExperiment ee ) {
+ return ( CellTypeAssignment ) getSessionFactory().getCurrentSession()
.createQuery( "select distinct ctl from SingleCellExpressionDataVector scedv "
+ "join scedv.singleCellDimension scd "
+ "join scd.cellTypeLabellings ctl "
@@ -1991,9 +1990,9 @@ public CellTypeLabelling getPreferredCellTypeLabelling( ExpressionExperiment ee
}
@Override
- public void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeLabelling labelling ) {
+ public void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension dimension, CellTypeAssignment labelling ) {
if ( labelling.isPreferred() ) {
- for ( CellTypeLabelling l : dimension.getCellTypeLabellings() ) {
+ for ( CellTypeAssignment l : dimension.getCellTypeAssignments() ) {
if ( l.isPreferred() ) {
log.info( "Marking existing cell type labelling as non-preferred, a new preferred labelling will be added." );
l.setPreferred( false );
@@ -2002,7 +2001,7 @@ public void addCellTypeLabelling( ExpressionExperiment ee, SingleCellDimension d
}
}
getSessionFactory().getCurrentSession().persist( labelling );
- dimension.getCellTypeLabellings().add( labelling );
+ dimension.getCellTypeAssignments().add( labelling );
}
@Override
@@ -2018,6 +2017,17 @@ public List getCellTypes( ExpressionExperiment ee ) {
.list();
}
+ @Override
+ public List getSingleCellDataVectors( ExpressionExperiment expressionExperiment, QuantitationType quantitationType ) {
+ //noinspection unchecked
+ return getSessionFactory().getCurrentSession()
+ .createQuery( "select scedv from SingleCellExpressionDataVector scedv "
+ + "where scedv.expressionExperiment = :ee and scedv.quantitationType = :qt" )
+ .setParameter( "ee", expressionExperiment )
+ .setParameter( "qt", quantitationType )
+ .list();
+ }
+
@Override
protected Query getFilteringQuery( @Nullable Filters filters, @Nullable Sort sort ) {
// the constants for aliases are messing with the inspector
@@ -2272,4 +2282,28 @@ private void populateArrayDesignCount( Collection eevos ) {
+ //noinspection unchecked
+ List