Skip to content

Commit

Permalink
Lots of changes regarding single-cell data
Browse files Browse the repository at this point in the history
Rename CellTypeLabelling to CellTypeAssignment and perform some related
renaming.

Add basic VOs for cell type assignment, protocol, etc.

Add an explicit dependency on mtj since we use it for parsing
MatrixMarket formats.
  • Loading branch information
arteymix committed Feb 19, 2024
1 parent 18e8c77 commit 6a993b7
Show file tree
Hide file tree
Showing 39 changed files with 1,008 additions and 523 deletions.
8 changes: 8 additions & 0 deletions gemma-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,14 @@
<version>4.2.2.GA</version>
</dependency>

<!-- MTJ, for reading MatrixMarket formats -->
<!-- this is also declared in baseCode, but we use it explicitly for reading single-cell data -->
<dependency>
<groupId>com.googlecode.matrix-toolkits-java</groupId>
<artifactId>mtj</artifactId>
<version>1.0.4</version>
</dependency>

<!-- Testing -->
<!-- specifically for spring-security-test which refers to definitions from javax.servlet-api, spring-web and spring-security-web -->
<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@ public interface BulkExpressionDataMatrix<T> extends ExpressionDataMatrix<T> {
*/
Collection<QuantitationType> getQuantitationTypes();

/**
* @return a {@link BioAssayDimension} that covers all the biomaterials in this matrix.
* @throws IllegalStateException if there isn't a single bioassaydimension that encapsulates all the biomaterials
* used in the experiment.
*/
BioAssayDimension getBestBioAssayDimension();

/**
* @return true if any values are null or NaN (for Doubles); all other values are considered non-missing.
*/
boolean hasMissingValues();

/**
* Access a single value of the matrix. Note that because there can be multiple bioassays per column and multiple
* designelements per row, it is possible for this method to retrieve a data that does not come from the bioassay
Expand All @@ -69,6 +81,13 @@ public interface BulkExpressionDataMatrix<T> extends ExpressionDataMatrix<T> {
*/
T[][] get( List<CompositeSequence> designElements, List<BioAssay> bioAssays );

/**
* Access the entire matrix.
*
* @return T[][]
*/
T[][] getRawMatrix();

/**
* Access a single column of the matrix.
*
Expand All @@ -85,6 +104,21 @@ public interface BulkExpressionDataMatrix<T> extends ExpressionDataMatrix<T> {
*/
T[][] getColumns( List<BioAssay> bioAssays );


/**
* @return list of elements representing the row 'labels'.
*/
List<ExpressionDataMatrixRowElement> getRowElements();

/**
* Number of columns that use the given design element. Useful if the matrix includes data from more than one array
* design.
*
* @param el el
* @return int
*/
int columns( CompositeSequence el );

/**
* @param index i
* @return BioMaterial. Note that if this represents a subsetted data set, the BioMaterial may be a lightweight
Expand All @@ -98,13 +132,6 @@ public interface BulkExpressionDataMatrix<T> extends ExpressionDataMatrix<T> {
*/
int getColumnIndex( BioMaterial bioMaterial );

/**
* @return The bioassaydimension that covers all the biomaterials in this matrix.
* @throws IllegalStateException if there isn't a single bioassaydimension that encapsulates all the biomaterials
* used in the experiment.
*/
BioAssayDimension getBestBioAssayDimension();

/**
* Produce a BioAssayDimension representing the matrix columns for a specific row. The designelement argument is
* needed because a matrix can combine data from multiple array designs, each of which will generate its own
Expand All @@ -122,4 +149,13 @@ public interface BulkExpressionDataMatrix<T> extends ExpressionDataMatrix<T> {
* used in the study.
*/
Collection<BioAssay> getBioAssaysForColumn( int index );

/**
* Set a value in the matrix, by index
*
* @param row row
* @param column col
* @param value val
*/
void set( int row, int column, T value );
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package ubic.gemma.core.datastructure.matrix;

import no.uib.cipr.matrix.sparse.CompRowMatrix;
import org.springframework.util.Assert;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.persistence.util.ByteArrayUtils;

import java.util.*;

/**
* @author poirigui
*/
public class DoubleSingleCellExpressionDataMatrix implements SingleCellExpressionDataMatrix<Double> {

private static final Comparator<CompositeSequence> designElementComparator = Comparator.comparing( CompositeSequence::getName )
.thenComparing( CompositeSequence::getId );

private final ExpressionExperiment expressionExperiment;
private final QuantitationType quantitationType;
private final SingleCellDimension singleCellDimension;
private final CompRowMatrix matrix;
private final List<CompositeSequence> designElements;

public DoubleSingleCellExpressionDataMatrix( Collection<SingleCellExpressionDataVector> vectors ) {
Assert.isTrue( !vectors.isEmpty(), "At least one vector must be supplied. Use EmptyExpressionDataMatrix for empty data matrices instead." );
Assert.isTrue( vectors.stream().map( SingleCellExpressionDataVector::getQuantitationType ).distinct().count() == 1,
"All vectors must have the same quantitation type." );
Assert.isTrue( vectors.stream().map( SingleCellExpressionDataVector::getSingleCellDimension ).distinct().count() == 1,
"All vectors must have the same single-cell dimension." );
SingleCellExpressionDataVector vector = vectors.iterator().next();
expressionExperiment = vector.getExpressionExperiment();
quantitationType = vector.getQuantitationType();
singleCellDimension = vector.getSingleCellDimension();
// sort vectors by CS
List<SingleCellExpressionDataVector> sortedVectors = new ArrayList<>( vectors );
sortedVectors.sort( Comparator.comparing( SingleCellExpressionDataVector::getDesignElement, designElementComparator ) );
int rows = sortedVectors.size();
int i = 0;
int[][] nz = new int[rows][];
for ( SingleCellExpressionDataVector v : sortedVectors ) {
nz[i++] = v.getDataIndices();
}
matrix = new CompRowMatrix( rows, singleCellDimension.getNumberOfCells(), nz );
designElements = new ArrayList<>( sortedVectors.size() );
i = 0;
for ( SingleCellExpressionDataVector v : sortedVectors ) {
designElements.add( v.getDesignElement() );
double[] row = ByteArrayUtils.byteArrayToDoubles( v.getData() );
int[] indices = v.getDataIndices();
for ( int j = 0; j < row.length; j++ ) {
matrix.set( i, indices[j], row[j] );
}
i++;
}
}

@Override
public ExpressionExperiment getExpressionExperiment() {
return expressionExperiment;
}

@Override
public int columns() {
return matrix.numColumns();
}

@Override
public Double get( int row, int column ) {
return matrix.get( row, column );
}

@Override
public Double[] getColumn( int column ) {
Double[] vec = new Double[matrix.numRows()];
for ( int j = 0; j < matrix.numRows(); j++ ) {
vec[j] = matrix.get( j, column );
}
return vec;
}

@Override
public List<CompositeSequence> getDesignElements() {
return designElements;
}

@Override
public CompositeSequence getDesignElementForRow( int index ) {
return designElements.get( index );
}

@Override
public Double[] getRow( CompositeSequence designElement ) {
int ix = getRowIndex( designElement );
if ( ix == -1 ) {
return null;
}
return getRow( ix );
}

@Override
public Double[] getRow( int index ) {
Double[] vec = new Double[matrix.numColumns()];
int[] rowptr = matrix.getRowPointers();
int[] colind = matrix.getColumnIndices();
double[] data = matrix.getData();
for ( int i = rowptr[index]; i < rowptr[index + 1]; i++ ) {
vec[colind[i]] = data[i];
}
return vec;
}

@Override
public int getRowIndex( CompositeSequence designElement ) {
return Math.max( Collections.binarySearch( designElements, designElement, designElementComparator ), -1 );
}

@Override
public int rows() {
return matrix.numRows();
}

@Override
public QuantitationType getQuantitationType() {
return quantitationType;
}

@Override
public SingleCellDimension getSingleCellDimension() {
return singleCellDimension;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public Object[] getColumn( BioAssay bioAssay ) {
}

@Override
public Object[] getColumn( Integer column ) {
public Object[] getColumn( int column ) {
throw new UnsupportedOperationException();
}

Expand All @@ -104,12 +104,7 @@ public Object[] getRow( CompositeSequence designElement ) {
}

@Override
public Object[] getRow( Integer index ) {
throw new UnsupportedOperationException();
}

@Override
public Object[][] getRows( List<CompositeSequence> designElements ) {
public Object[] getRow( int index ) {
throw new UnsupportedOperationException();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public Boolean[] getColumn( BioAssay bioAssay ) {
}

@Override
public Boolean[] getColumn( Integer index ) {
public Boolean[] getColumn( int index ) {
ObjectMatrix1D rawResult = this.matrix.viewColumn( index );
Boolean[] res = new Boolean[rawResult.size()];
int i = 0;
Expand Down Expand Up @@ -135,26 +135,10 @@ public Boolean[] getRow( CompositeSequence designElement ) {
}

@Override
public Boolean[] getRow( Integer index ) {
public Boolean[] getRow( int index ) {
return matrix.getRow( index );
}

@Override
public Boolean[][] getRows( List<CompositeSequence> designElements ) {
if ( designElements == null ) {
return null;
}

Boolean[][] result = new Boolean[designElements.size()][];
int i = 0;
for ( CompositeSequence element : designElements ) {
Boolean[] rowResult = this.getRow( element );
result[i] = rowResult;
i++;
}
return result;
}

@Override
public boolean hasMissingValues() {
for ( int i = 0; i < matrix.rows(); i++ ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ public Double[] getColumn( BioAssay bioAssay ) {
}

@Override
public Double[] getColumn( Integer index ) {
public Double[] getColumn( int index ) {
double[] rawResult = this.matrix.getColumn( index );
assert rawResult != null;
Double[] result = new Double[rawResult.length];
Expand Down Expand Up @@ -350,27 +350,11 @@ public Double[] getRow( CompositeSequence designElement ) {
}

@Override
public Double[] getRow( Integer index ) {
public Double[] getRow( int index ) {
double[] rawRow = matrix.getRow( index );
return ArrayUtils.toObject( rawRow );
}

@Override
public Double[][] getRows( List<CompositeSequence> designElements ) {
if ( designElements == null ) {
return null;
}

Double[][] result = new Double[designElements.size()][];
int i = 0;
for ( CompositeSequence element : designElements ) {
Double[] rowResult = this.getRow( element );
result[i] = rowResult;
i++;
}
return result;
}

@Override
public boolean hasMissingValues() {
for ( int i = 0; i < matrix.rows(); i++ ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public Integer[] getColumn( BioAssay bioAssay ) {
}

@Override
public Integer[] getColumn( Integer index ) {
public Integer[] getColumn( int index ) {
return this.matrix.getColumn( index );
}

Expand Down Expand Up @@ -98,19 +98,10 @@ public Integer[] getRow( CompositeSequence designElement ) {
}

@Override
public Integer[] getRow( Integer index ) {
public Integer[] getRow( int index ) {
return this.matrix.getRow( index );
}

@Override
public Integer[][] getRows( List<CompositeSequence> designElements ) {
Integer[][] res = new Integer[this.rows()][];
for ( int i = 0; i < designElements.size(); i++ ) {
res[i] = this.matrix.getRow( this.getRowIndex( designElements.get( i ) ) );
}
return res;
}

@Override
public boolean hasMissingValues() {
for ( int i = 0; i < matrix.rows(); i++ ) {
Expand Down
Loading

0 comments on commit 6a993b7

Please sign in to comment.