bibrefIds = new ArrayList<>();
if ( this.pmids != null ) {
for ( String s : pmids ) {
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/BioSequenceCleanupCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/BioSequenceCleanupCli.java
index 77f58d23a6..3b361bda47 100755
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/BioSequenceCleanupCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/BioSequenceCleanupCli.java
@@ -23,6 +23,7 @@
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.genome.biosequence.BioSequence;
@@ -42,33 +43,35 @@
/**
* Goes through the biosequences for array designs in the database and removes duplicates.
- *
+ *
* Moved from GemmaAnalysis as this is a database maintenance tool
*
* @author pavlidis
*/
public class BioSequenceCleanupCli extends ArrayDesignSequenceManipulatingCli {
+ @Autowired
private BlatAssociationService blatAssociationService;
-
+ @Autowired
private BlatResultService blatResultService;
-
+ @Autowired
private BioSequenceService bss;
+ @Autowired
private CompositeSequenceService css;
+
private String file = null;
private boolean justTesting = false;
- /*
- * (non-Javadoc)
- *
- * @see ubic.gemma.util.AbstractCLI#getCommandName()
- */
@Override
public String getCommandName() {
return "seqCleanup";
}
- @SuppressWarnings("static-access")
+ @Override
+ public String getShortDesc() {
+ return "Examines biosequences for array designs in the database and removes duplicates.";
+ }
+
@Override
protected void buildOptions( Options options ) {
super.buildOptions( options );
@@ -86,11 +89,24 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void doWork() {
+ protected void processOptions( CommandLine commandLine ) throws ParseException {
+ super.processOptions( commandLine );
+ if ( commandLine.hasOption( "dryrun" ) ) {
+ this.justTesting = true;
+ log.info( "TEST MODE: NO DATABASE UPDATES WILL BE PERFORMED" );
+ }
+
+ if ( commandLine.hasOption( 'b' ) ) {
+ this.file = commandLine.getOptionValue( 'b' );
+ }
+ }
+
+ @Override
+ protected void doAuthenticatedWork() {
- Collection ads = new HashSet<>();
+ Collection ads;
if ( !this.getArrayDesignsToProcess().isEmpty() ) {
- ads.addAll( this.getArrayDesignsToProcess() );
+ ads = new HashSet<>( this.getArrayDesignsToProcess() );
} else if ( file != null ) {
try ( InputStream is = new FileInputStream( file );
BufferedReader br = new BufferedReader( new InputStreamReader( is ) ); ) {
@@ -184,32 +200,6 @@ protected void doWork() {
}
}
}
-
- return;
-
- }
-
- @Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
- if ( commandLine.hasOption( "dryrun" ) ) {
- this.justTesting = true;
- log.info( "TEST MODE: NO DATABASE UPDATES WILL BE PERFORMED" );
- }
-
- if ( commandLine.hasOption( 'b' ) ) {
- this.file = commandLine.getOptionValue( 'b' );
- }
-
- bss = this.getBean( BioSequenceService.class );
- css = this.getBean( CompositeSequenceService.class );
- blatResultService = this.getBean( BlatResultService.class );
- blatAssociationService = this.getBean( BlatAssociationService.class );
- }
-
- @Override
- public String getShortDesc() {
- return "Examines biosequences for array designs in the database and removes duplicates.";
}
/**
@@ -364,5 +354,4 @@ private void switchAndDeleteExtra( BioSequence keeper, BioSequence toRemove ) {
bss.remove( toRemove );
}
}
-
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/BlacklistCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/BlacklistCli.java
index 11cbf74d25..f0bc7c7788 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/BlacklistCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/BlacklistCli.java
@@ -23,6 +23,7 @@
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import ubic.gemma.core.loader.expression.geo.model.GeoRecord;
import ubic.gemma.core.loader.expression.geo.service.GeoBrowser;
@@ -54,6 +55,12 @@ public class BlacklistCli extends AbstractAuthenticatedCLI {
private static final int MAX_RETRIES = 3;
+ @Autowired
+ private BlacklistedEntityService blacklistedEntityService;
+ @Autowired
+ private ExternalDatabaseService externalDatabaseService;
+ @Autowired
+ ExpressionExperimentService expressionExperimentService;
@Value("${entrez.efetch.apikey}")
private String ncbiApiKey;
@@ -72,7 +79,7 @@ public class BlacklistCli extends AbstractAuthenticatedCLI {
private Collection platformsToScreen;
public BlacklistCli() {
- setRequireLogin( true );
+ setRequireLogin();
}
@Override
@@ -112,10 +119,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void doWork() throws Exception {
- BlacklistedEntityService blacklistedEntityService = this.getBean( BlacklistedEntityService.class );
- ExternalDatabaseService externalDatabaseService = this.getBean( ExternalDatabaseService.class );
-
+ protected void doAuthenticatedWork() throws Exception {
ExternalDatabase geo = externalDatabaseService.findByName( "GEO" );
if ( geo == null )
@@ -137,7 +141,7 @@ protected void doWork() throws Exception {
throw new IllegalArgumentException( "A reason for blacklisting must be provided for " + accession );
}
- BlacklistedEntity b = null;
+ BlacklistedEntity b;
if ( accession.startsWith( "GPL" ) ) {
b = new BlacklistedPlatform();
} else if ( accession.startsWith( "GSE" ) ) {
@@ -239,8 +243,6 @@ protected void doWork() throws Exception {
log.info( "Blacklisted " + accession );
}
- } catch ( Exception e ) {
- throw e;
}
}
@@ -249,12 +251,11 @@ protected void doWork() throws Exception {
*/
private void proactivelyBlacklistExperiments( ExternalDatabase geo ) throws Exception {
GeoBrowser gbs = new GeoBrowser( ncbiApiKey );
- BlacklistedEntityService blacklistedEntityDao = this.getBean( BlacklistedEntityService.class );
Collection candidates = new ArrayList<>();
int numChecked = 0;
int numBlacklisted = 0;
- for ( BlacklistedEntity be : blacklistedEntityDao.loadAll() ) {
+ for ( BlacklistedEntity be : blacklistedEntityService.loadAll() ) {
if ( be instanceof BlacklistedPlatform ) {
if ( platformsToScreen == null || !platformsToScreen.isEmpty()
@@ -266,13 +267,13 @@ private void proactivelyBlacklistExperiments( ExternalDatabase geo ) throws Exce
if ( candidates.size() == 5 ) { // too many will break eutils query
log.info( "Looking for batch of candidates using: " + StringUtils.join( candidates, "," ) );
- numBlacklisted += fetchAndBlacklist( geo, gbs, blacklistedEntityDao, candidates );
+ numBlacklisted += fetchAndBlacklist( geo, gbs, candidates );
candidates.clear();
}
}
// finish the last batch
- fetchAndBlacklist( geo, gbs, blacklistedEntityDao, candidates );
+ fetchAndBlacklist( geo, gbs, candidates );
log.info( "Checked " + numChecked + " blacklisted platforms for experiment in GEO, blacklisted " + numBlacklisted + " GSEs" );
@@ -281,19 +282,16 @@ private void proactivelyBlacklistExperiments( ExternalDatabase geo ) throws Exce
/**
* @return number of actually blacklisted experiments in this batch.
*/
- private int fetchAndBlacklist( ExternalDatabase geo, GeoBrowser gbs, BlacklistedEntityService blacklistedEntityDao, Collection candidates )
+ private int fetchAndBlacklist( ExternalDatabase geo, GeoBrowser gbs, Collection candidates )
throws InterruptedException {
int start = 0;
- ExpressionExperimentService expressionExperimentService = this.getBean( ExpressionExperimentService.class );
-
- boolean keepGoing = true;
int numBlacklisted = 0;
int retries = 0;
- while ( keepGoing ) {
+ while ( true ) {
// code copied from GeoGrabberCli
- List recs = null;
+ List recs;
try {
recs = gbs.searchGeoRecords( null, null, null, candidates, null, start, 100, false /* details */ );
@@ -307,18 +305,22 @@ private int fetchAndBlacklist( ExternalDatabase geo, GeoBrowser gbs, Blacklisted
continue; // try again
}
log.info( "Too many failures, giving up" );
- keepGoing = false;
+ break;
}
if ( recs == null || recs.isEmpty() ) {
- keepGoing = false;
break;
}
for ( GeoRecord geoRecord : recs ) {
boolean skip = false;
String eeAcc = geoRecord.getGeoAccession();
- if ( null != blacklistedEntityDao.findByAccession( eeAcc ) ) {
+ if ( eeAcc == null ) {
+ log.warn( "Found GEO record with null accession, skipping." );
+ continue;
+ }
+
+ if ( blacklistedEntityService.findByAccession( eeAcc ) != null ) {
log.debug( "Already blacklisted: " + eeAcc );
continue;
}
@@ -326,7 +328,7 @@ private int fetchAndBlacklist( ExternalDatabase geo, GeoBrowser gbs, Blacklisted
String[] platforms = geoRecord.getPlatform().split( ";" );
for ( String p : platforms ) {
- BlacklistedEntity bli = blacklistedEntityDao.findByAccession( p );
+ BlacklistedEntity bli = blacklistedEntityService.findByAccession( p );
if ( bli == null ) {
// then at least one platform it uses isn't blacklisted, we won't blacklist the experiment
@@ -352,7 +354,7 @@ private int fetchAndBlacklist( ExternalDatabase geo, GeoBrowser gbs, Blacklisted
b.setDescription( geoRecord.getTitle() );
b.setReason( "Unsupported platform" );
- blacklistedEntityDao.create( b );
+ blacklistedEntityService.create( b );
numBlacklisted++;
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/DatabaseViewGeneratorCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/DatabaseViewGeneratorCLI.java
index a9fcda6cc0..084e6e69f8 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/DatabaseViewGeneratorCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/DatabaseViewGeneratorCLI.java
@@ -21,6 +21,7 @@
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
+import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.core.analysis.report.DatabaseViewGenerator;
import ubic.gemma.core.util.AbstractAuthenticatedCLI;
@@ -32,6 +33,9 @@
@SuppressWarnings({ "FieldCanBeLocal", "unused" }) // Possible external use
public class DatabaseViewGeneratorCLI extends AbstractAuthenticatedCLI {
+ @Autowired
+ private DatabaseViewGenerator v;
+
private boolean generateDatasetSummary = false;
private boolean generateDiffExpressionSummary = false;
private boolean generateTissueSummary = false;
@@ -78,8 +82,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void doWork() throws Exception {
- DatabaseViewGenerator v = this.getBean( DatabaseViewGenerator.class );
+ protected void doAuthenticatedWork() throws Exception {
v.runAll();
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/DeleteExperimentsCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/DeleteExperimentsCli.java
index 5f7dbde4b5..8b59eb3bf7 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/DeleteExperimentsCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/DeleteExperimentsCli.java
@@ -21,12 +21,10 @@
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
-import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;
import java.util.Arrays;
-import java.util.Collection;
import java.util.List;
/**
@@ -41,6 +39,11 @@ public class DeleteExperimentsCli extends ExpressionExperimentManipulatingCLI {
private List platformAccs = null;
+ public DeleteExperimentsCli() {
+ // we delete troubled / unusable items, has to be set prior to processOptions()
+ setForce();
+ }
+
@Override
public String getCommandName() {
return "deleteExperiments";
@@ -52,8 +55,7 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
options.addOption(
Option.builder( "a" ).longOpt( "array" )
.desc( "Delete platform(s) instead; you must delete associated experiments first; other options are ignored" )
@@ -61,9 +63,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- this.force = true; // we delete troubled / unusuable items, has to be set prior to processOptions.
- super.processOptions( commandLine );
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
if ( commandLine.hasOption( 'a' ) ) {
this.platformAccs = Arrays.asList( StringUtils.split( commandLine.getOptionValue( 'a' ), "," ) );
@@ -73,8 +73,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
}
}
- @Override
- protected void processBioAssaySets( Collection expressionExperiments ) {
+ protected void doAuthenticatedWork() throws Exception {
if ( platformAccs != null ) {
log.info( "Deleting " + platformAccs.size() + " platform(s)" );
@@ -109,18 +108,20 @@ protected void processBioAssaySets( Collection expressionExperiment
addErrorObject( a, e );
}
}
- return;
+ } else {
+ super.doAuthenticatedWork();
}
+ }
- for ( BioAssaySet bas : expressionExperiments ) {
- try {
- log.info( "--------- Deleting " + bas + " --------" );
- this.eeService.remove( ( ExpressionExperiment ) bas );
- addSuccessObject( bas );
- log.info( "--------- Finished Deleting " + bas + " -------" );
- } catch ( Exception ex ) {
- addErrorObject( bas, ex );
- }
+ @Override
+ protected void processExpressionExperiment( ExpressionExperiment ee ) {
+ try {
+ log.info( "--------- Deleting " + ee + " --------" );
+ this.eeService.remove( ee );
+ addSuccessObject( ee );
+ log.info( "--------- Finished Deleting " + ee + " -------" );
+ } catch ( Exception ex ) {
+ addErrorObject( ee, ex );
}
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/DetectQuantitationTypeCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/DetectQuantitationTypeCli.java
new file mode 100644
index 0000000000..85e9884f83
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/DetectQuantitationTypeCli.java
@@ -0,0 +1,107 @@
+package ubic.gemma.core.apps;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.springframework.beans.factory.annotation.Autowired;
+import ubic.gemma.core.analysis.service.ExpressionDataMatrixService;
+import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
+import ubic.gemma.core.datastructure.matrix.SingleCellExpressionDataMatrix;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector;
+import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import ubic.gemma.persistence.service.common.quantitationtype.NonUniqueQuantitationTypeByNameException;
+import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService;
+import ubic.gemma.persistence.service.expression.experiment.SingleCellExpressionExperimentService;
+
+import javax.annotation.Nullable;
+
+import static ubic.gemma.core.analysis.preprocess.detect.QuantitationTypeDetectionUtils.inferQuantitationType;
+
+public class DetectQuantitationTypeCli extends ExpressionExperimentManipulatingCLI {
+
+ @Autowired
+ public QuantitationTypeService quantitationTypeService;
+
+ @Autowired
+ private ExpressionDataMatrixService expressionDataMatrixService;
+
+ @Autowired
+ public SingleCellExpressionExperimentService singleCellExpressionExperimentService;
+
+ @Nullable
+ private String quantitationTypeName;
+
+ @Nullable
+ @Override
+ public String getCommandName() {
+ return "detectQuantitationType";
+ }
+
+ @Nullable
+ @Override
+ public String getShortDesc() {
+ return "Detect quantitation type from data";
+ }
+
+ @Override
+ protected void buildExperimentOptions( Options options ) {
+ options.addOption( "qtName", "quantitation-type-name", true, "Name of the quantitation type to process." );
+ }
+
+ @Override
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
+ quantitationTypeName = commandLine.getOptionValue( "qtName" );
+ }
+
+ @Override
+ protected void processExpressionExperiment( ExpressionExperiment expressionExperiment ) {
+ if ( quantitationTypeName != null ) {
+ QuantitationType qt;
+ try {
+ qt = quantitationTypeService.findByNameAndVectorType( expressionExperiment, quantitationTypeName, RawExpressionDataVector.class );
+ } catch ( NonUniqueQuantitationTypeByNameException e ) {
+ throw new RuntimeException( e );
+ }
+ if ( qt != null ) {
+ detectRawQuantitationType( expressionExperiment, qt );
+ }
+ } else {
+ quantitationTypeService.findByExpressionExperiment( expressionExperiment, RawExpressionDataVector.class )
+ .forEach( qt -> detectRawQuantitationType( expressionExperiment, qt ) );
+ }
+ if ( quantitationTypeName != null ) {
+ QuantitationType qt;
+ try {
+ qt = quantitationTypeService.findByNameAndVectorType( expressionExperiment, quantitationTypeName, SingleCellExpressionDataVector.class );
+ } catch ( NonUniqueQuantitationTypeByNameException e ) {
+ throw new RuntimeException( e );
+ }
+ if ( qt != null ) {
+ detectRawQuantitationType( expressionExperiment, qt );
+ }
+ } else {
+ quantitationTypeService.findByExpressionExperiment( expressionExperiment, SingleCellExpressionDataVector.class )
+ .forEach( qt -> detectSingleCellQuantitationType( expressionExperiment, qt ) );
+ }
+ }
+
+ private void detectRawQuantitationType( ExpressionExperiment ee, QuantitationType quantitationType ) {
+ log.info( "Loading data for " + quantitationType + "..." );
+ ExpressionDataDoubleMatrix matrix = expressionDataMatrixService.getRawExpressionDataMatrix( ee, quantitationType );
+ log.info( "Got data!" );
+ if ( matrix != null ) {
+ QuantitationType qt = inferQuantitationType( matrix );
+ log.info( "Detected quantitation type: " + qt );
+ }
+ }
+
+ private void detectSingleCellQuantitationType( ExpressionExperiment ee, QuantitationType quantitationType ) {
+ log.info( "Loading data for " + quantitationType + "..." );
+ SingleCellExpressionDataMatrix matrix = singleCellExpressionExperimentService.getSingleCellExpressionDataMatrix( ee, quantitationType );
+ log.info( "Got data!" );
+ QuantitationType qt = inferQuantitationType( matrix );
+ log.info( "Detected quantitation type: " + qt );
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/DifferentialExpressionAnalysisCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/DifferentialExpressionAnalysisCli.java
index a89d77fcda..2fae5d361b 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/DifferentialExpressionAnalysisCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/DifferentialExpressionAnalysisCli.java
@@ -92,14 +92,7 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
-
- /*
- * These options from the super class support: running on one or more data sets from the command line, running
- * on list of data sets from a file, running on all data sets.
- */
- super.buildOptions( options );
-
+ protected void buildExperimentOptions( Options options ) {
/* Supports: running on all data sets that have not been run since a given date. */
addLimitingDateOption( options );
@@ -146,8 +139,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
if ( commandLine.hasOption( "type" ) ) {
if ( !commandLine.hasOption( "factors" ) ) {
throw new IllegalArgumentException( "Please specify the factor(s) when specifying the analysis type." );
@@ -216,14 +208,13 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
@Override
protected void processBioAssaySets( Collection expressionExperiments ) {
- if ( type != null && expressionExperiments.size() > 1 ) {
+ if ( type != null ) {
throw new IllegalArgumentException( "You can only specify the analysis type when analyzing a single experiment" );
}
- if ( subsetFactorId != null && expressionExperiments.size() > 1 ) {
+ if ( subsetFactorId != null ) {
throw new IllegalArgumentException( "You can only specify the subset factor when analyzing a single experiment" );
}
-
- if ( !factorIds.isEmpty() && expressionExperiments.size() > 1 ) {
+ if ( !factorIds.isEmpty() ) {
throw new IllegalArgumentException( "You can only specify the factors when analyzing a single experiment" );
}
super.processBioAssaySets( expressionExperiments );
@@ -360,8 +351,8 @@ protected void processExpressionExperiment( ExpressionExperiment ee ) {
} else {
log.info( "Writing results to disk" );
for ( DifferentialExpressionAnalysis r : results ) {
- try {
- expressionDataFileService.writeDiffExArchiveFile( ee, r, config );
+ try ( ExpressionDataFileService.LockedPath lockedPath = expressionDataFileService.writeDiffExAnalysisArchiveFile( r, config ) ) {
+ log.info( "Wrote to " + lockedPath.getPath() );
} catch ( IOException e ) {
throw new RuntimeException( e );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExperimentalDesignImportCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExperimentalDesignImportCli.java
index 01767cc5c7..46dbeca75e 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExperimentalDesignImportCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExperimentalDesignImportCli.java
@@ -71,16 +71,14 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
Option designFileOption = Option.builder( "f" ).required().hasArg().type( Path.class ).argName( "Design file" )
.desc( "Experimental design description file" ).longOpt( "designFile" ).build();
options.addOption( designFileOption );
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
experimentalDesignFile = commandLine.getParsedOptionValue( 'f' );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExperimentalDesignViewCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExperimentalDesignViewCli.java
index ff5d2d0e17..20a889b142 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExperimentalDesignViewCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExperimentalDesignViewCli.java
@@ -2,13 +2,14 @@
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
+import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.core.util.AbstractAuthenticatedCLI;
import ubic.gemma.core.util.CLI;
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.persistence.service.expression.experiment.ExperimentalDesignService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
-import ubic.gemma.persistence.util.EntityUtils;
+import ubic.gemma.persistence.util.IdentifiableUtils;
import java.util.*;
@@ -17,10 +18,10 @@
*/
public class ExperimentalDesignViewCli extends AbstractAuthenticatedCLI {
- @Override
- public CommandGroup getCommandGroup() {
- return CLI.CommandGroup.ANALYSIS;
- }
+ @Autowired
+ private ExperimentalDesignService eds;
+ @Autowired
+ private ExpressionExperimentService ees;
@Override
public String getCommandName() {
@@ -32,6 +33,11 @@ public String getShortDesc() {
return "Dump a view of experimental design(s)";
}
+ @Override
+ public CommandGroup getCommandGroup() {
+ return CLI.CommandGroup.ANALYSIS;
+ }
+
@Override
protected void buildOptions( Options options ) {
}
@@ -42,12 +48,9 @@ protected void processOptions( CommandLine commandLine ) {
}
@Override
- protected void doWork() throws Exception {
- ExperimentalDesignService eds = getBean( ExperimentalDesignService.class );
-
- ExpressionExperimentService ees = getBean( ExpressionExperimentService.class );
+ protected void doAuthenticatedWork() throws Exception {
Collection experiments = ees.loadValueObjectsByIds(
- EntityUtils.getIds( ees.loadAll() ) );
+ IdentifiableUtils.getIds( ees.loadAll() ) );
Map ed2ee = new HashMap<>();
@@ -82,7 +85,7 @@ protected void doWork() throws Exception {
for ( FactorValue f : factor.getFactorValues() ) {
if ( f.getMeasurement() != null ) continue; // don't list individual quantitative values.
- if ( f.getCharacteristics().size() > 0 ) {
+ if ( !f.getCharacteristics().isEmpty() ) {
for ( Characteristic c : f.getCharacteristics() ) {
if ( Objects.equals( c.getCategory(), category ) ) {
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExperimentalDesignWriterCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExperimentalDesignWriterCLI.java
index 5a7563058b..a95eee1f56 100755
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExperimentalDesignWriterCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExperimentalDesignWriterCLI.java
@@ -22,9 +22,12 @@
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
+import org.springframework.beans.factory.annotation.Autowired;
import ubic.basecode.util.FileTools;
-import ubic.gemma.core.datastructure.matrix.ExperimentalDesignWriter;
+import ubic.gemma.core.datastructure.matrix.io.ExperimentalDesignWriter;
+import ubic.gemma.core.util.BuildInfo;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import ubic.gemma.persistence.util.EntityUrlBuilder;
import java.io.IOException;
import java.io.PrintWriter;
@@ -36,6 +39,12 @@
*/
public class ExperimentalDesignWriterCLI extends ExpressionExperimentManipulatingCLI {
+ @Autowired
+ private EntityUrlBuilder entityUrlBuilder;
+
+ @Autowired
+ private BuildInfo buildInfo;
+
private String outFileName;
@Override
@@ -49,8 +58,7 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
Option outputFileOption = Option.builder( "o" ).hasArg().required().argName( "outFilePrefix" )
.desc( "File prefix for saving the output (short name will be appended)" )
.longOpt( "outFilePrefix" ).build();
@@ -58,14 +66,13 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
outFileName = commandLine.getOptionValue( 'o' );
}
@Override
protected void processExpressionExperiment( ExpressionExperiment ee ) {
- ExperimentalDesignWriter edWriter = new ExperimentalDesignWriter();
+ ExperimentalDesignWriter edWriter = new ExperimentalDesignWriter( entityUrlBuilder, buildInfo );
try ( PrintWriter writer = new PrintWriter( outFileName + "_" + FileTools.cleanForFileName( ee.getShortName() ) + ".txt" ) ) {
edWriter.write( writer, ee, true );
writer.flush();
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionDataCorrMatCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionDataCorrMatCli.java
index ad9e3417c3..31d0a4d69b 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionDataCorrMatCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionDataCorrMatCli.java
@@ -20,6 +20,7 @@
import org.apache.commons.cli.Options;
import org.springframework.beans.factory.annotation.Autowired;
+import ubic.gemma.core.analysis.preprocess.filter.FilteringException;
import ubic.gemma.model.common.auditAndSecurity.eventType.FailedSampleCorrelationAnalysisEvent;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.persistence.service.analysis.expression.sampleCoexpression.SampleCoexpressionAnalysisService;
@@ -45,26 +46,27 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
- super.addForceOption( options );
+ protected void buildExperimentOptions( Options options ) {
+ addForceOption( options );
}
@Override
protected void processExpressionExperiment( ExpressionExperiment ee ) {
- if ( !force && this.noNeedToRun( ee, null ) ) {
+ if ( this.noNeedToRun( ee, null ) ) {
return;
}
-
ee = eeService.thawLiter( ee );
try {
- if ( force ) {
+ if ( isForce() ) {
sampleCoexpressionAnalysisService.compute( ee, sampleCoexpressionAnalysisService.prepare( ee ) );
} else {
if ( sampleCoexpressionAnalysisService.retrieveExisting( ee ) == null ) {
sampleCoexpressionAnalysisService.compute( ee, sampleCoexpressionAnalysisService.prepare( ee ) );
}
}
+ } catch ( FilteringException e ) {
+ auditTrailService.addUpdateEvent( ee, FailedSampleCorrelationAnalysisEvent.class, null, e );
+ throw new RuntimeException( e );
} catch ( Exception e ) {
auditTrailService.addUpdateEvent( ee, FailedSampleCorrelationAnalysisEvent.class, null, e );
throw e;
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionDataMatrixWriterCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionDataMatrixWriterCLI.java
index af3e3caee0..29ca468f23 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionDataMatrixWriterCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionDataMatrixWriterCLI.java
@@ -22,15 +22,23 @@
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
-import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
-import ubic.basecode.util.FileTools;
+import ubic.gemma.core.analysis.preprocess.filter.FilteringException;
import ubic.gemma.core.analysis.service.ExpressionDataFileService;
+import ubic.gemma.core.analysis.service.ExpressionDataFileUtils;
import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import javax.annotation.Nullable;
import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.Collection;
+import java.util.zip.GZIPOutputStream;
/**
* Prints preferred data matrix to a file.
@@ -42,8 +50,9 @@ public class ExpressionDataMatrixWriterCLI extends ExpressionExperimentManipulat
@Autowired
private ExpressionDataFileService fs;
- private boolean filter = false;
- private String outFileName = null;
+ @Nullable
+ private Path outputFile;
+ private boolean filter;
@Override
public String getCommandName() {
@@ -52,28 +61,25 @@ public String getCommandName() {
@Override
public String getShortDesc() {
- return "Prints preferred data matrix to a file; gene information is included if available.";
+ return "Write processed data matrix to a file; gene information is included if available.";
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
- options.addOption( Option.builder( "o" ).longOpt( "outputFileName" ).desc( "File name. If omitted, the file name will be based on the short name of the experiment." ).argName( "filename" ).hasArg().build() );
+ protected void buildExperimentOptions( Options options ) {
+ options.addOption( Option.builder( "o" ).longOpt( "outputFileName" ).desc( "File name. If omitted, the file name will be based on the short name of the experiment." ).argName( "filename" ).hasArg().type( Path.class ).build() );
options.addOption( "filter", "Filter expression matrix under default parameters" );
+ addForceOption( options );
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
- outFileName = commandLine.getOptionValue( 'o' );
- if ( commandLine.hasOption( "filter" ) ) {
- filter = true;
- }
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
+ outputFile = commandLine.getParsedOptionValue( 'o' );
+ filter = commandLine.hasOption( "filter" );
}
@Override
protected void processBioAssaySets( Collection expressionExperiments ) {
- if ( expressionExperiments.size() > 1 && StringUtils.isNotBlank( outFileName ) ) {
+ if ( outputFile != null ) {
throw new IllegalArgumentException( "Output file name can only be used for single experiment output" );
}
super.processBioAssaySets( expressionExperiments );
@@ -81,16 +87,19 @@ protected void processBioAssaySets( Collection expressionExperiment
@Override
protected void processExpressionExperiment( ExpressionExperiment ee ) {
- String fileName;
- if ( StringUtils.isNotBlank( outFileName ) ) {
- fileName = outFileName;
+ Path fileName;
+ if ( outputFile != null ) {
+ fileName = outputFile;
} else {
- fileName = FileTools.cleanForFileName( ee.getShortName() ) + ".txt";
+ fileName = Paths.get( ExpressionDataFileUtils.getDataOutputFilename( ee, filter, ExpressionDataFileUtils.TABULAR_BULK_DATA_FILE_SUFFIX ) );
+ }
+ if ( !isForce() && Files.exists( fileName ) ) {
+ throw new RuntimeException( "Output file " + fileName + " already exists, use -force to overwrite." );
}
- try {
- fs.writeProcessedExpressionDataFile( ee, filter, fileName, false )
- .orElseThrow( () -> new RuntimeException( "No processed expression data vectors to write." ) );
- } catch ( IOException e ) {
+ try ( Writer writer = new OutputStreamWriter( new GZIPOutputStream( Files.newOutputStream( fileName ) ), StandardCharsets.UTF_8 ) ) {
+ int written = fs.writeProcessedExpressionData( ee, filter, writer );
+ addSuccessObject( ee, "Wrote " + written + " vectors to " + fileName + "." );
+ } catch ( IOException | FilteringException e ) {
throw new RuntimeException( e );
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentDataFileGeneratorCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentDataFileGeneratorCli.java
index 6373b5e6c0..a711bbabf7 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentDataFileGeneratorCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentDataFileGeneratorCli.java
@@ -55,19 +55,15 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
-
+ protected void buildExperimentOptions( Options options ) {
Option forceWriteOption = Option.builder( "w" )
.desc( "Overwrites existing files if this option is set" ).longOpt( "forceWrite" )
.build();
-
options.addOption( forceWriteOption );
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
if ( commandLine.hasOption( 'w' ) ) {
this.forceWrite = true;
}
@@ -78,9 +74,11 @@ protected void processExpressionExperiment( ExpressionExperiment ee1 ) {
getBatchTaskExecutor().submit( () -> {
log.info( "Processing Experiment: " + ee1.getName() );
ExpressionExperiment ee = this.eeService.thawLite( ee1 );
- expressionDataFileService.writeOrLocateDiffExpressionDataFiles( ee, forceWrite );
+ expressionDataFileService.writeOrLocateDiffExpressionDataFiles( ee, forceWrite )
+ .forEach( ExpressionDataFileService.LockedPath::close );
ats.addUpdateEvent( ee, CommentedEvent.class, "Generated Flat data files for downloading" );
addSuccessObject( ee, "Success: generated data file for " + ee.getShortName() + " ID=" + ee.getId() );
+ return null;
} );
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentManipulatingCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentManipulatingCLI.java
index 27b7ae8441..4d101af003 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentManipulatingCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentManipulatingCLI.java
@@ -30,13 +30,11 @@
import ubic.gemma.core.search.SearchException;
import ubic.gemma.core.search.SearchResult;
import ubic.gemma.core.search.SearchService;
-import ubic.gemma.core.util.AbstractAuthenticatedCLI;
+import ubic.gemma.core.util.AbstractAutoSeekingCLI;
+import ubic.gemma.core.util.EntityLocator;
import ubic.gemma.core.util.FileUtils;
import ubic.gemma.core.util.GemmaRestApiClient;
import ubic.gemma.model.analysis.expression.ExpressionExperimentSet;
-import ubic.gemma.model.common.auditAndSecurity.AuditEvent;
-import ubic.gemma.model.common.auditAndSecurity.Auditable;
-import ubic.gemma.model.common.auditAndSecurity.curation.Curatable;
import ubic.gemma.model.common.auditAndSecurity.eventType.AuditEventType;
import ubic.gemma.model.common.search.SearchSettings;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
@@ -49,18 +47,15 @@
import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetService;
-import ubic.gemma.persistence.service.genome.taxon.TaxonService;
-import ubic.gemma.persistence.util.Filter;
-import ubic.gemma.persistence.util.Filters;
+import ubic.gemma.persistence.util.EntityUrlBuilder;
import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.file.Path;
import java.util.*;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
-import static ubic.gemma.persistence.util.IdentifiableUtils.toIdentifiableSet;
-
/**
* Base class for CLIs that needs one or more expression experiment as an input. It offers the following ways of reading
* them in:
@@ -82,15 +77,13 @@
*
* @author Paul
*/
-public abstract class ExpressionExperimentManipulatingCLI extends AbstractAuthenticatedCLI {
+public abstract class ExpressionExperimentManipulatingCLI extends AbstractAutoSeekingCLI {
@Autowired
protected ExpressionExperimentService eeService;
@Autowired
private ExpressionExperimentSetService expressionExperimentSetService;
@Autowired
- private TaxonService taxonService;
- @Autowired
private SearchService searchService;
@Autowired
private ArrayDesignService arrayDesignService;
@@ -98,17 +91,31 @@ public abstract class ExpressionExperimentManipulatingCLI extends AbstractAuthen
protected AuditTrailService auditTrailService;
@Autowired
protected AuditEventService auditEventService;
+ @Autowired
+ protected EntityLocator entityLocator;
+ @Autowired
+ protected EntityUrlBuilder entityUrlBuilder;
/**
* Single-experiment mode.
*/
private boolean singleExperimentMode = false;
+ /**
+ * Default to all datasets if no options are supplied.
+ */
+ private boolean defaultToAll = false;
+
/**
* Try to use references instead of actual entities.
*/
private boolean useReferencesIfPossible = false;
+ /**
+ * Abort processing experiments if an error occurs.
+ */
+ private boolean abortOnError = false;
+
/**
* Process all experiments.
*/
@@ -138,13 +145,9 @@ public abstract class ExpressionExperimentManipulatingCLI extends AbstractAuthen
*/
private Path excludeFile;
- /**
- * Force processing of EEs regardless of their troubled status.
- */
- protected boolean force = false;
-
protected ExpressionExperimentManipulatingCLI() {
- setRequireLogin( true );
+ super( ExpressionExperiment.class );
+ setRequireLogin();
}
@Override
@@ -153,7 +156,7 @@ public CommandGroup getCommandGroup() {
}
@Override
- protected void buildOptions( Options options ) {
+ protected final void buildOptions( Options options ) {
Option expOption = Option.builder( "e" ).hasArg().argName( "shortname" ).desc(
"Expression experiment short name. Most tools recognize comma-delimited values given on the command line, "
+ "and if this option is omitted (and none other provided), the tool will be applied to all expression experiments." )
@@ -161,10 +164,14 @@ protected void buildOptions( Options options ) {
options.addOption( expOption );
- if ( singleExperimentMode )
+ if ( singleExperimentMode ) {
+ buildExperimentOptions( options );
return;
+ }
- options.addOption( "all", false, "Process all expression experiments" );
+ if ( !defaultToAll ) {
+ options.addOption( "all", false, "Process all expression experiments" );
+ }
Option eeFileListOption = Option.builder( "f" ).hasArg().type( Path.class ).argName( "file" )
.desc( "File with list of short names or IDs of expression experiments (one per line; use instead of '-e')" )
@@ -191,15 +198,25 @@ protected void buildOptions( Options options ) {
options.addOption( excludeEeOption );
addBatchOption( options );
+
+ buildExperimentOptions( options );
+ }
+
+ protected void buildExperimentOptions( Options options ) {
+
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- Assert.isTrue( commandLine.hasOption( "all" ) || commandLine.hasOption( "eeset" )
- || commandLine.hasOption( "e" ) || commandLine.hasOption( 'f' ) || commandLine.hasOption( 'q' ),
- "At least one of -all, -e, -eeset, -f, or -q must be provided." );
- this.force = commandLine.hasOption( "force" );
- this.all = commandLine.hasOption( "all" );
+ protected final void processOptions( CommandLine commandLine ) throws ParseException {
+ boolean hasAnyDatasetOptions = commandLine.hasOption( "all" ) || commandLine.hasOption( "eeset" )
+ || commandLine.hasOption( "e" ) || commandLine.hasOption( 'f' ) || commandLine.hasOption( 'q' );
+ Assert.isTrue( hasAnyDatasetOptions || defaultToAll, "At least one of -all, -e, -eeset, -f, or -q must be provided." );
+ super.processOptions( commandLine );
+ if ( defaultToAll && !hasAnyDatasetOptions ) {
+ this.all = true;
+ } else {
+ this.all = commandLine.hasOption( "all" );
+ }
if ( commandLine.hasOption( 'e' ) ) {
String optionValue = commandLine.getOptionValue( 'e' );
Assert.isTrue( StringUtils.isNotBlank( optionValue ), "List of EE identifiers must not be blank." );
@@ -210,12 +227,17 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
this.query = commandLine.getOptionValue( 'q' );
this.taxonName = commandLine.getOptionValue( 't' );
this.excludeFile = commandLine.getParsedOptionValue( 'x' );
+ processExperimentOptions( commandLine );
+ }
+
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
+
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
// intentionally a TreeSet over IDs, to prevent proxy initialization via hashCode()
- Set expressionExperiments = new TreeSet<>( Comparator.comparing( BioAssaySet::getId ) );
+ Collection expressionExperiments = new TreeSet<>( Comparator.comparing( BioAssaySet::getId ) );
if ( all ) {
if ( useReferencesIfPossible ) {
@@ -245,7 +267,7 @@ protected void doWork() throws Exception {
}
if ( query != null ) {
- Taxon taxon = this.taxonName != null ? locateTaxon( this.taxonName ) : null;
+ Taxon taxon = this.taxonName != null ? entityLocator.locateTaxon( this.taxonName ) : null;
log.info( "Processing all experiments that match query " + query + ( taxon != null ? " in taxon " + taxon : "" ) );
try {
expressionExperiments.addAll( this.findExpressionExperimentsByQuery( query, taxon ) );
@@ -262,7 +284,7 @@ protected void doWork() throws Exception {
}
}
- if ( !force && !expressionExperiments.isEmpty() ) {
+ if ( !isForce() && !expressionExperiments.isEmpty() ) {
if ( isAutoSeek() ) {
if ( this.getAutoSeekEventType() == null ) {
@@ -276,25 +298,42 @@ protected void doWork() throws Exception {
this.removeTroubledExperiments( expressionExperiments );
}
+ expressionExperiments = preprocessBioAssaySets( expressionExperiments );
+
if ( expressionExperiments.isEmpty() ) {
throw new RuntimeException( "No expression experiments matched the given options." );
} else if ( expressionExperiments.size() == 1 ) {
BioAssaySet ee = expressionExperiments.iterator().next();
- log.info( "Final dataset: " + experimentToString( ee ) );
+ log.info( "Final dataset: " + formatExperiment( ee ) );
+ processBioAssaySet( expressionExperiments.iterator().next() );
} else {
log.info( String.format( "Final list: %d expression experiments", expressionExperiments.size() ) );
+ processBioAssaySets( expressionExperiments );
}
+ }
- processBioAssaySets( expressionExperiments );
+ /**
+ * Pre-process BioAssays.
+ */
+ protected Collection preprocessBioAssaySets( Collection expressionExperiments ) {
+ return expressionExperiments;
}
+ /**
+ * Process multiple {@link BioAssaySet}.
+ *
+ * This only called if more than one experiment was found.
+ */
protected void processBioAssaySets( Collection expressionExperiments ) {
for ( BioAssaySet bas : expressionExperiments ) {
try {
processBioAssaySet( bas );
- addSuccessObject( bas );
} catch ( Exception e ) {
- addErrorObject( bas, e );
+ if ( abortOnError ) {
+ throw e;
+ } else {
+ addErrorObject( bas, e );
+ }
}
}
}
@@ -326,24 +365,18 @@ protected void processExpressionExperiment( ExpressionExperiment expressionExper
/**
* Process an {@link ExpressionExperimentSubSet}.
*/
- protected void processExpressionExperimentSubSet( ExpressionExperimentSubSet expressionExperimentSubSet ) {
+ protected void processExpressionExperimentSubSet( @SuppressWarnings("unused") ExpressionExperimentSubSet expressionExperimentSubSet ) {
throw new UnsupportedOperationException( "This command line does support experiment subsets." );
}
/**
* Process other kinds of {@link BioAssaySet} that are neither experiment nor subset.
*/
- protected void processOtherBioAssaySet( BioAssaySet bas ) {
+ protected void processOtherBioAssaySet( @SuppressWarnings("unused") BioAssaySet bas ) {
throw new UnsupportedOperationException( "This command line does support other kinds of BioAssaySet." );
}
- protected void addForceOption( Options options ) {
- String desc = "Ignore other reasons for skipping experiments (e.g., trouble) and overwrite existing data (see documentation for this tool to see exact behavior if not clear)";
- Option forceOption = Option.builder( "force" ).longOpt( "force" ).desc( desc ).build();
- options.addOption( forceOption );
- }
-
- private void excludeFromFile( Set expressionExperiments, Path excludeEeFileName ) throws IOException {
+ private void excludeFromFile( Collection expressionExperiments, Path excludeEeFileName ) throws IOException {
assert !expressionExperiments.isEmpty();
Collection excludeExperiments;
excludeExperiments = this.readExpressionExperimentListFile( excludeEeFileName );
@@ -357,10 +390,7 @@ private void excludeFromFile( Set expressionExperiments, Path exclu
private List experimentsFromCliList( String[] identifiers ) {
List ees = new ArrayList<>( identifiers.length );
for ( String identifier : identifiers ) {
- ExpressionExperiment expressionExperiment = this.locateExpressionExperiment( identifier );
- if ( expressionExperiment == null ) {
- continue;
- }
+ ExpressionExperiment expressionExperiment = entityLocator.locateExpressionExperiment( identifier, useReferencesIfPossible );
if ( !useReferencesIfPossible ) {
expressionExperiment = eeService.thawLite( expressionExperiment );
}
@@ -431,43 +461,6 @@ private Collection findExpressionExperimentsByQuery( Strin
return ees;
}
- /**
- * Attempt to locate an experiment using the given identifier.
- */
- @Nullable
- private ExpressionExperiment locateExpressionExperiment( String identifier ) {
- Assert.isTrue( StringUtils.isNotBlank( identifier ), "Expression experiment ID or short name must be provided" );
- identifier = StringUtils.strip( identifier );
- ExpressionExperiment ee;
- try {
- Long id = Long.parseLong( identifier );
- if ( useReferencesIfPossible ) {
- // this is never null, but may produce ObjectNotFoundException later on
- return eeService.loadReference( id );
- } else if ( ( ee = eeService.load( id ) ) != null ) {
- log.debug( "Found " + ee + " by ID" );
- return ee;
- } else {
- return null;
- }
- } catch ( NumberFormatException e ) {
- // can be safely ignored, we'll attempt to use it as a short name
- }
- if ( ( ee = eeService.findByShortName( identifier ) ) != null ) {
- log.debug( "Found " + ee + " by short name" );
- return ee;
- }
- if ( ( ee = eeService.findOneByAccession( identifier ) ) != null ) {
- log.debug( "Found " + ee + " by accession" );
- return ee;
- }
- if ( ( ee = eeService.findOneByName( identifier ) ) != null ) {
- log.debug( "Found " + ee + " by name" );
- return ee;
- }
- log.warn( "Could not locate any experiment with identifier or name " + identifier );
- return null;
- }
/**
* Load expression experiments based on a list of short names or IDs in a file. Only the first column of the file is
@@ -479,10 +472,7 @@ private Collection readExpressionExperimentListFile( Path
log.info( String.format( "Found %d experiment identifiers in %s", idlist.size(), fileName ) );
int count = 0;
for ( String id : idlist ) {
- ExpressionExperiment ee = locateExpressionExperiment( id );
- if ( ee == null ) {
- continue;
- }
+ ExpressionExperiment ee = entityLocator.locateExpressionExperiment( id, useReferencesIfPossible );
count++;
ees.add( ee );
if ( idlist.size() > 500 && count > 0 && count % 500 == 0 ) {
@@ -501,146 +491,34 @@ private void removeTroubledExperiments( Collection expressionExperi
log.warn( "No experiments to remove troubled from" );
return;
}
+
// it's not possible to check the curation details directly as that might trigger proxy initialization
- List troubledIds = eeService.loadIds( Filters.by( eeService.getFilter( "curationDetails.troubled", Boolean.class, Filter.Operator.eq, true ) ), null );
-
- // for subsets, check if the source experiment is troubled
- Set troubledExpressionExperiments = expressionExperiments.stream()
- .filter( ee -> {
- // for subsets, check source experiment troubled flag
- if ( ee instanceof ExpressionExperimentSubSet ) {
- return troubledIds.contains( ( ( ExpressionExperimentSubSet ) ee ).getSourceExperiment().getId() );
- } else {
- return troubledIds.contains( ee.getId() );
- }
- } )
- .collect( toIdentifiableSet() );
+ Set troubledIds = new HashSet<>( eeService.loadTroubledIds() );
// only retain non-troubled experiments
- expressionExperiments.removeAll( troubledExpressionExperiments );
-
- if ( !troubledExpressionExperiments.isEmpty() ) {
- log.info( String.format( "Removed %s troubled experiments, leaving %d to be processed; use -force to include those.",
- experimentsToString( troubledExpressionExperiments ), expressionExperiments.size() ) );
- }
- }
-
- /**
- * @param auditable auditable
- * @param eventClass can be null
- * @return boolean
- */
- protected boolean noNeedToRun( Auditable auditable, Class extends AuditEventType> eventClass ) {
- boolean needToRun = true;
- Date skipIfLastRunLaterThan = this.getLimitingDate();
- List events = this.auditEventService.getEvents( auditable );
-
- boolean okToRun = true; // assume okay unless indicated otherwise
-
- // figure out if we need to run it by date; or if there is no event of the given class; "Fail" type events don't
- // count.
- for ( int j = events.size() - 1; j >= 0; j-- ) {
- AuditEvent event = events.get( j );
- if ( event == null ) {
- continue; // legacy of ordered-list which could end up with gaps; should not be needed any more
- }
- AuditEventType eventType = event.getEventType();
- if ( eventType != null && eventClass != null && eventClass.isAssignableFrom( eventType.getClass() )
- && !eventType.getClass().getSimpleName().startsWith( "Fail" ) ) {
- if ( skipIfLastRunLaterThan != null ) {
- if ( event.getDate().after( skipIfLastRunLaterThan ) ) {
- log.info( auditable + ": " + " run more recently than " + skipIfLastRunLaterThan );
- addErrorObject( auditable, "Run more recently than " + skipIfLastRunLaterThan );
- needToRun = false;
- }
+ AtomicInteger removedTroubledExperiments = new AtomicInteger();
+ expressionExperiments.removeIf( ee -> {
+ // for subsets, check source experiment troubled flag
+ if ( ee instanceof ExpressionExperimentSubSet ) {
+ if ( troubledIds.contains( ( ( ExpressionExperimentSubSet ) ee ).getSourceExperiment().getId() ) ) {
+ removedTroubledExperiments.incrementAndGet();
+ return true;
} else {
- needToRun = false; // it has been run already at some point
+ return false;
}
+ } else if ( troubledIds.contains( ee.getId() ) ) {
+ removedTroubledExperiments.incrementAndGet();
+ return true;
+ } else {
+ return false;
}
+ } );
+ if ( removedTroubledExperiments.get() > 0 ) {
+ log.info( String.format( "Removed %d troubled experiments, leaving %d to be processed; use -%s to include those.",
+ removedTroubledExperiments.get(), expressionExperiments.size(), FORCE_OPTION ) );
}
-
- /*
- * Always skip if the object is curatable and troubled
- */
- if ( auditable instanceof Curatable ) {
- Curatable curatable = ( Curatable ) auditable;
- okToRun = !curatable.getCurationDetails().getTroubled(); //not ok if troubled
-
- // special case for expression experiments - check associated ADs.
- if ( okToRun && curatable instanceof ExpressionExperiment ) {
- for ( ArrayDesign ad : eeService.getArrayDesignsUsed( ( ExpressionExperiment ) auditable ) ) {
- if ( ad.getCurationDetails().getTroubled() ) {
- okToRun = false; // not ok if even one parent AD is troubled, no need to check the remaining ones.
- break;
- }
- }
- }
-
- if ( !okToRun ) {
- addErrorObject( auditable, "Has an active 'trouble' flag" );
- }
- }
-
- return !needToRun || !okToRun;
}
- protected Taxon locateTaxon( String taxonName ) {
- Assert.isTrue( StringUtils.isNotBlank( taxonName ), "Taxon name must be be blank." );
- taxonName = StringUtils.strip( taxonName );
- Taxon taxon;
- try {
- long id = Long.parseLong( taxonName );
- if ( ( taxon = taxonService.load( id ) ) != null ) {
- log.info( "Found " + taxon + " by ID" );
- return taxon;
- }
- if ( ( taxon = taxonService.findByNcbiId( Math.toIntExact( id ) ) ) != null ) {
- log.info( "Found " + taxon + " by NCBI ID" );
- return taxon;
- }
- throw new NullPointerException( "No taxon with ID or NCBI ID " + id );
- } catch ( NumberFormatException e ) {
- // ignore
- }
- if ( ( taxon = taxonService.findByCommonName( taxonName ) ) != null ) {
- log.info( "Found " + taxon + " by common name." );
- return taxon;
- }
- if ( ( taxon = taxonService.findByScientificName( taxonName ) ) != null ) {
- log.info( "Found " + taxon + " by scientific name." );
- return taxon;
- }
- throw new NullPointerException( "Cannot find taxon with name " + taxonName );
- }
-
- protected ArrayDesign locateArrayDesign( String name ) {
- Assert.isTrue( StringUtils.isNotBlank( name ), "Platform name must not be blank." );
- name = StringUtils.strip( name );
- ArrayDesign arrayDesign;
- try {
- long id = Long.parseLong( name );
- if ( ( arrayDesign = arrayDesignService.load( id ) ) != null ) {
- log.info( "Found " + arrayDesign + " by ID." );
- return arrayDesign;
- }
- throw new NullPointerException( "No platform with ID " + id );
- } catch ( NumberFormatException e ) {
- // ignore
- }
- if ( ( arrayDesign = arrayDesignService.findByShortName( name ) ) != null ) {
- log.info( "Found " + arrayDesign + " by short name." );
- return arrayDesign;
- }
- if ( ( arrayDesign = arrayDesignService.findOneByName( name ) ) != null ) {
- log.info( "Found " + arrayDesign + " by name." );
- return arrayDesign;
- }
- if ( ( arrayDesign = arrayDesignService.findOneByAlternateName( name ) ) != null ) {
- log.info( "Found " + arrayDesign + " by alternate name." );
- return arrayDesign;
- }
- throw new NullPointerException( "No platform found with ID or name " + name );
- }
/**
* Refresh a dataset for Gemma Web.
@@ -669,11 +547,22 @@ protected void refreshExpressionExperimentFromGemmaWeb( ExpressionExperiment ee,
* Enable the single-experiment mode.
*/
protected void setSingleExperimentMode() {
+ Assert.state( !this.singleExperimentMode, "Single experiment mode is already enabled." );
this.singleExperimentMode = true;
}
/**
- * Set this to true to allow reference to be retrieved instead of actual entities.
+ * Default to all datasets if no options are provided.
+ *
+ * This is a very dangerous setting that should be combined with {@link #useReferencesIfPossible}.
+ */
+ public void setDefaultToAll() {
+ Assert.state( !this.defaultToAll, "Default to all is already enabled." );
+ this.defaultToAll = true;
+ }
+
+ /**
+ * Set this to allow reference to be retrieved instead of actual entities.
*
* This only works for entities retrieved by ID.
*
@@ -683,31 +572,56 @@ protected void setSingleExperimentMode() {
* The default is false.
*/
protected void setUseReferencesIfPossible() {
+ Assert.state( !this.useReferencesIfPossible, "Use references if possible is already enabled." );
this.useReferencesIfPossible = true;
}
+ /**
+ * Indicate if this CLI should abort on error or move on to the next experiment.
+ */
+ public boolean isAbortOnError() {
+ return abortOnError;
+ }
+
+ /**
+ * Set this to stop processing experiments if an error occurs.
+ */
+ protected void setAbortOnError() {
+ Assert.state( !this.abortOnError, "Abort on error is already enabled." );
+ this.abortOnError = true;
+ }
+
/**
* Render an experiment to string, with special handling in case of an uninitialized proxy.
+ *
+ * Use this for printing datasets if {@link #useReferencesIfPossible} is set to prevent {@link org.hibernate.LazyInitializationException}.
*/
- private String experimentToString( BioAssaySet bas ) {
+ protected String formatExperiment( BioAssaySet bas ) {
if ( Hibernate.isInitialized( bas ) ) {
- return String.valueOf( bas );
+ return bas + " " + entityUrlBuilder.fromHostUrl().entity( bas ).web().toUriString();
} else if ( bas instanceof ExpressionExperiment ) {
- return "ExpressionExperiment Id=" + bas.getId();
+ return "ExpressionExperiment Id=" + bas.getId() + " " + entityUrlBuilder.fromHostUrl().entity( ( ExpressionExperiment ) bas ).web().toUriString();
} else if ( bas instanceof ExpressionExperimentSubSet ) {
- return "ExpressionExperimentSubSet Id=" + bas.getId();
+ return "ExpressionExperimentSubSet Id=" + bas.getId() + entityUrlBuilder.fromHostUrl().entity( ( ExpressionExperimentSubSet ) bas ).web().toUriString();
} else {
return "BioAssaySet Id=" + bas.getId();
}
}
- private String experimentsToString( Collection extends BioAssaySet> bas ) {
- if ( bas.isEmpty() ) {
- return "no experiments";
- } else if ( bas.size() == 1 ) {
- return experimentToString( bas.iterator().next() );
- } else {
- return bas.size() + " experiments";
+ @Override
+ protected boolean noNeedToRun( ExpressionExperiment auditable, @Nullable Class extends AuditEventType> eventClass ) {
+ if ( super.noNeedToRun( auditable, eventClass ) ) {
+ return true;
+ }
+
+ // special case for expression experiments - check associated ADs.
+ for ( ArrayDesign ad : eeService.getArrayDesignsUsed( auditable ) ) {
+ if ( ad.getCurationDetails().getTroubled() ) {
+ addErrorObject( auditable, "Associated platform " + ad.getShortName() + " has an active troubled flag, use - " + FORCE_OPTION + "to process anyway." );
+ return true; // not ok if even one parent AD is troubled, no need to check the remaining ones.
+ }
}
+
+ return false;
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentPlatformSwitchCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentPlatformSwitchCli.java
index 7b196003e4..df41760261 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentPlatformSwitchCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentPlatformSwitchCli.java
@@ -21,14 +21,12 @@
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
-import org.apache.commons.cli.ParseException;
import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.core.loader.expression.ExpressionExperimentPlatformSwitchService;
import ubic.gemma.model.common.auditAndSecurity.eventType.ExpressionExperimentPlatformSwitchEvent;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService;
-import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;
/**
* Switch the array design used to the merged one.
@@ -37,17 +35,14 @@
*/
public class ExpressionExperimentPlatformSwitchCli extends ExpressionExperimentManipulatingCLI {
- private String arrayDesignName = null;
-
@Autowired
private ExpressionExperimentPlatformSwitchService serv;
- @Autowired
- private ArrayDesignService arrayDesignService;
-
@Autowired
private AuditTrailService ats;
+ private String arrayDesignName = null;
+
@Override
public String getCommandName() {
return "switchExperimentPlatform";
@@ -59,8 +54,7 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
Option arrayDesignOption = Option.builder( "a" ).hasArg().argName( "Array design" ).desc(
"Array design short name to be switched to - no need to specify if the platforms used by the EE are merged" )
.longOpt( "array" ).build();
@@ -69,8 +63,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
+ protected void processExperimentOptions( CommandLine commandLine ) {
if ( commandLine.hasOption( 'a' ) ) {
this.arrayDesignName = commandLine.getOptionValue( 'a' );
}
@@ -81,10 +74,7 @@ protected void processExpressionExperiment( ExpressionExperiment expressionExper
expressionExperiment = this.eeService.thawLite( expressionExperiment );
ArrayDesign ad;
if ( this.arrayDesignName != null ) {
- ad = this.locateArrayDesign( this.arrayDesignName );
- if ( ad == null ) {
- throw new RuntimeException( "Unknown array design" );
- }
+ ad = entityLocator.locateArrayDesign( this.arrayDesignName );
serv.switchExperimentToArrayDesign( expressionExperiment, ad );
ats.addUpdateEvent( expressionExperiment, ExpressionExperimentPlatformSwitchEvent.class, "Switched to use " + ad );
} else {
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentPrimaryPubCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentPrimaryPubCli.java
index 29154542e5..c1e62e6c5e 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentPrimaryPubCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentPrimaryPubCli.java
@@ -53,10 +53,19 @@ public class ExpressionExperimentPrimaryPubCli extends ExpressionExperimentManip
private ExpressionExperimentService ees;
@Autowired
private PersisterHelper persisterHelper;
+ private PubMedXMLFetcher fetcher;
+ private ExpressionExperimentBibRefFinder finder;
@Value("${entrez.efetch.apikey}")
private String ncbiApiKey;
+ @Override
+ public void afterPropertiesSet() throws Exception {
+ super.afterPropertiesSet();
+ this.fetcher = new PubMedXMLFetcher( ncbiApiKey );
+ this.finder = new ExpressionExperimentBibRefFinder( ncbiApiKey );
+ }
+
private String pubmedIdFilename;
private Map pubmedIds = new HashMap<>();
@@ -71,8 +80,7 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
Option pubmedOption = Option.builder( "pubmedIDFile" ).hasArg().argName( "pubmedIDFile" ).desc(
"A text file which contains the list of pubmed IDs associated with each experiment ID. "
+ "If the pubmed ID is not found, it will try to use the existing pubmed ID associated "
@@ -85,8 +93,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
if ( commandLine.hasOption( "pmidFile" ) ) {
this.pubmedIdFilename = commandLine.getOptionValue( "pmidFile" );
try {
@@ -97,72 +104,21 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
}
}
+ // collect some statistics
+ Collection nullPubCount;
+ Collection samePubCount;
+ Collection diffPubCount;
+ Collection failedEe;
+
@Override
protected void processBioAssaySets( Collection expressionExperiments ) {
- PubMedXMLFetcher fetcher = new PubMedXMLFetcher( ncbiApiKey );
-
// collect some statistics
- Collection nullPubCount = new ArrayList<>();
- Collection samePubCount = new ArrayList<>();
- Collection diffPubCount = new ArrayList<>();
- Collection failedEe = new ArrayList<>();
-
- ExpressionExperimentBibRefFinder finder = new ExpressionExperimentBibRefFinder( ncbiApiKey );
- for ( BioAssaySet bioassay : expressionExperiments ) {
- if ( !( bioassay instanceof ExpressionExperiment ) ) {
- log.info( bioassay.getName() + " is not an ExpressionExperiment" );
- continue;
- }
- ExpressionExperiment experiment = ( ExpressionExperiment ) bioassay;
- try {
- // if ( experiment.getPrimaryPublication() != null ) continue;
- if ( experiment.getPrimaryPublication() == null ) {
- log.warn( experiment + " has no existing primary publication, will attempt to find" );
- } else {
- log.info( experiment.getPrimaryPublication() + " has a primary publication, updating" );
- }
- experiment = ees.thawLite( experiment );
+ nullPubCount = new ArrayList<>();
+ samePubCount = new ArrayList<>();
+ diffPubCount = new ArrayList<>();
+ failedEe = new ArrayList<>();
- // get from GEO or get from a file
- BibliographicReference ref = fetcher.retrieveByHTTP( pubmedIds.get( experiment.getShortName() ) );
-
- if ( ref == null ) {
- if ( this.pubmedIdFilename != null ) {
- log.warn( "Pubmed ID for " + experiment.getShortName() + " was not found in "
- + this.pubmedIdFilename );
- }
- try {
- ref = finder.locatePrimaryReference( experiment );
- } catch ( IOException e ) {
- log.error( e );
- continue;
- }
-
- if ( ref == null ) {
- log.error( "No ref for " + experiment );
- failedEe.add( experiment.getShortName() );
- continue;
- }
- }
-
- // collect some statistics
- if ( experiment.getPrimaryPublication() == null ) {
- nullPubCount.add( experiment.getShortName() );
- } else if ( experiment.getPrimaryPublication().getPubAccession().getAccession()
- .equals( pubmedIds.get( experiment.getShortName() ).toString() ) ) {
- samePubCount.add( experiment.getShortName() );
- } else {
- diffPubCount.add( experiment.getShortName() );
- }
-
- log.info( "Found pubAccession " + ref.getPubAccession().getAccession() + " for " + experiment );
- ref = ( BibliographicReference ) persisterHelper.persist( ref );
- experiment.setPrimaryPublication( ref );
- ees.update( experiment );
- } catch ( Exception e ) {
- log.error( experiment.getShortName() + " (id=" + experiment.getId() + ") update failed.", e );
- }
- }
+ super.processBioAssaySets( expressionExperiments );
// print statistics
log.info( "\n\n========== Summary ==========" );
@@ -178,6 +134,60 @@ protected void processBioAssaySets( Collection expressionExperiment
log.info( "No publications found: " + Arrays.toString( failedEe.toArray() ) );
}
+ @Override
+ protected void processExpressionExperiment( ExpressionExperiment experiment ) {
+ try {
+ // if ( experiment.getPrimaryPublication() != null ) continue;
+ if ( experiment.getPrimaryPublication() == null ) {
+ log.warn( experiment + " has no existing primary publication, will attempt to find" );
+ } else {
+ log.info( experiment.getPrimaryPublication() + " has a primary publication, updating" );
+ }
+ experiment = ees.thawLite( experiment );
+
+ // get from GEO or get from a file
+ BibliographicReference ref = fetcher.retrieveByHTTP( pubmedIds.get( experiment.getShortName() ) );
+
+ if ( ref == null ) {
+ if ( this.pubmedIdFilename != null ) {
+ log.warn( "Pubmed ID for " + experiment.getShortName() + " was not found in "
+ + this.pubmedIdFilename );
+ }
+ try {
+ ref = finder.locatePrimaryReference( experiment );
+ } catch ( IOException e ) {
+ addErrorObject( experiment, e );
+ log.error( e );
+ return;
+ }
+
+ if ( ref == null ) {
+ addErrorObject( experiment, "No ref for " + experiment );
+ failedEe.add( experiment.getShortName() );
+ return;
+ }
+ }
+
+ // collect some statistics
+ if ( experiment.getPrimaryPublication() == null ) {
+ nullPubCount.add( experiment.getShortName() );
+ } else if ( experiment.getPrimaryPublication().getPubAccession().getAccession()
+ .equals( pubmedIds.get( experiment.getShortName() ).toString() ) ) {
+ samePubCount.add( experiment.getShortName() );
+ } else {
+ diffPubCount.add( experiment.getShortName() );
+ }
+
+ log.info( "Found pubAccession " + ref.getPubAccession().getAccession() + " for " + experiment );
+ ref = ( BibliographicReference ) persisterHelper.persist( ref );
+ experiment.setPrimaryPublication( ref );
+ ees.update( experiment );
+ addSuccessObject( experiment );
+ } catch ( Exception e ) {
+ addErrorObject( experiment, experiment.getShortName() + " (id=" + experiment.getId() + ") update failed.", e );
+ }
+ }
+
/**
* Reads pubmedID and experiment short name from the file and stores it in a HashMap. E.g.
* 22438826 GSE27715 22340501 GSE35802
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentVectorsManipulatingCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentVectorsManipulatingCli.java
new file mode 100644
index 0000000000..21eb2b4f55
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExpressionExperimentVectorsManipulatingCli.java
@@ -0,0 +1,150 @@
+package ubic.gemma.core.apps;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.util.Assert;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.bioAssayData.DataVector;
+import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector;
+import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector;
+import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService;
+import ubic.gemma.persistence.service.expression.experiment.SingleCellExpressionExperimentService;
+
+import javax.annotation.Nullable;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.stream.Collectors;
+
+/**
+ * Base class for CLI tools that manipulate expression experiment vectors.
+ * @author poirigui
+ */
+public abstract class ExpressionExperimentVectorsManipulatingCli extends ExpressionExperimentManipulatingCLI {
+
+ @Autowired
+ private QuantitationTypeService quantitationTypeService;
+
+ @Autowired
+ private SingleCellExpressionExperimentService singleCellExpressionExperimentService;
+
+ /**
+ * The data vector type this CLI is working with.
+ */
+ private final Class dataVectorType;
+
+ private boolean quantitationTypeIdentifierRequired = false;
+ private boolean usePreferredQuantitationType = false;
+
+ @Nullable
+ private String qtIdentifier;
+
+ protected ExpressionExperimentVectorsManipulatingCli( Class dataVectorType ) {
+ this.dataVectorType = dataVectorType;
+ }
+
+ /**
+ * Makes it so that the quantitation type identifier is required.
+ *
+ * This is incompatible with {@link #setUsePreferredQuantitationType()}.
+ */
+ public void setQuantitationTypeIdentifierRequired() {
+ Assert.state( !this.quantitationTypeIdentifierRequired, "Quantitation type identifier is already required" );
+ Assert.state( !this.usePreferredQuantitationType, "Preferred quantitation type is enabled, cannot require an identifier." );
+ this.quantitationTypeIdentifierRequired = true;
+ }
+
+ /**
+ * Use the preferred QT if no identifier is provided, otherwise process all the QTs.
+ *
+ * This is incompatible with {@link #setQuantitationTypeIdentifierRequired()}.
+ */
+ public void setUsePreferredQuantitationType() {
+ Assert.state( !this.usePreferredQuantitationType, "Use preferred quantitation type is already set" );
+ Assert.state( !this.quantitationTypeIdentifierRequired, "Quantitation type identifier is required, cannot default the the preferred one." );
+ this.usePreferredQuantitationType = true;
+ }
+
+ @Override
+ protected final void buildExperimentOptions( Options options ) {
+ options.addOption( Option.builder( "qt" )
+ .longOpt( "quantitation-type" )
+ .hasArg()
+ .required( quantitationTypeIdentifierRequired )
+ .desc( "Identifier of the quantitation type to use"
+ + ( quantitationTypeIdentifierRequired ? ""
+ : " (defaults to " + ( usePreferredQuantitationType ? "the preferred one" : "all of them" ) + ")" ) )
+ .build() );
+ buildExperimentVectorsOptions( options );
+ }
+
+ protected void buildExperimentVectorsOptions( Options options ) {
+
+ }
+
+ @Override
+ protected final void processExperimentOptions( CommandLine commandLine ) throws ParseException {
+ qtIdentifier = commandLine.getOptionValue( "qt" );
+ processExperimentVectorsOptions( commandLine );
+ }
+
+ protected void processExperimentVectorsOptions( CommandLine commandLine ) throws ParseException {
+
+ }
+
+ @Override
+ protected void processExpressionExperiment( ExpressionExperiment expressionExperiment ) {
+ Collection qts;
+ if ( qtIdentifier != null ) {
+ qts = Collections.singleton( entityLocator.locateQuantitationType( expressionExperiment, qtIdentifier, quantitationTypeService.getMappedDataVectorType( dataVectorType ) ) );
+ } else if ( usePreferredQuantitationType ) {
+ qts = quantitationTypeService.getMappedDataVectorType( dataVectorType ).stream()
+ .map( vt -> locatePreferredQuantitationType( expressionExperiment, vt ) )
+ .collect( Collectors.toSet() );
+ } else {
+ qts = quantitationTypeService.getMappedDataVectorType( dataVectorType ).stream()
+ .flatMap( vt -> quantitationTypeService.findByExpressionExperiment( expressionExperiment, vt ).stream() )
+ .collect( Collectors.toSet() );
+ }
+ for ( QuantitationType qt : qts ) {
+ try {
+ processExpressionExperimentVectors( expressionExperiment, qt );
+ } catch ( Exception e ) {
+ if ( isAbortOnError() ) {
+ throw e;
+ } else {
+ addErrorObject( expressionExperiment, "Error while processing " + qt, e );
+ }
+ }
+ }
+ }
+
+ /**
+ * Process a set of vectors identified by a {@link QuantitationType}.
+ */
+ protected abstract void processExpressionExperimentVectors( ExpressionExperiment ee, QuantitationType qt );
+
+ private QuantitationType locatePreferredQuantitationType( ExpressionExperiment expressionExperiment, Class extends DataVector> dataVectorType ) {
+ if ( RawExpressionDataVector.class.isAssignableFrom( dataVectorType ) ) {
+ return eeService.getPreferredQuantitationType( expressionExperiment );
+ } else if ( ProcessedExpressionDataVector.class.isAssignableFrom( dataVectorType ) ) {
+ Collection results = quantitationTypeService.findByExpressionExperiment( expressionExperiment, dataVectorType );
+ if ( results.isEmpty() ) {
+ return null;
+ } else if ( results.size() > 1 ) {
+ throw new IllegalStateException( expressionExperiment + " has more than one set of processed vectors." );
+ } else {
+ return results.iterator().next();
+ }
+ } else if ( SingleCellExpressionDataVector.class.isAssignableFrom( dataVectorType ) ) {
+ return singleCellExpressionExperimentService.getPreferredSingleCellQuantitationType( expressionExperiment )
+ .orElseThrow( () -> new IllegalStateException( expressionExperiment + " does not have a preferred set of single-cell vectors." ) );
+ } else {
+ throw new IllegalArgumentException( "Unsupported data vector type: " + dataVectorType );
+ }
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalDatabaseAdderCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalDatabaseAdderCli.java
index 6a20734039..33df8e04b4 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalDatabaseAdderCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalDatabaseAdderCli.java
@@ -70,7 +70,7 @@ protected void processOptions( CommandLine commandLine ) {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
ExternalDatabase created = externalDatabaseService.create( ExternalDatabase.Factory.newInstance( name, type ) );
log.info( "Created " + created );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalDatabaseOverviewCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalDatabaseOverviewCli.java
index 1db4dfce14..540dd5c09c 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalDatabaseOverviewCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalDatabaseOverviewCli.java
@@ -47,7 +47,7 @@ protected void processOptions( CommandLine commandLine ) {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
externalDatabaseService.loadAllWithAuditTrail().stream()
.sorted( Comparator.comparing( ExternalDatabase::getLastUpdated, Comparator.nullsLast( Comparator.reverseOrder() ) ) )
.forEachOrdered( ed -> addSuccessObject( ed, summarize( ed ) ) );
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalDatabaseUpdaterCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalDatabaseUpdaterCli.java
index 660932a35b..c370dbb5d3 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalDatabaseUpdaterCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalDatabaseUpdaterCli.java
@@ -39,7 +39,7 @@ public class ExternalDatabaseUpdaterCli extends AbstractAuthenticatedCLI {
private Date lastUpdated;
public ExternalDatabaseUpdaterCli() {
- setRequireLogin( true );
+ setRequireLogin();
}
@Override
@@ -114,7 +114,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
ExternalDatabase ed = requireNonNull( externalDatabaseService.findByNameWithAuditTrail( name ),
String.format( "No database with name %s.", name ) );
if ( description != null ) {
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalFileGeneLoaderCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalFileGeneLoaderCLI.java
index 87e4b7d4ab..7852b4b334 100755
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalFileGeneLoaderCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ExternalFileGeneLoaderCLI.java
@@ -22,6 +22,7 @@
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
+import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.core.loader.genome.gene.ExternalFileGeneLoaderService;
import ubic.gemma.core.util.AbstractAuthenticatedCLI;
import ubic.gemma.core.util.CLI;
@@ -36,11 +37,19 @@
*/
public class ExternalFileGeneLoaderCLI extends AbstractAuthenticatedCLI {
+ @Autowired
+ private ExternalFileGeneLoaderService loader;
+
private String directGeneInputFileName = null;
private String taxonName;
public ExternalFileGeneLoaderCLI() {
- setRequireLogin( true );
+ setRequireLogin();
+ }
+
+ @Override
+ public String getCommandName() {
+ return "loadGenesFromFile";
}
@Override
@@ -48,6 +57,11 @@ public String getShortDesc() {
return "loading genes from a non-NCBI files; only used for species like salmon";
}
+ @Override
+ public CommandGroup getCommandGroup() {
+ return CLI.CommandGroup.SYSTEM;
+ }
+
/**
* This method is called at the end of processCommandLine
*/
@@ -68,12 +82,6 @@ protected void processOptions( CommandLine commandLine ) {
}
- @Override
- public String getCommandName() {
- return "loadGenesFromFile";
- }
-
- @SuppressWarnings("static-access")
@Override
protected void buildOptions( Options options ) {
Option directGene = Option.builder( "f" )
@@ -86,19 +94,11 @@ protected void buildOptions( Options options ) {
options.addOption( taxonNameOption );
}
- @Override
- protected void doWork() throws Exception {
- this.processGeneList();
- }
-
/**
* Main entry point to service class which reads a gene file and persists the genes in that file.
*/
- @SuppressWarnings({ "unused", "WeakerAccess" }) // Possible external use
- public void processGeneList() {
-
- ExternalFileGeneLoaderService loader = this.getBean( ExternalFileGeneLoaderService.class );
-
+ @Override
+ protected void doAuthenticatedWork() throws Exception {
try {
int count = loader.load( directGeneInputFileName, taxonName );
System.out.println( count + " genes loaded successfully " );
@@ -114,12 +114,5 @@ public void processGeneList() {
System.out.println( "Gene file persisting error: " + e.getMessage() );
throw new RuntimeException( e );
}
-
}
-
- @Override
- public CommandGroup getCommandGroup() {
- return CLI.CommandGroup.SYSTEM;
- }
-
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/FactorValueMigratorCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/FactorValueMigratorCLI.java
index f1184fccee..5554ea9943 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/FactorValueMigratorCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/FactorValueMigratorCLI.java
@@ -160,7 +160,7 @@ private Long parseLongIfNonBlank( String s ) {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
if ( noop ) {
log.info( "Noop mode enabled, no statements will be saved." );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java
index 6d823e5e4a..27e28cfe91 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java
@@ -60,7 +60,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
if ( autoLoadOntologies ) {
throw new IllegalArgumentException( "Auto-loading of ontologies is enabled, disable it by setting load.ontologies=false in Gemma.properties." );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/FixOntologyTermLabelsCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/FixOntologyTermLabelsCli.java
index 2ee1b8ab0b..dfdb758979 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/FixOntologyTermLabelsCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/FixOntologyTermLabelsCli.java
@@ -66,7 +66,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
if ( autoLoadOntologies ) {
throw new IllegalArgumentException( "Auto-loading of ontologies is enabled, disable it by setting load.ontologies=false in Gemma.properties." );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/GeeqCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/GeeqCli.java
index 9994be9c31..4e05e8a452 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/GeeqCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/GeeqCli.java
@@ -50,21 +50,17 @@ public String getShortDesc() {
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
if ( commandLine.hasOption( 'm' ) ) {
this.mode = GeeqService.ScoreMode.valueOf( commandLine.getOptionValue( 'm' ) );
}
}
@Override
- protected void buildOptions( Options options ) {
-
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
super.addAutoOption( options, GeeqEvent.class );
super.addLimitingDateOption( options );
super.addForceOption( options );
-
Option modeOption = Option.builder( "m" ).longOpt( "mode" )
.desc( "If specified, switches the scoring mode. By default the mode is set to 'all'" //
+ "\n Possible values are:" //
@@ -78,8 +74,7 @@ protected void buildOptions( Options options ) {
@Override
protected void processExpressionExperiment( ExpressionExperiment ee ) {
- if ( !force && this.noNeedToRun( ee, GeeqEvent.class ) ) {
- log.info( "Can't or don't need to run " + ee );
+ if ( this.noNeedToRun( ee, GeeqEvent.class ) ) {
return;
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/GemmaCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/GemmaCLI.java
index adca9fd621..553e546873 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/GemmaCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/GemmaCLI.java
@@ -350,7 +350,7 @@ private static void printHelp( Options options, @Nullable SortedMap --help'." );
- HelpUtils.printHelp( writer, "", options, false, null, footer.toString() );
+ HelpUtils.printHelp( writer, "gemma-cli [options] [commandName] [commandOptions]", options, null, footer.toString() );
}
@Value
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/GenerateDatabaseUpdateCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/GenerateDatabaseUpdateCli.java
index 0ea5180c36..30f0af32cb 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/GenerateDatabaseUpdateCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/GenerateDatabaseUpdateCli.java
@@ -20,6 +20,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.Connection;
+import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
@@ -29,17 +30,24 @@
*/
public class GenerateDatabaseUpdateCli extends AbstractCLI {
+ private static final String
+ CREATE_OPTION = "c",
+ VENDOR_OPTION = "vendor",
+ OUTPUT_FILE_OPTION = "o";
+
@Autowired
private DataSource dataSource;
@Autowired
private LocalSessionFactoryBean factory;
- @Nullable
- private Path outputFile;
+ private boolean create;
private Dialect dialect;
+ @Nullable
+ private Path outputFile;
+
@Nullable
@Override
public String getCommandName() {
@@ -49,7 +57,7 @@ public String getCommandName() {
@Nullable
@Override
public String getShortDesc() {
- return "Generate a script to update the database";
+ return "Generate SQL statements to update the database";
}
@Override
@@ -59,14 +67,16 @@ public CommandGroup getCommandGroup() {
@Override
protected void buildOptions( Options options ) {
- options.addOption( "d", "dialect", true, "Dialect to use to generate SQL statements (either mysql or h2, defaults to mysql)" );
- options.addOption( Option.builder( "o" ).longOpt( "output-file" ).hasArg().type( Path.class ).desc( "File destination for the update script (defaults to stdout)" ).build() );
+ options.addOption( CREATE_OPTION, "create", false, "Generate a creation script" );
+ options.addOption( VENDOR_OPTION, "vendor", true, "Vendor to use to generate SQL statements (either mysql or h2, defaults to mysql)" );
+ options.addOption( Option.builder( OUTPUT_FILE_OPTION ).longOpt( "output-file" ).hasArg().type( Path.class ).desc( "File destination for the update script (defaults to stdout)" ).build() );
}
@Override
protected void processOptions( CommandLine commandLine ) throws ParseException {
- if ( commandLine.hasOption( "d" ) ) {
- String dialectStr = commandLine.getOptionValue( "d" );
+ create = commandLine.hasOption( CREATE_OPTION );
+ if ( commandLine.hasOption( VENDOR_OPTION ) ) {
+ String dialectStr = commandLine.getOptionValue( VENDOR_OPTION );
if ( "mysql".equalsIgnoreCase( dialectStr ) ) {
dialect = new MySQL57InnoDBDialect();
} else if ( "h2".equalsIgnoreCase( dialectStr ) ) {
@@ -78,22 +88,27 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
log.info( "No dialect specified, defaulting to MySQL 5.7." );
dialect = new MySQL57InnoDBDialect();
}
- outputFile = commandLine.getParsedOptionValue( "o" );
+ outputFile = commandLine.getParsedOptionValue( OUTPUT_FILE_OPTION );
}
@Override
protected void doWork() throws Exception {
- DatabaseMetadata dm;
- try ( Connection c = dataSource.getConnection() ) {
- dm = new DatabaseMetadata( c, dialect, factory.getConfiguration() );
+ List sqlStatements;
+ if ( create ) {
+ sqlStatements = Arrays.asList( factory.getConfiguration().generateSchemaCreationScript( dialect ) );
+ } else {
+ DatabaseMetadata dm;
+ try ( Connection c = dataSource.getConnection() ) {
+ dm = new DatabaseMetadata( c, dialect, factory.getConfiguration() );
+ }
+ sqlStatements = factory.getConfiguration()
+ .generateSchemaUpdateScriptList( dialect, dm )
+ .stream()
+ .map( SchemaUpdateScript::getScript )
+ .collect( Collectors.toList() );
}
- List sqlUpdates = factory.getConfiguration()
- .generateSchemaUpdateScriptList( dialect, dm )
- .stream()
- .map( SchemaUpdateScript::getScript )
- .collect( Collectors.toList() );
try ( PrintWriter writer = getWriter() ) {
- for ( String sqlUpdate : sqlUpdates ) {
+ for ( String sqlUpdate : sqlStatements ) {
writer.println( sqlUpdate + ";" );
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/GenericGenelistDesignGenerator.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/GenericGenelistDesignGenerator.java
index 0f93a49fef..8f8c261fff 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/GenericGenelistDesignGenerator.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/GenericGenelistDesignGenerator.java
@@ -109,7 +109,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
ArrayDesign platform = arrayDesignService.findByShortName( this.platformShortName );
platform = arrayDesignService.thaw( platform );
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/GeoGrabberCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/GeoGrabberCli.java
index 85e06bbe3b..5a6b02fcb8 100755
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/GeoGrabberCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/GeoGrabberCli.java
@@ -230,7 +230,7 @@ protected void processOptions( CommandLine commandLine ) throws org.apache.commo
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
switch ( mode ) {
case GET_PLATFORMS:
getPlatforms();
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java
index 69234205a5..31dd0007a6 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java
@@ -25,7 +25,6 @@
import org.apache.commons.lang3.time.StopWatch;
import org.springframework.beans.factory.annotation.Autowired;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
-import ubic.basecode.io.ByteArrayConverter;
import ubic.basecode.util.FileTools;
import ubic.gemma.core.analysis.expression.coexpression.links.LinkAnalysisConfig;
import ubic.gemma.core.analysis.expression.coexpression.links.LinkAnalysisConfig.NormalizationMethod;
@@ -49,7 +48,7 @@
import ubic.gemma.persistence.service.association.coexpression.CoexpressionService;
import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.persistence.service.genome.taxon.TaxonService;
-import ubic.gemma.persistence.util.EntityUtils;
+import ubic.gemma.persistence.util.IdentifiableUtils;
import java.io.File;
import java.io.IOException;
@@ -100,9 +99,7 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
-
+ protected void buildExperimentOptions( Options options ) {
super.addLimitingDateOption( options );
Option nodeDegreeUpdate = Option.builder( "n" ).desc( "Update the node degree for taxon given by -t option. All other options ignored." )
@@ -210,12 +207,10 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
-
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
if ( commandLine.hasOption( "delete" ) ) {
this.deleteAnalyses = true;
- this.force = true;
+ setForce();
return;
} else if ( commandLine.hasOption( "init" ) ) {
initializeFromOldData = true;
@@ -329,18 +324,15 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
}
@Override
- protected void processBioAssaySets( Collection expressionExperiments ) {
+ protected void doAuthenticatedWork() throws Exception {
if ( initializeFromOldData ) {
log.info( "Initializing links from old data for " + taxon );
linkAnalysisPersister.initializeLinksFromOldData( taxon );
- return;
} else if ( updateNodeDegree ) {
// we waste some time here getting the experiments.
this.loadTaxon();
coexpressionService.updateNodeDegrees( taxon );
- }
-
- if ( this.dataFileName != null ) {
+ } else if ( this.dataFileName != null ) {
/*
* Read vectors from file. Could provide as a matrix, but it's easier to provide vectors (less mess in later
* code)
@@ -363,7 +355,6 @@ protected void processBioAssaySets( Collection expressionExperiment
QuantitationType qtype = this.makeQuantitationType();
- ByteArrayConverter bArrayConverter = new ByteArrayConverter();
try ( InputStream data = Files.newInputStream( this.dataFileName ) ) {
DoubleMatrix matrix = simpleExpressionDataLoaderService.parse( data );
@@ -371,10 +362,9 @@ protected void processBioAssaySets( Collection expressionExperiment
BioAssayDimension bad = this.makeBioAssayDimension( arrayDesign, matrix );
for ( int i = 0; i < matrix.rows(); i++ ) {
- byte[] bData = bArrayConverter.doubleArrayToBytes( matrix.getRow( i ) );
ProcessedExpressionDataVector vector = ProcessedExpressionDataVector.Factory.newInstance();
- vector.setData( bData );
+ vector.setDataAsDoubles( matrix.getRow( i ) );
CompositeSequence cs = csMap.get( matrix.getRowName( i ) );
if ( cs == null ) {
@@ -396,40 +386,74 @@ protected void processBioAssaySets( Collection expressionExperiment
this.linkAnalysisService.processVectors( taxon, dataVectors, filterConfig, linkAnalysisConfig );
} else {
+ super.doAuthenticatedWork();
+ }
+ }
- /*
- * Do in decreasing order of size, to help capture more links earlier - reduces fragmentation.
- */
- List sees = new ArrayList<>( expressionExperiments );
+ @Override
+ protected Collection preprocessBioAssaySets( Collection expressionExperiments ) {
+ /*
+ * Do in decreasing order of size, to help capture more links earlier - reduces fragmentation.
+ */
+ List sees = new ArrayList<>( expressionExperiments );
- if ( expressionExperiments.size() > 1 ) {
- log.info( "Sorting data sets by number of samples, doing large data sets first." );
+ if ( expressionExperiments.size() > 1 ) {
+ log.info( "Sorting data sets by number of samples, doing large data sets first." );
- Collection vos = eeService
- .loadValueObjectsByIds( EntityUtils.getIds( expressionExperiments ), true );
- final Map idMap = EntityUtils.getIdMap( vos );
+ Collection vos = eeService
+ .loadValueObjectsByIds( IdentifiableUtils.getIds( expressionExperiments ), true );
+ final Map idMap = IdentifiableUtils.getIdMap( vos );
- sees.sort( ( o1, o2 ) -> {
+ sees.sort( ( o1, o2 ) -> {
- ExpressionExperimentValueObject e1 = idMap.get( o1.getId() );
- ExpressionExperimentValueObject e2 = idMap.get( o2.getId() );
+ ExpressionExperimentValueObject e1 = idMap.get( o1.getId() );
+ ExpressionExperimentValueObject e2 = idMap.get( o2.getId() );
- assert e1 != null : "No valueobject: " + e2;
- assert e2 != null : "No valueobject: " + e1;
+ assert e1 != null : "No valueobject: " + e2;
+ assert e2 != null : "No valueobject: " + e1;
- return -e1.getBioMaterialCount().compareTo( e2.getBioMaterialCount() );
+ return -e1.getBioMaterialCount().compareTo( e2.getBioMaterialCount() );
- } );
- }
+ } );
+ }
- for ( BioAssaySet ee : sees ) {
- if ( ee instanceof ExpressionExperiment ) {
- this.processExperiment( ( ExpressionExperiment ) ee );
- } else {
- throw new UnsupportedOperationException( "Can't handle non-EE BioAssaySets yet" );
- }
+ return sees;
+ }
+
+ @Override
+ protected void processExpressionExperiment( ExpressionExperiment ee ) {
+ ee = eeService.thaw( ee );
+
+ if ( this.deleteAnalyses ) {
+ log.info( "======= Deleting coexpression analysis (if any) for: " + ee );
+ if ( !linkAnalysisPersister.deleteAnalyses( ee ) ) {
+ throw new RuntimeException( "Seems to not have any eligible link analysis to remove" );
}
+ return;
+ }
+
+ /*
+ * If we're not using the database, always run it.
+ */
+ if ( linkAnalysisConfig.isUseDb() && this.noNeedToRun( ee, LinkAnalysisEvent.class ) ) {
+ return;
+ }
+
+ /*
+ * Note that auditing is handled by the service.
+ */
+ StopWatch sw = new StopWatch();
+ sw.start();
+
+ if ( linkAnalysisConfig.isTextOut() ) {
+ linkAnalysisConfig.setOutputFile( new File( FileTools.cleanForFileName( ee.getShortName() ) + "-links.txt" ) );
}
+
+ log.info( "==== Starting: [" + ee.getShortName() + "] ======" );
+
+ linkAnalysisService.process( ee, filterConfig, linkAnalysisConfig );
+ log.info( "==== Done: [" + ee.getShortName() + "] ======" );
+ log.info( "Time elapsed: " + String.format( "%.2f", sw.getTime() / 1000.0 / 60.0 ) + " minutes" );
}
private void buildFilterConfigOptions( Options options ) {
@@ -520,42 +544,4 @@ private QuantitationType makeQuantitationType() {
qtype.setIsRatio( false ); // this shouldn't get used, just filled in to keep everybody happy.
return qtype;
}
-
- private void processExperiment( ExpressionExperiment ee ) {
- ee = eeService.thaw( ee );
-
- if ( this.deleteAnalyses ) {
- log.info( "======= Deleting coexpression analysis (if any) for: " + ee );
- if ( !linkAnalysisPersister.deleteAnalyses( ee ) ) {
- throw new RuntimeException( "Seems to not have any eligible link analysis to remove" );
- }
- return;
- }
-
- /*
- * If we're not using the database, always run it.
- */
- if ( linkAnalysisConfig.isUseDb() && !force && this.noNeedToRun( ee, LinkAnalysisEvent.class ) ) {
- log.info( "Can't or Don't need to run " + ee );
- return;
- }
-
- /*
- * Note that auditing is handled by the service.
- */
- StopWatch sw = new StopWatch();
- sw.start();
-
- if ( linkAnalysisConfig.isTextOut() ) {
- linkAnalysisConfig.setOutputFile( new File( FileTools.cleanForFileName( ee.getShortName() ) + "-links.txt" ) );
- }
-
- log.info( "==== Starting: [" + ee.getShortName() + "] ======" );
-
- linkAnalysisService.process( ee, filterConfig, linkAnalysisConfig );
- log.info( "==== Done: [" + ee.getShortName() + "] ======" );
- log.info( "Time elapsed: " + String.format( "%.2f", sw.getTime() / 1000.0 / 60.0 ) + " minutes" );
-
- }
-
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ListQuantitationTypesCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ListQuantitationTypesCli.java
new file mode 100644
index 0000000000..a0413ec3de
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ListQuantitationTypesCli.java
@@ -0,0 +1,42 @@
+package ubic.gemma.core.apps;
+
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.bioAssayData.DataVector;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+
+import javax.annotation.Nullable;
+
+/**
+ * @author poirigui
+ */
+public class ListQuantitationTypesCli extends ExpressionExperimentVectorsManipulatingCli {
+
+ public ListQuantitationTypesCli() {
+ super( DataVector.class );
+ setUseReferencesIfPossible();
+ setDefaultToAll();
+ }
+
+ @Override
+ protected void processExpressionExperiment( ExpressionExperiment expressionExperiment ) {
+ System.out.println( formatExperiment( expressionExperiment ) );
+ super.processExpressionExperiment( expressionExperiment );
+ }
+
+ @Override
+ protected void processExpressionExperimentVectors( ExpressionExperiment ee, QuantitationType qt ) {
+ System.out.println( "\t" + qt );
+ }
+
+ @Nullable
+ @Override
+ public String getCommandName() {
+ return "listQuantitationTypes";
+ }
+
+ @Nullable
+ @Override
+ public String getShortDesc() {
+ return "List the available quantitation types for an experiment.";
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/LoadExpressionDataCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/LoadExpressionDataCli.java
index 62eab97bac..a778b0ad4d 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/LoadExpressionDataCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/LoadExpressionDataCli.java
@@ -26,7 +26,6 @@
import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.core.analysis.preprocess.PreprocessingException;
import ubic.gemma.core.analysis.preprocess.PreprocessorService;
-import ubic.gemma.core.loader.expression.geo.GeoDomainObjectGenerator;
import ubic.gemma.core.loader.expression.geo.service.GeoService;
import ubic.gemma.core.util.AbstractAuthenticatedCLI;
import ubic.gemma.model.common.description.DatabaseEntry;
@@ -37,9 +36,8 @@
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.InputStream;
-import java.io.InputStreamReader;
+import java.nio.file.Files;
+import java.nio.file.Paths;
import java.util.Collection;
/**
@@ -132,7 +130,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
if ( accessions == null && accessionFile == null ) {
throw new IllegalArgumentException(
"You must specific either a file or accessions on the command line" );
@@ -176,8 +174,7 @@ protected void doWork() throws Exception {
if ( accessionFile != null ) {
log.info( "Loading accessions from " + accessionFile );
- InputStream is = new FileInputStream( accessionFile );
- try ( BufferedReader br = new BufferedReader( new InputStreamReader( is ) ) ) {
+ try ( BufferedReader br = Files.newBufferedReader( Paths.get( accessionFile ) ) ) {
String accession;
while ( ( accession = br.readLine() ) != null ) {
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/LoadSimpleExpressionDataCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/LoadSimpleExpressionDataCli.java
index 939ad29bad..32bd42e416 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/LoadSimpleExpressionDataCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/LoadSimpleExpressionDataCli.java
@@ -116,7 +116,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
if ( this.fileName != null ) {
log.info( "Loading experiments from " + this.fileName );
InputStream is = new FileInputStream( new File( this.dirName, this.fileName ) );
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/MeshTermFetcherCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/MeshTermFetcherCli.java
index 119eebb041..3ae269161b 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/MeshTermFetcherCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/MeshTermFetcherCli.java
@@ -67,7 +67,6 @@ public CommandGroup getCommandGroup() {
return CLI.CommandGroup.MISC;
}
- @SuppressWarnings("static-access")
@Override
protected void buildOptions( Options options ) {
Option fileOption = Option.builder( "f" )
@@ -80,6 +79,16 @@ protected void buildOptions( Options options ) {
options.addOption( "m", "Use major subjects only" );
}
+ @Override
+ protected void processOptions( CommandLine commandLine ) {
+ if ( commandLine.hasOption( 'f' ) ) {
+ this.file = commandLine.getOptionValue( 'f' );
+ }
+ if ( commandLine.hasOption( 'm' ) ) {
+ this.majorTopicsOnly = true;
+ }
+ }
+
@Override
protected void doWork() throws Exception {
PubMedXMLFetcher fetcher = new PubMedXMLFetcher( ncbiApiKey );
@@ -102,16 +111,6 @@ protected void doWork() throws Exception {
}
}
- @Override
- protected void processOptions( CommandLine commandLine ) {
- if ( commandLine.hasOption( 'f' ) ) {
- this.file = commandLine.getOptionValue( 'f' );
- }
- if ( commandLine.hasOption( 'm' ) ) {
- this.majorTopicsOnly = true;
- }
- }
-
private Collection readIdsFromFile( String inFile ) throws IOException {
log.info( "Reading " + inFile );
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/MultifunctionalityCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/MultifunctionalityCli.java
index b6347da61d..c43828df59 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/MultifunctionalityCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/MultifunctionalityCli.java
@@ -17,6 +17,7 @@
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
+import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.core.analysis.service.GeneMultifunctionalityPopulationService;
import ubic.gemma.core.util.AbstractAuthenticatedCLI;
import ubic.gemma.core.util.CLI;
@@ -28,10 +29,15 @@
*/
public class MultifunctionalityCli extends AbstractAuthenticatedCLI {
+ @Autowired
+ private GeneMultifunctionalityPopulationService gfs;
+ @Autowired
+ private TaxonService taxonService;
+
private Taxon taxon;
public MultifunctionalityCli() {
- setRequireLogin( true );
+ setRequireLogin();
}
@Override
@@ -39,38 +45,29 @@ public String getCommandName() {
return "updateMultifunc";
}
- @SuppressWarnings("static-access")
+
@Override
- protected void buildOptions( Options options ) {
- Option taxonOption = Option.builder( "t" ).hasArg()
- .desc( "Taxon to process" ).longOpt( "taxon" )
- .build();
- options.addOption( taxonOption );
+ public String getShortDesc() {
+ return "Update or create gene multifunctionality metrics";
}
@Override
- protected void doWork() throws Exception {
- GeneMultifunctionalityPopulationService gfs = this.getBean( GeneMultifunctionalityPopulationService.class );
-
- if ( this.taxon != null ) {
- gfs.updateMultifunctionality( taxon );
- } else {
- gfs.updateMultifunctionality();
- }
+ public CommandGroup getCommandGroup() {
+ return CLI.CommandGroup.SYSTEM;
}
@Override
- public String getShortDesc() {
- return "Update or create gene multifunctionality metrics";
+ protected void buildOptions( Options options ) {
+ Option taxonOption = Option.builder( "t" ).hasArg()
+ .desc( "Taxon to process" ).longOpt( "taxon" )
+ .build();
+ options.addOption( taxonOption );
}
@Override
-
protected void processOptions( CommandLine commandLine ) {
-
if ( commandLine.hasOption( 't' ) ) {
String taxonName = commandLine.getOptionValue( 't' );
- TaxonService taxonService = this.getBean( TaxonService.class );
this.taxon = taxonService.findByCommonName( taxonName );
if ( taxon == null ) {
log.error( "ERROR: Cannot find taxon " + taxonName );
@@ -79,8 +76,11 @@ protected void processOptions( CommandLine commandLine ) {
}
@Override
- public CommandGroup getCommandGroup() {
- return CLI.CommandGroup.SYSTEM;
+ protected void doAuthenticatedWork() {
+ if ( this.taxon != null ) {
+ gfs.updateMultifunctionality( taxon );
+ } else {
+ gfs.updateMultifunctionality();
+ }
}
-
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/NCBIGene2GOAssociationLoaderCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/NCBIGene2GOAssociationLoaderCLI.java
index 1508cdca69..d0305b9f06 100755
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/NCBIGene2GOAssociationLoaderCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/NCBIGene2GOAssociationLoaderCLI.java
@@ -28,7 +28,6 @@
import ubic.gemma.core.util.AbstractAuthenticatedCLI;
import ubic.gemma.model.common.description.ExternalDatabase;
import ubic.gemma.model.common.description.ExternalDatabases;
-import ubic.gemma.model.common.description.LocalFile;
import ubic.gemma.model.genome.Taxon;
import ubic.gemma.persistence.persister.Persister;
import ubic.gemma.persistence.service.association.Gene2GOAssociationService;
@@ -75,7 +74,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
NCBIGene2GOAssociationLoader gene2GOAssLoader = new NCBIGene2GOAssociationLoader();
gene2GOAssLoader.setPersisterHelper( persisterHelper );
@@ -85,21 +84,19 @@ protected void doWork() throws Exception {
HttpFetcher fetcher = new HttpFetcher();
- Collection files;
+ Collection files;
if ( filePath != null ) {
File f = new File( filePath );
if ( !f.canRead() ) {
throw new IOException( "Cannot read from " + filePath );
}
files = new HashSet<>();
- LocalFile lf = LocalFile.Factory.newInstance();
- lf.setLocalURL( f.toURI() );
- files.add( lf );
+ files.add( f );
} else {
files = fetcher.fetch( "ftp://ftp.ncbi.nih.gov/gene/DATA/" + NCBIGene2GOAssociationLoaderCLI.GENE2GO_FILE );
}
assert files.size() == 1;
- LocalFile gene2Gofile = files.iterator().next();
+ File gene2Gofile = files.iterator().next();
log.info( "Removing all old GO associations" );
gene2GOAssociationService.removeAll();
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/NcbiGeneLoaderCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/NcbiGeneLoaderCLI.java
index 6aef05a2d6..c027c92e18 100755
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/NcbiGeneLoaderCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/NcbiGeneLoaderCLI.java
@@ -60,7 +60,7 @@ public class NcbiGeneLoaderCLI extends AbstractAuthenticatedCLI {
private Integer startNcbiId = null;
public NcbiGeneLoaderCLI() {
- setRequireLogin( true );
+ setRequireLogin();
}
@Override
@@ -98,7 +98,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
loader = new NcbiGeneLoader();
loader.setTaxonService( taxonService );
loader.setPersisterHelper( persisterHelper );
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ProcessedDataComputeCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ProcessedDataComputeCLI.java
index c2198cf87a..2f8890d836 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ProcessedDataComputeCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ProcessedDataComputeCLI.java
@@ -23,8 +23,8 @@
import org.apache.commons.cli.ParseException;
import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.core.analysis.preprocess.PreprocessorService;
-import ubic.gemma.core.analysis.preprocess.QuantitationMismatchPreprocessingException;
-import ubic.gemma.core.datastructure.matrix.SuspiciousValuesForQuantitationException;
+import ubic.gemma.core.analysis.preprocess.QuantitationTypeDetectionRelatedPreprocessingException;
+import ubic.gemma.core.analysis.preprocess.detect.SuspiciousValuesForQuantitationException;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService;
@@ -59,8 +59,7 @@ public class ProcessedDataComputeCLI extends ExpressionExperimentManipulatingCLI
private boolean ignoreQuantitationMismatch = false;
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
addForceOption( options );
addLimitingDateOption( options );
options.addOption( UPDATE_DIAGNOSTICS_OPTION, false,
@@ -71,8 +70,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
this.updateDiagnostics = commandLine.hasOption( UPDATE_DIAGNOSTICS_OPTION );
this.updateRanks = commandLine.hasOption( UPDATE_RANKS_OPTION );
this.ignoreQuantitationMismatch = commandLine.hasOption( IGNORE_QUANTITATION_MISMATCH_OPTION );
@@ -90,8 +88,8 @@ public String getShortDesc() {
@Override
protected void processExpressionExperiment( ExpressionExperiment ee ) {
- if ( expressionExperimentService.isTroubled( ee ) && !force ) {
- log.info( "Skipping troubled experiment " + ee.getShortName() );
+ if ( expressionExperimentService.isTroubled( ee ) && !isForce() ) {
+ addErrorObject( ee, "Skipping troubled experiment " + ee.getShortName() + ", use -" + FORCE_OPTION + " to process." );
return;
}
try {
@@ -122,17 +120,17 @@ protected void processExpressionExperiment( ExpressionExperiment ee ) {
// Note the auditing is done by the service.
addSuccessObject( ee );
- } catch ( QuantitationMismatchPreprocessingException e ) {
- // TODO: e.getCause().getQuantitationType();
- QuantitationType qt = e.getCause().getQuantitationType();
+ } catch ( QuantitationTypeDetectionRelatedPreprocessingException e ) {
if ( e.getCause() instanceof SuspiciousValuesForQuantitationException ) {
+ SuspiciousValuesForQuantitationException actual = ( SuspiciousValuesForQuantitationException ) e.getCause();
+ QuantitationType qt = actual.getQuantitationType();
addErrorObject( String.format( "%s:\n%s", ee, qt ), String.format( "The following issues were found in expression data:\n\n - %s\n\nYou may ignore this by setting the -%s option.",
- ( ( SuspiciousValuesForQuantitationException ) e.getCause() )
+ actual
.getSuspiciousValues().stream()
.map( SuspiciousValuesForQuantitationException.SuspiciousValueResult::toString )
.collect( Collectors.joining( "\n - " ) ), IGNORE_QUANTITATION_MISMATCH_OPTION ) );
} else {
- addErrorObject( String.format( "%s:\n%s", ee, qt ), e.getCause().getMessage() );
+ addErrorObject( ee, e );
}
} catch ( Exception e ) {
addErrorObject( ee, e );
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ProcessedDataDeleterCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ProcessedDataDeleterCli.java
new file mode 100644
index 0000000000..dc001ea469
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ProcessedDataDeleterCli.java
@@ -0,0 +1,36 @@
+package ubic.gemma.core.apps;
+
+import org.springframework.beans.factory.annotation.Autowired;
+import ubic.gemma.core.loader.expression.DataDeleterService;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+
+import javax.annotation.Nullable;
+
+public class ProcessedDataDeleterCli extends ExpressionExperimentVectorsManipulatingCli {
+
+ @Autowired
+ private DataDeleterService dataDeleterService;
+
+ public ProcessedDataDeleterCli() {
+ super( ProcessedExpressionDataVector.class );
+ }
+
+ @Nullable
+ @Override
+ public String getCommandName() {
+ return "deleteProcessedData";
+ }
+
+ @Nullable
+ @Override
+ public String getShortDesc() {
+ return "Delete processed expression data";
+ }
+
+ @Override
+ protected void processExpressionExperimentVectors( ExpressionExperiment ee, QuantitationType qt ) {
+ dataDeleterService.deleteProcessedData( ee );
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/PubMedLoaderCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/PubMedLoaderCli.java
index d8fa7d9d70..0d1f86852e 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/PubMedLoaderCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/PubMedLoaderCli.java
@@ -21,6 +21,7 @@
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
+import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.core.loader.entrez.pubmed.PubMedService;
import ubic.gemma.core.util.AbstractAuthenticatedCLI;
@@ -33,43 +34,42 @@
*/
public class PubMedLoaderCli extends AbstractAuthenticatedCLI {
+ @Autowired
+ private PubMedService pms;
+
private String directory;
@Override
- public CommandGroup getCommandGroup() {
- return CommandGroup.MISC;
+ public String getCommandName() {
+ return "pubmedLoad";
}
@Override
- public String getCommandName() {
- return "pubmedLoad";
+ public String getShortDesc() {
+ return "Loads PubMed records into the database from XML files";
+ }
+
+ @Override
+ public CommandGroup getCommandGroup() {
+ return CommandGroup.MISC;
}
- @SuppressWarnings("static-access")
@Override
protected void buildOptions( Options options ) {
Option fileOption = Option.builder( "d" ).required().hasArg().argName( "Directory" )
.desc( "Directory of PubMed XML files to load" ).longOpt( "dir" ).build();
options.addOption( fileOption );
-
}
@Override
- protected void doWork() throws Exception {
- PubMedService pms = this.getBean( PubMedService.class );
+ protected void doAuthenticatedWork() {
pms.loadFromDirectory( new File( directory ) );
}
- @Override
- public String getShortDesc() {
- return "Loads PubMed records into the database from XML files";
- }
-
@Override
protected void processOptions( CommandLine commandLine ) {
if ( commandLine.hasOption( 'd' ) ) {
this.directory = commandLine.getOptionValue( 'd' );
}
}
-
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/RNASeqBatchInfoCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/RNASeqBatchInfoCli.java
index b224d3df79..e4c8e4f195 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/RNASeqBatchInfoCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/RNASeqBatchInfoCli.java
@@ -48,19 +48,18 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
super.addForceOption( options );
}
@Override
- protected void processBioAssaySets( Collection expressionExperiments ) {
+ protected Collection preprocessBioAssaySets( Collection expressionExperiments ) {
log.info( "Checking folders for existing experiments in " + fastqRootDir );
- super.processBioAssaySets( expressionExperiments );
+ return super.preprocessBioAssaySets( expressionExperiments );
}
@Override
protected void processExpressionExperiment( ExpressionExperiment ee ) {
- batchService.fillBatchInformation( ee, this.force );
+ batchService.fillBatchInformation( ee, isForce() );
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/RNASeqDataAddCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/RNASeqDataAddCli.java
index 3d6440901d..c92f190a9a 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/RNASeqDataAddCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/RNASeqDataAddCli.java
@@ -18,7 +18,6 @@
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
-import org.apache.commons.io.FileUtils;
import org.springframework.beans.factory.annotation.Autowired;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.basecode.io.reader.DoubleMatrixReader;
@@ -29,13 +28,12 @@
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
-import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentMetaFileType;
-import java.io.File;
import java.io.IOException;
-import java.nio.file.StandardCopyOption;
-import java.text.MessageFormat;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.Collection;
/**
@@ -50,6 +48,12 @@ public class RNASeqDataAddCli extends ExpressionExperimentManipulatingCLI {
private static final String METADATAOPT = "rlen";
private static final String RPKM_FILE_OPT = "rpkm";
private static final String MULTIQC_METADATA_FILE_OPT = "multiqc";
+
+ @Autowired
+ private DataUpdater serv;
+ @Autowired
+ private ExpressionDataFileService expressionDataFileService;
+
private boolean allowMissingSamples = false;
private String countFile = null;
private Boolean isPairedReads = null;
@@ -57,14 +61,7 @@ public class RNASeqDataAddCli extends ExpressionExperimentManipulatingCLI {
private Integer readLength = null;
private String rpkmFile = null;
private boolean justbackfillLog2cpm = false;
- private File qualityControlReportFile = null;
-
- @Autowired
- private DataUpdater serv;
- @Autowired
- private ExpressionDataFileService expressionDataFileService;
- @Autowired
- private ArrayDesignService arrayDesignService;
+ private Path qualityControlReportFile = null;
@Override
public String getCommandName() {
@@ -77,8 +74,7 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
options.addOption( Option.builder( RNASeqDataAddCli.RPKM_FILE_OPT ).longOpt( null ).desc( "File with RPKM data" ).argName( "file path" ).hasArg().build() );
options.addOption( Option.builder( RNASeqDataAddCli.COUNT_FILE_OPT ).longOpt( null ).desc( "File with count data" ).argName( "file path" ).hasArg().build() );
@@ -95,9 +91,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
-
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
if ( commandLine.hasOption( "log2cpm" ) ) {
this.justbackfillLog2cpm = true;
@@ -151,55 +145,47 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
this.platformName = commandLine.getOptionValue( "a" );
if ( commandLine.hasOption( RNASeqDataAddCli.MULTIQC_METADATA_FILE_OPT ) ) {
- qualityControlReportFile = new File( commandLine.getOptionValue( RNASeqDataAddCli.MULTIQC_METADATA_FILE_OPT ) );
- if ( !qualityControlReportFile.isFile() || !qualityControlReportFile.canRead() ) {
+ qualityControlReportFile = Paths.get( commandLine.getOptionValue( RNASeqDataAddCli.MULTIQC_METADATA_FILE_OPT ) );
+ if ( !Files.exists( qualityControlReportFile ) || !Files.isReadable( qualityControlReportFile ) ) {
throw new IllegalArgumentException( "The MultiQC report file must exist and be readable." );
}
}
}
@Override
- protected void processBioAssaySets( Collection expressionExperiments ) {
- if ( expressionExperiments.isEmpty() ) {
- throw new RuntimeException( "No experiment to be processed. Check in the logs above for troubled experiments." );
+ protected void processBioAssaySets( Collection bas ) {
+ if ( !justbackfillLog2cpm ) {
+ throw new IllegalArgumentException( "Sorry, can only process one experiment with this tool, unless -log2cpm is used." );
}
+ super.processBioAssaySets( bas );
+ }
+ @Override
+ protected void processExpressionExperiment( ExpressionExperiment ee ) {
if ( this.justbackfillLog2cpm ) {
- for ( BioAssaySet bas : expressionExperiments ) {
- try {
- ExpressionExperiment ee = ( ExpressionExperiment ) bas;
- QuantitationType qt = this.eeService.getPreferredQuantitationType( ee );
- if ( qt == null )
- throw new IllegalArgumentException( "No preferred quantitation type for " + ee.getShortName() );
- if ( !qt.getType().equals( StandardQuantitationType.COUNT ) ) {
- log.warn( "Preferred data is not counts for " + ee );
- addErrorObject( ee.getShortName(), "Preferred data is not counts" );
- continue;
- }
- serv.log2cpmFromCounts( ee, qt );
- addSuccessObject( ee );
- } catch ( Exception e ) {
- addErrorObject( bas, e );
+ try {
+ QuantitationType qt = this.eeService.getPreferredQuantitationType( ee );
+ if ( qt == null )
+ throw new IllegalArgumentException( "No preferred quantitation type for " + ee.getShortName() );
+ if ( !qt.getType().equals( StandardQuantitationType.COUNT ) ) {
+ log.warn( "Preferred data is not counts for " + ee );
+ addErrorObject( ee.getShortName(), "Preferred data is not counts" );
}
+ serv.log2cpmFromCounts( ee, qt );
+ addSuccessObject( ee );
+ } catch ( Exception e ) {
+ addErrorObject( ee, e );
}
+ return;
}
/*
* Usual cases.
*/
- if ( expressionExperiments.size() > 1 ) {
- throw new IllegalArgumentException( "Sorry, can only process one experiment with this tool." );
- }
- ArrayDesign targetArrayDesign = this.locateArrayDesign( this.platformName );
+ ArrayDesign targetArrayDesign = entityLocator.locateArrayDesign( this.platformName );
- ExpressionExperiment ee = ( ExpressionExperiment ) expressionExperiments.iterator().next();
-
- if ( expressionExperiments.size() > 1 ) {
- log
- .warn( "This CLI can only deal with one experiment at a time; only the first one will be processed" );
- }
- DoubleMatrixReader reader = new DoubleMatrixReader();
try {
+ DoubleMatrixReader reader = new DoubleMatrixReader();
DoubleMatrix countMatrix = null;
DoubleMatrix rpkmMatrix = null;
if ( this.countFile != null ) {
@@ -212,22 +198,18 @@ protected void processBioAssaySets( Collection expressionExperiment
serv.addCountData( ee, targetArrayDesign, countMatrix, rpkmMatrix, readLength, isPairedReads,
allowMissingSamples );
-
} catch ( IOException e ) {
- throw new RuntimeException( "Failed while processing " + ee, e );
+ addErrorObject( ee, "Failed to add count and RPKM data.", e );
+ return;
}
/* copy metadata files */
if ( qualityControlReportFile != null ) {
- File destinationFile = expressionDataFileService.getMetadataFile( ee, ExpressionExperimentMetaFileType.MUTLQC_REPORT );
- if ( destinationFile.exists() ) {
- log.warn( MessageFormat.format( "Replacing existing RNA-Seq quality control report located at {0}.", destinationFile ) );
- }
try {
- FileUtils.forceMkdirParent( destinationFile );
- FileUtils.copyFile( qualityControlReportFile, destinationFile, StandardCopyOption.REPLACE_EXISTING );
+ Path dest = expressionDataFileService.copyMetadataFile( ee, qualityControlReportFile, ExpressionExperimentMetaFileType.MUTLQC_REPORT, true );
+ log.info( "Copied QC report file to " + dest + "." );
} catch ( IOException e ) {
- addErrorObject( ee, String.format( "Could not copy the MultiQC report from %s to %s", qualityControlReportFile, destinationFile ), e );
+ addErrorObject( ee, "Could not copy the MultiQC report.", e );
}
}
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/RawDataDeleterCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/RawDataDeleterCli.java
new file mode 100644
index 0000000000..97b663a5d8
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/RawDataDeleterCli.java
@@ -0,0 +1,39 @@
+package ubic.gemma.core.apps;
+
+import org.springframework.beans.factory.annotation.Autowired;
+import ubic.gemma.core.loader.expression.DataDeleterService;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+
+import javax.annotation.Nullable;
+
+/**
+ * @author poirigui
+ */
+public class RawDataDeleterCli extends ExpressionExperimentVectorsManipulatingCli {
+
+ @Autowired
+ private DataDeleterService dataDeleterService;
+
+ public RawDataDeleterCli() {
+ super( RawExpressionDataVector.class );
+ }
+
+ @Nullable
+ @Override
+ public String getCommandName() {
+ return "deleteRawData";
+ }
+
+ @Nullable
+ @Override
+ public String getShortDesc() {
+ return "Delete raw expression data";
+ }
+
+ @Override
+ protected void processExpressionExperimentVectors( ExpressionExperiment ee, QuantitationType qt ) {
+ dataDeleterService.deleteRawData( ee, qt );
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/RawExpressionDataWriterCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/RawExpressionDataWriterCli.java
new file mode 100644
index 0000000000..6b0941d572
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/RawExpressionDataWriterCli.java
@@ -0,0 +1,87 @@
+package ubic.gemma.core.apps;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.springframework.beans.factory.annotation.Autowired;
+import ubic.gemma.core.analysis.service.ExpressionDataFileService;
+import ubic.gemma.core.analysis.service.ExpressionDataFileUtils;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector;
+import ubic.gemma.model.expression.experiment.BioAssaySet;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Collection;
+import java.util.zip.GZIPOutputStream;
+
+public class RawExpressionDataWriterCli extends ExpressionExperimentVectorsManipulatingCli {
+
+ @Autowired
+ private ExpressionDataFileService expressionDataFileService;
+
+ @Nullable
+ private Path outputFile;
+
+ public RawExpressionDataWriterCli() {
+ super( RawExpressionDataVector.class );
+ setUsePreferredQuantitationType();
+ }
+
+ @Nullable
+ @Override
+ public String getCommandName() {
+ return "getRawDataMatrix";
+ }
+
+ @Nullable
+ @Override
+ public String getShortDesc() {
+ return "Write raw data matrix to a; gene information is included if available.";
+ }
+
+ @Override
+ protected void buildExperimentVectorsOptions( Options options ) {
+ options.addOption( Option.builder( "o" ).longOpt( "output-file" ).hasArg().type( Path.class ).build() );
+ addForceOption( options );
+ }
+
+ @Override
+ protected void processExperimentVectorsOptions( CommandLine commandLine ) throws ParseException {
+ this.outputFile = commandLine.getParsedOptionValue( "o" );
+ }
+
+ @Override
+ protected void processBioAssaySets( Collection expressionExperiments ) {
+ if ( outputFile != null ) {
+ throw new IllegalArgumentException( "The -o/--output-file option cannot be used with multiple experiments." );
+ }
+ }
+
+ @Override
+ protected void processExpressionExperimentVectors( ExpressionExperiment ee, QuantitationType qt ) {
+ Path f;
+ if ( outputFile != null ) {
+ f = outputFile;
+ } else {
+ f = Paths.get( ExpressionDataFileUtils.getDataOutputFilename( ee, qt, ExpressionDataFileUtils.TABULAR_BULK_DATA_FILE_SUFFIX ) );
+ }
+ if ( !isForce() && Files.exists( f ) ) {
+ throw new RuntimeException( "Output file " + f + " already exists, use -force to overwrite." );
+ }
+ try ( Writer writer = new OutputStreamWriter( new GZIPOutputStream( Files.newOutputStream( f ) ), StandardCharsets.UTF_8 ) ) {
+ int written = expressionDataFileService.writeRawExpressionData( ee, qt, writer );
+ addSuccessObject( ee, "Wrote " + written + " vectors for " + qt + " to " + f + "." );
+ } catch ( IOException e ) {
+ throw new RuntimeException( e );
+ }
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/RefreshExperimentCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/RefreshExperimentCli.java
index 2441a2ba54..c3bc2cc601 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/RefreshExperimentCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/RefreshExperimentCli.java
@@ -27,16 +27,14 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
options.addOption( "v", "refreshVectors", false, "Refresh cache of raw and processed data vectors" );
options.addOption( "r", "refreshReports", false, "Refresh cache of experiment reports (i.e. batch information, diff ex. analyses, etc.)" );
addThreadsOption( options );
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
refreshVectors = commandLine.hasOption( 'v' );
refreshReports = commandLine.hasOption( 'r' );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ReplaceDataCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ReplaceDataCli.java
index eaf77a2628..d3933e3d03 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ReplaceDataCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ReplaceDataCli.java
@@ -63,15 +63,13 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
options.addOption( Option.builder( "file" ).longOpt( null ).desc( "Path to file with tab-delimited data, first column = probe ids, first row = sample IDs (e.g. GEO GSM#)" ).argName( "file path" ).hasArg().required().build() );
super.addForceOption( options );
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
this.file = commandLine.getOptionValue( "file" );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/SVDCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/SVDCli.java
index 294c350195..a2169b1e3b 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/SVDCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/SVDCli.java
@@ -45,19 +45,18 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
super.addForceOption( options );
}
@Override
- protected void processExpressionExperiment( ExpressionExperiment bas ) {
- if ( !force && this.noNeedToRun( bas, PCAAnalysisEvent.class ) ) {
- throw new IllegalArgumentException( "Already has PCA; use -force to override" );
+ protected void processExpressionExperiment( ExpressionExperiment ee ) {
+ if ( this.noNeedToRun( ee, PCAAnalysisEvent.class ) ) {
+ return;
}
- log.info( "Processing: " + bas );
+ log.info( "Processing: " + ee );
try {
- svdService.svd( bas.getId() );
+ svdService.svd( ee );
} catch ( SVDException e ) {
throw new RuntimeException( e );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataAggregateDeleterCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataAggregateDeleterCli.java
new file mode 100644
index 0000000000..56af62f52a
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataAggregateDeleterCli.java
@@ -0,0 +1,41 @@
+package ubic.gemma.core.apps;
+
+import org.springframework.beans.factory.annotation.Autowired;
+import ubic.gemma.core.loader.expression.DataDeleterService;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+
+import javax.annotation.Nullable;
+
+/**
+ * @author poirigui
+ */
+public class SingleCellDataAggregateDeleterCli extends ExpressionExperimentVectorsManipulatingCli {
+
+ @Autowired
+ private DataDeleterService dataDeleterService;
+
+ public SingleCellDataAggregateDeleterCli() {
+ super( RawExpressionDataVector.class );
+ setSingleExperimentMode();
+ setQuantitationTypeIdentifierRequired();
+ }
+
+ @Nullable
+ @Override
+ public String getCommandName() {
+ return "deleteSingleCellDataAggregate";
+ }
+
+ @Nullable
+ @Override
+ public String getShortDesc() {
+ return "Delete a single-cell data aggregate";
+ }
+
+ @Override
+ protected void processExpressionExperimentVectors( ExpressionExperiment ee, QuantitationType qt ) {
+ dataDeleterService.deleteSingleCellDataAggregate( ee, qt );
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataAggregatorCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataAggregatorCli.java
new file mode 100644
index 0000000000..2bdc733a93
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataAggregatorCli.java
@@ -0,0 +1,154 @@
+package ubic.gemma.core.apps;
+
+import lombok.extern.apachecommons.CommonsLog;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+import ubic.gemma.core.analysis.preprocess.PreprocessorService;
+import ubic.gemma.core.analysis.service.ExpressionDataFileService;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.bioAssay.BioAssay;
+import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment;
+import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import ubic.gemma.model.expression.experiment.ExpressionExperimentSubSet;
+import ubic.gemma.persistence.service.expression.experiment.SingleCellExpressionExperimentAggregatorService;
+import ubic.gemma.persistence.service.expression.experiment.SingleCellExpressionExperimentService;
+import ubic.gemma.persistence.service.expression.experiment.SingleCellExpressionExperimentSplitService;
+import ubic.gemma.persistence.service.expression.experiment.UnsupportedScaleTypeForAggregationException;
+import ubic.gemma.persistence.util.EntityUrlBuilder;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.stream.Collectors;
+
+@CommonsLog
+public class SingleCellDataAggregatorCli extends ExpressionExperimentVectorsManipulatingCli {
+
+ @Autowired
+ private SingleCellExpressionExperimentService singleCellExpressionExperimentService;
+
+ @Autowired
+ private ExpressionDataFileService expressionDataFileService;
+
+ @Autowired
+ private AggregatorHelperService helperService;
+
+ @Autowired
+ private PreprocessorService preprocessorService;
+
+ @Nullable
+ private String ctaName;
+ private boolean makePreferred;
+
+ public SingleCellDataAggregatorCli() {
+ super( SingleCellExpressionDataVector.class );
+ setUsePreferredQuantitationType();
+ }
+
+ @Nullable
+ @Override
+ public String getCommandName() {
+ return "aggregateSingleCellData";
+ }
+
+ @Nullable
+ @Override
+ public String getShortDesc() {
+ return "Aggregate single cell data into pseudo-bulks";
+ }
+
+ @Override
+ protected void buildExperimentVectorsOptions( Options options ) {
+ options.addOption( "cta", "cell-type-assignment", true, "Name of the cell type assignment to use (defaults to the preferred one)" );
+ options.addOption( "p", "make-preferred", false, "Make the resulting aggregated data the preferred raw data for the experiment" );
+ }
+
+ @Override
+ protected void processExperimentVectorsOptions( CommandLine commandLine ) {
+ ctaName = commandLine.getOptionValue( "cta" );
+ makePreferred = commandLine.hasOption( "p" );
+ }
+
+ @Override
+ protected void processExpressionExperimentVectors( ExpressionExperiment expressionExperiment, QuantitationType qt ) {
+ log.info( "Splitting single cell data into pseudo-bulks for: " + expressionExperiment + " and " + qt );
+
+ CellTypeAssignment cta;
+ if ( ctaName != null ) {
+ cta = entityLocator.locateCellTypeAssignment( expressionExperiment, qt, ctaName );
+ } else {
+ cta = singleCellExpressionExperimentService.getPreferredCellTypeAssignment( expressionExperiment, qt )
+ .orElseThrow( () -> new IllegalStateException( expressionExperiment + " does not have a preferred cell-type assignment for " + qt + "." ) );
+ }
+
+ QuantitationType newQt;
+ try {
+ newQt = helperService.splitAndAggregate( expressionExperiment, qt, cta, makePreferred );
+ addSuccessObject( expressionExperiment, "Aggregated single-cell data into " + newQt + "." );
+ } catch ( UnsupportedScaleTypeForAggregationException e ) {
+ addErrorObject( expressionExperiment, String.format( "Aggregation is not support for data of scale type %s, change it first in the GUI %s.",
+ qt.getScale(), entityUrlBuilder.fromHostUrl().entity( expressionExperiment ).web().edit().toUriString() ), e );
+ return;
+ }
+
+ // create/recreate processed vectors
+ if ( newQt.getIsPreferred() ) {
+ log.info( "Creating a data file for " + newQt + "..." );
+ try ( ExpressionDataFileService.LockedPath lockedFile = expressionDataFileService.writeOrLocateRawExpressionDataFile( expressionExperiment, newQt, true ) ) {
+ addSuccessObject( expressionExperiment, "Created a data file for " + newQt + ": " + lockedFile.getPath() );
+ } catch ( IOException e ) {
+ addErrorObject( expressionExperiment, "Failed to generate a data file for " + newQt + ".", e );
+ }
+
+ log.info( "Reprocessing experiment since a new set of raw data vectors was added or replaced..." );
+ try {
+ preprocessorService.process( expressionExperiment );
+ addSuccessObject( expressionExperiment, "Post-processed data from " + newQt + "." );
+ } catch ( Exception e ) {
+ addErrorObject( expressionExperiment, "Failed to post-process the data from " + newQt + ".", e );
+ }
+ }
+ }
+
+ /**
+ * Simple service to split and aggregate in a transaction.
+ */
+ @Service
+ public static class AggregatorHelperService {
+
+ @Autowired
+ private SingleCellExpressionExperimentSplitService singleCellExpressionExperimentSplitService;
+
+ @Autowired
+ private SingleCellExpressionExperimentAggregatorService singleCellExpressionExperimentAggregatorService;
+
+ @Autowired
+ private EntityUrlBuilder entityUrlBuilder;
+
+ @Transactional
+ public QuantitationType splitAndAggregate( ExpressionExperiment expressionExperiment, QuantitationType qt, CellTypeAssignment cta, boolean makePreferred ) {
+ List subsets = singleCellExpressionExperimentSplitService.splitByCellType( expressionExperiment, cta );
+ int longestSubsetName = subsets.stream().map( ExpressionExperimentSubSet::getName ).mapToInt( String::length ).max().orElse( 0 );
+ log.info( String.format( "Created %d subsets of %s for each cell type:\n\t%s", subsets.size(), expressionExperiment,
+ subsets.stream().map( subset -> StringUtils.rightPad( subset.getName(), longestSubsetName ) + "\t" + entityUrlBuilder.fromHostUrl().entity( subset ).web().toUri() ).collect( Collectors.joining( "\n\t" ) ) ) );
+
+ List cellBAs = new ArrayList<>();
+ for ( ExpressionExperimentSubSet subset : subsets ) {
+ subset.getBioAssays().stream()
+ .sorted( Comparator.comparing( BioAssay::getName ) )
+ .forEach( cellBAs::add );
+ }
+
+ QuantitationType newQt = singleCellExpressionExperimentAggregatorService.aggregateVectors( expressionExperiment, qt, cellBAs, makePreferred );
+ log.info( "Aggregated single-cell data for " + qt + " into pseudo-bulks with quantitation type " + newQt + "." );
+ return newQt;
+ }
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataDeleterCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataDeleterCli.java
new file mode 100644
index 0000000000..26f165da96
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataDeleterCli.java
@@ -0,0 +1,39 @@
+package ubic.gemma.core.apps;
+
+import org.springframework.beans.factory.annotation.Autowired;
+import ubic.gemma.core.loader.expression.DataDeleterService;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+
+import javax.annotation.Nullable;
+
+/**
+ * @author poirigui
+ */
+public class SingleCellDataDeleterCli extends ExpressionExperimentVectorsManipulatingCli {
+
+ @Autowired
+ private DataDeleterService dataDeleterService;
+
+ public SingleCellDataDeleterCli() {
+ super( SingleCellExpressionDataVector.class );
+ }
+
+ @Nullable
+ @Override
+ public String getCommandName() {
+ return "deleteSingleCellData";
+ }
+
+ @Nullable
+ @Override
+ public String getShortDesc() {
+ return "Delete single cell data and any related data files";
+ }
+
+ @Override
+ protected void processExpressionExperimentVectors( ExpressionExperiment ee, QuantitationType qt ) {
+ dataDeleterService.deleteSingleCellData( ee, qt );
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataDownloaderCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataDownloaderCli.java
new file mode 100644
index 0000000000..5b5faf96e6
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataDownloaderCli.java
@@ -0,0 +1,509 @@
+package ubic.gemma.core.apps;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVPrinter;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.io.file.PathUtils;
+import org.apache.commons.lang3.ArrayUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.exception.ExceptionUtils;
+import org.apache.commons.lang3.time.StopWatch;
+import org.apache.commons.net.ftp.FTPClient;
+import org.apache.commons.net.ftp.FTPFile;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Component;
+import ubic.gemma.core.loader.expression.geo.GeoFamilyParser;
+import ubic.gemma.core.loader.expression.geo.model.GeoSample;
+import ubic.gemma.core.loader.expression.geo.model.GeoSeries;
+import ubic.gemma.core.loader.expression.geo.singleCell.GeoSingleCellDetector;
+import ubic.gemma.core.loader.expression.singleCell.SingleCellDataLoader;
+import ubic.gemma.core.loader.expression.singleCell.SingleCellDataType;
+import ubic.gemma.core.loader.util.ftp.FTPClientFactory;
+import ubic.gemma.core.loader.util.ftp.FTPClientFactoryImpl;
+import ubic.gemma.core.util.AbstractCLI;
+import ubic.gemma.core.util.ProgressInputStream;
+import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
+import ubic.gemma.model.expression.bioAssay.BioAssay;
+import ubic.gemma.model.expression.bioAssayData.SingleCellDimension;
+import ubic.gemma.model.expression.biomaterial.BioMaterial;
+
+import javax.annotation.Nullable;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.*;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import java.util.zip.GZIPInputStream;
+
+import static java.util.Objects.requireNonNull;
+
+@Component
+public class SingleCellDataDownloaderCli extends AbstractCLI {
+
+ private static final String
+ ACCESSIONS_FILE_OPTION = "f",
+ ACCESSIONS_OPTION = "e",
+ SUMMARY_OUTPUT_FILE_OPTION = "s",
+ RESUME_OPTION = "r",
+ RETRY_OPTION = "retry",
+ FETCH_THREADS_OPTION = "fetchThreads";
+
+ private static final String
+ SAMPLE_ACCESSIONS_OPTION = "sampleAccessions",
+ DATA_TYPE_OPTION = "dataType",
+ SUPPLEMENTARY_FILE_OPTION = "supplementaryFile";
+
+ /**
+ * Only applicable if dataType is set to MEX.
+ */
+ private static final String
+ MEX_BARCODES_FILE_SUFFIX = "mexBarcodesFile",
+ MEX_FEATURES_FILE_SUFFIX = "mexFeaturesFile",
+ MEX_MATRIX_FILE_SUFFIX = "mexMatrixFile";
+
+ private static final String[] SUMMARY_HEADER = new String[] { "geo_accession", "data_type", "number_of_samples", "number_of_cells", "number_of_genes", "additional_supplementary_files", "comment" };
+
+ private static final String
+ UNKNOWN_INDICATOR = "UNKNOWN",
+ UNSUPPORTED_INDICATOR = "UNSUPPORTED",
+ FAILED_INDICATOR = "FAILED";
+
+ @Autowired
+ private FTPClientFactory ftpClientFactory;
+
+ @Value("${geo.local.datafile.basepath}")
+ private File geoSeriesDownloadPath;
+
+ @Value("${geo.local.singleCellData.basepath}")
+ private File singleCellDataBasePath;
+
+ private final Set accessions = new HashSet<>();
+ @Nullable
+ private Path summaryOutputFile;
+ private boolean resume;
+ private String[] retry;
+ @Nullable
+ private Number fetchThreads;
+
+ // single-accession options
+ @Nullable
+ private Set sampleAccessions;
+ @Nullable
+ private SingleCellDataType dataType;
+ @Nullable
+ private String supplementaryFile;
+
+ // MEX options
+ @Nullable
+ private String barcodesFileSuffix;
+ @Nullable
+ private String featuresFileSuffix;
+ @Nullable
+ private String matrixFileSuffix;
+
+ @Nullable
+ @Override
+ public String getCommandName() {
+ return "downloadSingleCellData";
+ }
+
+ @Nullable
+ @Override
+ public String getShortDesc() {
+ return "Download single cell data from GEO.\nFor the moment, only GEO series accessions are supported.";
+ }
+
+ @Override
+ public CommandGroup getCommandGroup() {
+ return CommandGroup.MISC;
+ }
+
+ @Override
+ protected void buildOptions( Options options ) {
+ // options are consistent with those of LoadExpressionDataCli
+ options.addOption( Option.builder( ACCESSIONS_FILE_OPTION ).longOpt( "file" ).type( File.class ).hasArg().desc( "File containing accessions to download." ).build() );
+ options.addOption( Option.builder( ACCESSIONS_OPTION ).longOpt( "acc" ).hasArg().desc( "Comma-delimited list of accessions to download." ).build() );
+ options.addOption( Option.builder( SUMMARY_OUTPUT_FILE_OPTION ).longOpt( "summary-output-file" ).type( File.class ).hasArg().desc( "File to write the summary output to. This is used to keep track of progress and resume download with -r/--resume." ).build() );
+ options.addOption( Option.builder( RESUME_OPTION ).longOpt( "resume" ).desc( "Resume download from a previous invocation of this command. Requires -s/--summary-output-file to be set and refer to an existing file." ).build() );
+ options.addOption( Option.builder( RETRY_OPTION ).longOpt( "retry" ).hasArg().desc( "Retry problematic datasets. Possible values are: '" + UNSUPPORTED_INDICATOR + "', '" + UNKNOWN_INDICATOR + "' or '" + FAILED_INDICATOR + "', or any combination delimited by ','. Requires -r/--resume option to be set." ).build() );
+ options.addOption( Option.builder( FETCH_THREADS_OPTION ).longOpt( "fetch-threads" ).hasArg().type( Number.class ).desc( "Number of threads to use for downloading files. Default is " + GeoSingleCellDetector.DEFAULT_NUMBER_OF_FETCH_THREADS + ". Use -threads/--threads for processing series in parallel." ).build() );
+ options.addOption( Option.builder( SAMPLE_ACCESSIONS_OPTION ).longOpt( "sample-accessions" ).hasArg().desc( "Comma-delimited list of sample accessions to download." ).build() );
+ options.addOption( Option.builder( DATA_TYPE_OPTION ).longOpt( "data-type" ).hasArg().desc( "Data type. Possible values are: " + Arrays.stream( SingleCellDataType.values() ).map( Enum::name ).collect( Collectors.joining( ", " ) ) + ". Only works if a single accession is passed to -e/--acc." ).build() );
+ options.addOption( Option.builder( SUPPLEMENTARY_FILE_OPTION ).longOpt( "supplementary-file" ).hasArgs().desc( "Supplementary file to download. Only works if a single accession is passed to -e/--acc and -dataType is specified." ).build() );
+ options.addOption( Option.builder( MEX_BARCODES_FILE_SUFFIX ).longOpt( "mex-barcodes-file" ).hasArg().desc( "Suffix to use to detect MEX barcodes file. Only works if -dataType/--data-type is set to MEX." ).build() );
+ options.addOption( Option.builder( MEX_FEATURES_FILE_SUFFIX ).longOpt( "mex-features-file" ).hasArg().desc( "Suffix to use to detect MEX features file. Only works if -dataType/--data-type is set to MEX." ).build() );
+ options.addOption( Option.builder( MEX_MATRIX_FILE_SUFFIX ).longOpt( "mex-matrix-file" ).hasArg().desc( "Suffix to use to detect MEX matrix file. Only works if -dataType/--data-type is set to MEX." ).build() );
+ addBatchOption( options );
+ addThreadsOption( options );
+ }
+
+ @Override
+ protected void processOptions( CommandLine commandLine ) throws ParseException {
+ boolean singleAccessionMode = commandLine.hasOption( ACCESSIONS_OPTION )
+ && !commandLine.getOptionValue( ACCESSIONS_OPTION ).contains( "," )
+ && !commandLine.hasOption( ACCESSIONS_FILE_OPTION );
+ if ( commandLine.hasOption( ACCESSIONS_OPTION ) ) {
+ Arrays.stream( StringUtils.split( commandLine.getOptionValue( ACCESSIONS_OPTION ), ',' ) )
+ .filter( geoAccession -> {
+ if ( !geoAccession.startsWith( "GSE" ) ) {
+ log.warn( "Unsupported accession " + geoAccession );
+ return false;
+ }
+ return true;
+ } ).forEach( accessions::add );
+ if ( singleAccessionMode && accessions.size() != 1 ) {
+ throw new IllegalStateException( "In single accession mode, exactly one supported accession must be supplied." );
+ }
+ }
+ if ( commandLine.hasOption( ACCESSIONS_FILE_OPTION ) ) {
+ if ( singleAccessionMode ) {
+ throw new IllegalStateException( "The -" + ACCESSIONS_FILE_OPTION + " option cannot be used in single accession mode." );
+ }
+ Path inputFile = ( ( File ) commandLine.getParsedOptionValue( ACCESSIONS_FILE_OPTION ) ).toPath();
+ try ( Stream lines = Files.lines( inputFile ) ) {
+ lines.skip( 1 )
+ .filter( StringUtils::isNotBlank )
+ .map( line -> line.split( "\t", 2 )[0] )
+ .filter( geoAccession -> {
+ if ( !geoAccession.startsWith( "GSE" ) ) {
+ log.warn( "Unsupported accession " + geoAccession );
+ return false;
+ }
+ return true;
+ } )
+ .forEach( accessions::add );
+ } catch ( IOException e ) {
+ throw new RuntimeException( e );
+ }
+ }
+ if ( commandLine.hasOption( SUMMARY_OUTPUT_FILE_OPTION ) ) {
+ if ( singleAccessionMode ) {
+ throw new IllegalStateException( "The -" + SUMMARY_OUTPUT_FILE_OPTION + " option cannot be used in single accession mode." );
+ }
+ summaryOutputFile = ( ( File ) commandLine.getParsedOptionValue( SUMMARY_OUTPUT_FILE_OPTION ) ).toPath();
+ } else {
+ summaryOutputFile = null;
+ }
+ resume = commandLine.hasOption( RESUME_OPTION );
+ if ( commandLine.hasOption( RETRY_OPTION ) ) {
+ retry = StringUtils.split( commandLine.getOptionValue( RETRY_OPTION ), "," );
+ for ( String r : retry ) {
+ if ( !r.equals( UNKNOWN_INDICATOR ) && !r.equals( UNSUPPORTED_INDICATOR ) && !r.equals( FAILED_INDICATOR ) ) {
+ throw new IllegalArgumentException( String.format( "Value for the %s option must be one of: %s, %s or %s.",
+ RETRY_OPTION, UNKNOWN_INDICATOR, UNSUPPORTED_INDICATOR, FAILED_INDICATOR ) );
+ }
+ }
+ } else {
+ retry = null;
+ }
+ if ( resume ) {
+ if ( singleAccessionMode ) {
+ throw new IllegalArgumentException( "The -" + RESUME_OPTION + " option cannot be used in single accession mode." );
+ }
+ if ( summaryOutputFile == null ) {
+ throw new IllegalArgumentException( "The -" + RESUME_OPTION + " option requires the -" + SUMMARY_OUTPUT_FILE_OPTION + " option to be provided." );
+ }
+ AtomicInteger accessionsToRetry = new AtomicInteger( 0 );
+ try ( Stream lines = Files.lines( summaryOutputFile ) ) {
+ Set accessionsToRemove = lines.skip( 1 )
+ .filter( line -> {
+ if ( retry != null ) {
+ String dataType = line.split( "\t", 3 )[1];
+ if ( ArrayUtils.contains( retry, dataType ) ) {
+ accessionsToRetry.incrementAndGet();
+ return false;
+ }
+ }
+ return true;
+ } )
+ .map( line -> line.split( "\t", 2 )[0] )
+ .collect( Collectors.toSet() );
+ if ( accessionsToRemove.isEmpty() ) {
+ throw new RuntimeException( String.format( "No accessions were found in %s, is the file empty?", summaryOutputFile ) );
+ }
+ if ( !accessions.containsAll( accessionsToRemove ) ) {
+ throw new RuntimeException( String.format( "Some of the accessions from %s were not found as input, are you sure this is the right summary file?.", summaryOutputFile ) );
+ }
+ accessions.removeAll( accessionsToRemove );
+ log.info( String.format( "Resuming download, %d accessions were already processed%s...",
+ accessionsToRemove.size(),
+ accessionsToRetry.get() > 0 ? " and " + accessionsToRetry.get() + " will be retried" : "" ) );
+ } catch ( IOException e ) {
+ throw new RuntimeException( e );
+ }
+ } else if ( retry != null ) {
+ throw new IllegalArgumentException( "The -" + RETRY_OPTION + " option requires the -" + RESUME_OPTION + " option to be provided." );
+ }
+ if ( commandLine.hasOption( FETCH_THREADS_OPTION ) ) {
+ fetchThreads = commandLine.getParsedOptionValue( FETCH_THREADS_OPTION );
+ }
+ if ( commandLine.hasOption( SAMPLE_ACCESSIONS_OPTION ) ) {
+ if ( !singleAccessionMode ) {
+ throw new IllegalArgumentException( "The -sampleAccessions/--sample-accessions option requires that only one accession be supplied via -e/--acc." );
+ }
+ sampleAccessions = new HashSet<>( Arrays.asList( StringUtils.split( commandLine.getOptionValue( SAMPLE_ACCESSIONS_OPTION ), ',' ) ) );
+ }
+ if ( commandLine.hasOption( DATA_TYPE_OPTION ) ) {
+ if ( !singleAccessionMode ) {
+ throw new IllegalArgumentException( "The -dataType/--data-type option requires that only one accession be supplied via -e/--acc." );
+ }
+ dataType = SingleCellDataType.valueOf( commandLine.getOptionValue( DATA_TYPE_OPTION ).toUpperCase() );
+ }
+ if ( commandLine.hasOption( MEX_BARCODES_FILE_SUFFIX ) || commandLine.hasOption( MEX_FEATURES_FILE_SUFFIX ) || commandLine.hasOption( MEX_MATRIX_FILE_SUFFIX ) ) {
+ if ( dataType != SingleCellDataType.MEX ) {
+ throw new IllegalArgumentException( "The -mexBarcodes, -mexFeatures and -mexMatrix options are only available if -dataType is set to MEX." );
+ }
+ barcodesFileSuffix = commandLine.getOptionValue( MEX_BARCODES_FILE_SUFFIX, "barcodes.tsv" );
+ featuresFileSuffix = commandLine.getOptionValue( MEX_FEATURES_FILE_SUFFIX, "features.tsv" );
+ matrixFileSuffix = commandLine.getOptionValue( MEX_MATRIX_FILE_SUFFIX, "matrix.mtx" );
+ }
+ if ( commandLine.hasOption( SUPPLEMENTARY_FILE_OPTION ) ) {
+ if ( !singleAccessionMode ) {
+ throw new IllegalArgumentException( "The -supplementaryFile option requires that only one accession be supplied via -e/--acc." );
+ }
+ if ( dataType == null ) {
+ throw new IllegalArgumentException( "The -supplementaryFile option requires the -dataType option to be provided." );
+ }
+ supplementaryFile = commandLine.getOptionValue( SUPPLEMENTARY_FILE_OPTION );
+ }
+ }
+
+ @Override
+ protected void doWork() throws Exception {
+ if ( retry != null ) {
+ log.info( String.format( "Removing accessions marked as %s from %s since they will be reattempted...", String.join( ", ", retry ), summaryOutputFile ) );
+ // rewrite the summary output file to remove the accessions that will be retried since those will be appended
+ assert summaryOutputFile != null;
+ List linesToKeep = new ArrayList<>();
+ linesToKeep.add( String.join( "\t", SUMMARY_HEADER ) );
+ try ( Stream lines = Files.lines( summaryOutputFile ) ) {
+ lines.skip( 1 )
+ .filter( line -> !accessions.contains( line.split( "\t", 2 )[0] ) )
+ .forEach( linesToKeep::add );
+ }
+ Files.write( summaryOutputFile, linesToKeep );
+ }
+
+ try ( GeoSingleCellDetector detector = new GeoSingleCellDetector();
+ CSVPrinter writer = getSummaryOutputFilePrinter() ) {
+ detector.setFTPClientFactory( ftpClientFactory );
+ detector.setDownloadDirectory( singleCellDataBasePath.toPath() );
+ if ( barcodesFileSuffix != null && featuresFileSuffix != null && matrixFileSuffix != null ) {
+ detector.setMexFileSuffixes( barcodesFileSuffix, featuresFileSuffix, matrixFileSuffix );
+ }
+ if ( fetchThreads != null ) {
+ // ensure that each thread can utilize a FTP connection
+ if ( ftpClientFactory instanceof FTPClientFactoryImpl ) {
+ ( ( FTPClientFactoryImpl ) ftpClientFactory ).setMaxTotalConnections( fetchThreads.intValue() );
+ }
+ detector.setNumberOfFetchThreads( fetchThreads.intValue() );
+ }
+ log.info( "Downloading single cell data to " + singleCellDataBasePath + "..." );
+ for ( String geoAccession : accessions ) {
+ getBatchTaskExecutor().submit( () -> {
+ String detectedDataType = UNKNOWN_INDICATOR;
+ Integer numberOfSamples = null, numberOfCells = null, numberOfGenes = null;
+ List additionalSupplementaryFiles = new ArrayList<>();
+ String comment = "";
+ try {
+ log.info( geoAccession + ": Parsing GEO series metadata..." );
+ GeoSeries series = readSeriesFromGeo( geoAccession );
+ if ( series == null ) {
+ addErrorObject( geoAccession, "The SOFT file does not contain an entry for the series." );
+ comment = "The SOFT file does not contain an entry for the series.";
+ return;
+ }
+ if ( sampleAccessions != null ) {
+ log.info( "Only retaining the following samples from " + geoAccession + ": " + String.join( ", ", sampleAccessions ) );
+ Set samplesToKeep = series.getSamples().stream()
+ .filter( s -> sampleAccessions.contains( s.getGeoAccession() ) )
+ .collect( Collectors.toSet() );
+ if ( samplesToKeep.size() != sampleAccessions.size() ) {
+ Set availableSamples = series.getSamples().stream().map( GeoSample::getGeoAccession )
+ .filter( Objects::nonNull ).collect( Collectors.toCollection( LinkedHashSet::new ) );
+ String missingSamples = sampleAccessions.stream()
+ .filter( sa -> !availableSamples.contains( sa ) )
+ .collect( Collectors.joining( ", " ) );
+ throw new IllegalArgumentException( String.format( "Not all desired samples were found in %s, the following were missing: %s. The following are available: %s.",
+ geoAccession, missingSamples, String.join( ", ", availableSamples ) ) );
+ }
+ series.keepSamples( samplesToKeep );
+ }
+ if ( detector.hasSingleCellData( series ) ) {
+ if ( dataType != null && supplementaryFile != null ) {
+ detectedDataType = dataType.name();
+ } else {
+ detectedDataType = detector.getSingleCellDataType( series ).name();
+ }
+ additionalSupplementaryFiles.addAll( detector.getAdditionalSupplementaryFiles( series ) );
+ for ( GeoSample sample : series.getSamples() ) {
+ additionalSupplementaryFiles.addAll( detector.getAdditionalSupplementaryFiles( series, sample ) );
+ }
+ if ( dataType != null && supplementaryFile != null ) {
+ detector.downloadSingleCellData( series, dataType,
+ matchSupplementaryFile( series.getSupplementaryFiles(), supplementaryFile ) );
+ } else if ( dataType != null ) {
+ detector.downloadSingleCellData( series, dataType );
+ } else {
+ detector.downloadSingleCellData( series );
+ }
+ // create a dummy platform, we just need to retrieve basic metadata from the loader
+ ArrayDesign platform = new ArrayDesign();
+ List bas = series.getSamples().stream()
+ .map( GeoSample::getGeoAccession )
+ .map( s -> BioAssay.Factory.newInstance( s, platform, BioMaterial.Factory.newInstance( s ) ) )
+ .collect( Collectors.toList() );
+ SingleCellDataLoader loader = detector.getSingleCellDataLoader( series );
+ numberOfSamples = loader.getSampleNames().size();
+ SingleCellDimension scd = loader.getSingleCellDimension( bas );
+ numberOfCells = scd.getNumberOfCells();
+ numberOfGenes = loader.getGenes().size();
+ addSuccessObject( geoAccession );
+ } else {
+ detectedDataType = UNSUPPORTED_INDICATOR;
+ // consider all supplementary materials as additional
+ additionalSupplementaryFiles.addAll( detector.getAdditionalSupplementaryFiles( series ) );
+ for ( GeoSample sample : series.getSamples() ) {
+ additionalSupplementaryFiles.addAll( detector.getAdditionalSupplementaryFiles( series, sample ) );
+ }
+ }
+ } catch ( Exception e ) {
+ addErrorObject( geoAccession, e );
+ comment = StringUtils.trim( ExceptionUtils.getRootCauseMessage( e ) );
+ if ( !detectedDataType.equals( UNKNOWN_INDICATOR ) ) {
+ comment += " (detected data type: " + detectedDataType + ")";
+ }
+ if ( e instanceof UnsupportedOperationException ) {
+ // this might be caused by downloadSingleCellData() or getSingleCellDataLoader()
+ detectedDataType = UNSUPPORTED_INDICATOR;
+ } else {
+ detectedDataType = FAILED_INDICATOR;
+ }
+ } finally {
+ if ( writer != null ) {
+ try {
+ writer.printRecord(
+ geoAccession, detectedDataType, numberOfSamples, numberOfCells, numberOfGenes,
+ additionalSupplementaryFiles.stream().map( this::formatFilename ).collect( Collectors.joining( ";" ) ),
+ comment );
+ writer.flush(); // for convenience, so that results appear immediately with tail -f
+ } catch ( IOException e ) {
+ log.error( "Failed to append to the summary output file.", e );
+ }
+ }
+ }
+ } );
+ }
+ awaitBatchExecutorService();
+ }
+ }
+
+ /**
+ * Pick a supplementary file from a user-supplied string.
+ */
+ private String matchSupplementaryFile( Collection supplementaryFiles, String supplementaryFile ) {
+ // 1. check for a complete match
+ for ( String f : supplementaryFiles ) {
+ if ( f.equals( supplementaryFile ) ) {
+ return f;
+ }
+ }
+
+ // 2. check for the last component
+ for ( String f : supplementaryFiles ) {
+ if ( FilenameUtils.getName( f ).equals( supplementaryFile ) ) {
+ return f;
+ }
+ }
+
+ throw new IllegalStateException( "No supplementary file matching " + supplementaryFile + " found in: " + StringUtils.join( ", ", supplementaryFiles ) + "." );
+ }
+
+ /**
+ * Format a filename for the summary output file.
+ *
+ * Exclamation marks are used to refer to files within archives (i.e. {@code GSM000012_bundle.tar!/cellids.csv}).
+ */
+ private String formatFilename( String fullPath ) {
+ int afterExclamationMark = fullPath.indexOf( "!" );
+ if ( afterExclamationMark > 0 ) {
+ return FilenameUtils.getName( fullPath.substring( 0, afterExclamationMark ) ) + fullPath.substring( afterExclamationMark );
+ } else {
+ return FilenameUtils.getName( fullPath );
+ }
+ }
+
+ @Nullable
+ private CSVPrinter getSummaryOutputFilePrinter() throws IOException {
+ if ( summaryOutputFile == null ) {
+ return null;
+ }
+ CSVFormat.Builder csvFormatBuilder = CSVFormat.TDF.builder();
+ if ( !resume ) {
+ csvFormatBuilder.setHeader( SUMMARY_HEADER );
+ }
+ return csvFormatBuilder.build().print( Files.newBufferedWriter( summaryOutputFile, resume ? StandardOpenOption.APPEND : StandardOpenOption.CREATE ) );
+ }
+
+ @Nullable
+ private GeoSeries readSeriesFromGeo( String accession ) throws IOException {
+ String remoteFile = String.format( "geo/series/%snnn/%s/soft/%s_family.soft.gz",
+ accession.substring( 0, accession.length() - 3 ), accession, accession );
+ URL softFileUrl = new URL( "ftp://ftp.ncbi.nlm.nih.gov/" + remoteFile );
+ Path dest = geoSeriesDownloadPath.toPath().resolve( accession ).resolve( accession + ".soft.gz" );
+ boolean download = true;
+ if ( Files.exists( dest ) ) {
+ FTPClient client = ftpClientFactory.getFtpClient( softFileUrl );
+ try {
+ FTPFile res = client.mlistFile( remoteFile );
+ long expectedLength = res != null ? res.getSize() : -1;
+ if ( expectedLength != -1 && dest.toFile().length() == expectedLength ) {
+ log.info( accession + ": Using existing SOFT file " + dest + "." );
+ download = false;
+ }
+ ftpClientFactory.recycleClient( softFileUrl, client );
+ } catch ( Exception e ) {
+ ftpClientFactory.destroyClient( softFileUrl, client );
+ throw e;
+ }
+ }
+ if ( download ) {
+ log.info( accession + ": Downloading SOFT file to " + dest + "..." );
+ PathUtils.createParentDirectories( dest );
+ StopWatch timer = StopWatch.createStarted();
+ try ( InputStream in = new ProgressInputStream( ftpClientFactory.openStream( softFileUrl ), accession + ".soft.gz", SingleCellDataDownloaderCli.class.getName() ); OutputStream out = Files.newOutputStream( dest ) ) {
+ int downloadedBytes = IOUtils.copy( in, out );
+ if ( downloadedBytes > 0 ) {
+ log.info( String.format( "%s: Done downloading SOFT file (%s in %s @ %.3f MB/s).", accession,
+ FileUtils.byteCountToDisplaySize( downloadedBytes ), timer,
+ ( 1000.0 / ( 1000.0 * 1000.0 ) ) * ( downloadedBytes / timer.getTime() ) ) );
+ }
+ } catch ( IOException e ) {
+ if ( Files.exists( dest ) ) {
+ log.warn( accession + ": An I/O error occurred while downloading the SOFT file, removing " + dest + "...", e );
+ PathUtils.deleteDirectory( dest.getParent() );
+ }
+ throw e;
+ }
+ }
+ try ( InputStream is = new GZIPInputStream( Files.newInputStream( dest ) ) ) {
+ GeoFamilyParser parser = new GeoFamilyParser();
+ parser.parse( is );
+ return requireNonNull( parser.getUniqueResult() ).getSeriesMap().get( accession );
+ }
+ }
+}
\ No newline at end of file
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataLoaderCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataLoaderCli.java
new file mode 100644
index 0000000000..8416046cab
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataLoaderCli.java
@@ -0,0 +1,314 @@
+package ubic.gemma.core.apps;
+
+import org.apache.commons.cli.*;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
+import ubic.gemma.core.analysis.service.ExpressionDataFileService;
+import ubic.gemma.core.analysis.service.ExpressionExperimentDataFileType;
+import ubic.gemma.core.loader.expression.singleCell.AnnDataSingleCellDataLoaderConfig;
+import ubic.gemma.core.loader.expression.singleCell.SingleCellDataLoaderConfig;
+import ubic.gemma.core.loader.expression.singleCell.SingleCellDataLoaderService;
+import ubic.gemma.core.loader.expression.singleCell.SingleCellDataType;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
+import ubic.gemma.model.expression.bioAssayData.CellLevelCharacteristics;
+import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment;
+import ubic.gemma.model.expression.experiment.BioAssaySet;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.stream.Collectors;
+
+@Component
+public class SingleCellDataLoaderCli extends ExpressionExperimentManipulatingCLI {
+
+ private static final String
+ LOAD_CELL_TYPE_ASSIGNMENT_OPTION = "loadCta",
+ LOAD_CELL_LEVEL_CHARACTERISTICS_OPTION = "loadClc",
+ DATA_TYPE_OPTION = "dataType",
+ DATA_PATH_OPTION = "p",
+ PLATFORM_OPTION = "a",
+ QT_NAME_OPTION = "qtName",
+ PREFERRED_QT_OPTION = "preferredQt",
+ REPLACE_OPTION = "replace",
+ CELL_TYPE_ASSIGNMENT_FILE_OPTION = "ctaFile",
+ CELL_TYPE_ASSIGNMENT_NAME_OPTION = "ctaName",
+ CELL_TYPE_ASSIGNMENT_PROTOCOL_NAME_OPTION = "ctaProtocol",
+ PREFERRED_CELL_TYPE_ASSIGNMENT = "preferredCta",
+ OTHER_CELL_LEVEL_CHARACTERISTICS_FILE = "clcFile";
+
+ private static final String ANNDATA_OPTION_PREFIX = "annData";
+ private static final String
+ ANNDATA_SAMPLE_FACTOR_NAME_OPTION = ANNDATA_OPTION_PREFIX + "SampleFactorName",
+ ANNDATA_CELL_TYPE_FACTOR_NAME_OPTION = ANNDATA_OPTION_PREFIX + "CellTypeFactorName",
+ ANNDATA_UNKNOWN_CELL_TYPE_INDICATOR_OPTION = ANNDATA_OPTION_PREFIX + "UnknownCellTypeIndicator";
+
+ @Autowired
+ private SingleCellDataLoaderService singleCellDataLoaderService;
+
+ @Autowired
+ private ExpressionDataFileService expressionDataFileService;
+
+ /**
+ * Operation mode when loading data.
+ */
+ private Mode mode;
+
+ enum Mode {
+ LOAD_CELL_TYPE_ASSIGNMENTS,
+ LOAD_CELL_LEVEL_CHARACTERISTICS,
+ LOAD_EVERYTHING
+ }
+
+ @Nullable
+ private String platformName;
+ @Nullable
+ private Path dataPath;
+ @Nullable
+ private SingleCellDataType dataType;
+ @Nullable
+ private String qtName;
+ private boolean preferredQt;
+ private boolean replaceQt;
+ @Nullable
+ private Path cellTypeAssignmentFile;
+ @Nullable
+ private String cellTypeAssignmentName;
+ @Nullable
+ private String cellTypeAssignmentProtocolName;
+ private boolean preferredCellTypeAssignment;
+ @Nullable
+ private Path otherCellLevelCharacteristicsFile;
+
+ // AnnData
+ private String annDataSampleFactorName;
+ private String annDataCellTypeFactorName;
+ private String annDataUnknownCellTypeIndicator;
+
+ @Nullable
+ @Override
+ public String getCommandName() {
+ return "loadSingleCellData";
+ }
+
+ @Nullable
+ @Override
+ public String getShortDesc() {
+ return "Load single-cell data from either AnnData or 10x MEX format.";
+ }
+
+ @Override
+ protected void buildExperimentOptions( Options options ) {
+ options.addOption( LOAD_CELL_TYPE_ASSIGNMENT_OPTION, "load-cell-type-assignment", false, "Only load cell type assignment. Use -" + QT_NAME_OPTION + " to specify which set of vectors this is applicable to." );
+ options.addOption( LOAD_CELL_LEVEL_CHARACTERISTICS_OPTION, "load-cell-level-characteristics", false, "Only load cell-level characteristics. Use -" + QT_NAME_OPTION + " to specify which set of vectors this is applicable to." );
+ options.addOption( DATA_TYPE_OPTION, "data-type", true, "Data type to import. Must be one of " + Arrays.stream( SingleCellDataType.values() ).map( Enum::name ).collect( Collectors.joining( ", " ) ) + "." );
+ options.addOption( Option.builder( DATA_PATH_OPTION )
+ .longOpt( "data-path" )
+ .hasArg()
+ .type( Path.class )
+ .desc( "Load single-cell data from the given path instead of looking up the download directory. For AnnData and Seurat Disk, it is a file. For MEX it is a directory. Requires the -" + DATA_TYPE_OPTION + " option to be set." )
+ .build() );
+ options.addOption( PLATFORM_OPTION, "platform", true, "Target platform (must already exist in the system)" );
+ options.addOption( QT_NAME_OPTION, "quantitation-type-name", true, "Quantitation type to import (optional, use if more than one is present in data)" );
+ options.addOption( PREFERRED_QT_OPTION, "preferred-quantitation-type", false, "Make the quantitation type the preferred one." );
+ options.addOption( REPLACE_OPTION, "replace", false, "Replace an existing quantitation type. The -" + QT_NAME_OPTION + "/--quantitation-type-name option must be set." );
+ options.addOption( Option.builder( CELL_TYPE_ASSIGNMENT_FILE_OPTION )
+ .longOpt( "cell-type-assignment-file" )
+ .hasArg().type( Path.class )
+ .desc( "Path to a cell type assignment file. If missing, cell type importing will be delegated to a specific loader. For AnnData, you must supply the -" + ANNDATA_CELL_TYPE_FACTOR_NAME_OPTION + " option." )
+ .build() );
+ options.addOption( CELL_TYPE_ASSIGNMENT_NAME_OPTION, "cell-type-assignment-name", true, "Name to use for the cell type assignment. This require the -" + CELL_TYPE_ASSIGNMENT_FILE_OPTION + " option to be set." );
+ options.addOption( CELL_TYPE_ASSIGNMENT_PROTOCOL_NAME_OPTION, "cell-type-assignment-protocol", true, "An identifier for a protocol describing the cell type assignment. This require the -" + CELL_TYPE_ASSIGNMENT_FILE_OPTION + " option to be set." );
+ options.addOption( PREFERRED_CELL_TYPE_ASSIGNMENT, "preferred-cell-type-assignment", false, "Make the cell type assignment the preferred one." );
+ options.addOption( Option.builder( OTHER_CELL_LEVEL_CHARACTERISTICS_FILE )
+ .longOpt( "cell-level-characteristics-file" )
+ .hasArg().type( Path.class )
+ .desc( "Path to a file containing additional cell-level characteristics to import." )
+ .build() );
+ // for AnnData
+ options.addOption( ANNDATA_SAMPLE_FACTOR_NAME_OPTION, "anndata-sample-factor-name", true, "Name of the factor used for the sample name." );
+ options.addOption( ANNDATA_CELL_TYPE_FACTOR_NAME_OPTION, "anndata-cell-type-factor-name", true, "Name of the factor used for the cell type, incompatible with -" + CELL_TYPE_ASSIGNMENT_FILE_OPTION + "." );
+ options.addOption( ANNDATA_UNKNOWN_CELL_TYPE_INDICATOR_OPTION, "anndata-unknown-cell-type-indicator", true, "Indicator used for missing cell type. Defaults to using the standard -1 categorical code." );
+ }
+
+ @Override
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
+ if ( commandLine.hasOption( LOAD_CELL_TYPE_ASSIGNMENT_OPTION ) && commandLine.hasOption( LOAD_CELL_LEVEL_CHARACTERISTICS_OPTION ) ) {
+ throw new IllegalArgumentException( "Can only choose one of -" + LOAD_CELL_TYPE_ASSIGNMENT_OPTION + " and -" + LOAD_CELL_LEVEL_CHARACTERISTICS_OPTION + " at a time." );
+ }
+ if ( commandLine.hasOption( LOAD_CELL_TYPE_ASSIGNMENT_OPTION ) ) {
+ mode = Mode.LOAD_CELL_TYPE_ASSIGNMENTS;
+ if ( commandLine.hasOption( PLATFORM_OPTION ) ) {
+ throw new IllegalArgumentException( "The -" + PLATFORM_OPTION + " cannot be used with -" + LOAD_CELL_TYPE_ASSIGNMENT_OPTION + "." );
+ }
+ } else if ( commandLine.hasOption( LOAD_CELL_LEVEL_CHARACTERISTICS_OPTION ) ) {
+ mode = Mode.LOAD_CELL_LEVEL_CHARACTERISTICS;
+ if ( commandLine.hasOption( PLATFORM_OPTION ) ) {
+ throw new IllegalArgumentException( "The -" + PLATFORM_OPTION + " cannot be used with -" + LOAD_CELL_LEVEL_CHARACTERISTICS_OPTION + "." );
+ }
+ } else {
+ mode = Mode.LOAD_EVERYTHING;
+ platformName = commandLine.getOptionValue( PLATFORM_OPTION );
+ if ( platformName == null ) {
+ throw new MissingArgumentException( "The -" + PLATFORM_OPTION + " option is required when loading vectors." );
+ }
+ }
+ if ( commandLine.hasOption( DATA_TYPE_OPTION ) ) {
+ dataType = SingleCellDataType.valueOf( commandLine.getOptionValue( DATA_TYPE_OPTION ) );
+ }
+ if ( commandLine.hasOption( DATA_PATH_OPTION ) ) {
+ if ( dataType == null ) {
+ throw new IllegalArgumentException( "The -" + DATA_TYPE_OPTION + " option must be set of a data path is provided." );
+ }
+ dataPath = commandLine.getParsedOptionValue( DATA_PATH_OPTION );
+ }
+ qtName = commandLine.getOptionValue( QT_NAME_OPTION );
+ if ( commandLine.hasOption( REPLACE_OPTION ) ) {
+ if ( qtName == null ) {
+ throw new IllegalArgumentException( "The -" + QT_NAME_OPTION + " option must be set in order to replace an existing set of vectors." );
+ }
+ replaceQt = true;
+ }
+ preferredQt = commandLine.hasOption( PREFERRED_QT_OPTION );
+ cellTypeAssignmentFile = commandLine.getParsedOptionValue( CELL_TYPE_ASSIGNMENT_FILE_OPTION );
+ cellTypeAssignmentName = commandLine.getOptionValue( CELL_TYPE_ASSIGNMENT_NAME_OPTION );
+ cellTypeAssignmentProtocolName = commandLine.getOptionValue( CELL_TYPE_ASSIGNMENT_PROTOCOL_NAME_OPTION );
+ otherCellLevelCharacteristicsFile = commandLine.getParsedOptionValue( OTHER_CELL_LEVEL_CHARACTERISTICS_FILE );
+ preferredCellTypeAssignment = commandLine.hasOption( PREFERRED_CELL_TYPE_ASSIGNMENT );
+ if ( dataType == SingleCellDataType.ANNDATA ) {
+ annDataSampleFactorName = commandLine.getOptionValue( ANNDATA_SAMPLE_FACTOR_NAME_OPTION );
+ annDataCellTypeFactorName = commandLine.getOptionValue( ANNDATA_CELL_TYPE_FACTOR_NAME_OPTION );
+ annDataUnknownCellTypeIndicator = commandLine.getOptionValue( ANNDATA_UNKNOWN_CELL_TYPE_INDICATOR_OPTION );
+ if ( cellTypeAssignmentFile != null && annDataCellTypeFactorName != null ) {
+ throw new IllegalArgumentException( String.format( "The -%s option would override the value of -%s.",
+ CELL_TYPE_ASSIGNMENT_FILE_OPTION, ANNDATA_CELL_TYPE_FACTOR_NAME_OPTION ) );
+ }
+ } else {
+ for ( Option o : commandLine.getOptions() ) {
+ if ( o.getOpt().startsWith( ANNDATA_OPTION_PREFIX ) ) {
+ throw new IllegalArgumentException( String.format( "Options starting with -%s require -%s to be set to %s.",
+ ANNDATA_OPTION_PREFIX, DATA_TYPE_OPTION, SingleCellDataType.ANNDATA ) );
+ }
+ }
+ }
+ }
+
+ @Override
+ protected void processBioAssaySets( Collection expressionExperiments ) {
+ if ( dataPath != null || qtName != null || cellTypeAssignmentFile != null || otherCellLevelCharacteristicsFile != null ) {
+ throw new IllegalArgumentException( "Cannot specify a data path, quantitation type name, cell type assignment file or cell-level characteristics file when processing more than one experiment." );
+ }
+ super.processBioAssaySets( expressionExperiments );
+ }
+
+ @Override
+ protected void processExpressionExperiment( ExpressionExperiment ee ) {
+ SingleCellDataLoaderConfig config = getConfigForDataType( dataType );
+ switch ( mode ) {
+ case LOAD_CELL_TYPE_ASSIGNMENTS:
+ Collection cta;
+ if ( dataType != null ) {
+ cta = singleCellDataLoaderService.loadCellTypeAssignments( ee, dataType, config );
+ } else {
+ cta = singleCellDataLoaderService.loadCellTypeAssignments( ee, config );
+ }
+ addSuccessObject( ee, "Loaded cell type assignments " + cta );
+ break;
+ case LOAD_CELL_LEVEL_CHARACTERISTICS:
+ Collection clc;
+ if ( dataType != null ) {
+ clc = singleCellDataLoaderService.loadOtherCellLevelCharacteristics( ee, dataType, config );
+ } else {
+ clc = singleCellDataLoaderService.loadOtherCellLevelCharacteristics( ee, config );
+ }
+ addSuccessObject( ee, "Loaded cell-level characteristics " + clc );
+ break;
+ case LOAD_EVERYTHING:
+ QuantitationType qt;
+ if ( dataType != null ) {
+ qt = singleCellDataLoaderService.load( ee, getPlatform(), dataType, config );
+ } else {
+ qt = singleCellDataLoaderService.load( ee, getPlatform(), config );
+ }
+ if ( qt.getIsSingleCellPreferred() ) {
+ log.info( "Generating MEX data files for preferred QT: " + qt + "..." );
+ try ( ExpressionDataFileService.LockedPath lockedPath = expressionDataFileService.writeOrLocateMexSingleCellExpressionData( ee, qt, true, 500, true ) ) {
+ log.info( "Generated MEX data file for " + qt + " at " + lockedPath.getPath() + "." );
+ } catch ( IOException e ) {
+ throw new RuntimeException( "Failed to generate MEX data files for " + qt + ".", e );
+ }
+ } else if ( replaceQt ) {
+ // attempt to delete the MEX files if they exist since the data was replaced
+ try {
+ expressionDataFileService.deleteDataFile( ee, qt, ExpressionExperimentDataFileType.MEX );
+ } catch ( IOException e ) {
+ throw new RuntimeException( "Failed to delete MEX data files for " + qt + ".", e );
+ }
+ } else {
+ log.info( "Adding a non-preferred QT, no need to generate MEX files." );
+ }
+ addSuccessObject( ee, "Loaded vectors for " + qt );
+ break;
+ default:
+ throw new IllegalArgumentException( "Unknown operation mode " + mode );
+ }
+ }
+
+ /**
+ * Cached platform object.
+ */
+ @Nullable
+ private ArrayDesign platform;
+
+ private ArrayDesign getPlatform() {
+ if ( platformName == null ) {
+ throw new IllegalStateException( "A platform name must be set." );
+ }
+ if ( platform == null ) {
+ platform = entityLocator.locateArrayDesign( platformName );
+ }
+ return platform;
+ }
+
+ private SingleCellDataLoaderConfig getConfigForDataType( @Nullable SingleCellDataType dataType ) {
+ SingleCellDataLoaderConfig.SingleCellDataLoaderConfigBuilder, ?> configBuilder;
+ if ( dataType == SingleCellDataType.ANNDATA ) {
+ configBuilder = AnnDataSingleCellDataLoaderConfig.builder()
+ .sampleFactorName( annDataSampleFactorName )
+ .cellTypeFactorName( annDataCellTypeFactorName )
+ .unknownCellTypeIndicator( annDataUnknownCellTypeIndicator );
+ } else {
+ configBuilder = SingleCellDataLoaderConfig.builder();
+ }
+ if ( dataPath != null ) {
+ configBuilder.dataPath( dataPath );
+ }
+ if ( qtName != null ) {
+ configBuilder
+ .quantitationTypeName( qtName )
+ .replaceExistingQuantitationType( replaceQt );
+ }
+ configBuilder.markQuantitationTypeAsPreferred( preferredQt );
+ if ( cellTypeAssignmentFile != null ) {
+ configBuilder
+ .cellTypeAssignmentPath( cellTypeAssignmentFile )
+ .markSingleCellTypeAssignmentAsPreferred( preferredCellTypeAssignment );
+ if ( cellTypeAssignmentName != null ) {
+ configBuilder
+ .cellTypeAssignmentName( cellTypeAssignmentName );
+ }
+ if ( cellTypeAssignmentProtocolName != null ) {
+ configBuilder
+ .cellTypeAssignmentProtocol( entityLocator.locateProtocol( cellTypeAssignmentProtocolName ) );
+ }
+ }
+ if ( otherCellLevelCharacteristicsFile != null ) {
+ configBuilder.otherCellLevelCharacteristicsFile( otherCellLevelCharacteristicsFile );
+ }
+ return configBuilder.build();
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataTransformCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataTransformCli.java
new file mode 100644
index 0000000000..ed1e663a7c
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataTransformCli.java
@@ -0,0 +1,134 @@
+package ubic.gemma.core.apps;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang3.ArrayUtils;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Component;
+import ubic.gemma.core.loader.expression.singleCell.*;
+import ubic.gemma.core.util.AbstractCLI;
+
+import javax.annotation.Nullable;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.stream.Collectors;
+
+/**
+ * Transform various single-cell formats.
+ * @author poirigui
+ */
+@Component
+public class SingleCellDataTransformCli extends AbstractCLI {
+
+ private static final String PYTHON_OPTION = "python";
+
+ @Value("${python.exe}")
+ private String pythonExecutable;
+
+ private SingleCellInputOutputFileTransformation transformation;
+
+ public SingleCellDataTransformCli() {
+ setAllowPositionalArguments();
+ }
+
+ @Override
+ protected void buildOptions( Options options ) {
+ options.addOption( PYTHON_OPTION, true, "Override the Python executable to use (defaults to " + pythonExecutable + ")" );
+ }
+
+ @Override
+ protected void processOptions( CommandLine commandLine ) throws ParseException {
+ if ( commandLine.hasOption( PYTHON_OPTION ) ) {
+ pythonExecutable = commandLine.getOptionValue( PYTHON_OPTION );
+ }
+ LinkedList positionalArguments = new LinkedList<>( commandLine.getArgList() );
+ if ( positionalArguments.isEmpty() ) {
+ throw new ParseException( "No operation specified. Possible values are: transpose, pack, sortBySample, sample." );
+ }
+ String operation = positionalArguments.removeFirst();
+ if ( positionalArguments.size() < 2 ) {
+ throw usageException( operation );
+ }
+ Path inputFile, outputFile;
+ inputFile = Paths.get( positionalArguments.removeFirst() );
+ outputFile = Paths.get( positionalArguments.removeFirst() );
+ switch ( operation ) {
+ case "transpose":
+ transformation = new SingleCellDataTranspose();
+ break;
+ case "pack":
+ transformation = new SingleCellDataPack();
+ break;
+ case "sortBySample":
+ if ( commandLine.getArgList().size() != 1 ) {
+ throw usageException( operation, "sampleColumnName" );
+ }
+ transformation = new SingleCellDataSortBySample();
+ ( ( SingleCellDataSortBySample ) transformation )
+ .setSampleColumnName( positionalArguments.removeFirst() );
+ break;
+ case "sample":
+ if ( commandLine.getArgList().size() != 2 ) {
+ throw usageException( operation, "sampleColumnName" );
+ }
+ transformation = new SingleCellDataSample();
+ ( ( SingleCellDataSample ) transformation )
+ .setNumberOfCells( Integer.parseInt( positionalArguments.removeFirst() ) );
+ ( ( SingleCellDataSample ) transformation )
+ .setNumberOfGenes( Integer.parseInt( positionalArguments.removeFirst() ) );
+ default:
+ throw new ParseException( "Unknown operation: " + operation + ". Possible values are: transpose, pack, sortBySample, sample." );
+ }
+ if ( transformation instanceof AbstractPythonScriptBasedAnnDataTransformation ) {
+ ( ( AbstractPythonScriptBasedAnnDataTransformation ) transformation )
+ .setPythonExecutable( pythonExecutable );
+ }
+ transformation.setInputFile( inputFile );
+ transformation.setInputDataType( SingleCellDataType.ANNDATA );
+ transformation.setOutputFile( outputFile );
+ transformation.setOutputDataType( SingleCellDataType.ANNDATA );
+ }
+
+ @Override
+ protected void doWork() throws Exception {
+ transformation.perform();
+ }
+
+ @Nullable
+ @Override
+ public String getCommandName() {
+ return "transformSingleCellData";
+ }
+
+ @Nullable
+ @Override
+ public String getShortDesc() {
+ return "Transform single-cell data in various ways";
+ }
+
+ @Override
+ public CommandGroup getCommandGroup() {
+ return CommandGroup.EXPERIMENT;
+ }
+
+ @Nullable
+ private String operation;
+ private String[] args = { "inputFile", "outputFile" };
+
+ @Override
+ protected String getUsage() {
+ return String.format( "gemma-cli [options] %s [commandOptions] %s %s",
+ getCommandName(),
+ operation != null ? operation : "",
+ Arrays.stream( args ).map( a -> "<" + a + ">" ).collect( Collectors.joining( " " ) ) );
+ }
+
+ private ParseException usageException( String op, String... args ) {
+ this.operation = op;
+ this.args = ArrayUtils.addAll( this.args, args );
+ return new ParseException( "" );
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataWriterCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataWriterCli.java
new file mode 100644
index 0000000000..93d97093e6
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/SingleCellDataWriterCli.java
@@ -0,0 +1,178 @@
+package ubic.gemma.core.apps;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.springframework.beans.factory.annotation.Autowired;
+import ubic.gemma.core.analysis.service.ExpressionDataFileService;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.common.quantitationtype.ScaleType;
+import ubic.gemma.model.expression.bioAssayData.SingleCellExpressionDataVector;
+import ubic.gemma.model.expression.experiment.BioAssaySet;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService;
+import ubic.gemma.persistence.service.expression.experiment.SingleCellExpressionExperimentService;
+
+import javax.annotation.Nullable;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.stream.Collectors;
+import java.util.zip.GZIPOutputStream;
+
+@SuppressWarnings("unused")
+public class SingleCellDataWriterCli extends ExpressionExperimentVectorsManipulatingCli {
+
+ public SingleCellDataWriterCli() {
+ super( SingleCellExpressionDataVector.class );
+ setUsePreferredQuantitationType();
+ }
+
+ enum MatrixFormat {
+ TABULAR,
+ MEX
+ }
+
+ @Autowired
+ private SingleCellExpressionExperimentService singleCellExpressionExperimentService;
+
+ @Autowired
+ private QuantitationTypeService quantitationTypeService;
+
+ @Autowired
+ private ExpressionDataFileService expressionDataFileService;
+
+ private MatrixFormat format;
+ @Nullable
+ private ScaleType scaleType;
+ private boolean useEnsemblIds;
+ private boolean useStreaming;
+ private int fetchSize;
+ private boolean standardLocation;
+ @Nullable
+ private Path outputFile;
+
+ @Nullable
+ @Override
+ public String getCommandName() {
+ return "getSingleCellDataMatrix";
+ }
+
+ @Nullable
+ @Override
+ public String getShortDesc() {
+ return "Write single-cell data matrix to a file; gene information is included if available.";
+ }
+
+ @Override
+ protected void buildExperimentVectorsOptions( Options options ) {
+ options.addOption( "format", "format", true, "Format to write the matrix for (possible values: tabular, MEX, defaults to tabular)" );
+ options.addOption( "scaleType", "scale-type", true, "Scale type to use when generating data to disk (possible values: " + Arrays.stream( ScaleType.values() ).map( Enum::name ).collect( Collectors.joining( ", " ) ) + ")." );
+ options.addOption( "useEnsemblIds", "use-ensembl-ids", false, "Use Ensembl IDs instead of official gene symbols (only for MEX output)" );
+ options.addOption( "noStreaming", "no-streaming", false, "Use in-memory storage instead streaming for retrieving and writing vectors (defaults to false)" );
+ options.addOption( Option.builder( "fetchSize" ).longOpt( "fetch-size" ).hasArg( true ).type( Integer.class ).desc( "Fetch size to use when retrieving vectors, incompatible with -noStreaming/--no-streaming." ).build() );
+ options.addOption( "standardLocation", "standard-location", false, "Write the file to the standard location under, this is incompatible with -scaleType/--scale-type, -useEnsemblIds/--use-ensembl-ids and -o/--output." );
+ options.addOption( Option.builder( "o" ).longOpt( "output" ).hasArg( true ).type( Path.class ).desc( "Destination for the matrix file, or a directory if -format is set to MEX." ).build() );
+ addForceOption( options );
+ }
+
+ @Override
+ protected void processExperimentVectorsOptions( CommandLine commandLine ) throws ParseException {
+ this.useEnsemblIds = commandLine.hasOption( "useEnsemblIds" );
+ if ( commandLine.hasOption( "noStreaming" ) && commandLine.hasOption( "fetchSize" ) ) {
+ throw new ParseException( "Cannot use -noStreaming/--no-streaming and -fetchSize/--fetch-size at the same time." );
+ }
+ this.useStreaming = !commandLine.hasOption( "noStreaming" );
+ this.fetchSize = commandLine.getParsedOptionValue( "fetchSize", 30 );
+ if ( commandLine.hasOption( "format" ) ) {
+ this.format = MatrixFormat.valueOf( commandLine.getOptionValue( "format" ).toUpperCase() );
+ } else {
+ this.format = MatrixFormat.TABULAR;
+ }
+ if ( commandLine.hasOption( "scaleType" ) ) {
+ this.scaleType = ScaleType.valueOf( commandLine.getOptionValue( "scaleType" ).toUpperCase() );
+ }
+ this.standardLocation = commandLine.hasOption( "standardLocation" );
+ this.outputFile = commandLine.getParsedOptionValue( "o" );
+ if ( standardLocation && scaleType != null ) {
+ throw new ParseException( "Cannot use -standardLocation/--standard-location and -scaleType/--scale-type at the same time." );
+ }
+ if ( standardLocation && outputFile != null ) {
+ throw new ParseException( "Cannot use -standardLocation/--standard-location and -o/--output at the same time." );
+ }
+ if ( standardLocation && useEnsemblIds ) {
+ throw new ParseException( "Data cannot be written to the standard location using Ensembl IDs." );
+ }
+ }
+
+ @Override
+ protected void processBioAssaySets( Collection expressionExperiments ) {
+ if ( !standardLocation ) {
+ throw new IllegalStateException( "Can only process multiple experiments with -standardLocation/--standard-location option." );
+ }
+ super.processBioAssaySets( expressionExperiments );
+ }
+
+ @Override
+ protected void processExpressionExperimentVectors( ExpressionExperiment ee, QuantitationType qt ) {
+ try {
+ switch ( format ) {
+ case TABULAR:
+ if ( standardLocation ) {
+ try ( ExpressionDataFileService.LockedPath path = expressionDataFileService.writeOrLocateTabularSingleCellExpressionData( ee, qt, useStreaming, fetchSize, isForce() ) ) {
+ addSuccessObject( ee, "Written vectors for " + qt + " to " + path.getPath() + "." );
+ }
+ } else {
+ try ( Writer writer = new OutputStreamWriter( openOutputFile( isForce() ), StandardCharsets.UTF_8 ) ) {
+ int written = expressionDataFileService.writeTabularSingleCellExpressionData( ee, qt, scaleType, useStreaming, fetchSize, writer );
+ addSuccessObject( ee, "Wrote " + written + " vectors for " + qt + "." );
+ }
+ }
+ break;
+ case MEX:
+ if ( standardLocation ) {
+ try ( ExpressionDataFileService.LockedPath path = expressionDataFileService.writeOrLocateMexSingleCellExpressionData( ee, qt, useStreaming, fetchSize, isForce() ) ) {
+ addSuccessObject( ee, "Successfully written vectors for " + qt + " to " + path.getPath() + "." );
+ }
+ } else if ( outputFile == null || outputFile.toString().endsWith( ".tar" ) || outputFile.toString().endsWith( ".tar.gz" ) ) {
+ log.warn( "Writing MEX to a stream requires a lot of memory and cannot be streamed, you can cancel this any anytime with Ctrl-C." );
+ try ( OutputStream stream = openOutputFile( isForce() ) ) {
+ int written = expressionDataFileService.writeMexSingleCellExpressionData( ee, qt, scaleType, useEnsemblIds, stream );
+ addSuccessObject( ee, "Wrote " + written + " vectors for " + qt + ( useEnsemblIds ? " using Ensembl IDs " : "" ) + "." );
+ }
+ } else {
+ if ( !isForce() && Files.exists( outputFile ) ) {
+ throw new RuntimeException( outputFile + " already exists, use -force/--force to override." );
+ }
+ int written = expressionDataFileService.writeMexSingleCellExpressionData( ee, qt, scaleType, useEnsemblIds, useStreaming, fetchSize, isForce(), outputFile );
+ addSuccessObject( ee, "Wrote " + written + " vectors for " + qt + ( useEnsemblIds ? " using Ensembl IDs " : "" ) + "." );
+ }
+ break;
+ }
+ } catch ( IOException e ) {
+ addErrorObject( ee, e );
+ }
+ }
+
+ private OutputStream openOutputFile( boolean overwriteExisting ) throws IOException {
+ if ( outputFile != null ) {
+ if ( !overwriteExisting && Files.exists( outputFile ) ) {
+ throw new RuntimeException( outputFile + " already exists, use -force/--force to override." );
+ }
+ if ( outputFile.toString().endsWith( ".gz" ) ) {
+ return new GZIPOutputStream( Files.newOutputStream( outputFile ) );
+ } else {
+ return Files.newOutputStream( outputFile );
+ }
+ } else {
+ return System.out;
+ }
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/SplitExperimentCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/SplitExperimentCli.java
index 60f3eb1a48..63c5c00360 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/SplitExperimentCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/SplitExperimentCli.java
@@ -54,6 +54,7 @@ public class SplitExperimentCli extends ExpressionExperimentManipulatingCLI {
private String factorName;
public SplitExperimentCli() {
+ super();
setSingleExperimentMode();
}
@@ -68,16 +69,14 @@ public String getShortDesc() {
}
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
options.addOption( Option.builder( FACTOR_OPTION ).hasArg()
.desc( "ID numbers, categories or names of the factor to use, with spaces replaced by underscores (must not be 'batch')" )
.build() );
}
@Override
- protected void processOptions( CommandLine commandLine ) throws ParseException {
- super.processOptions( commandLine );
+ protected void processExperimentOptions( CommandLine commandLine ) throws ParseException {
if ( !commandLine.hasOption( FACTOR_OPTION ) ) {
throw new IllegalArgumentException( "Please specify the factor" );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/TaxonLoaderCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/TaxonLoaderCli.java
index 0845ad105d..d456b65e12 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/TaxonLoaderCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/TaxonLoaderCli.java
@@ -20,12 +20,13 @@
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
+import org.springframework.beans.factory.annotation.Autowired;
import ubic.gemma.core.loader.genome.taxon.TaxonFetcher;
import ubic.gemma.core.loader.genome.taxon.TaxonLoader;
import ubic.gemma.core.util.AbstractAuthenticatedCLI;
-import ubic.gemma.model.common.description.LocalFile;
import ubic.gemma.persistence.persister.PersisterHelper;
+import java.io.File;
import java.util.Collection;
/**
@@ -33,6 +34,9 @@
*/
public class TaxonLoaderCli extends AbstractAuthenticatedCLI {
+ @Autowired
+ private PersisterHelper persisterHelper;
+
@Override
public String getCommandName() {
return "loadTaxa";
@@ -59,12 +63,12 @@ protected void processOptions( CommandLine commandLine ) {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
TaxonFetcher tf = new TaxonFetcher();
- Collection files = tf.fetch();
- LocalFile names = null;
- for ( LocalFile file : files ) {
- if ( file.getLocalURL().toString().endsWith( "names.dmp" ) ) {
+ Collection files = tf.fetch();
+ File names = null;
+ for ( File file : files ) {
+ if ( file.toString().endsWith( "names.dmp" ) ) {
names = file;
}
}
@@ -74,8 +78,8 @@ protected void doWork() throws Exception {
}
TaxonLoader tl = new TaxonLoader();
- tl.setPersisterHelper( this.getBean( PersisterHelper.class ) );
- int numLoaded = tl.load( names.asFile() );
+ tl.setPersisterHelper( persisterHelper );
+ int numLoaded = tl.load( names );
log.info( "Loaded " + numLoaded + " taxa" );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java
index 7508447639..70afd61272 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java
@@ -11,6 +11,8 @@
import javax.annotation.Nullable;
import java.util.Date;
+import static ubic.gemma.core.util.OptionsUtils.addDateOption;
+
public class UpdateEE2CCli extends AbstractAuthenticatedCLI {
private static final String
@@ -57,7 +59,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
int updated = tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( sinceLastUpdate, truncate );
if ( updated > 0 ) {
try {
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEe2AdCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEe2AdCli.java
index 6ad5b0d0a7..5e63af30f3 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEe2AdCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEe2AdCli.java
@@ -10,6 +10,8 @@
import javax.annotation.Nullable;
import java.util.Date;
+import static ubic.gemma.core.util.OptionsUtils.addDateOption;
+
public class UpdateEe2AdCli extends AbstractAuthenticatedCLI {
private static final String SINCE_OPTION = "s";
@@ -51,7 +53,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
int updated = tableMaintenanceUtil.updateExpressionExperiment2ArrayDesignEntries( sinceLastUpdate );
if ( updated > 0 ) {
try {
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateGene2CsCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateGene2CsCli.java
index 4b4c994710..4d91622c6e 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateGene2CsCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateGene2CsCli.java
@@ -46,7 +46,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
int updated = tableMaintenanceUtil.updateGene2CsEntries( force );
if ( updated > 0 ) {
try {
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdatePubMedCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdatePubMedCli.java
index 2b04713905..df00c7a692 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdatePubMedCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdatePubMedCli.java
@@ -71,7 +71,7 @@ protected void buildOptions( Options options ) {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
Map toFetch = new HashMap<>();
Collection ees = eeserv.getExperimentsLackingPublications();
for ( ExpressionExperiment ee : ees ) {
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/VectorMergingCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/VectorMergingCli.java
index 05bf04400e..35169813d8 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/apps/VectorMergingCli.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/VectorMergingCli.java
@@ -38,8 +38,7 @@ public class VectorMergingCli extends ExpressionExperimentManipulatingCLI {
private PreprocessorService preprocessorService;
@Override
- protected void buildOptions( Options options ) {
- super.buildOptions( options );
+ protected void buildExperimentOptions( Options options ) {
super.addForceOption( options );
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/package-info.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/package-info.java
new file mode 100644
index 0000000000..810418a546
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/package-info.java
@@ -0,0 +1,7 @@
+/**
+ *
+ */
+@ParametersAreNonnullByDefault
+package ubic.gemma.core.apps;
+
+import javax.annotation.ParametersAreNonnullByDefault;
\ No newline at end of file
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/loader/entrez/pubmed/PubMedSearcher.java b/gemma-cli/src/main/java/ubic/gemma/core/loader/entrez/pubmed/PubMedSearcher.java
index ee7da2ce89..3564e8ac64 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/loader/entrez/pubmed/PubMedSearcher.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/loader/entrez/pubmed/PubMedSearcher.java
@@ -45,7 +45,7 @@ public class PubMedSearcher extends AbstractAuthenticatedCLI {
private boolean persist = false;
public PubMedSearcher() {
- setAllowPositionalArguments( true );
+ setAllowPositionalArguments();
}
@Override
@@ -70,7 +70,7 @@ protected void processOptions( CommandLine commandLine ) {
}
@Override
- protected void doWork() throws Exception {
+ protected void doAuthenticatedWork() throws Exception {
PubMedSearch pubMedSearcher = new PubMedSearch( ncbiApiKey );
Collection refs = pubMedSearcher
.searchAndRetrieveByHTTP( this.args );
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractAuthenticatedCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractAuthenticatedCLI.java
index 8714854c90..8890fc72db 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractAuthenticatedCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractAuthenticatedCLI.java
@@ -28,7 +28,6 @@
import org.springframework.security.core.Authentication;
import org.springframework.security.core.context.SecurityContextHolder;
-import javax.annotation.Nullable;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
@@ -90,22 +89,24 @@ public void afterPropertiesSet() throws Exception {
}
}
- @Override
- protected final void beforeWork() {
- authenticate();
+ /**
+ * Indicate that the command requires authentication.
+ */
+ protected void setRequireLogin() {
+ this.requireLogin = true;
}
@Override
- protected final void afterWork( @Nullable Exception e ) {
- SecurityContextHolder.clearContext();
+ protected final void doWork() throws Exception {
+ try {
+ authenticate();
+ doAuthenticatedWork();
+ } finally {
+ SecurityContextHolder.clearContext();
+ }
}
- /**
- * Indicate if the command requires authentication.
- */
- public void setRequireLogin( boolean requireLogin ) {
- this.requireLogin = requireLogin;
- }
+ protected abstract void doAuthenticatedWork() throws Exception;
/**
* check username and password.
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractAutoSeekingCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractAutoSeekingCLI.java
new file mode 100644
index 0000000000..2d0e0839b2
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractAutoSeekingCLI.java
@@ -0,0 +1,216 @@
+package ubic.gemma.core.util;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.util.Assert;
+import ubic.gemma.model.common.auditAndSecurity.AuditEvent;
+import ubic.gemma.model.common.auditAndSecurity.Auditable;
+import ubic.gemma.model.common.auditAndSecurity.curation.Curatable;
+import ubic.gemma.model.common.auditAndSecurity.eventType.AuditEventType;
+import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService;
+
+import javax.annotation.Nullable;
+import javax.annotation.OverridingMethodsMustInvokeSuper;
+import java.util.Date;
+import java.util.List;
+
+import static java.util.Objects.requireNonNull;
+import static ubic.gemma.core.util.OptionsUtils.addDateOption;
+
+/**
+ * Provide auto-seeking capabilities to a CLI.
+ *
+ * This allows CLIs to process entities that lack certain {@link AuditEvent} or that haven't been updated since a
+ * certain date.
+ * @param the type of entity being seeked
+ */
+public abstract class AbstractAutoSeekingCLI extends AbstractAuthenticatedCLI {
+
+ private static final String AUTO_OPTION_NAME = "auto";
+ private static final String LIMITING_DATE_OPTION = "mdate";
+ protected static final String FORCE_OPTION = "force";
+
+ @Autowired
+ private AuditEventService auditEventService;
+
+ private final Class entityClass;
+
+ /**
+ * Automatically identify which entities to run the tool on. To enable call addAutoOption.
+ */
+ private boolean autoSeek;
+
+ /**
+ * The event type to look for the lack of, when using auto-seek.
+ */
+ @Nullable
+ private Class extends AuditEventType> autoSeekEventType;
+
+ /**
+ * Date used to identify which entities to run the tool on (e.g., those which were run less recently than mDate). To
+ * enable call addLimitingDateOption.
+ */
+ @Nullable
+ private Date limitingDate;
+
+ /**
+ * Force entities to be run, regardless of the other auto-seeking options.
+ */
+ private boolean force = false;
+
+ protected AbstractAutoSeekingCLI( Class entityClass ) {
+ this.entityClass = entityClass;
+ }
+
+ /**
+ * Add the {@code -auto} option.
+ *
+ * The auto option value can be retrieved with {@link #isAutoSeek()}.
+ */
+ protected void addAutoOption( Options options ) {
+ Assert.state( !options.hasOption( AUTO_OPTION_NAME ), "The -" + AUTO_OPTION_NAME + " option was already added." );
+ options.addOption( Option.builder( AUTO_OPTION_NAME )
+ .desc( "Attempt to process entities that need processing based on workflow criteria." )
+ .build() );
+ }
+
+ /**
+ * Add the {@code -auto} option for a specific {@link AuditEventType}.
+ *
+ * The event type can be retrieved with {@link #getAutoSeekEventType()}.
+ */
+ protected void addAutoOption( Options options, Class extends AuditEventType> autoSeekEventType ) {
+ addAutoOption( options );
+ this.autoSeekEventType = autoSeekEventType;
+ }
+
+ /**
+ * Add the {@code -mdate} option.
+ *
+ * The limiting date can be retrieved with {@link #getLimitingDate()}.
+ */
+ protected void addLimitingDateOption( Options options ) {
+ Assert.state( !options.hasOption( LIMITING_DATE_OPTION ), "The -" + LIMITING_DATE_OPTION + " option was already added." );
+ addDateOption( LIMITING_DATE_OPTION, null, "Constrain to run only on entities with analyses older than the given date. "
+ + "For example, to run only on entities that have not been analyzed in the last 10 days, use '-10d'. "
+ + "If there is no record of when the analysis was last run, it will be run.", options );
+ }
+
+ protected void addForceOption( Options options ) {
+ Assert.state( !force, "Force mode is enabled for this CLI, you cannot add the -force/--force option." );
+ Assert.state( !options.hasOption( FORCE_OPTION ), "The -" + FORCE_OPTION + " option was already added." );
+ String desc = "Ignore other reasons for skipping experiments (e.g., trouble) and overwrite existing data (see documentation for this tool to see exact behavior if not clear)";
+ options.addOption( FORCE_OPTION, "force", false, desc );
+ }
+
+ /**
+ * Indicate if auto-seek is enabled.
+ */
+ protected boolean isAutoSeek() {
+ return autoSeek;
+ }
+
+ /**
+ * Indicate the event to be used for auto-seeking.
+ */
+ protected Class extends AuditEventType> getAutoSeekEventType() {
+ return requireNonNull( autoSeekEventType, "This CLI was not configured with a specific event type for auto-seek." );
+ }
+
+ /**
+ * Obtain the limiting date (i.e. starting date at which entities should be processed).
+ */
+ @Nullable
+ protected Date getLimitingDate() {
+ if ( limitingDate != null ) {
+ log.info( "Analyses will be run only if last was older than " + limitingDate );
+ }
+ return limitingDate;
+ }
+
+ /**
+ * Check if forcing is enabled.
+ */
+ protected boolean isForce() {
+ return force;
+ }
+
+ /**
+ * Enable the forcing mode.
+ */
+ protected void setForce() {
+ Assert.state( !this.force, "Force mode is already enabled." );
+ this.force = true;
+ }
+
+ @Override
+ @OverridingMethodsMustInvokeSuper
+ protected void processOptions( CommandLine commandLine ) throws ParseException {
+ if ( commandLine.hasOption( LIMITING_DATE_OPTION ) && commandLine.hasOption( AUTO_OPTION_NAME ) ) {
+ throw new IllegalArgumentException( String.format( "Please only select one of -%s or -%s", LIMITING_DATE_OPTION, AUTO_OPTION_NAME ) );
+ }
+
+ if ( commandLine.hasOption( LIMITING_DATE_OPTION ) ) {
+ this.limitingDate = commandLine.getParsedOptionValue( LIMITING_DATE_OPTION );
+ }
+
+ this.autoSeek = commandLine.hasOption( AUTO_OPTION_NAME );
+
+ if ( commandLine.hasOption( FORCE_OPTION ) ) {
+ this.force = true;
+ }
+ }
+
+ /**
+ * Check if the given auditable can be skipped.
+ * @param auditable auditable
+ * @param eventClass can be null
+ * @return boolean
+ */
+ protected boolean noNeedToRun( T auditable, @Nullable Class extends AuditEventType> eventClass ) {
+ if ( force ) {
+ return false;
+ }
+
+ Date skipIfLastRunLaterThan = this.getLimitingDate();
+ List events = this.auditEventService.getEvents( auditable );
+
+ // figure out if we need to run it by date; or if there is no event of the given class; "Fail" type events don't
+ // count.
+ for ( int j = events.size() - 1; j >= 0; j-- ) {
+ AuditEvent event = events.get( j );
+ if ( event == null ) {
+ continue; // legacy of ordered-list which could end up with gaps; should not be needed any more
+ }
+ AuditEventType eventType = event.getEventType();
+ if ( eventClass != null && eventClass.isInstance( eventType ) && !eventType.getClass().getSimpleName().startsWith( "Fail" ) ) {
+ if ( skipIfLastRunLaterThan != null ) {
+ if ( event.getDate().after( skipIfLastRunLaterThan ) ) {
+ log.info( auditable + ": " + " run more recently than " + skipIfLastRunLaterThan );
+ addErrorObject( auditable, "Run more recently than " + skipIfLastRunLaterThan + ", use - " + FORCE_OPTION + "to process anyway." );
+ return true;
+ }
+ } else {
+ // it has been run already at some point
+ return true;
+ }
+ }
+ }
+
+ if ( auditable instanceof Curatable ) {
+ Curatable curatable = ( Curatable ) auditable;
+ if ( curatable.getCurationDetails().getTroubled() ) {
+ /*
+ * Always skip if the object is curatable and troubled
+ */
+ addErrorObject( auditable, "Has an active troubled flag, use -" + FORCE_OPTION + " to process anyway." );
+ return true;
+ }
+ }
+
+ return false;
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java
index 687f6df887..1c0f6e990a 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java
@@ -23,24 +23,17 @@
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.springframework.beans.factory.BeanFactory;
-import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.util.Assert;
-import ubic.gemma.model.common.auditAndSecurity.eventType.AuditEventType;
import javax.annotation.Nullable;
-import java.io.*;
-import java.nio.file.Files;
-import java.util.Date;
+import java.io.PrintWriter;
+import java.nio.file.Path;
import java.util.List;
-import java.util.TimeZone;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
-import static java.util.Objects.requireNonNull;
-
/**
* Basic implementation of the {@link CLI} interface.
*
@@ -75,41 +68,14 @@ public abstract class AbstractCLI implements CLI {
private static final String THREADS_OPTION = "threads";
private static final String HELP_OPTION = "h";
- private static final String AUTO_OPTION_NAME = "auto";
- private static final String LIMITING_DATE_OPTION = "mdate";
-
private static final String BATCH_FORMAT_OPTION = "batchFormat";
private static final String BATCH_OUTPUT_FILE_OPTION = "batchOutputFile";
- /**
- * When parsing dates, use this as a reference for 'now'.
- */
- private static final Date relativeTo = new Date();
-
- @Autowired
- private BeanFactory ctx;
-
/**
* Indicate if this CLI allows positional arguments.
*/
private boolean allowPositionalArguments = false;
- /* support for convenience options */
- /**
- * Automatically identify which entities to run the tool on. To enable call addAutoOption.
- */
- private boolean autoSeek;
- /**
- * The event type to look for the lack of, when using auto-seek.
- */
- @Nullable
- private Class extends AuditEventType> autoSeekEventType;
- /**
- * Date used to identify which entities to run the tool on (e.g., those which were run less recently than mDate). To
- * enable call addLimitingDateOption.
- */
- @Nullable
- private Date limitingDate;
/**
* Number of threads to use for batch processing.
*/
@@ -117,12 +83,12 @@ public abstract class AbstractCLI implements CLI {
/**
* Format to use to summarize batch processing.
*/
- private BatchFormat batchFormat;
+ private BatchTaskExecutorService.BatchFormat batchFormat;
/**
* Destination for batch processing summary.
*/
@Nullable
- private File batchOutputFile;
+ private Path batchOutputFile;
/**
* Indicate if we are "inside" {@link #doWork()}.
@@ -132,29 +98,6 @@ public abstract class AbstractCLI implements CLI {
@Nullable
private BatchTaskExecutorService executorService;
- /**
- * Convenience method to obtain instance of any bean by name.
- *
- * @param the bean class type
- * @param clz class
- * @param name name
- * @return bean
- * @deprecated Use {@link Autowired} to specify your dependencies, this is just a wrapper around the current
- * {@link BeanFactory}.
- */
- @SuppressWarnings("SameParameterValue") // Better for general use
- @Deprecated
- protected T getBean( String name, Class clz ) {
- assert ctx != null : "Spring context was not initialized";
- return ctx.getBean( name, clz );
- }
-
- @Deprecated
- protected T getBean( Class clz ) {
- assert ctx != null : "Spring context was not initialized";
- return ctx.getBean( clz );
- }
-
@Override
public Options getOptions() {
Options options = new Options();
@@ -233,78 +176,41 @@ public int executeCommand( String... args ) {
}
} finally {
if ( executorService != null ) {
- try {
- // always summarize processing, even if an error is thrown
- summarizeBatchProcessing();
- } catch ( IOException e ) {
- log.error( "Failed to summarize batch processing.", e );
- } finally {
- executorService = null;
- }
+ // always summarize processing, even if an error is thrown
+ executorService.summarizeBatchProcessing();
+ executorService = null;
}
}
}
- private void printHelp( Options options, PrintWriter writer ) {
- HelpUtils.printHelp( writer, getCommandName(), options, allowPositionalArguments, getShortDesc(), null );
- }
-
/**
- * Add the {@code -auto} option.
+ * Describe the intended usage for the command.
*
- * The auto option value can be retrieved with {@link #isAutoSeek()}.
+ * This will be included in the 'Usage: ...' error message when the CLI is misused.
*/
- protected void addAutoOption( Options options ) {
- options.addOption( Option.builder( AUTO_OPTION_NAME )
- .desc( "Attempt to process entities that need processing based on workflow criteria." )
- .build() );
- }
-
- /**
- * Add the {@code -auto} option for a specific {@link AuditEventType}.
- *
- * The event type can be retrieved with {@link #getAutoSeekEventType()}.
- */
- protected void addAutoOption( Options options, Class extends AuditEventType> autoSeekEventType ) {
- addAutoOption( options );
- this.autoSeekEventType = autoSeekEventType;
+ protected String getUsage() {
+ return "gemma-cli [options] " + this.getCommandName() + " [commandOptions]" + ( allowPositionalArguments ? " [files]" : "" );
}
- /**
- * Add the {@code -mdate} option.
- *
- * The limiting date can be retrieved with {@link #getLimitingDate()}.
- */
- protected void addLimitingDateOption( Options options ) {
- addDateOption( LIMITING_DATE_OPTION, null, "Constrain to run only on entities with analyses older than the given date. "
- + "For example, to run only on entities that have not been analyzed in the last 10 days, use '-10d'. "
- + "If there is no record of when the analysis was last run, it will be run.", options );
+ private void printHelp( Options options, PrintWriter writer ) {
+ HelpUtils.printHelp( writer, getUsage(), options, getShortDesc(), null );
}
- /**
- * Add a date option with support for fuzzy dates (i.e. one month ago).
- * @see DateConverterImpl
- */
- protected void addDateOption( String name, String longOpt, String desc, Options options ) {
- options.addOption( Option.builder( name )
- .longOpt( longOpt )
- .desc( desc )
- .hasArg()
- .type( Date.class )
- .converter( new DateConverterImpl( relativeTo, TimeZone.getDefault() ) ).build() );
- }
/**
* Add the {@code -threads} option.
*
- * This is used to configure the internal batch processing thread pool which can be used with
- * {@link #getBatchTaskExecutor()}. You may also use {@link #getNumThreads()} to retrieve the number of threads to
- * use.
+ * This is used to configure the internal batch processing thread pool which can be used with {@link #getBatchTaskExecutor()}.
+ *
+ * You may also use {@link #getNumThreads()} to retrieve the number of threads to use.
*/
protected void addThreadsOption( Options options ) {
- options.addOption( Option.builder( THREADS_OPTION ).argName( "numThreads" ).hasArg()
+ Assert.state( !options.hasOption( THREADS_OPTION ), "The -" + THREADS_OPTION + " option was already added." );
+ options.addOption( Option.builder( THREADS_OPTION )
+ .longOpt( "threads" )
+ .argName( "numThreads" ).hasArg()
.desc( "Number of threads to use for batch processing." )
- .type( Number.class )
+ .type( Number.class ) // FIXME: this should be an Integer.class
.build() );
}
@@ -314,8 +220,9 @@ protected void addThreadsOption( Options options ) {
* These options allow the user to control how and where batch processing results are summarized.
*/
protected void addBatchOption( Options options ) {
+ Assert.state( !options.hasOption( BATCH_FORMAT_OPTION ), "The -" + BATCH_FORMAT_OPTION + " option was already added." );
options.addOption( BATCH_FORMAT_OPTION, true, "Format to use to the batch summary" );
- options.addOption( Option.builder( BATCH_OUTPUT_FILE_OPTION ).hasArg().type( File.class ).desc( "Output file to use for the batch summary (default is standard output)" ).build() );
+ options.addOption( Option.builder( BATCH_OUTPUT_FILE_OPTION ).hasArg().type( Path.class ).desc( "Output file to use for the batch summary (default is standard output)" ).build() );
}
/**
@@ -324,33 +231,8 @@ protected void addBatchOption( Options options ) {
*
* Those arguments can be retrieved in {@link #processOptions(CommandLine)} by using {@link CommandLine#getArgList()}.
*/
- protected void setAllowPositionalArguments( @SuppressWarnings("SameParameterValue") boolean allowPositionalArguments ) {
- this.allowPositionalArguments = allowPositionalArguments;
- }
-
- /**
- * Indicate if auto-seek is enabled.
- */
- protected boolean isAutoSeek() {
- return autoSeek;
- }
-
- /**
- * Indicate the event to be used for auto-seeking.
- */
- protected Class extends AuditEventType> getAutoSeekEventType() {
- return requireNonNull( autoSeekEventType, "This CLI was not configured with a specific event type for auto-seek." );
- }
-
- /**
- * Obtain the limiting date (i.e. starting date at which entities should be processed).
- */
- @Nullable
- protected Date getLimitingDate() {
- if ( limitingDate != null ) {
- log.info( "Analyses will be run only if last was older than " + limitingDate );
- }
- return limitingDate;
+ protected void setAllowPositionalArguments() {
+ this.allowPositionalArguments = true;
}
protected int getNumThreads() {
@@ -358,7 +240,6 @@ protected int getNumThreads() {
}
private void buildStandardOptions( Options options ) {
- log.debug( "Creating standard options" );
options.addOption( HELP_OPTION, "help", false, "Print this message" );
}
@@ -376,16 +257,6 @@ private void buildStandardOptions( Options options ) {
* purposes.
*/
private void processStandardOptions( CommandLine commandLine ) throws ParseException {
- if ( commandLine.hasOption( LIMITING_DATE_OPTION ) && commandLine.hasOption( AbstractCLI.AUTO_OPTION_NAME ) ) {
- throw new IllegalArgumentException( String.format( "Please only select one of -%s or -%s", LIMITING_DATE_OPTION, AUTO_OPTION_NAME ) );
- }
-
- if ( commandLine.hasOption( LIMITING_DATE_OPTION ) ) {
- this.limitingDate = commandLine.getParsedOptionValue( LIMITING_DATE_OPTION );
- }
-
- this.autoSeek = commandLine.hasOption( AbstractCLI.AUTO_OPTION_NAME );
-
if ( commandLine.hasOption( THREADS_OPTION ) ) {
this.numThreads = ( ( Number ) commandLine.getParsedOptionValue( THREADS_OPTION ) ).intValue();
if ( this.numThreads < 1 ) {
@@ -397,12 +268,12 @@ private void processStandardOptions( CommandLine commandLine ) throws ParseExcep
if ( commandLine.hasOption( BATCH_FORMAT_OPTION ) ) {
try {
- this.batchFormat = BatchFormat.valueOf( commandLine.getOptionValue( BATCH_FORMAT_OPTION ).toUpperCase() );
+ this.batchFormat = BatchTaskExecutorService.BatchFormat.valueOf( commandLine.getOptionValue( BATCH_FORMAT_OPTION ).toUpperCase() );
} catch ( IllegalArgumentException e ) {
throw new ParseException( String.format( "Unsupported batch format: %s.", commandLine.getOptionValue( BATCH_FORMAT_OPTION ) ) );
}
} else {
- this.batchFormat = commandLine.hasOption( BATCH_OUTPUT_FILE_OPTION ) ? BatchFormat.TSV : BatchFormat.TEXT;
+ this.batchFormat = commandLine.hasOption( BATCH_OUTPUT_FILE_OPTION ) ? BatchTaskExecutorService.BatchFormat.TSV : BatchTaskExecutorService.BatchFormat.TEXT;
}
this.batchOutputFile = commandLine.getParsedOptionValue( BATCH_OUTPUT_FILE_OPTION );
}
@@ -422,33 +293,18 @@ private void processStandardOptions( CommandLine commandLine ) throws ParseExcep
* Default workflow of a CLI.
*/
private void work() throws Exception {
- beforeWork();
- Exception doWorkException = null;
try {
insideDoWork = true;
- try {
- doWork();
- } catch ( Exception e2 ) {
- doWorkException = e2;
- throw doWorkException;
- }
+ doWork();
} finally {
insideDoWork = false;
- afterWork( doWorkException );
}
}
- /**
- * Override this to perform any setup before {@link #doWork()}.
- */
- protected void beforeWork() {
-
- }
-
/**
* Command line implementation.
*
- * This is called after {@link #buildOptions(Options)}, {@link #processOptions(CommandLine)} and {@link #beforeWork()},
+ * This is called after {@link #buildOptions(Options)} and {@link #processOptions(CommandLine)}.
* so the implementation can assume that all its arguments have already been initialized and any setup behaviour
* have been performed.
*
@@ -458,16 +314,6 @@ protected void beforeWork() {
*/
protected abstract void doWork() throws Exception;
- /**
- * Override this to perform any cleanup after {@link #doWork()}.
- *
- * This is always invoked regardless of the outcome of {@link #doWork()}.
- * @param exception the exception thrown by {@link #doWork()} if any, else null
- */
- protected void afterWork( @Nullable Exception exception ) {
-
- }
-
/**
* Prompt the user for a confirmation or raise an exception to abort the {@link #doWork()} method.
*/
@@ -483,6 +329,16 @@ protected void promptConfirmationOrAbort( String message ) throws Exception {
throw new WorkAbortedException( "Confirmation failed, the command cannot proceed." );
}
+ /**
+ * Exception raised when a {@link #doWork()} aborted by the user.
+ */
+ private static class WorkAbortedException extends Exception {
+
+ private WorkAbortedException( String message ) {
+ super( message );
+ }
+ }
+
/**
* Add a success object to indicate success in a batch processing.
* @param successObject object that was processed
@@ -533,19 +389,6 @@ protected void addErrorObject( @Nullable Object errorObject, Exception exception
log.error( "Error while processing " + ( errorObject != null ? errorObject : "unknown object" ), exception );
}
- /**
- * Create an {@link ExecutorService} to be used for running batch tasks.
- */
- protected ExecutorService createBatchTaskExecutorService() {
- Assert.isNull( executorService, "There is already a batch task ExecutorService." );
- ThreadFactory threadFactory = new SimpleThreadFactory( "gemma-cli-batch-thread-" );
- if ( this.numThreads > 1 ) {
- return Executors.newFixedThreadPool( this.numThreads, threadFactory );
- } else {
- return Executors.newSingleThreadExecutor( threadFactory );
- }
- }
-
/**
* Obtain an executor for running batch tasks.
*
@@ -559,14 +402,30 @@ protected final ExecutorService getBatchTaskExecutor() {
return getBatchTaskExecutorInternal();
}
+ /**
+ * Internal batch task executor, because {@link BatchTaskExecutorService} is package-private.
+ */
private BatchTaskExecutorService getBatchTaskExecutorInternal() {
Assert.isTrue( insideDoWork, "Batch tasks can only be submitted in doWork()." );
if ( executorService == null ) {
- executorService = new BatchTaskExecutorService( createBatchTaskExecutorService() );
+ executorService = new BatchTaskExecutorService( createBatchTaskExecutorService(), batchFormat, batchOutputFile );
}
return executorService;
}
+ /**
+ * Create an {@link ExecutorService} to be used for running batch tasks.
+ */
+ protected ExecutorService createBatchTaskExecutorService() {
+ Assert.isNull( executorService, "There is already a batch task ExecutorService." );
+ ThreadFactory threadFactory = new SimpleThreadFactory( "gemma-cli-batch-thread-" );
+ if ( this.numThreads > 1 ) {
+ return Executors.newFixedThreadPool( this.numThreads, threadFactory );
+ } else {
+ return Executors.newSingleThreadExecutor( threadFactory );
+ }
+ }
+
/**
* Await the completion of all batch tasks.
*/
@@ -579,52 +438,8 @@ protected void awaitBatchExecutorService() throws InterruptedException {
return;
}
log.info( String.format( "Awaiting for %d/%d batch tasks to finish...", executorService.getSubmittedTasks() - executorService.getCompletedTasks(), executorService.getSubmittedTasks() ) );
- while ( !executorService.awaitTermination( 5, TimeUnit.SECONDS ) ) {
+ while ( !executorService.awaitTermination( 30, TimeUnit.SECONDS ) ) {
log.info( String.format( "Completed %d/%d batch tasks.", executorService.getCompletedTasks(), executorService.getSubmittedTasks() ) );
}
}
-
- /**
- * Print out a summary of what the program did. Useful when analyzing lists of experiments etc. Use the
- * 'successObjects' and 'errorObjects'
- */
- private void summarizeBatchProcessing() throws IOException {
- Assert.notNull( executorService );
- if ( executorService.getBatchProcessingResults().isEmpty() ) {
- return;
- }
- if ( batchFormat != BatchFormat.SUPPRESS && batchOutputFile != null ) {
- log.info( String.format( "Batch processing summary will be written to %s", batchOutputFile.getAbsolutePath() ) );
- }
- try ( Writer dest = batchOutputFile != null ? new OutputStreamWriter( Files.newOutputStream( batchOutputFile.toPath() ) ) : null ) {
- switch ( batchFormat ) {
- case TEXT:
- new BatchTaskExecutorServiceSummarizer( executorService ).summarizeBatchProcessingToText( dest != null ? dest : System.out );
- break;
- case TSV:
- new BatchTaskExecutorServiceSummarizer( executorService ).summarizeBatchProcessingToTsv( dest != null ? dest : System.out );
- break;
- case SUPPRESS:
- break;
- }
- }
- }
-
- enum BatchFormat {
- TEXT,
- TSV,
- SUPPRESS
- }
-
- /**
- * Exception raised when a {@link #doWork()} aborted by the user.
- *
- * @author poirigui
- */
- private static class WorkAbortedException extends Exception {
-
- private WorkAbortedException( String message ) {
- super( message );
- }
- }
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/BatchTaskExecutorService.java b/gemma-cli/src/main/java/ubic/gemma/core/util/BatchTaskExecutorService.java
index 452ba8712f..6dd7380f5f 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/util/BatchTaskExecutorService.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/util/BatchTaskExecutorService.java
@@ -1,17 +1,16 @@
package ubic.gemma.core.util;
import lombok.Value;
+import lombok.extern.apachecommons.CommonsLog;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.lang3.exception.ExceptionUtils;
import javax.annotation.Nullable;
-import javax.annotation.ParametersAreNonnullByDefault;
-import java.io.File;
import java.io.IOException;
-import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@@ -23,9 +22,18 @@
/**
* A task executor that automatically reports errors in batch tasks.
*/
-@ParametersAreNonnullByDefault
+@CommonsLog
class BatchTaskExecutorService extends AbstractDelegatingExecutorService {
+ enum BatchFormat {
+ TEXT,
+ TSV,
+ SUPPRESS
+ }
+
+ private final BatchFormat batchFormat;
+ private final Path batchOutputFile;
+
private final AtomicInteger batchTaskCounter = new AtomicInteger( 0 );
private final AtomicInteger completedBatchTasks = new AtomicInteger( 0 );
@@ -36,8 +44,10 @@ class BatchTaskExecutorService extends AbstractDelegatingExecutorService {
private final List batchProcessingResults = Collections.synchronizedList( new ArrayList<>() );
private boolean hasErrorObjects = false;
- public BatchTaskExecutorService( ExecutorService delegate ) {
+ public BatchTaskExecutorService( ExecutorService delegate, BatchFormat batchFormat, @Nullable Path batchOutputFile ) {
super( delegate );
+ this.batchFormat = batchFormat;
+ this.batchOutputFile = batchOutputFile;
}
private final ThreadLocal wasSuccessObjectAdded = ThreadLocal.withInitial( () -> false );
@@ -102,6 +112,13 @@ int getSubmittedTasks() {
return batchTaskCounter.get();
}
+ /**
+ * Indicate if error objects have been reported.
+ */
+ boolean hasErrorObjects() {
+ return hasErrorObjects;
+ }
+
/**
* Add a success object to indicate success in a batch processing.
*
@@ -150,18 +167,8 @@ void addErrorObject( @Nullable Object errorObject, Exception exception ) {
addBatchProcessingResult( new BatchProcessingResult( true, errorObject, exception.getMessage(), exception ) );
}
- /**
- * Indicate if error objects have been reported.
- */
- boolean hasErrorObjects() {
- return hasErrorObjects;
- }
-
- List getBatchProcessingResults() {
- return batchProcessingResults;
- }
-
private void addBatchProcessingResult( BatchProcessingResult result ) {
+ batchProcessingResults.add( result );
if ( result.isError() ) {
wasErrorObjectAdded.set( true );
hasErrorObjects = true;
@@ -174,7 +181,7 @@ private void addBatchProcessingResult( BatchProcessingResult result ) {
* Represents an individual result in a batch processing.
*/
@Value
- static class BatchProcessingResult {
+ private static class BatchProcessingResult {
boolean isError;
@Nullable
Object source;
@@ -206,4 +213,72 @@ public String toString() {
return buf.toString();
}
}
+
+ /**
+ * Print out a summary of what the program did. Useful when analyzing lists of experiments etc. Use the
+ * 'successObjects' and 'errorObjects'
+ */
+ void summarizeBatchProcessing() {
+ if ( batchProcessingResults.isEmpty() ) {
+ return;
+ }
+ if ( batchFormat != BatchFormat.SUPPRESS && batchOutputFile != null ) {
+ log.info( String.format( "Batch processing summary will be written to %s", batchOutputFile ) );
+ }
+ try ( Writer dest = batchOutputFile != null ? Files.newBufferedWriter( batchOutputFile ) : null ) {
+ switch ( batchFormat ) {
+ case TEXT:
+ summarizeBatchProcessingToText( dest != null ? dest : System.out );
+ break;
+ case TSV:
+ summarizeBatchProcessingToTsv( dest != null ? dest : System.out );
+ break;
+ case SUPPRESS:
+ break;
+ default:
+ throw new IllegalStateException( "Unsupported batch format " + batchFormat );
+ }
+ } catch ( IOException e ) {
+ log.error( "Failed to summarize batch processing.", e );
+ }
+ }
+
+ private void summarizeBatchProcessingToText( Appendable dest ) throws IOException {
+ List successObjects = batchProcessingResults.stream().filter( bp -> !bp.isError() ).collect( Collectors.toList() );
+ if ( !successObjects.isEmpty() ) {
+ dest.append( "---------------------\nSuccessfully processed " )
+ .append( String.valueOf( successObjects.size() ) )
+ .append( " objects:\n" );
+ for ( BatchTaskExecutorService.BatchProcessingResult result : successObjects ) {
+ dest.append( String.valueOf( result ) ).append( "\n" );
+ }
+ dest.append( "---------------------\n" );
+ }
+
+ List errorObjects = batchProcessingResults.stream().filter( BatchTaskExecutorService.BatchProcessingResult::isError ).collect( Collectors.toList() );
+ if ( !errorObjects.isEmpty() ) {
+ if ( !successObjects.isEmpty() ) {
+ dest.append( "\n" );
+ }
+ dest.append( "---------------------\nErrors occurred during the processing of " )
+ .append( String.valueOf( errorObjects.size() ) )
+ .append( " objects:\n" );
+ for ( BatchTaskExecutorService.BatchProcessingResult result : errorObjects ) {
+ dest.append( String.valueOf( result ) ).append( "\n" );
+ }
+ dest.append( "---------------------\n" );
+ }
+ }
+
+ private void summarizeBatchProcessingToTsv( Appendable dest ) throws IOException {
+ try ( CSVPrinter printer = new CSVPrinter( dest, CSVFormat.TDF ) ) {
+ for ( BatchTaskExecutorService.BatchProcessingResult result : batchProcessingResults ) {
+ printer.printRecord(
+ result.getSource(),
+ result.isError() ? "ERROR" : "SUCCESS",
+ result.getMessage(),
+ result.getThrowable() != null ? ExceptionUtils.getRootCauseMessage( result.getThrowable() ) : null );
+ }
+ }
+ }
}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/BatchTaskExecutorServiceSummarizer.java b/gemma-cli/src/main/java/ubic/gemma/core/util/BatchTaskExecutorServiceSummarizer.java
deleted file mode 100644
index 7a9084121c..0000000000
--- a/gemma-cli/src/main/java/ubic/gemma/core/util/BatchTaskExecutorServiceSummarizer.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package ubic.gemma.core.util;
-
-import org.apache.commons.csv.CSVFormat;
-import org.apache.commons.csv.CSVPrinter;
-import org.apache.commons.lang3.exception.ExceptionUtils;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.stream.Collectors;
-
-/**
- * Summarizes the processing of a batch of tasks.
- * @author poirigui
- */
-class BatchTaskExecutorServiceSummarizer {
-
- private final BatchTaskExecutorService batchTaskExecutorService;
-
- BatchTaskExecutorServiceSummarizer( BatchTaskExecutorService batchTaskExecutorService ) {
- this.batchTaskExecutorService = batchTaskExecutorService;
- }
-
- void summarizeBatchProcessingToText( Appendable dest ) throws IOException {
- List successObjects = batchTaskExecutorService.getBatchProcessingResults().stream().filter( bp -> !bp.isError() ).collect( Collectors.toList() );
- if ( !successObjects.isEmpty() ) {
- StringBuilder buf = new StringBuilder();
- buf.append( "\n---------------------\nSuccessfully processed " ).append( successObjects.size() )
- .append( " objects:\n" );
- for ( BatchTaskExecutorService.BatchProcessingResult result : successObjects ) {
- buf.append( result ).append( "\n" );
- }
- buf.append( "---------------------\n" );
- dest.append( buf );
- }
-
- List errorObjects = batchTaskExecutorService.getBatchProcessingResults().stream().filter( BatchTaskExecutorService.BatchProcessingResult::isError ).collect( Collectors.toList() );
- if ( !errorObjects.isEmpty() ) {
- StringBuilder buf = new StringBuilder();
- buf.append( "\n---------------------\nErrors occurred during the processing of " )
- .append( errorObjects.size() ).append( " objects:\n" );
- for ( BatchTaskExecutorService.BatchProcessingResult result : errorObjects ) {
- buf.append( result ).append( "\n" );
- }
- buf.append( "---------------------\n" );
- dest.append( buf );
- }
- }
-
- void summarizeBatchProcessingToTsv( Appendable dest ) throws IOException {
- try ( CSVPrinter printer = new CSVPrinter( dest, CSVFormat.TDF ) ) {
- for ( BatchTaskExecutorService.BatchProcessingResult result : batchTaskExecutorService.getBatchProcessingResults() ) {
- printer.printRecord(
- result.getSource(),
- result.isError() ? "ERROR" : "SUCCESS",
- result.getMessage(),
- result.getThrowable() != null ? ExceptionUtils.getRootCauseMessage( result.getThrowable() ) : null );
- }
- }
- }
-}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/DateConverterImpl.java b/gemma-cli/src/main/java/ubic/gemma/core/util/DateConverterImpl.java
deleted file mode 100644
index 20f53eb44e..0000000000
--- a/gemma-cli/src/main/java/ubic/gemma/core/util/DateConverterImpl.java
+++ /dev/null
@@ -1,75 +0,0 @@
-package ubic.gemma.core.util;
-
-import org.apache.commons.cli.Converter;
-import org.ocpsoft.prettytime.nlp.PrettyTimeParser;
-import ubic.basecode.util.DateUtil;
-
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.List;
-import java.util.Locale;
-import java.util.TimeZone;
-
-/**
- * A converter for parsing dates supporting various formats.
- *
- * - most ISO 8601 date and date time with or without UTC offset
- * - {@code +1d, -1m, -1h} as per {@link DateUtil#getRelativeDate(Date, String)}
- * - natural language (i.e. five hours ago, last week, etc. using {@link PrettyTimeParser}
- *
- * @author poirigui
- */
-public class DateConverterImpl implements Converter {
-
- /**
- * Exact date formats to attempt before resorting to natural language parsing.
- */
- private final SimpleDateFormat[] exactDateFormats = {
- // ISO 8601
- new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ssX", Locale.ENGLISH ),
- new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH ),
- new SimpleDateFormat( "yyyy-MM-dd", Locale.ENGLISH ),
- new SimpleDateFormat( "yyyy-MM", Locale.ENGLISH ),
- new SimpleDateFormat( "yyyy", Locale.ENGLISH )
- };
-
- private final Date relativeTo;
- private final PrettyTimeParser parser;
-
- /**
- * @param relativeTo date relative to which duration are interpreted
- * @param timeZone when parsing date, use this time zone as a reference
- */
- public DateConverterImpl( Date relativeTo, TimeZone timeZone ) {
- this.relativeTo = relativeTo;
- this.parser = new PrettyTimeParser( timeZone );
- for ( SimpleDateFormat format : exactDateFormats ) {
- format.setTimeZone( timeZone );
- }
- }
-
- @Override
- public Date apply( String string ) throws ParseException {
- for ( SimpleDateFormat format : exactDateFormats ) {
- try {
- return format.parse( string );
- } catch ( ParseException e ) {
- // ignore
- }
- }
- try {
- return DateUtil.getRelativeDate( relativeTo, string );
- } catch ( IllegalArgumentException e ) {
- // ignore
- }
- List candidates = parser.parse( string, relativeTo );
- if ( candidates.isEmpty() ) {
- throw new ParseException( "No suitable date found.", 0 );
- }
- if ( candidates.size() > 1 ) {
- throw new ParseException( "More than one date is specified.", 0 );
- }
- return candidates.iterator().next();
- }
-}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/EntityLocator.java b/gemma-cli/src/main/java/ubic/gemma/core/util/EntityLocator.java
new file mode 100644
index 0000000000..10a65f5235
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/util/EntityLocator.java
@@ -0,0 +1,32 @@
+package ubic.gemma.core.util;
+
+import ubic.gemma.model.common.protocol.Protocol;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
+import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment;
+import ubic.gemma.model.expression.bioAssayData.DataVector;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import ubic.gemma.model.genome.Taxon;
+
+import java.util.Collection;
+
+/**
+ * Locate various entities using identifiers supplied by the CLI.
+ * @author poirigui
+ */
+public interface EntityLocator {
+
+ Taxon locateTaxon( String identifier );
+
+ ArrayDesign locateArrayDesign( String identifier );
+
+ ExpressionExperiment locateExpressionExperiment( String identifier, boolean useReferencesIfPossible );
+
+ Protocol locateProtocol( String protocolName );
+
+ QuantitationType locateQuantitationType( ExpressionExperiment ee, String qt, Class extends DataVector> vectorType );
+
+ QuantitationType locateQuantitationType( ExpressionExperiment ee, String qt, Collection> vectorType );
+
+ CellTypeAssignment locateCellTypeAssignment( ExpressionExperiment expressionExperiment, QuantitationType qt, String cta );
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/EntityLocatorImpl.java b/gemma-cli/src/main/java/ubic/gemma/core/util/EntityLocatorImpl.java
new file mode 100644
index 0000000000..0fd3b4b0bc
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/util/EntityLocatorImpl.java
@@ -0,0 +1,219 @@
+package ubic.gemma.core.util;
+
+import lombok.extern.apachecommons.CommonsLog;
+import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
+import org.springframework.util.Assert;
+import ubic.gemma.model.common.protocol.Protocol;
+import ubic.gemma.model.common.quantitationtype.QuantitationType;
+import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
+import ubic.gemma.model.expression.bioAssayData.CellTypeAssignment;
+import ubic.gemma.model.expression.bioAssayData.DataVector;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+import ubic.gemma.model.genome.Taxon;
+import ubic.gemma.persistence.service.common.protocol.ProtocolService;
+import ubic.gemma.persistence.service.common.quantitationtype.NonUniqueQuantitationTypeByNameException;
+import ubic.gemma.persistence.service.common.quantitationtype.QuantitationTypeService;
+import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;
+import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
+import ubic.gemma.persistence.service.expression.experiment.SingleCellExpressionExperimentService;
+import ubic.gemma.persistence.service.genome.taxon.TaxonService;
+
+import java.util.Collection;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import static java.util.Objects.requireNonNull;
+
+@CommonsLog
+@Component
+public class EntityLocatorImpl implements EntityLocator {
+
+ @Autowired
+ private ExpressionExperimentService eeService;
+ @Autowired
+ private TaxonService taxonService;
+ @Autowired
+ private ArrayDesignService arrayDesignService;
+ @Autowired
+ private ProtocolService protocolService;
+ @Autowired
+ private QuantitationTypeService quantitationTypeService;
+ @Autowired
+ private SingleCellExpressionExperimentService singleCellExpressionExperimentService;
+
+ @Override
+ public Taxon locateTaxon( String identifier ) {
+ Assert.isTrue( StringUtils.isNotBlank( identifier ), "Taxon name must be be blank." );
+ identifier = StringUtils.strip( identifier );
+ Taxon taxon;
+ try {
+ long id = Long.parseLong( identifier );
+ if ( ( taxon = taxonService.load( id ) ) != null ) {
+ log.info( "Found " + taxon + " by ID" );
+ return taxon;
+ }
+ if ( ( taxon = taxonService.findByNcbiId( Math.toIntExact( id ) ) ) != null ) {
+ log.info( "Found " + taxon + " by NCBI ID" );
+ return taxon;
+ }
+ throw new NullPointerException( "No taxon with ID or NCBI ID " + id );
+ } catch ( NumberFormatException e ) {
+ // ignore
+ }
+ if ( ( taxon = taxonService.findByCommonName( identifier ) ) != null ) {
+ log.info( "Found " + taxon + " by common name." );
+ return taxon;
+ }
+ if ( ( taxon = taxonService.findByScientificName( identifier ) ) != null ) {
+ log.info( "Found " + taxon + " by scientific name." );
+ return taxon;
+ }
+ throw new NullPointerException( "Cannot find taxon with name " + identifier );
+ }
+
+ @Override
+ public ArrayDesign locateArrayDesign( String identifier ) {
+ Assert.isTrue( StringUtils.isNotBlank( identifier ), "Platform name must not be blank." );
+ identifier = StringUtils.strip( identifier );
+ ArrayDesign arrayDesign;
+ try {
+ long id = Long.parseLong( identifier );
+ if ( ( arrayDesign = arrayDesignService.load( id ) ) != null ) {
+ log.info( "Found " + arrayDesign + " by ID." );
+ return arrayDesign;
+ } else {
+ throw new NullPointerException( "No platform with ID " + id );
+ }
+ } catch ( NumberFormatException e ) {
+ // ignore
+ }
+ if ( ( arrayDesign = arrayDesignService.findByShortName( identifier ) ) != null ) {
+ log.info( "Found " + arrayDesign + " by short name." );
+ return arrayDesign;
+ }
+ if ( ( arrayDesign = arrayDesignService.findOneByName( identifier ) ) != null ) {
+ log.info( "Found " + arrayDesign + " by name." );
+ return arrayDesign;
+ }
+ if ( ( arrayDesign = arrayDesignService.findOneByAlternateName( identifier ) ) != null ) {
+ log.info( "Found " + arrayDesign + " by alternate name." );
+ return arrayDesign;
+ }
+ throw new NullPointerException( "No platform found with ID or name matching " + identifier );
+ }
+
+ /**
+ * Attempt to locate an experiment using the given identifier.
+ */
+ @Override
+ public ExpressionExperiment locateExpressionExperiment( String identifier, boolean useReferencesIfPossible ) {
+ Assert.isTrue( StringUtils.isNotBlank( identifier ), "Expression experiment ID or short name must be provided" );
+ identifier = StringUtils.strip( identifier );
+ ExpressionExperiment ee;
+ try {
+ Long id = Long.parseLong( identifier );
+ if ( useReferencesIfPossible ) {
+ // this is never null, but may produce ObjectNotFoundException later on
+ return eeService.loadReference( id );
+ } else if ( ( ee = eeService.load( id ) ) != null ) {
+ log.debug( "Found " + ee + " by ID" );
+ return ee;
+ } else {
+ throw new NullPointerException( "No experiment found with ID " + id );
+ }
+ } catch ( NumberFormatException e ) {
+ // can be safely ignored, we'll attempt to use it as a short name
+ }
+ if ( ( ee = eeService.findByShortName( identifier ) ) != null ) {
+ log.debug( "Found " + ee + " by short name" );
+ return ee;
+ }
+ if ( ( ee = eeService.findOneByAccession( identifier ) ) != null ) {
+ log.debug( "Found " + ee + " by accession" );
+ return ee;
+ }
+ if ( ( ee = eeService.findOneByName( identifier ) ) != null ) {
+ log.debug( "Found " + ee + " by name" );
+ return ee;
+ }
+ throw new NullPointerException( "Could not locate any experiment with identifier or name matching " + identifier );
+ }
+
+ @Override
+ public Protocol locateProtocol( String protocolName ) {
+ try {
+ long id = Long.parseLong( protocolName );
+ return protocolService.load( id );
+ } catch ( NumberFormatException e ) {
+ // ignore
+ }
+ return requireNonNull( protocolService.findByName( protocolName ),
+ "Could not locate any protocol with identifier or name matching " + protocolName );
+ }
+
+ @Override
+ public QuantitationType locateQuantitationType( ExpressionExperiment ee, String qt, Class extends DataVector> vectorType ) {
+ QuantitationType result;
+ try {
+ if ( ( result = quantitationTypeService.loadByIdAndVectorType( Long.parseLong( qt ), ee, vectorType ) ) != null ) {
+ return result;
+ }
+ } catch ( NumberFormatException e ) {
+ // ignore
+ }
+ try {
+ if ( ( result = quantitationTypeService.findByNameAndVectorType( ee, qt, vectorType ) ) != null ) {
+ return result;
+ }
+ } catch ( NonUniqueQuantitationTypeByNameException e ) {
+ throw new RuntimeException( e );
+ }
+ Collection possibleValues = quantitationTypeService.findByExpressionExperiment( ee, vectorType );
+ throw new NullPointerException( String.format( "No quantitation type in %s for %s matching %s.%s",
+ ee, vectorType.getSimpleName(), qt,
+ !possibleValues.isEmpty() ? " Possible values are: " + possibleValues.stream().map( QuantitationType::getName ).collect( Collectors.joining( ", " ) ) : "" ) );
+ }
+
+ @Override
+ public QuantitationType locateQuantitationType( ExpressionExperiment ee, String qt, Collection> vectorTypes ) {
+ QuantitationType result;
+ for ( Class extends DataVector> vectorType : vectorTypes ) {
+ try {
+ if ( ( result = quantitationTypeService.loadByIdAndVectorType( Long.parseLong( qt ), ee, vectorType ) ) != null ) {
+ return result;
+ }
+ } catch ( NumberFormatException e ) {
+ // ignore
+ }
+ try {
+ if ( ( result = quantitationTypeService.findByNameAndVectorType( ee, qt, vectorType ) ) != null ) {
+ return result;
+ }
+ } catch ( NonUniqueQuantitationTypeByNameException e ) {
+ throw new RuntimeException( e );
+ }
+ }
+ Collection possibleValues = quantitationTypeService.findByExpressionExperiment( ee, vectorTypes );
+ throw new NullPointerException( String.format( "No quantitation type in %s for any of %s matching %s.%s",
+ ee, vectorTypes.stream().map( Class::getSimpleName ).collect( Collectors.joining( ", " ) ), qt,
+ !possibleValues.isEmpty() ? " Possible values are: " + possibleValues.stream().map( QuantitationType::getName ).collect( Collectors.joining( ", " ) ) : "" )
+
+ );
+ }
+
+ @Override
+ public CellTypeAssignment locateCellTypeAssignment( ExpressionExperiment expressionExperiment, QuantitationType qt, String cta ) {
+ try {
+ Optional c = singleCellExpressionExperimentService.getCellTypeAssignment( expressionExperiment, qt, Long.parseLong( cta ) );
+ if ( c.isPresent() ) {
+ return c.get();
+ }
+ } catch ( NumberFormatException e ) {
+ // ignore
+ }
+ return singleCellExpressionExperimentService.getCellTypeAssignment( expressionExperiment, qt, cta )
+ .orElseThrow( () -> new NullPointerException( "Could not locate any cell type assignment with identifier or name matching " + cta ) );
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/HelpUtils.java b/gemma-cli/src/main/java/ubic/gemma/core/util/HelpUtils.java
index 1adfc401f2..b7bc3bea4d 100644
--- a/gemma-cli/src/main/java/ubic/gemma/core/util/HelpUtils.java
+++ b/gemma-cli/src/main/java/ubic/gemma/core/util/HelpUtils.java
@@ -4,6 +4,7 @@
import org.apache.commons.cli.Options;
import org.apache.commons.lang3.StringUtils;
+import javax.annotation.Nullable;
import java.io.PrintWriter;
public class HelpUtils {
@@ -17,8 +18,7 @@ public class HelpUtils {
formatter.setSyntaxPrefix( "Usage: " );
}
- public static void printHelp( PrintWriter writer, String commandName, Options options, boolean allowPositionalArguments, String header, String footer ) {
- String syntax = "gemma-cli " + commandName + " [options]" + ( allowPositionalArguments ? " [arguments]" : "" );
+ public static void printHelp( PrintWriter writer, String syntax, Options options, @Nullable String header, @Nullable String footer ) {
if ( StringUtils.isBlank( header ) ) {
header = HEADER;
} else {
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/OptionsUtils.java b/gemma-cli/src/main/java/ubic/gemma/core/util/OptionsUtils.java
new file mode 100644
index 0000000000..8b318c882c
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/util/OptionsUtils.java
@@ -0,0 +1,101 @@
+package ubic.gemma.core.util;
+
+import org.apache.commons.cli.Converter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.ocpsoft.prettytime.nlp.PrettyTimeParser;
+import ubic.basecode.util.DateUtil;
+
+import javax.annotation.Nullable;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.List;
+import java.util.Locale;
+import java.util.TimeZone;
+
+public class OptionsUtils {
+
+ /**
+ * When parsing dates, use this as a reference for 'now'.
+ */
+ private static final Date DEFAULT_RELATIVE_TO = new Date();
+
+ private static final TimeZone DEFAULT_TIME_ZONE = TimeZone.getDefault();
+
+ /**
+ * Add a date option with support for fuzzy dates (i.e. one month ago).
+ * @see DateConverterImpl
+ */
+ public static void addDateOption( String name, @Nullable String longOpt, String desc, Options options ) {
+ options.addOption( Option.builder( name )
+ .longOpt( longOpt )
+ .desc( desc )
+ .hasArg()
+ .type( Date.class )
+ .converter( new DateConverterImpl( DEFAULT_RELATIVE_TO, DEFAULT_TIME_ZONE ) ).build() );
+ }
+
+ /**
+ * A converter for parsing dates supporting various formats.
+ *
+ * - most ISO 8601 date and date time with or without UTC offset
+ * - {@code +1d, -1m, -1h} as per {@link DateUtil#getRelativeDate(Date, String)}
+ * - natural language (i.e. five hours ago, last week, etc. using {@link PrettyTimeParser}
+ *
+ * @author poirigui
+ */
+ static class DateConverterImpl implements Converter {
+
+ /**
+ * Exact date formats to attempt before resorting to natural language parsing.
+ */
+ private final SimpleDateFormat[] exactDateFormats = {
+ // ISO 8601
+ new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ssX", Locale.ENGLISH ),
+ new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH ),
+ new SimpleDateFormat( "yyyy-MM-dd", Locale.ENGLISH ),
+ new SimpleDateFormat( "yyyy-MM", Locale.ENGLISH ),
+ new SimpleDateFormat( "yyyy", Locale.ENGLISH )
+ };
+
+ private final Date relativeTo;
+ private final PrettyTimeParser parser;
+
+ /**
+ * @param relativeTo date relative to which duration are interpreted
+ * @param timeZone when parsing date, use this time zone as a reference
+ */
+ public DateConverterImpl( Date relativeTo, TimeZone timeZone ) {
+ this.relativeTo = relativeTo;
+ this.parser = new PrettyTimeParser( timeZone );
+ for ( SimpleDateFormat format : exactDateFormats ) {
+ format.setTimeZone( timeZone );
+ }
+ }
+
+ @Override
+ public Date apply( String string ) throws ParseException {
+ for ( SimpleDateFormat format : exactDateFormats ) {
+ try {
+ return format.parse( string );
+ } catch ( ParseException e ) {
+ // ignore
+ }
+ }
+ try {
+ return DateUtil.getRelativeDate( relativeTo, string );
+ } catch ( IllegalArgumentException e ) {
+ // ignore
+ }
+ List candidates = parser.parse( string, relativeTo );
+ if ( candidates.isEmpty() ) {
+ throw new ParseException( "No suitable date found.", 0 );
+ }
+ if ( candidates.size() > 1 ) {
+ throw new ParseException( "More than one date is specified.", 0 );
+ }
+ return candidates.iterator().next();
+ }
+ }
+}
diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/package-info.java b/gemma-cli/src/main/java/ubic/gemma/core/util/package-info.java
new file mode 100644
index 0000000000..92e70fbd10
--- /dev/null
+++ b/gemma-cli/src/main/java/ubic/gemma/core/util/package-info.java
@@ -0,0 +1,7 @@
+/**
+ *
+ */
+@ParametersAreNonnullByDefault
+package ubic.gemma.core.util;
+
+import javax.annotation.ParametersAreNonnullByDefault;
\ No newline at end of file
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/apps/ArrayDesignMergeCliTest.java b/gemma-cli/src/test/java/ubic/gemma/core/apps/ArrayDesignMergeCliTest.java
index dec6f49462..047497c22e 100644
--- a/gemma-cli/src/test/java/ubic/gemma/core/apps/ArrayDesignMergeCliTest.java
+++ b/gemma-cli/src/test/java/ubic/gemma/core/apps/ArrayDesignMergeCliTest.java
@@ -10,11 +10,12 @@
import org.springframework.security.test.context.support.WithSecurityContextTestExecutionListener;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.TestExecutionListeners;
-import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests;
import ubic.gemma.core.analysis.report.ArrayDesignReportService;
import ubic.gemma.core.context.TestComponent;
import ubic.gemma.core.loader.expression.arrayDesign.ArrayDesignMergeService;
+import ubic.gemma.core.util.EntityLocator;
import ubic.gemma.core.util.GemmaRestApiClient;
+import ubic.gemma.core.util.test.BaseCliTest;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService;
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService;
@@ -29,7 +30,7 @@
@ContextConfiguration
@TestExecutionListeners(WithSecurityContextTestExecutionListener.class)
-public class ArrayDesignMergeCliTest extends AbstractJUnit4SpringContextTests {
+public class ArrayDesignMergeCliTest extends BaseCliTest {
@Configuration
@TestComponent
@@ -74,6 +75,11 @@ public ArrayDesignService arrayDesignService() {
public GemmaRestApiClient gemmaRestApiClient() {
return mock();
}
+
+ @Bean
+ public EntityLocator entityLocator() {
+ return mock();
+ }
}
@Autowired
@@ -85,9 +91,12 @@ public GemmaRestApiClient gemmaRestApiClient() {
@Autowired
private ArrayDesignService arrayDesignService;
+ @Autowired
+ private EntityLocator entityLocator;
+
@After
public void tearDown() {
- reset( arrayDesignService );
+ reset( entityLocator, arrayDesignService );
}
@Test
@@ -96,9 +105,9 @@ public void test() {
ArrayDesign a = ArrayDesign.Factory.newInstance();
ArrayDesign b = ArrayDesign.Factory.newInstance();
ArrayDesign c = ArrayDesign.Factory.newInstance();
- when( arrayDesignService.findByShortName( "1" ) ).thenReturn( a );
- when( arrayDesignService.findByShortName( "2" ) ).thenReturn( b );
- when( arrayDesignService.findByShortName( "3" ) ).thenReturn( c );
+ when( entityLocator.locateArrayDesign( "1" ) ).thenReturn( a );
+ when( entityLocator.locateArrayDesign( "2" ) ).thenReturn( b );
+ when( entityLocator.locateArrayDesign( "3" ) ).thenReturn( c );
when( arrayDesignService.thaw( any( ArrayDesign.class ) ) ).thenAnswer( args -> args.getArgument( 0 ) );
when( arrayDesignService.thaw( anyCollection() ) ).thenAnswer( args -> args.getArgument( 0 ) );
Collection otherPlatforms = new HashSet<>( Arrays.asList( b, c ) );
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/apps/ExternalDatabaseUpdaterCliTest.java b/gemma-cli/src/test/java/ubic/gemma/core/apps/ExternalDatabaseUpdaterCliTest.java
index d3c13c21dc..dbc0cd98a8 100644
--- a/gemma-cli/src/test/java/ubic/gemma/core/apps/ExternalDatabaseUpdaterCliTest.java
+++ b/gemma-cli/src/test/java/ubic/gemma/core/apps/ExternalDatabaseUpdaterCliTest.java
@@ -11,15 +11,15 @@
import org.springframework.security.test.context.support.WithSecurityContextTestExecutionListener;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.TestExecutionListeners;
-import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests;
+import ubic.gemma.core.context.TestComponent;
import ubic.gemma.core.security.authentication.UserManager;
import ubic.gemma.core.util.GemmaRestApiClient;
+import ubic.gemma.core.util.test.BaseCliTest;
import ubic.gemma.model.common.auditAndSecurity.User;
import ubic.gemma.model.common.description.DatabaseType;
import ubic.gemma.model.common.description.ExternalDatabase;
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService;
import ubic.gemma.persistence.service.common.description.ExternalDatabaseService;
-import ubic.gemma.core.context.TestComponent;
import java.net.MalformedURLException;
import java.net.URL;
@@ -29,7 +29,7 @@
@ContextConfiguration
@TestExecutionListeners(WithSecurityContextTestExecutionListener.class)
-public class ExternalDatabaseUpdaterCliTest extends AbstractJUnit4SpringContextTests {
+public class ExternalDatabaseUpdaterCliTest extends BaseCliTest {
@Configuration
@TestComponent
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/apps/FactorValueMigratorCLITest.java b/gemma-cli/src/test/java/ubic/gemma/core/apps/FactorValueMigratorCLITest.java
index 5d31ae98c0..323c16550e 100644
--- a/gemma-cli/src/test/java/ubic/gemma/core/apps/FactorValueMigratorCLITest.java
+++ b/gemma-cli/src/test/java/ubic/gemma/core/apps/FactorValueMigratorCLITest.java
@@ -12,16 +12,16 @@
import org.springframework.security.test.context.support.WithSecurityContextTestExecutionListener;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.TestExecutionListeners;
-import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests;
import org.springframework.transaction.PlatformTransactionManager;
+import ubic.gemma.core.context.TestComponent;
import ubic.gemma.core.util.GemmaRestApiClient;
+import ubic.gemma.core.util.test.BaseCliTest;
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.expression.experiment.FactorValue;
import ubic.gemma.model.expression.experiment.Statement;
import ubic.gemma.persistence.service.expression.experiment.FactorValueMigratorService;
import ubic.gemma.persistence.service.expression.experiment.FactorValueMigratorServiceImpl;
import ubic.gemma.persistence.service.expression.experiment.FactorValueService;
-import ubic.gemma.core.context.TestComponent;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicLong;
@@ -31,7 +31,7 @@
@Deprecated
@ContextConfiguration
@TestExecutionListeners(WithSecurityContextTestExecutionListener.class)
-public class FactorValueMigratorCLITest extends AbstractJUnit4SpringContextTests {
+public class FactorValueMigratorCLITest extends BaseCliTest {
@Configuration
@TestComponent
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/apps/FactorValueMigratorServiceTest.java b/gemma-cli/src/test/java/ubic/gemma/core/apps/FactorValueMigratorServiceTest.java
index 1cf7ba3a1c..6e05afd241 100644
--- a/gemma-cli/src/test/java/ubic/gemma/core/apps/FactorValueMigratorServiceTest.java
+++ b/gemma-cli/src/test/java/ubic/gemma/core/apps/FactorValueMigratorServiceTest.java
@@ -7,22 +7,22 @@
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.test.context.ContextConfiguration;
-import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests;
import org.springframework.transaction.PlatformTransactionManager;
+import ubic.gemma.core.context.TestComponent;
+import ubic.gemma.core.util.test.BaseCliTest;
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.expression.experiment.FactorValue;
import ubic.gemma.model.expression.experiment.Statement;
import ubic.gemma.persistence.service.expression.experiment.FactorValueMigratorService;
import ubic.gemma.persistence.service.expression.experiment.FactorValueMigratorServiceImpl;
import ubic.gemma.persistence.service.expression.experiment.FactorValueService;
-import ubic.gemma.core.context.TestComponent;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.*;
@Deprecated
@ContextConfiguration
-public class FactorValueMigratorServiceTest extends AbstractJUnit4SpringContextTests {
+public class FactorValueMigratorServiceTest extends BaseCliTest {
@Configuration
@TestComponent
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/apps/FindObsoleteTermsCliTest.java b/gemma-cli/src/test/java/ubic/gemma/core/apps/FindObsoleteTermsCliTest.java
index 07753beaa5..7f5db7445e 100644
--- a/gemma-cli/src/test/java/ubic/gemma/core/apps/FindObsoleteTermsCliTest.java
+++ b/gemma-cli/src/test/java/ubic/gemma/core/apps/FindObsoleteTermsCliTest.java
@@ -11,10 +11,10 @@
import org.springframework.security.test.context.support.WithSecurityContextTestExecutionListener;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.TestExecutionListeners;
-import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests;
import ubic.gemma.core.context.TestComponent;
import ubic.gemma.core.ontology.OntologyService;
import ubic.gemma.core.util.GemmaRestApiClient;
+import ubic.gemma.core.util.test.BaseCliTest;
import ubic.gemma.core.util.test.TestPropertyPlaceholderConfigurer;
import java.util.concurrent.TimeUnit;
@@ -26,7 +26,7 @@
@ContextConfiguration
@TestExecutionListeners(WithSecurityContextTestExecutionListener.class)
-public class FindObsoleteTermsCliTest extends AbstractJUnit4SpringContextTests {
+public class FindObsoleteTermsCliTest extends BaseCliTest {
@Configuration
@TestComponent
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/apps/NCBIGene2GOAssociationLoaderCLITest.java b/gemma-cli/src/test/java/ubic/gemma/core/apps/NCBIGene2GOAssociationLoaderCLITest.java
index 65f2319edf..26883b2271 100644
--- a/gemma-cli/src/test/java/ubic/gemma/core/apps/NCBIGene2GOAssociationLoaderCLITest.java
+++ b/gemma-cli/src/test/java/ubic/gemma/core/apps/NCBIGene2GOAssociationLoaderCLITest.java
@@ -12,6 +12,7 @@
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.TestExecutionListeners;
import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests;
+import ubic.gemma.core.util.test.BaseCliTest;
import ubic.gemma.core.util.test.category.SlowTest;
import ubic.gemma.model.common.description.DatabaseType;
import ubic.gemma.model.common.description.ExternalDatabase;
@@ -27,7 +28,7 @@
@ContextConfiguration
@TestExecutionListeners(WithSecurityContextTestExecutionListener.class)
-public class NCBIGene2GOAssociationLoaderCLITest extends AbstractJUnit4SpringContextTests {
+public class NCBIGene2GOAssociationLoaderCLITest extends BaseCliTest {
@Configuration
@TestComponent
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/apps/RNASeqDataAddCliTest.java b/gemma-cli/src/test/java/ubic/gemma/core/apps/RNASeqDataAddCliTest.java
index 229f7b8037..4b4a3fc858 100644
--- a/gemma-cli/src/test/java/ubic/gemma/core/apps/RNASeqDataAddCliTest.java
+++ b/gemma-cli/src/test/java/ubic/gemma/core/apps/RNASeqDataAddCliTest.java
@@ -14,12 +14,13 @@
import org.springframework.security.test.context.support.WithSecurityContextTestExecutionListener;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.TestExecutionListeners;
-import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests;
import ubic.gemma.core.analysis.service.ExpressionDataFileService;
-import ubic.gemma.persistence.service.genome.gene.GeneService;
+import ubic.gemma.core.context.TestComponent;
import ubic.gemma.core.loader.expression.DataUpdater;
import ubic.gemma.core.search.SearchService;
+import ubic.gemma.core.util.EntityLocator;
import ubic.gemma.core.util.GemmaRestApiClient;
+import ubic.gemma.core.util.test.BaseCliTest;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditEventService;
@@ -27,8 +28,9 @@
import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetService;
+import ubic.gemma.persistence.service.genome.gene.GeneService;
import ubic.gemma.persistence.service.genome.taxon.TaxonService;
-import ubic.gemma.core.context.TestComponent;
+import ubic.gemma.persistence.util.EntityUrlBuilder;
import java.io.IOException;
@@ -37,7 +39,7 @@
@ContextConfiguration
@TestExecutionListeners(WithSecurityContextTestExecutionListener.class)
-public class RNASeqDataAddCliTest extends AbstractJUnit4SpringContextTests {
+public class RNASeqDataAddCliTest extends BaseCliTest {
@Configuration
@TestComponent
@@ -108,6 +110,16 @@ public AuditEventService auditEventService() {
public GemmaRestApiClient gemmaRestApiClient() {
return mock();
}
+
+ @Bean
+ public EntityLocator entityLocator() {
+ return mock();
+ }
+
+ @Bean
+ public EntityUrlBuilder entityUrlBuilder() {
+ return new EntityUrlBuilder( "https://gemma.msl.ubc.ca" );
+ }
}
@Autowired
@@ -122,6 +134,9 @@ public GemmaRestApiClient gemmaRestApiClient() {
@Autowired
private ArrayDesignService arrayDesignService;
+ @Autowired
+ private EntityLocator entityLocator;
+
private ArrayDesign ad;
private ExpressionExperiment ee;
private String rpkmFile;
@@ -132,9 +147,9 @@ public void setUp() throws IOException {
ee = new ExpressionExperiment();
ee.setId( 1L );
rpkmFile = new ClassPathResource( "ubic/gemma/core/apps/test.rpkm.txt" ).getFile().getAbsolutePath();
- when( expressionExperimentService.findByShortName( "GSE000001" ) ).thenReturn( ee );
+ when( entityLocator.locateExpressionExperiment( "GSE000001", false ) ).thenReturn( ee );
when( expressionExperimentService.thawLite( any() ) ).thenAnswer( a -> a.getArgument( 0 ) );
- when( arrayDesignService.findByShortName( "test" ) ).thenReturn( ad );
+ when( entityLocator.locateArrayDesign( "test" ) ).thenReturn( ad );
}
@After
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/apps/SingleCellDataDownloaderCliTest.java b/gemma-cli/src/test/java/ubic/gemma/core/apps/SingleCellDataDownloaderCliTest.java
new file mode 100644
index 0000000000..65590669f9
--- /dev/null
+++ b/gemma-cli/src/test/java/ubic/gemma/core/apps/SingleCellDataDownloaderCliTest.java
@@ -0,0 +1,37 @@
+package ubic.gemma.core.apps;
+
+import org.junit.Test;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Import;
+import org.springframework.test.context.ContextConfiguration;
+import ubic.gemma.core.context.TestComponent;
+import ubic.gemma.core.loader.util.ftp.FTPConfig;
+import ubic.gemma.core.util.test.BaseCliTest;
+
+import static ubic.gemma.core.util.test.Assertions.assertThat;
+
+@ContextConfiguration
+public class SingleCellDataDownloaderCliTest extends BaseCliTest {
+
+ @Configuration
+ @TestComponent
+ @Import(FTPConfig.class)
+ static class CC {
+ @Bean
+ public SingleCellDataDownloaderCli singleCellDataDownloaderCli() {
+ return new SingleCellDataDownloaderCli();
+ }
+ }
+
+ @Autowired
+ private SingleCellDataDownloaderCli singleCellDataDownloaderCli;
+
+ @Test
+ public void test() {
+ assertThat( singleCellDataDownloaderCli )
+ .withCommand( "-e", "GSE1234" )
+ .succeeds();
+ }
+}
\ No newline at end of file
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/util/MarkCLIsAsPrototypesTest.java b/gemma-cli/src/test/java/ubic/gemma/core/util/MarkCLIsAsPrototypesTest.java
index a42794c042..f91f9f2e6d 100644
--- a/gemma-cli/src/test/java/ubic/gemma/core/util/MarkCLIsAsPrototypesTest.java
+++ b/gemma-cli/src/test/java/ubic/gemma/core/util/MarkCLIsAsPrototypesTest.java
@@ -8,7 +8,7 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import ubic.gemma.core.apps.TestCli;
-import ubic.gemma.core.util.test.BaseCLIIntegrationTest;
+import ubic.gemma.core.util.test.BaseCliIntegrationTest;
import javax.annotation.Nullable;
@@ -19,7 +19,7 @@
* Test various behaviours of CLIs when injected as bean.
* @author poirigui
*/
-public class MarkCLIsAsPrototypesTest extends BaseCLIIntegrationTest {
+public class MarkCLIsAsPrototypesTest extends BaseCliIntegrationTest {
@Autowired
private BeanFactory beanFactory;
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/util/DateConverterImplTest.java b/gemma-cli/src/test/java/ubic/gemma/core/util/OptionsUtilsTest.java
similarity index 88%
rename from gemma-cli/src/test/java/ubic/gemma/core/util/DateConverterImplTest.java
rename to gemma-cli/src/test/java/ubic/gemma/core/util/OptionsUtilsTest.java
index de589173ed..0be65e9ed5 100644
--- a/gemma-cli/src/test/java/ubic/gemma/core/util/DateConverterImplTest.java
+++ b/gemma-cli/src/test/java/ubic/gemma/core/util/OptionsUtilsTest.java
@@ -10,7 +10,7 @@
import static org.assertj.core.api.Assertions.assertThat;
-public class DateConverterImplTest {
+public class OptionsUtilsTest {
private static TimeZone tz;
@@ -26,10 +26,10 @@ public static void resetTimeZone() {
}
private final Date relativeTo = new Date();
- private final DateConverterImpl c = new DateConverterImpl( relativeTo, TimeZone.getTimeZone( "America/Vancouver" ) );
+ private final OptionsUtils.DateConverterImpl c = new OptionsUtils.DateConverterImpl( relativeTo, TimeZone.getTimeZone( "America/Vancouver" ) );
@Test
- public void test() throws ParseException {
+ public void testParseFuzzyDate() throws ParseException {
assertThat( c.apply( "2019-01-01" ) )
.hasYear( 2019 ).hasMonth( 1 ).hasDayOfMonth( 1 );
assertThat( c.apply( "2019-01-01 02:12:11" ) )
@@ -37,7 +37,7 @@ public void test() throws ParseException {
}
@Test
- public void testIso8601() throws ParseException {
+ public void testParseIso8601Date() throws ParseException {
assertThat( c.apply( "2019" ) )
.hasYear( 2019 ).hasMonth( 1 ).hasDayOfMonth( 1 );
assertThat( c.apply( "2019-01" ) )
@@ -61,7 +61,7 @@ public void testIso8601() throws ParseException {
}
@Test
- public void testWords() throws ParseException {
+ public void testParseWords() throws ParseException {
assertThat( c.apply( "now" ) ).isCloseTo( relativeTo, 10 );
assertThat( c.apply( "yesterday" ) )
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/util/test/Assertions.java b/gemma-cli/src/test/java/ubic/gemma/core/util/test/Assertions.java
new file mode 100644
index 0000000000..253bfb0e37
--- /dev/null
+++ b/gemma-cli/src/test/java/ubic/gemma/core/util/test/Assertions.java
@@ -0,0 +1,10 @@
+package ubic.gemma.core.util.test;
+
+import ubic.gemma.core.util.CLI;
+
+public class Assertions {
+
+ public static CliAssert assertThat( CLI cli ) {
+ return new CliAssert( cli );
+ }
+}
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/util/test/BaseCLIIntegrationTest.java b/gemma-cli/src/test/java/ubic/gemma/core/util/test/BaseCliIntegrationTest.java
similarity index 75%
rename from gemma-cli/src/test/java/ubic/gemma/core/util/test/BaseCLIIntegrationTest.java
rename to gemma-cli/src/test/java/ubic/gemma/core/util/test/BaseCliIntegrationTest.java
index cfbbe259db..eaca89a6fa 100644
--- a/gemma-cli/src/test/java/ubic/gemma/core/util/test/BaseCLIIntegrationTest.java
+++ b/gemma-cli/src/test/java/ubic/gemma/core/util/test/BaseCliIntegrationTest.java
@@ -7,5 +7,6 @@
* @author poirigui
*/
@ActiveProfiles("cli")
-public abstract class BaseCLIIntegrationTest extends BaseIntegrationTest {
+public abstract class BaseCliIntegrationTest extends BaseIntegrationTest {
+
}
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/util/test/BaseCliTest.java b/gemma-cli/src/test/java/ubic/gemma/core/util/test/BaseCliTest.java
new file mode 100644
index 0000000000..ba882fd3ac
--- /dev/null
+++ b/gemma-cli/src/test/java/ubic/gemma/core/util/test/BaseCliTest.java
@@ -0,0 +1,13 @@
+package ubic.gemma.core.util.test;
+
+import org.springframework.test.context.ActiveProfiles;
+
+/**
+ * Base class for CLI tests.
+ *
+ * Use {@link BaseCliIntegrationTest} for integration tests.
+ */
+@ActiveProfiles("cli")
+public abstract class BaseCliTest extends BaseTest {
+
+}
diff --git a/gemma-cli/src/test/java/ubic/gemma/core/util/test/CliAssert.java b/gemma-cli/src/test/java/ubic/gemma/core/util/test/CliAssert.java
new file mode 100644
index 0000000000..b2d4f9625c
--- /dev/null
+++ b/gemma-cli/src/test/java/ubic/gemma/core/util/test/CliAssert.java
@@ -0,0 +1,23 @@
+package ubic.gemma.core.util.test;
+
+import org.assertj.core.api.AbstractAssert;
+import ubic.gemma.core.util.CLI;
+
+public class CliAssert extends AbstractAssert {
+
+ private String[] command;
+
+ public CliAssert( CLI cli ) {
+ super( cli, CliAssert.class );
+ }
+
+ public CliAssert withCommand( String... command ) {
+ this.command = command;
+ return myself;
+ }
+
+ public CliAssert succeeds() {
+ this.objects.assertEqual( info, actual.executeCommand( command ), 0 );
+ return myself;
+ }
+}
diff --git a/gemma-core/pom.xml b/gemma-core/pom.xml
index c17ad58160..8d9e696887 100644
--- a/gemma-core/pom.xml
+++ b/gemma-core/pom.xml
@@ -57,7 +57,7 @@
org.apache.maven.plugins
maven-dependency-plugin
- 3.8.0
+ 3.8.1
unpack-spring-security-test
@@ -166,6 +166,13 @@
3.11.1
+
+
+ io.swagger.core.v3
+ swagger-annotations
+ ${swagger.version}
+
+
javax.xml.bind
@@ -191,6 +198,23 @@
4.2.2.GA
+
+
+
+ com.googlecode.matrix-toolkits-java
+ mtj
+ 1.0.4
+
+
+
+
+ org.hdf5group
+ hdf5
+ ${hdf5.version}
+ system
+ ${hdf5.jarPath}
+
+
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/GeneCoexpressionSearchServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/GeneCoexpressionSearchServiceImpl.java
index 774e504bf4..33fd5ec701 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/GeneCoexpressionSearchServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/GeneCoexpressionSearchServiceImpl.java
@@ -33,7 +33,7 @@
import ubic.gemma.persistence.service.association.coexpression.CoexpressionService;
import ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
-import ubic.gemma.persistence.util.EntityUtils;
+import ubic.gemma.persistence.util.IdentifiableUtils;
import java.util.*;
@@ -95,7 +95,7 @@ private List addExtCoexpressionValueObjects( GeneVal
}
// database hit. loadValueObjects is too slow.
- Map coexpedGenes = EntityUtils
+ Map coexpedGenes = IdentifiableUtils
.getIdMap( geneService.loadValueObjectsByIds( coexpGenes ) );
for ( CoexpressionValueObject cvo : coexp ) {
@@ -188,7 +188,7 @@ private CoexpressionMetaValueObject doCoexpressionSearch( Collection input
return result;
}
- Collection eeIds = EntityUtils.getIds( eevos );
+ Collection eeIds = IdentifiableUtils.getIds( eevos );
Map> allCoexpressions;
@@ -252,7 +252,7 @@ private CoexpressionMetaValueObject doCoexpressionSearch( Collection input
Set queryGeneIds = allCoexpressions.keySet();
assert genes.containsAll( queryGeneIds );
- Map idMap = EntityUtils.getIdMap( geneService.loadValueObjectsByIds( queryGeneIds ) );
+ Map idMap = IdentifiableUtils.getIdMap( geneService.loadValueObjectsByIds( queryGeneIds ) );
int k = 0;
for ( Long queryGene : queryGeneIds ) {
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysis.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysis.java
index e15e12cb61..9951777616 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysis.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysis.java
@@ -23,8 +23,6 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import ubic.basecode.dataStructure.Link;
-import ubic.basecode.dataStructure.matrix.MatrixUtil;
-import ubic.basecode.io.ByteArrayConverter;
import ubic.basecode.math.CorrelationStats;
import ubic.basecode.math.Stats;
import ubic.gemma.core.analysis.expression.coexpression.links.LinkAnalysisConfig.SingularThreshold;
@@ -142,8 +140,7 @@ public CoexpCorrelationDistribution getCorrelationDistribution() {
DoubleArrayList histogramArrayList = this.metricMatrix.getHistogramArrayList();
result.setNumBins( histogramArrayList.size() );
- ByteArrayConverter bac = new ByteArrayConverter();
- result.setBinCounts( bac.doubleArrayToBytes( MatrixUtil.fromList( histogramArrayList ).toArray() ) );
+ result.setBinCounts( histogramArrayList.elements() );
return result;
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisPersisterImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisPersisterImpl.java
index 8684282015..97e53576d4 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisPersisterImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisPersisterImpl.java
@@ -19,7 +19,7 @@
import ubic.gemma.persistence.service.association.coexpression.CoexpressionService;
import ubic.gemma.persistence.service.association.coexpression.LinkCreator;
import ubic.gemma.persistence.service.association.coexpression.NonPersistentNonOrderedCoexpLink;
-import ubic.gemma.persistence.util.EntityUtils;
+import ubic.gemma.persistence.util.IdentifiableUtils;
import java.util.*;
@@ -63,7 +63,7 @@ public boolean deleteAnalyses( BioAssaySet ee ) {
@Override
public void initializeLinksFromOldData( Taxon t ) {
Collection genes = geneService.loadAll( t );
- Map idMap = EntityUtils.getIdMap( genes );
+ Map idMap = IdentifiableUtils.getIdMap( genes );
/*
* First count the old links for every gene, and remove genes that have too few. That set of genes has to be
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java
index e7eb1000a3..55685945a4 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/coexpression/links/LinkAnalysisServiceImpl.java
@@ -29,12 +29,12 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import ubic.basecode.dataStructure.Link;
-import ubic.basecode.io.ByteArrayConverter;
import ubic.gemma.core.analysis.preprocess.InsufficientProbesException;
import ubic.gemma.core.analysis.preprocess.OutlierDetails;
import ubic.gemma.core.analysis.preprocess.OutlierDetectionService;
import ubic.gemma.core.analysis.preprocess.SVDRelatedPreprocessingException;
import ubic.gemma.core.analysis.preprocess.batcheffects.BatchEffectDetails;
+import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService;
import ubic.gemma.core.analysis.preprocess.filter.FilterConfig;
import ubic.gemma.core.analysis.preprocess.filter.FilteringException;
import ubic.gemma.core.analysis.preprocess.filter.InsufficientSamplesException;
@@ -59,7 +59,6 @@
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService;
import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService;
import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService;
-import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchInformationService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
import java.io.IOException;
@@ -255,8 +254,7 @@ private void diagnoseCorrelationDistribution( ExpressionExperiment ee, CoexpCorr
/*
* Find the median, etc.
*/
- ByteArrayConverter bac = new ByteArrayConverter();
- double[] binCounts = bac.byteArrayToDoubles( corrDist.getBinCounts() );
+ double[] binCounts = corrDist.getBinCounts();
int numBins = binCounts.length;
DoubleMatrix1D histogram = new DenseDoubleMatrix1D( binCounts );
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalysisUtil.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalysisUtil.java
index 0da329f786..152a67af14 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalysisUtil.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalysisUtil.java
@@ -21,7 +21,7 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import ubic.gemma.core.datastructure.matrix.ExpressionDataMatrix;
+import ubic.gemma.core.datastructure.matrix.BulkExpressionDataMatrix;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.experiment.BioAssaySet;
@@ -46,9 +46,9 @@ public class DifferentialExpressionAnalysisUtil {
* false otherwise. When determining completeness, a biomaterial's factor values are only considered if they are
* equivalent to one of the input experimental factors.
*
- * @param factors to consider completeness for.
- * @param expressionExperiment the experiment
- * @return true if block complete
+ * @param factors to consider completeness for.
+ * @param expressionExperiment the experiment
+ * @return true if block complete
*/
@SuppressWarnings("BooleanMethodIsAlwaysInverted") // Better semantics
public static boolean blockComplete( BioAssaySet expressionExperiment, Collection factors ) {
@@ -74,9 +74,9 @@ public static boolean blockComplete( BioAssaySet expressionExperiment, Collectio
/**
* See if there are at least two samples for each factor value combination.
*
- * @param expressionExperiment the experiment
- * @param factors factors
- * @return true if there are replicates
+ * @param expressionExperiment the experiment
+ * @param factors factors
+ * @return true if there are replicates
*/
static boolean checkBiologicalReplicates( BioAssaySet expressionExperiment,
Collection factors ) {
@@ -121,9 +121,9 @@ static boolean checkBiologicalReplicates( BioAssaySet expressionExperiment,
* Check that the factorValues are measurements, or that there are at least two assays for at least one factor
* value. Otherwise the model fit will be perfect and pvalues will not be returned.
*
- * @param experimentalFactor exp. factor
- * @param expressionExperiment the experiment
- * @return true if it's okay, false otherwise.
+ * @param experimentalFactor exp. factor
+ * @param expressionExperiment the experiment
+ * @return true if it's okay, false otherwise.
*/
public static boolean checkValidForLm( BioAssaySet expressionExperiment, ExperimentalFactor experimentalFactor ) {
@@ -143,7 +143,7 @@ public static boolean checkValidForLm( BioAssaySet expressionExperiment, Experim
Map counts = new HashMap<>();
for ( BioAssay ba : expressionExperiment.getBioAssays() ) {
BioMaterial bm = ba.getSampleUsed();
- for ( FactorValue fv : bm.getFactorValues() ) {
+ for ( FactorValue fv : bm.getAllFactorValues() ) {
if ( fv.getExperimentalFactor().equals( experimentalFactor ) ) {
if ( !counts.containsKey( fv ) ) {
@@ -172,10 +172,10 @@ public static boolean checkValidForLm( BioAssaySet expressionExperiment, Experim
/**
* Returns a List of all the different types of biomaterials across all bioassays in the experiment.
*
- * @param matrix matrix
- * @return list of biomaterials
+ * @param matrix matrix
+ * @return list of biomaterials
*/
- public static List getBioMaterialsForBioAssays( ExpressionDataMatrix> matrix ) {
+ public static List getBioMaterialsForBioAssays( BulkExpressionDataMatrix> matrix ) {
List biomaterials = new ArrayList<>();
@@ -205,9 +205,9 @@ public static List getBioMaterialsForBioAssays( ExpressionDataMatri
* factors, and all factor values from 1 factor have been paired with all factor values from the other factors,
* across all biomaterials.
*
- * @param biomaterials biomaterials
- * @param experimentalFactors exp. factors
- * @return false if not a complete block design.
+ * @param biomaterials biomaterials
+ * @param experimentalFactors exp. factors
+ * @return false if not a complete block design.
*/
private static boolean checkBlockDesign( Collection biomaterials,
Collection experimentalFactors ) {
@@ -219,7 +219,7 @@ private static boolean checkBlockDesign( Collection biomaterials,
Map, BioMaterial> seenPairings = new HashMap<>();
for ( BioMaterial m : biomaterials ) {
- Collection factorValuesFromBioMaterial = m.getFactorValues();
+ Collection factorValuesFromBioMaterial = m.getAllFactorValues();
if ( factorValuesFromBioMaterial.size() < experimentalFactors.size() ) {
DifferentialExpressionAnalysisUtil.log
@@ -264,8 +264,8 @@ private static boolean checkBlockDesign( Collection biomaterials,
/**
* Generates all possible factor value pairings for the given experimental factors.
*
- * @param experimentalFactors exp. factors
- * @return A collection of hashSets, where each hashSet is a pairing.
+ * @param experimentalFactors exp. factors
+ * @return A collection of hashSets, where each hashSet is a pairing.
*/
private static Collection> generateFactorValuePairings(
Collection experimentalFactors ) {
@@ -312,7 +312,7 @@ private static Collection filterFactorValuesFromBiomaterials( Colle
Collection biomaterialsWithGivenFactorValues = new HashSet<>();
int numHaveAny = 0;
for ( BioMaterial b : biomaterials ) {
- Collection biomaterialFactorValues = b.getFactorValues();
+ Collection biomaterialFactorValues = b.getAllFactorValues();
Set factorValuesToConsider = new HashSet<>( biomaterialFactorValues );
for ( FactorValue biomaterialFactorValue : biomaterialFactorValues ) {
numHaveAny++;
@@ -354,7 +354,7 @@ private static List getBioMaterials( BioAssaySet ee ) {
*/
private static Collection getRelevantFactorValues( Collection factors,
BioMaterial biomaterial ) {
- Collection factorValues = biomaterial.getFactorValues();
+ Collection factorValues = biomaterial.getAllFactorValues();
Collection factorValuesToCheck = new HashSet<>();
for ( FactorValue factorValue : factorValues ) {
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalyzerServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalyzerServiceImpl.java
index c82d403b1c..51f3ae287e 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalyzerServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/DifferentialExpressionAnalyzerServiceImpl.java
@@ -24,7 +24,6 @@
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
-import ubic.basecode.io.ByteArrayConverter;
import ubic.basecode.math.distribution.Histogram;
import ubic.basecode.util.FileTools;
import ubic.gemma.core.analysis.service.ExpressionDataFileService;
@@ -229,8 +228,8 @@ public DifferentialExpressionAnalysis persistAnalysis( ExpressionExperiment expr
// we do this here because now we have IDs for everything.
if ( config.getMakeArchiveFile() ) {
- try {
- expressionDataFileService.writeDiffExArchiveFile( expressionExperiment, analysis, config );
+ try ( ExpressionDataFileService.LockedPath lockedPath = expressionDataFileService.writeDiffExAnalysisArchiveFile( analysis, config ) ) {
+ log.info( "Create archive file at " + lockedPath.getPath() );
} catch ( IOException e ) {
DifferentialExpressionAnalyzerServiceImpl.log
.error( "Unable to save the data to a file: " + e.getMessage() );
@@ -293,8 +292,7 @@ private void addPvalueDistribution( ExpressionAnalysisResultSet resultSet ) {
PvalueDistribution pvd = PvalueDistribution.Factory.newInstance();
pvd.setNumBins( 100 );
- ByteArrayConverter bac = new ByteArrayConverter();
- pvd.setBinCounts( bac.doubleArrayToBytes( pvalHist.getArray() ) );
+ pvd.setBinCounts( pvalHist.getArray() );
resultSet.setPvalueDistribution( pvd ); // do not save yet.
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/GeneDifferentialExpressionServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/GeneDifferentialExpressionServiceImpl.java
index 4672152a5b..40fa187284 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/GeneDifferentialExpressionServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/GeneDifferentialExpressionServiceImpl.java
@@ -29,7 +29,7 @@
import ubic.gemma.model.genome.Gene;
import ubic.gemma.model.genome.gene.GeneValueObject;
import ubic.gemma.persistence.service.analysis.expression.diff.DifferentialExpressionResultService;
-import ubic.gemma.persistence.util.EntityUtils;
+import ubic.gemma.persistence.util.IdentifiableUtils;
import java.util.*;
@@ -113,7 +113,7 @@ public Collection getDifferentialExpression(
return devos;
Map> results = differentialExpressionResultService
- .find( gene, EntityUtils.getIds( ees ) );
+ .find( gene, IdentifiableUtils.getIds( ees ) );
timer.stop();
if ( timer.getTime() > 1000 ) {
GeneDifferentialExpressionServiceImpl.log.info( "Diff ex results: " + timer.getTime() + " ms" );
@@ -222,7 +222,7 @@ public DifferentialExpressionMetaAnalysisValueObject getDifferentialExpressionMe
* for the meta analysis. The results returned are for all factors, not just the factors we are seeking.
*/
Map> resultsMap = differentialExpressionResultService
- .find( g, EntityUtils.getIds( activeExperiments ) );
+ .find( g, IdentifiableUtils.getIds( activeExperiments ) );
GeneDifferentialExpressionServiceImpl.log
.debug( resultsMap.size() + " results for " + g + " in " + activeExperiments );
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java
index 7e56770521..9a4cb1ee2b 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/expression/diff/LinearModelAnalyzer.java
@@ -28,6 +28,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.DisposableBean;
+import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Scope;
import org.springframework.stereotype.Component;
import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix;
@@ -36,11 +37,11 @@
import ubic.basecode.math.DescriptiveWithMissing;
import ubic.basecode.math.MathUtil;
import ubic.basecode.math.linearmodels.*;
-import ubic.gemma.model.expression.experiment.ExperimentalDesignUtils;
+import ubic.gemma.core.analysis.preprocess.convert.QuantitationTypeConversionException;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
-import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrixUtil;
import ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixColumnSort;
-import ubic.gemma.core.datastructure.matrix.MatrixWriter;
+import ubic.gemma.core.datastructure.matrix.io.MatrixWriter;
+import ubic.gemma.core.util.BuildInfo;
import ubic.gemma.model.analysis.expression.diff.*;
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
@@ -51,6 +52,7 @@
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.model.genome.Gene;
+import ubic.gemma.persistence.util.EntityUrlBuilder;
import java.io.File;
import java.io.FileWriter;
@@ -58,6 +60,8 @@
import java.util.*;
import java.util.concurrent.*;
+import static ubic.gemma.core.analysis.preprocess.convert.QuantitationTypeConversionUtils.filterAndLog2Transform;
+
/**
* Handles fitting linear models with continuous or fixed-level covariates. Data are always log-transformed.
* Interactions can be included if a DifferentialExpressionAnalysisConfig is passed as an argument to 'run'. Currently
@@ -97,7 +101,7 @@ public static void populateFactorValuesFromBASet( BioAssaySet ee, ExperimentalFa
Collection fvs ) {
for ( BioAssay ba : ee.getBioAssays() ) {
BioMaterial bm = ba.getSampleUsed();
- for ( FactorValue fv : bm.getFactorValues() ) {
+ for ( FactorValue fv : bm.getAllFactorValues() ) {
if ( fv.getExperimentalFactor().equals( f ) ) {
fvs.add( fv );
}
@@ -146,6 +150,11 @@ public static BioAssayDimension createBADMap( List columnsToUse ) {
return reorderedDim;
}
+ @Autowired
+ private EntityUrlBuilder entityUrlBuilder;
+ @Autowired
+ private BuildInfo buildInfo;
+
/**
* Executor used for performing analyses in the background while the current thread is reporting progress.
*
@@ -331,9 +340,6 @@ public DifferentialExpressionAnalysis run( ExpressionExperimentSubSet subset,
*/
ExpressionDataDoubleMatrix dmatrix = expressionDataMatrixService
.getProcessedExpressionDataMatrix( subset.getSourceExperiment() );
- if ( dmatrix == null ) {
- throw new RuntimeException( String.format( "There are no processed EVs for %s.", subset.getSourceExperiment() ) );
- }
ExperimentalFactor ef = config.getSubsetFactor();
Collection bmTmp = new HashSet<>();
@@ -345,7 +351,7 @@ public DifferentialExpressionAnalysis run( ExpressionExperimentSubSet subset,
FactorValue subsetFactorValue = null;
for ( BioMaterial bm : samplesInSubset ) {
- Collection fvs = bm.getFactorValues();
+ Collection fvs = bm.getAllFactorValues();
for ( FactorValue fv : fvs ) {
if ( fv.getExperimentalFactor().equals( ef ) ) {
if ( subsetFactorValue == null ) {
@@ -394,9 +400,6 @@ public Collection run( ExpressionExperiment expr
ExpressionDataDoubleMatrix dmatrix = expressionDataMatrixService
.getProcessedExpressionDataMatrix( expressionExperiment );
- if ( dmatrix == null ) {
- throw new RuntimeException( String.format( "There are no processed EVs for %s.", expressionExperiment ) );
- }
return this.run( expressionExperiment, dmatrix, config );
@@ -594,11 +597,11 @@ private boolean checkIfNeedToTreatAsIntercept( ExperimentalFactor experimentalFa
*/
private void outputForDebugging( ExpressionDataDoubleMatrix dmatrix,
ObjectMatrix designMatrix ) {
- MatrixWriter mw = new MatrixWriter();
+ MatrixWriter mw = new MatrixWriter( entityUrlBuilder, buildInfo );
try ( FileWriter writer = new FileWriter( File.createTempFile( "data.", ".txt" ) );
FileWriter out = new FileWriter( File.createTempFile( "design.", ".txt" ) ) ) {
- mw.write( writer, dmatrix, null, true, false );
+ mw.write( dmatrix, writer );
ubic.basecode.io.writer.MatrixWriter dem = new ubic.basecode.io.writer.MatrixWriter<>(
out );
@@ -720,7 +723,11 @@ private DifferentialExpressionAnalysis doAnalysis( BioAssaySet bioAssaySet,
/*
* FIXME: remove columns that are marked as outliers, this will make some steps cleaner
*/
- expressionData = ExpressionDataDoubleMatrixUtil.filterAndLog2Transform( expressionData );
+ try {
+ expressionData = filterAndLog2Transform( expressionData );
+ } catch ( QuantitationTypeConversionException e ) {
+ throw new RuntimeException( e );
+ }
DoubleMatrix bareFilteredDataMatrix = expressionData.getMatrix();
DoubleMatrix1D librarySizes = getLibrarySizes( config, expressionData );
@@ -741,7 +748,7 @@ private DifferentialExpressionAnalysis doAnalysis( BioAssaySet bioAssaySet,
final Map rawResults = this
.runAnalysis( bareFilteredDataMatrix, finalDataMatrix, properDesignMatrix, librarySizes, config );
- if ( rawResults.size() == 0 ) {
+ if ( rawResults.isEmpty() ) {
LinearModelAnalyzer.log.error( "Got no results from the analysis" );
return null;
}
@@ -1339,7 +1346,7 @@ private Map makeSubSets( DifferentialEx
for ( BioMaterial sample : samplesUsed ) {
boolean ok = false;
- for ( FactorValue fv : sample.getFactorValues() ) {
+ for ( FactorValue fv : sample.getAllFactorValues() ) {
if ( fv.getExperimentalFactor().equals( subsetFactor ) ) {
subSetSamples.get( fv ).add( sample );
ok = true;
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/FailedToComputeSingularValueDecomposition.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/FailedToComputeSingularValueDecomposition.java
deleted file mode 100644
index 6589854ad1..0000000000
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/FailedToComputeSingularValueDecomposition.java
+++ /dev/null
@@ -1,10 +0,0 @@
-package ubic.gemma.core.analysis.preprocess;
-
-import ubic.gemma.core.analysis.preprocess.svd.SVDException;
-import ubic.gemma.model.expression.experiment.ExpressionExperiment;
-
-public class FailedToComputeSingularValueDecomposition extends PreprocessingException {
- public FailedToComputeSingularValueDecomposition( ExpressionExperiment ee, SVDException cause ) {
- super( ee, cause );
- }
-}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/FilteringRelatedPreprocessingException.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/FilteringRelatedPreprocessingException.java
new file mode 100644
index 0000000000..af7e775d21
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/FilteringRelatedPreprocessingException.java
@@ -0,0 +1,19 @@
+package ubic.gemma.core.analysis.preprocess;
+
+import ubic.gemma.core.analysis.preprocess.filter.FilteringException;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+
+public class FilteringRelatedPreprocessingException extends PreprocessingException {
+
+ private final FilteringException cause;
+
+ public FilteringRelatedPreprocessingException( ExpressionExperiment ee, FilteringException cause ) {
+ super( ee, cause );
+ this.cause = cause;
+ }
+
+ @Override
+ public FilteringException getCause() {
+ return cause;
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/MeanVarianceServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/MeanVarianceServiceImpl.java
index da5e4e9780..8557c1f94e 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/MeanVarianceServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/MeanVarianceServiceImpl.java
@@ -21,12 +21,10 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
-import ubic.basecode.io.ByteArrayConverter;
import ubic.basecode.math.linearmodels.MeanVarianceEstimator;
+import ubic.gemma.core.analysis.preprocess.convert.QuantitationTypeConversionException;
import ubic.gemma.core.analysis.service.ExpressionDataMatrixService;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
-import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrixUtil;
-import ubic.gemma.model.common.auditAndSecurity.eventType.FailedMeanVarianceUpdateEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.MeanVarianceUpdateEvent;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation;
@@ -34,6 +32,8 @@
import ubic.gemma.persistence.service.common.auditAndSecurity.AuditTrailService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
+import static ubic.gemma.core.analysis.preprocess.convert.QuantitationTypeConversionUtils.filterAndLog2Transform;
+
/**
* Manage the mean-variance relationship.
*
@@ -43,7 +43,6 @@
public class MeanVarianceServiceImpl implements MeanVarianceService {
private static final Log log = LogFactory.getLog( MeanVarianceServiceImpl.class );
- private static final ByteArrayConverter bac = new ByteArrayConverter();
@Autowired
private ExpressionExperimentService expressionExperimentService;
@@ -78,8 +77,8 @@ public MeanVarianceRelation create( ExpressionExperiment ee, boolean forceRecomp
throw new IllegalStateException( "Did not find any preferred quantitation type. Mean-variance relation was not computed." );
}
try {
- intensities = ExpressionDataDoubleMatrixUtil.filterAndLog2Transform( intensities );
- } catch ( UnsupportedQuantitationScaleConversionException e ) {
+ intensities = filterAndLog2Transform( intensities );
+ } catch ( QuantitationTypeConversionException e ) {
log.warn( "Problem log transforming data. Check that the appropriate log scale is used. Mean-variance will be computed as is." );
}
@@ -110,8 +109,8 @@ private MeanVarianceRelation calculateMeanVariance( ExpressionDataDoubleMatrix m
MeanVarianceRelation mvr = MeanVarianceRelation.Factory.newInstance();
if ( mve.getMeanVariance() != null ) {
- mvr.setMeans( bac.doubleArrayToBytes( mve.getMeanVariance().viewColumn( 0 ).toArray() ) );
- mvr.setVariances( bac.doubleArrayToBytes( mve.getMeanVariance().viewColumn( 1 ).toArray() ) );
+ mvr.setMeans( mve.getMeanVariance().viewColumn( 0 ).toArray() );
+ mvr.setVariances( mve.getMeanVariance().viewColumn( 1 ).toArray() );
}
return mvr;
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/OutlierDetectionService.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/OutlierDetectionService.java
index 3fe88fc772..bc16b6a0db 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/OutlierDetectionService.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/OutlierDetectionService.java
@@ -15,6 +15,7 @@
package ubic.gemma.core.analysis.preprocess;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
+import ubic.gemma.core.analysis.preprocess.filter.FilteringException;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
@@ -40,7 +41,7 @@ public interface OutlierDetectionService {
* @return the information about the identified outliers.
* @see #identifyOutliersByMedianCorrelation(DoubleMatrix)
*/
- Collection identifyOutliersByMedianCorrelation( ExpressionExperiment ee );
+ Collection identifyOutliersByMedianCorrelation( ExpressionExperiment ee ) throws FilteringException;
/**
* Identify outliers by sorting by median, then looking for non-overlap of first quartile-second quartile range
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/OutlierDetectionServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/OutlierDetectionServiceImpl.java
index 0aabd98f1d..40954ad3dc 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/OutlierDetectionServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/OutlierDetectionServiceImpl.java
@@ -20,6 +20,7 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
+import ubic.gemma.core.analysis.preprocess.filter.FilteringException;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.persistence.service.analysis.expression.sampleCoexpression.SampleCoexpressionAnalysisService;
@@ -51,7 +52,7 @@ public Collection getOutlierDetails( ExpressionExperiment ee ) {
}
@Override
- public Collection identifyOutliersByMedianCorrelation( ExpressionExperiment ee ) {
+ public Collection identifyOutliersByMedianCorrelation( ExpressionExperiment ee ) throws FilteringException {
DoubleMatrix cormat = sampleCoexpressionAnalysisService.loadBestMatrix( ee );
if ( cormat == null ) {
cormat = sampleCoexpressionAnalysisService.compute( ee, sampleCoexpressionAnalysisService.prepare( ee ) );
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessingException.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessingException.java
index 85d8c658ce..bfa491884d 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessingException.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessingException.java
@@ -15,14 +15,22 @@
package ubic.gemma.core.analysis.preprocess;
import org.apache.commons.lang3.exception.ExceptionUtils;
+import ubic.gemma.core.analysis.preprocess.batcheffects.BatchInfoPopulationException;
+import ubic.gemma.core.analysis.preprocess.filter.FilteringException;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
/**
* Allows us to catch preprocessing errors and handle them correctly.
*
- * The main kind of preprocessing exceptions are {@link ubic.gemma.core.analysis.preprocess.filter.FilteringException}
- * and {@link ubic.gemma.core.analysis.preprocess.batcheffects.BatchInfoPopulationException}.
- *
+ * The main kind of preprocessing exceptions are:
+ *
+ * - {@link QuantitationTypeDetectionRelatedPreprocessingException} when QT type cannot be detected from data or when
+ * the detected one disagrees with the assigned one
+ * - {@link QuantitationTypeConversionRelatedPreprocessingException} when a desired QT conversion is not possible
+ * - {@link FilteringException} when processed data cannot be filtered
+ * - {@link BatchInfoPopulationException} when batch info cannot be detected, populated, etc.
+ * - {@link SVDRelatedPreprocessingException} when singular value decomposition fails
+ *
* @author Paul
*/
public class PreprocessingException extends RuntimeException {
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessorServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessorServiceImpl.java
index bc61aa95f1..52f3035648 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessorServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/PreprocessorServiceImpl.java
@@ -23,12 +23,14 @@
import org.springframework.transaction.annotation.Transactional;
import ubic.gemma.core.analysis.expression.diff.DifferentialExpressionAnalyzerService;
import ubic.gemma.core.analysis.preprocess.batcheffects.ExpressionExperimentBatchCorrectionService;
+import ubic.gemma.core.analysis.preprocess.convert.QuantitationTypeConversionException;
+import ubic.gemma.core.analysis.preprocess.detect.QuantitationTypeDetectionException;
+import ubic.gemma.core.analysis.preprocess.filter.FilteringException;
import ubic.gemma.core.analysis.preprocess.svd.SVDException;
-import ubic.gemma.core.analysis.preprocess.svd.SVDServiceHelper;
+import ubic.gemma.core.analysis.preprocess.svd.SVDService;
import ubic.gemma.core.analysis.report.ExpressionExperimentReportService;
import ubic.gemma.core.analysis.service.ExpressionDataFileService;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
-import ubic.gemma.core.datastructure.matrix.QuantitationMismatchException;
import ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysis;
import ubic.gemma.model.common.auditAndSecurity.eventType.BatchCorrectionEvent;
import ubic.gemma.model.common.auditAndSecurity.eventType.FailedMeanVarianceUpdateEvent;
@@ -51,6 +53,7 @@
import java.util.LinkedList;
@Service
+@Transactional(propagation = Propagation.NEVER)
public class PreprocessorServiceImpl implements PreprocessorService {
private static final Log log = LogFactory.getLog( PreprocessorServiceImpl.class );
@@ -72,7 +75,7 @@ public class PreprocessorServiceImpl implements PreprocessorService {
@Autowired
private SampleCoexpressionAnalysisService sampleCoexpressionAnalysisService;
@Autowired
- private SVDServiceHelper svdService;
+ private SVDService svdService;
@Autowired
private TwoChannelMissingValues twoChannelMissingValueService;
@Autowired
@@ -83,7 +86,6 @@ public class PreprocessorServiceImpl implements PreprocessorService {
private GeeqService geeqService;
@Override
- @Transactional(propagation = Propagation.NEVER)
public void process( ExpressionExperiment ee, boolean ignoreQuantitationMismatch, boolean ignoreDiagnosticsFailure ) throws PreprocessingException {
StopWatch timer = new StopWatch();
timer.start();
@@ -113,21 +115,26 @@ public void process( ExpressionExperiment ee, boolean ignoreQuantitationMismatch
*/
private void batchCorrect( ExpressionExperiment ee ) throws PreprocessingException {
if ( !expressionExperimentBatchCorrectionService.checkCorrectability( ee ) ) {
+ log.warn( ee + " is not batch-correctable, will not perform ComBat." );
return;
}
- Collection vecs = this.getProcessedExpressionDataVectors( ee );
+ Collection vecs;
+ try {
+ vecs = this.getProcessedExpressionDataVectors( ee );
+ } catch ( QuantitationTypeConversionException e ) {
+ throw new QuantitationTypeConversionRelatedPreprocessingException( ee, e );
+ }
ExpressionDataDoubleMatrix correctedData = this.getCorrectedData( ee, vecs );
// Convert to vectors (persist QT)
- processedExpressionDataVectorService.replaceProcessedDataVectors( ee, correctedData.toProcessedDataVectors() );
+ int replaced = processedExpressionDataVectorService.replaceProcessedDataVectors( ee, correctedData.toProcessedDataVectors(), false );
- auditTrailService.addUpdateEvent( ee, BatchCorrectionEvent.class, "ComBat batch correction", "" );
+ auditTrailService.addUpdateEvent( ee, BatchCorrectionEvent.class, String.format( "ComBat batch correction, vectors were replaced with %d batch-corrected ones.", replaced ) );
}
@Override
- @Transactional(propagation = Propagation.NEVER)
public void processDiagnostics( ExpressionExperiment ee ) throws PreprocessingException {
this.processForSampleCorrelation( ee );
this.processForMeanVarianceRelation( ee );
@@ -138,16 +145,17 @@ public void processDiagnostics( ExpressionExperiment ee ) throws PreprocessingEx
private void processVectorCreate( ExpressionExperiment ee, boolean ignoreQuantitationMismatch ) throws PreprocessingException {
try {
- processedExpressionDataVectorService.computeProcessedExpressionData( ee, ignoreQuantitationMismatch );
- } catch ( QuantitationMismatchException e ) {
+ processedExpressionDataVectorService.createProcessedDataVectors( ee, true, ignoreQuantitationMismatch );
+ } catch ( QuantitationTypeDetectionException e ) {
// wrap it in a runtime exception, which will result in a rollback of the current transaction
- throw new QuantitationMismatchPreprocessingException( ee, e );
+ throw new QuantitationTypeDetectionRelatedPreprocessingException( ee, e );
+ } catch ( QuantitationTypeConversionException e ) {
+ throw new QuantitationTypeConversionRelatedPreprocessingException( ee, e );
}
}
/**
* Refresh the batch status of the data set.
- * @param ee
*/
private void processBatchInfo( ExpressionExperiment ee ) {
expressionExperimentReportService.recalculateExperimentBatchInfo( ee );
@@ -241,14 +249,18 @@ private void processForPca( ExpressionExperiment ee ) throws SVDRelatedPreproces
private void processForSampleCorrelation( ExpressionExperiment ee ) throws SampleCoexpressionRelatedPreprocessingException {
try {
sampleCoexpressionAnalysisService.compute( ee, sampleCoexpressionAnalysisService.prepare( ee ) );
- } catch ( RuntimeException e ) {
+ } catch ( FilteringException e ) {
+ auditTrailService.addUpdateEvent( ee, FailedSampleCorrelationAnalysisEvent.class, null, e );
+ throw new FilteringRelatedPreprocessingException( ee, e );
+ } catch ( Exception e ) {
auditTrailService.addUpdateEvent( ee, FailedSampleCorrelationAnalysisEvent.class, null, e );
throw new SampleCoexpressionRelatedPreprocessingException( ee, e );
}
}
private void removeInvalidatedData( ExpressionExperiment expExp ) {
- dataFileService.deleteAllFiles( expExp );
+ dataFileService.deleteAllProcessedDataFiles( expExp );
+ dataFileService.deleteAllAnalysisFiles( expExp );
}
private void checkQuantitationType( ExpressionDataDoubleMatrix correctedData ) {
@@ -294,12 +306,12 @@ private ExpressionDataDoubleMatrix getCorrectedData( ExpressionExperiment ee,
*
* @return processed data vectors; if they don't exist, create them. They will be thawed in either case.
*/
- private Collection getProcessedExpressionDataVectors( ExpressionExperiment ee ) {
+ private Collection getProcessedExpressionDataVectors( ExpressionExperiment ee ) throws QuantitationTypeConversionException {
Collection vecs = processedExpressionDataVectorService
.getProcessedDataVectorsAndThaw( ee );
if ( vecs.isEmpty() ) {
log.info( String.format( "No processed vectors for %s, they will be computed from raw data...", ee ) );
- this.processedExpressionDataVectorService.computeProcessedExpressionData( ee );
+ this.processedExpressionDataVectorService.createProcessedDataVectors( ee, true );
vecs = this.processedExpressionDataVectorService.getProcessedDataVectorsAndThaw( ee );
}
return vecs;
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/QuantitationMismatchPreprocessingException.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/QuantitationMismatchPreprocessingException.java
deleted file mode 100644
index 597f71cc1a..0000000000
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/QuantitationMismatchPreprocessingException.java
+++ /dev/null
@@ -1,16 +0,0 @@
-package ubic.gemma.core.analysis.preprocess;
-
-import ubic.gemma.core.datastructure.matrix.QuantitationMismatchException;
-import ubic.gemma.model.expression.experiment.ExpressionExperiment;
-
-public class QuantitationMismatchPreprocessingException extends PreprocessingException {
-
- public QuantitationMismatchPreprocessingException( ExpressionExperiment ee, QuantitationMismatchException cause ) {
- super( ee, cause );
- }
-
- @Override
- public synchronized QuantitationMismatchException getCause() {
- return ( QuantitationMismatchException ) super.getCause();
- }
-}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/QuantitationTypeConversionRelatedPreprocessingException.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/QuantitationTypeConversionRelatedPreprocessingException.java
new file mode 100644
index 0000000000..9c28498b7e
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/QuantitationTypeConversionRelatedPreprocessingException.java
@@ -0,0 +1,19 @@
+package ubic.gemma.core.analysis.preprocess;
+
+import ubic.gemma.core.analysis.preprocess.convert.QuantitationTypeConversionException;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+
+public class QuantitationTypeConversionRelatedPreprocessingException extends PreprocessingException {
+
+ private final QuantitationTypeConversionException cause;
+
+ public QuantitationTypeConversionRelatedPreprocessingException( ExpressionExperiment ee, QuantitationTypeConversionException cause ) {
+ super( ee, cause );
+ this.cause = cause;
+ }
+
+ @Override
+ public QuantitationTypeConversionException getCause() {
+ return cause;
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/QuantitationTypeDetectionRelatedPreprocessingException.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/QuantitationTypeDetectionRelatedPreprocessingException.java
new file mode 100644
index 0000000000..25a70a076c
--- /dev/null
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/QuantitationTypeDetectionRelatedPreprocessingException.java
@@ -0,0 +1,23 @@
+package ubic.gemma.core.analysis.preprocess;
+
+import ubic.gemma.core.analysis.preprocess.detect.QuantitationTypeDetectionException;
+import ubic.gemma.model.expression.experiment.ExpressionExperiment;
+
+/**
+ * An exception that wraps a {@link QuantitationTypeDetectionException}.
+ * @author poirigui
+ */
+public class QuantitationTypeDetectionRelatedPreprocessingException extends PreprocessingException {
+
+ private final QuantitationTypeDetectionException cause;
+
+ public QuantitationTypeDetectionRelatedPreprocessingException( ExpressionExperiment ee, QuantitationTypeDetectionException cause ) {
+ super( ee, cause );
+ this.cause = cause;
+ }
+
+ @Override
+ public QuantitationTypeDetectionException getCause() {
+ return cause;
+ }
+}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/SplitExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/SplitExperimentServiceImpl.java
index 9c5916759d..b319d2c160 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/SplitExperimentServiceImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/SplitExperimentServiceImpl.java
@@ -25,10 +25,12 @@
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
+import org.springframework.util.Assert;
import ubic.gemma.core.analysis.preprocess.batcheffects.BatchInfoPopulationServiceImpl;
import ubic.gemma.core.analysis.service.ExpressionDataFileService;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
import ubic.gemma.core.datastructure.matrix.ExpressionDataMatrix;
+import ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixBuilder;
import ubic.gemma.model.analysis.expression.ExpressionExperimentSet;
import ubic.gemma.model.common.description.Characteristic;
import ubic.gemma.model.common.description.DatabaseEntry;
@@ -183,6 +185,8 @@ public ExpressionExperimentSet split( ExpressionExperiment toSplit, Experimental
BioMaterial bm = ba.getSampleUsed();
// identify samples we want to include
+ // TODO: support sub-biomaterials and use getAllFactorValues() instead, we also need to implement
+ // cloneBioMaterial() accordingly
for ( FactorValue fv : bm.getFactorValues() ) {
if ( fv.equals( splitValue ) ) {
assert !bms.contains( bm );
@@ -535,7 +539,9 @@ private BioAssay cloneBioAssay( BioAssay ba ) {
clone.setSequenceReadCount( ba.getSequenceReadCount() );
clone.setSequenceReadLength( ba.getSequenceReadLength() );
- clone.setSampleUsed( this.cloneBioMaterial( ba.getSampleUsed(), clone ) );
+ BioMaterial sampleClone = this.cloneBioMaterial( ba.getSampleUsed() );
+ clone.setSampleUsed( sampleClone );
+ sampleClone.getBioAssaysUsedIn().add( clone );
clone.setAccession( this.cloneAccession( ba.getAccession() ) );
return clone;
@@ -553,8 +559,8 @@ private DatabaseEntry cloneAccession( DatabaseEntry de ) {
return clone;
}
- private BioMaterial cloneBioMaterial( BioMaterial bm, BioAssay ba ) {
- assert ba.getId() == null; // should be a clone
+ private BioMaterial cloneBioMaterial( BioMaterial bm ) {
+ Assert.isNull( bm.getSourceBioMaterial(), "Cannot split an experiment with biomaterials that have a source biomaterial." );
BioMaterial clone = BioMaterial.Factory.newInstance();
clone.setName( bm.getName() + " (Split)" ); // it is important we make a new name, so we don't confuse this with the previous one in findOrCreate();
clone.setDescription( bm.getDescription() );
@@ -562,7 +568,6 @@ private BioMaterial cloneBioMaterial( BioMaterial bm, BioAssay ba ) {
clone.setExternalAccession( this.cloneAccession( bm.getExternalAccession() ) );
clone.setSourceTaxon( bm.getSourceTaxon() );
clone.setTreatments( this.cloneTreatments( bm.getTreatments() ) );
- clone.getBioAssaysUsedIn().add( ba );
// Factor values are done separately
return clone;
}
diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/TwoChannelMissingValuesImpl.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/TwoChannelMissingValuesImpl.java
index 314d9151d8..9cf8f17756 100644
--- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/TwoChannelMissingValuesImpl.java
+++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/TwoChannelMissingValuesImpl.java
@@ -25,14 +25,14 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;
-import ubic.basecode.io.ByteArrayConverter;
import ubic.basecode.math.distribution.Histogram;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
+import ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixBuilder;
import ubic.gemma.core.datastructure.matrix.ExpressionDataMatrixRowElement;
import ubic.gemma.model.common.auditAndSecurity.eventType.MissingValueAnalysisEvent;
import ubic.gemma.model.common.quantitationtype.*;
import ubic.gemma.model.expression.bioAssayData.BioAssayDimension;
-import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector;
+import ubic.gemma.model.expression.bioAssayData.BulkExpressionDataVector;
import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector;
import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector;
import ubic.gemma.model.expression.designElement.CompositeSequence;
@@ -132,7 +132,7 @@ public Collection computeMissingValues( ExpressionExper
timer.stop();
this.logTimeInfo( timer, procVectors.size() + rawVectors.size() );
- Collection extends DesignElementDataVector> builderVectors = new HashSet<>(
+ Collection extends BulkExpressionDataVector> builderVectors = new HashSet<>(
rawVectors.isEmpty() ? procVectors : rawVectors );
ExpressionDataMatrixBuilder builder = new ExpressionDataMatrixBuilder( builderVectors );
@@ -228,8 +228,6 @@ private Collection computeMissingValues( ExpressionExpe
return results;
}
- ByteArrayConverter converter = new ByteArrayConverter();
-
int count = 0;
ExpressionDataDoubleMatrix baseChannel = signalChannelA == null ? signalChannelB : signalChannelA;
@@ -242,7 +240,7 @@ private Collection computeMissingValues( ExpressionExpe
source.getQuantitationTypes().add( present );
for ( ExpressionDataMatrixRowElement element : baseChannel.getRowElements() ) {
count = this.examineVector( source, preferred, signalChannelA, signalChannelB, bkgChannelA, bkgChannelB,
- signalToNoiseThreshold, extraMissingValueIndicators, results, converter, count, baseChannel,
+ signalToNoiseThreshold, extraMissingValueIndicators, results, count, baseChannel,
signalThreshold, present, element );
}
@@ -263,7 +261,7 @@ private int examineVector( ExpressionExperiment source, ExpressionDataDoubleMatr
ExpressionDataDoubleMatrix signalChannelA, ExpressionDataDoubleMatrix signalChannelB,
ExpressionDataDoubleMatrix bkgChannelA, ExpressionDataDoubleMatrix bkgChannelB,
double signalToNoiseThreshold, Collection