From 196c53116b060712420e112e51611b3c5963d106 Mon Sep 17 00:00:00 2001 From: jcschaff Date: Wed, 21 Aug 2024 15:32:00 -0400 Subject: [PATCH 01/11] unit test for slurm submit file generation for FiniteVolume solver --- .../vcell/message/server/htc/HtcProxy.java | 2 +- .../message/server/htc/slurm/SlurmProxy.java | 49 +++--- .../server/htc/slurm/SlurmProxyTest.java | 125 ++++++++++++- .../SimID_274514696_0__0.simtask.xml | 155 +++++++++++++++++ .../V_REL_274514696_0_0.slurm.sub | 164 ++++++++++++++++++ 5 files changed, 458 insertions(+), 37 deletions(-) create mode 100644 vcell-server/src/test/resources/slurm_fixtures/finite_volume/SimID_274514696_0__0.simtask.xml create mode 100644 vcell-server/src/test/resources/slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub diff --git a/vcell-server/src/main/java/cbit/vcell/message/server/htc/HtcProxy.java b/vcell-server/src/main/java/cbit/vcell/message/server/htc/HtcProxy.java index cb19ed6884..642f145457 100644 --- a/vcell-server/src/main/java/cbit/vcell/message/server/htc/HtcProxy.java +++ b/vcell-server/src/main/java/cbit/vcell/message/server/htc/HtcProxy.java @@ -170,7 +170,7 @@ public HtcProxy(CommandService commandService, String htcUser){ * @throws ExecutableException */ public abstract HtcJobID submitJob(String jobName, File sub_file_internal, File sub_file_external, ExecutableCommand.Container commandSet, - int ncpus, double memSize, Collection postProcessingCommands, SimulationTask simTask,File primaryUserDirExternal) throws ExecutableException; + int ncpus, double memSize, Collection postProcessingCommands, SimulationTask simTask,File primaryUserDirExternal) throws ExecutableException, IOException; public abstract HtcJobID submitOptimizationJob(String jobName, File sub_file_internal, File sub_file_external, File optProblemInputFile,File optProblemOutputFile,File optReportFile)throws ExecutableException; public abstract HtcProxy cloneThreadsafe(); diff --git a/vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java b/vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java index 8b53127d08..0d56692f5d 100644 --- a/vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java +++ b/vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java @@ -1,15 +1,5 @@ package cbit.vcell.message.server.htc.slurm; -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.io.StringReader; -import java.util.*; - -import org.vcell.util.FileUtils; -import org.vcell.util.document.KeyValue; -import org.vcell.util.exe.ExecutableException; - import cbit.vcell.message.server.cmd.CommandService; import cbit.vcell.message.server.cmd.CommandService.CommandOutput; import cbit.vcell.message.server.cmd.CommandServiceLocal; @@ -28,6 +18,15 @@ import cbit.vcell.solvers.AbstractSolver; import cbit.vcell.solvers.ExecutableCommand; import edu.uchc.connjur.wb.LineStringBuilder; +import org.vcell.util.FileUtils; +import org.vcell.util.document.KeyValue; +import org.vcell.util.exe.ExecutableException; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.StringReader; +import java.util.*; public class SlurmProxy extends HtcProxy { @@ -811,17 +810,21 @@ private void slurmScriptInit(String jobName, boolean bPowerUser, MemLimitResults } @Override - public HtcJobID submitJob(String jobName, File sub_file_internal, File sub_file_external, ExecutableCommand.Container commandSet, int ncpus, double memSizeMB, Collection postProcessingCommands, SimulationTask simTask,File primaryUserDirExternal) throws ExecutableException { - try { + public HtcJobID submitJob(String jobName, File sub_file_as_internal_path, File sub_file_with_external_path, ExecutableCommand.Container commandSet, int ncpus, double memSizeMB, Collection postProcessingCommands, SimulationTask simTask,File primaryUserDirExternal) throws ExecutableException, IOException { + saveJobScript(jobName, sub_file_as_internal_path, commandSet, ncpus, memSizeMB, postProcessingCommands, simTask); + return submitJobFile(sub_file_with_external_path); + } + + public void saveJobScript(String jobName, File sub_file_as_internal_path, ExecutableCommand.Container commandSet, int ncpus, double memSizeMB, Collection postProcessingCommands, SimulationTask simTask) throws IOException { if (LG.isDebugEnabled()) { LG.debug("generating local SLURM submit script for jobName="+jobName); } SlurmProxy.SbatchSolverComponents sbatchSolverComponents = generateScript(jobName, commandSet, ncpus, memSizeMB, postProcessingCommands, simTask); final String SUB = ".sub"; - //String slurmRootName = sub_file_external.getName().substring(0, sub_file_external.getName().length()-SUB.length()); + //String slurmRootName = sub_file_with_external_path.getName().substring(0, sub_file_with_external_path.getName().length()-SUB.length()); //String child = slurmRootName+".sh"; - //File intSolverScriptFile = new File(sub_file_internal.getParentFile(),child); - //File extSolverScriptFile = new File(sub_file_external.getParentFile(),child); + //File intSolverScriptFile = new File(sub_file_as_internal_path.getParentFile(),child); + //File extSolverScriptFile = new File(sub_file_with_external_path.getParentFile(),child); StringBuilder scriptContent = new StringBuilder(); //Write the .slurm.sh File that the .slurm.sub file references and make it executable @@ -1013,8 +1016,8 @@ public HtcJobID submitJob(String jobName, File sub_file_internal, File sub_file_ //----------Add solver script path to sbatch file, write the .slurm.sub file String substitutedSbatchCommands = sbatchSolverComponents.getSbatchCommands(); // if(isCompleteMultiTrialArray) { -// substitutedSbatchCommands = substitutedSbatchCommands.replaceAll("#SBATCH -o.*", "#SBATCH -o "+new File(sub_file_external.getParent(),slurmRootName+".log").getAbsolutePath()+"_%a"); -// substitutedSbatchCommands = substitutedSbatchCommands.replaceAll("#SBATCH -e.*", "#SBATCH -e "+new File(sub_file_external.getParent(),slurmRootName+".log").getAbsolutePath()+"_%a"); +// substitutedSbatchCommands = substitutedSbatchCommands.replaceAll("#SBATCH -o.*", "#SBATCH -o "+new File(sub_file_with_external_path.getParent(),slurmRootName+".log").getAbsolutePath()+"_%a"); +// substitutedSbatchCommands = substitutedSbatchCommands.replaceAll("#SBATCH -e.*", "#SBATCH -e "+new File(sub_file_with_external_path.getParent(),slurmRootName+".log").getAbsolutePath()+"_%a"); // substitutedSbatchCommands+= "#SBATCH --array=1-"+slurmArrayCount; // } File tempFile = File.createTempFile("tempSubFile", SUB); @@ -1026,18 +1029,10 @@ public HtcJobID submitJob(String jobName, File sub_file_internal, File sub_file_ // move submission file to final location (either locally or remotely). if (LG.isDebugEnabled()) { - LG.debug("moving local SLURM submit file '"+tempFile.getAbsolutePath()+"' to remote file '"+sub_file_external+"'"); + LG.debug("moving local SLURM submit file '"+tempFile.getAbsolutePath()+"' to remote file '"+sub_file_as_internal_path+"'"); } - FileUtils.copyFile(tempFile, sub_file_internal); + FileUtils.copyFile(tempFile, sub_file_as_internal_path); tempFile.delete(); - //---------- - - } catch (IOException ex) { - LG.error(ex); - return null; - } - - return submitJobFile(sub_file_external); } HtcJobID submitJobFile(File sub_file_external) throws ExecutableException { diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java index ca8f66d8bd..4fd4eb7650 100644 --- a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java +++ b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java @@ -4,33 +4,140 @@ import cbit.vcell.message.server.htc.HtcJobStatus; import cbit.vcell.message.server.htc.HtcProxy.HtcJobInfo; import cbit.vcell.message.server.htc.HtcProxy.PartitionStatistics; +import cbit.vcell.messaging.server.SimulationTask; import cbit.vcell.mongodb.VCMongoMessage; +import cbit.vcell.parser.ExpressionException; import cbit.vcell.resource.PropertyLoader; import cbit.vcell.server.HtcJobID; +import cbit.vcell.simdata.PortableCommand; +import cbit.vcell.solvers.ExecutableCommand; +import cbit.vcell.xml.XmlHelper; +import cbit.vcell.xml.XmlParseException; import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.vcell.util.document.KeyValue; +import org.vcell.util.document.User; import org.vcell.util.exe.ExecutableException; -import java.io.File; -import java.io.IOException; +import java.io.*; import java.net.MalformedURLException; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.Random; +import java.util.stream.Collectors; -@Disabled public class SlurmProxyTest { @BeforeAll - public static void setLogger() throws MalformedURLException + public static void setProperties() throws MalformedURLException { -// System.setProperty("log4j.configurationFile","/Users/schaff/Documents/workspace-modular/vcell/docker/trace.log4j2.xml"); + System.setProperty(PropertyLoader.vcellServerIDProperty,"REL"); + System.setProperty(PropertyLoader.htcLogDirExternal,"/share/apps/vcell3/htclogs"); + System.setProperty(PropertyLoader.slurm_partition,"vcell"); + System.setProperty(PropertyLoader.slurm_reservation,""); + System.setProperty(PropertyLoader.slurm_qos,"vcell"); + System.setProperty(PropertyLoader.primarySimDataDirExternalProperty,"/share/apps/vcell3/users"); + System.setProperty(PropertyLoader.secondarySimDataDirExternalProperty,"/share/apps/vcell7/users"); + System.setProperty(PropertyLoader.jmsSimHostExternal, "rke-wn-01.cam.uchc.edu"); + System.setProperty(PropertyLoader.jmsSimPortExternal, "31618"); + System.setProperty(PropertyLoader.jmsSimRestPortExternal, "30163"); + System.setProperty(PropertyLoader.jmsUser, "clientUser"); + System.setProperty(PropertyLoader.jmsPasswordValue, "dummy"); + System.setProperty(PropertyLoader.mongodbHostExternal, "rke-wn-01.cam.uchc.edu"); + System.setProperty(PropertyLoader.mongodbPortExternal, "30019"); + System.setProperty(PropertyLoader.mongodbDatabase, "test"); + System.setProperty(PropertyLoader.vcellSoftwareVersion, "Rel_Version_7.6.0_build_28"); + System.setProperty(PropertyLoader.vcellbatch_singularity_image, "/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img"); + System.setProperty(PropertyLoader.slurm_tmpdir, "/scratch/vcell"); + System.setProperty(PropertyLoader.slurm_central_singularity_dir, "/share/apps/vcell3/singularityImages"); + System.setProperty(PropertyLoader.slurm_local_singularity_dir, "/state/partition1/singularityImages"); + System.setProperty(PropertyLoader.slurm_singularity_module_name, "singularity/vcell-3.10.0"); + System.setProperty(PropertyLoader.simDataDirArchiveExternal, "/share/apps/vcell12/users"); + System.setProperty(PropertyLoader.simDataDirArchiveInternal, "/share/apps/vcell12/users"); + System.setProperty(PropertyLoader.nativeSolverDir_External, "/share/apps/vcell3/nativesolvers"); + System.setProperty(PropertyLoader.jmsBlobMessageMinSize, "100000"); + System.setProperty(PropertyLoader.simulationPostprocessor, "JavaPostprocessor64"); + System.setProperty(PropertyLoader.simulationPreprocessor, "JavaPreprocessor64"); } - - + + @Test + public void testSimJobScript() throws IOException, XmlParseException, ExpressionException { + + SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource("slurm_fixtures/finite_volume/SimID_274514696_0__0.simtask.xml")); + + SlurmProxy slurmProxy = new SlurmProxy(null, "vcell"); + // make temp file + Path submitScript = Files.createTempFile("submit_script",".sh"); + File subFileExternal = new File("/share/apps/vcell3/htclogs/V_REL_274514696_0_0.slurm.sub"); + String JOB_NAME = "V_REL_274514696_0_0"; + + KeyValue simKey = simTask.getSimKey(); + User simOwner = simTask.getSimulation().getVersion().getOwner(); + final int jobId = simTask.getSimulationJob().getJobIndex(); + + // preprocessor + String simTaskFilePathExternal = "/share/apps/vcell3/users/schaff/SimID_274514696_0__0.simtask.xml"; + File primaryUserDirExternal = new File("/share/apps/vcell3/users/schaff"); + List args = new ArrayList<>( 4 ); + args.add( PropertyLoader.getRequiredProperty(PropertyLoader.simulationPreprocessor) ); + args.add( simTaskFilePathExternal ); + args.add( primaryUserDirExternal.getAbsolutePath() ); + ExecutableCommand preprocessorCmd = new ExecutableCommand(null, false, false,args); + + // finite volume solver invocation + ExecutableCommand solverCmd = new ExecutableCommand( + new ExecutableCommand.LibraryPath("/usr/local/app/localsolvers/linux64"), + "/usr/local/app/localsolvers/linux64/FiniteVolume_x64", + "/share/apps/vcell3/users/schaff/SimID_274514696_0_.fvinput", + "-tid", + "0"); + + // postprocessor + final String SOLVER_EXIT_CODE_REPLACE_STRING = "SOLVER_EXIT_CODE_REPLACE_STRING"; + ExecutableCommand postprocessorCmd = new ExecutableCommand(null,false, false, + PropertyLoader.getRequiredProperty(PropertyLoader.simulationPostprocessor), + simKey.toString(), + simOwner.getName(), + simOwner.getID().toString(), + Integer.toString(jobId), + Integer.toString(simTask.getTaskID()), + SOLVER_EXIT_CODE_REPLACE_STRING, + subFileExternal.getAbsolutePath()); + postprocessorCmd.setExitCodeToken(SOLVER_EXIT_CODE_REPLACE_STRING); + + ExecutableCommand.Container commandSet = new ExecutableCommand.Container(); + commandSet.add(preprocessorCmd); + commandSet.add(solverCmd); + commandSet.add(postprocessorCmd); + + int NUM_CPUs = 1; + int MEM_SIZE_MB = 1000; + ArrayList postProcessingCommands = new ArrayList<>(); + String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub"); + slurmProxy.saveJobScript(JOB_NAME, submitScript.toFile(), commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); + String slurmScript = FileUtils.readFileToString(submitScript.toFile()); + Assertions.assertEquals(expectedSlurmScript, slurmScript); + } + + private String readTextFileFromResource(String filename) throws IOException { + InputStream inputStream = getClass().getClassLoader().getResourceAsStream(filename); + if (inputStream == null) { + throw new IOException("Resource not found: " + filename); + } + String xmlString; + try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) { + xmlString = reader.lines().collect(Collectors.joining(System.lineSeparator())); + } + return xmlString; + } + + @Disabled // this test is disabled because it requires a running slurm server @Test public void testSingularitySupport() throws IOException, ExecutableException { CommandServiceSshNative cmd = null; @@ -115,12 +222,12 @@ public void testSingularitySupport() throws IOException, ExecutableException { } } - + + @Disabled // this test is disabled because it requires a running slurm server @Test public void testSLURM() throws IOException, ExecutableException { System.setProperty("log4j2.trace","true"); PropertyLoader.setProperty(PropertyLoader.vcellServerIDProperty, "Test2"); - PropertyLoader.setProperty(PropertyLoader.htcLogDirExternal, "/Volumes/vcell/htclogs"); VCMongoMessage.enabled=false; String partitions[] = new String[] { "vcell", "vcell2" }; PropertyLoader.setProperty(PropertyLoader.slurm_partition, partitions[0]); diff --git a/vcell-server/src/test/resources/slurm_fixtures/finite_volume/SimID_274514696_0__0.simtask.xml b/vcell-server/src/test/resources/slurm_fixtures/finite_volume/SimID_274514696_0__0.simtask.xml new file mode 100644 index 0000000000..05a321af5a --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/finite_volume/SimID_274514696_0__0.simtask.xml @@ -0,0 +1,155 @@ + + + 96485.3321 + 9.64853321E-5 + 1.0E-9 + 6.02214179E11 + 3.141592653589793 + 8314.46261815 + 300.0 + 1.0 + 1.0 + 10.0 + 0.0 + 1000.0 + 2.0 + 2.0 + 0.001660538783162726 + 1000.0 + 1.0 + 10.0 + 0.0 + 10.0 + 10.0 + 4.5E-4 + 0.0 + 0.0 + 0.0 + 1.0 + 1.0 + 1.0 + + + + + (kfl * (RanC_cyt - RanC_nuc)) + ((Kf * RanC_cyt) - ((Kr * Ran_cyt) * C_cyt)) + (AreaPerUnitArea_pm * s2_init_molecules_um_2) + (AreaPerUnitVolume_nm / VolumePerUnitVolume_cyt) + (AreaPerUnitVolume_nm / VolumePerUnitVolume_nuc) + (1.0 + x) + (K_s2_total / AreaPerUnitArea_pm) + (VolumePerUnitVolume_cyt * vcRegionVolume('subdomain1')) + (VolumePerUnitVolume_EC * vcRegionVolume('subdomain0')) + (AreaPerUnitVolume_nm * vcRegionVolume('subdomain1')) + (VolumePerUnitVolume_nuc * vcRegionVolume('subdomain1')) + (AreaPerUnitArea_pm * vcRegionArea('subdomain0_subdomain1_membrane')) + vcRegionArea('subdomain0_subdomain1_membrane') + vcRegionVolume('subdomain0') + vcRegionVolume('subdomain1') + + + + + + + + + ( - J_r0 - (KFlux_nm_cyt * J_flux0)) + RanC_cyt_diffusionRate + RanC_cyt_init_uM + + + J_r0 + Ran_cyt_diffusionRate + Ran_cyt_init_uM + + + J_r0 + C_cyt_diffusionRate + C_cyt_init_uM + + + (KFlux_nm_nuc * J_flux0) + RanC_nuc_diffusionRate + RanC_nuc_init_uM + + + + + + + + + + + + + + + + + + + 0.0 + 0.0 + + + 0.0 + 0.0 + + + 0.0 + 0.0 + + + 0.0 + 0.0 + + + + + + + + + + + + + + + 2 + + 1 + + + + + + + + + + + + + + + ((((x - 5.0) ^ 2.0) + ((y - 5.0) ^ 2.0)) < (3.0 ^ 2.0)) + + + 1.0 + + + + + + + + + + + + + \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub new file mode 100644 index 0000000000..11882bea7e --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub @@ -0,0 +1,164 @@ +#!/usr/bin/bash +#SBATCH --partition=vcell +#SBATCH --reservation= +#SBATCH --qos=vcell +#SBATCH -J V_REL_274514696_0_0 +#SBATCH -o /share/apps/vcell3/htclogs/V_REL_274514696_0_0.slurm.log +#SBATCH -e /share/apps/vcell3/htclogs/V_REL_274514696_0_0.slurm.log +#SBATCH --mem=4096M +#SBATCH --no-kill +#SBATCH --no-requeue +# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB + + +#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- +set -x + +TMPDIR=/scratch/vcell +echo "using TMPDIR=$TMPDIR" +if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi +echo `hostname` + +export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles + +source /usr/share/Modules/init/bash + +module load singularity/vcell-3.10.0 + +echo "job running on host `hostname -f`" + +echo "id is `id`" + +echo "bash version is `bash --version`" +date + +echo ENVIRONMENT +env + +container_prefix= +if command -v singularity >/dev/null 2>&1; then + # + # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + # + localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + if [ ! -e "$localSingularityImage" ]; then + echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + mkdir -p /state/partition1/singularityImages + singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) + flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" + theStatus=$? + if [ $theStatus -eq 100 ] + then + echo "lock in use, waiting for lock owner to copy singularityImage" + let c=0 + until [ -f $localSingularityImage ] + do + sleep 3 + let c=c+1 + if [ $c -eq 20 ] + then + echo "Exceeded wait time for lock owner to copy singularityImage" + break + fi + done + else + if [ $theStatus -eq 0 ] + then + echo copy succeeded + else + echo copy failed + fi + fi + rm -f ${singularitytempfile} + if [ ! -e "$localSingularityImage" ]; then + echo "Failed to copy $localSingularityImage to hpc from central" + exit 1 + else + echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + fi + fi + container_prefix="singularity run --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " +else + echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " + exit 1 +fi +echo "container_prefix is '${container_prefix}'" +echo "3 date=`date`" +#END---------SlurmProxy.generateScript():slurmInitSingularity---------- + +#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- +sendFailureMsg() { + echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274514696 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274514696 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + stat=$? + if [[ $stat -ne 0 ]]; then + echo 'failed to send error message, retcode=$stat' + else + echo 'sent failure message' + fi +} +#END---------SlurmProxy.generateScript():sendFailureMsg---------- +#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- +callExitProcessor( ) { + echo exitCommand = ${container_prefix}JavaPostprocessor64 274514696 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274514696_0_0.slurm.sub + ${container_prefix}JavaPostprocessor64 274514696 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274514696_0_0.slurm.sub +} +#END---------SlurmProxy.generateScript():hasExitProcessor---------- +echo +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274514696_0__0.simtask.xml /share/apps/vcell3/users/schaff + command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274514696_0__0.simtask.xml /share/apps/vcell3/users/schaff " + $command +stat=$? +echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274514696_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "1 date=`date`" + +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/FiniteVolume_x64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/FiniteVolume_x64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/FiniteVolume_x64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}FiniteVolume_x64 /share/apps/vcell3/users/schaff/SimID_274514696_0_.fvinput -tid 0 +if [ -z ${LD_LIBRARY_PATH+x} ]; then + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 +else + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH +fi + command="${cmd_prefix}FiniteVolume_x64 /share/apps/vcell3/users/schaff/SimID_274514696_0_.fvinput -tid 0 " + $command +stat=$? +echo ${cmd_prefix}FiniteVolume_x64 /share/apps/vcell3/users/schaff/SimID_274514696_0_.fvinput -tid 0 returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------FiniteVolume_x64 +callExitProcessor 0 + + +#Following commands (if any) are read by JavaPostProcessor64 + + From 9aac07332833d995658f972fa723fadb7137b915 Mon Sep 17 00:00:00 2001 From: jcschaff Date: Wed, 21 Aug 2024 17:02:48 -0400 Subject: [PATCH 02/11] unit tests for slurm scripts for CVODE, RK45 and Smoldyn --- .../server/htc/slurm/SlurmProxyTest.java | 181 +++++++++++- .../cvode/SimID_274630682_0__0.simtask.xml | 112 ++++++++ .../cvode/V_REL_274630682_0_0.slurm.sub | 164 +++++++++++ .../SimID_274631114_0__0.simtask.xml | 111 ++++++++ .../V_REL_274631114_0_0.slurm.sub | 137 +++++++++ .../smoldyn/SimID_274630052_0__0.simtask.xml | 264 ++++++++++++++++++ .../smoldyn/V_REL_274630052_0_0.slurm.sub | 164 +++++++++++ 7 files changed, 1132 insertions(+), 1 deletion(-) create mode 100644 vcell-server/src/test/resources/slurm_fixtures/cvode/SimID_274630682_0__0.simtask.xml create mode 100644 vcell-server/src/test/resources/slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub create mode 100644 vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/SimID_274631114_0__0.simtask.xml create mode 100644 vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub create mode 100644 vcell-server/src/test/resources/slurm_fixtures/smoldyn/SimID_274630052_0__0.simtask.xml create mode 100644 vcell-server/src/test/resources/slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java index 4fd4eb7650..b0fbad69f4 100644 --- a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java +++ b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java @@ -67,7 +67,7 @@ public static void setProperties() throws MalformedURLException } @Test - public void testSimJobScript() throws IOException, XmlParseException, ExpressionException { + public void testSimJobScriptFiniteVolume() throws IOException, XmlParseException, ExpressionException { SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource("slurm_fixtures/finite_volume/SimID_274514696_0__0.simtask.xml")); @@ -125,6 +125,185 @@ public void testSimJobScript() throws IOException, XmlParseException, Expression Assertions.assertEquals(expectedSlurmScript, slurmScript); } + @Test + public void testSimJobScriptSmoldyn() throws IOException, XmlParseException, ExpressionException { + + SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource("slurm_fixtures/smoldyn/SimID_274630052_0__0.simtask.xml")); + + SlurmProxy slurmProxy = new SlurmProxy(null, "vcell"); + // make temp file + Path submitScript = Files.createTempFile("submit_script",".sh"); + File subFileExternal = new File("/share/apps/vcell3/htclogs/V_REL_274630052_0_0.slurm.sub"); + String JOB_NAME = "V_REL_274630052_0_0"; + + KeyValue simKey = simTask.getSimKey(); + User simOwner = simTask.getSimulation().getVersion().getOwner(); + final int jobId = simTask.getSimulationJob().getJobIndex(); + + // preprocessor + String simTaskFilePathExternal = "/share/apps/vcell3/users/schaff/SimID_274630052_0__0.simtask.xml"; + File primaryUserDirExternal = new File("/share/apps/vcell3/users/schaff"); + List args = new ArrayList<>( 4 ); + args.add( PropertyLoader.getRequiredProperty(PropertyLoader.simulationPreprocessor) ); + args.add( simTaskFilePathExternal ); + args.add( primaryUserDirExternal.getAbsolutePath() ); + ExecutableCommand preprocessorCmd = new ExecutableCommand(null, false, false,args); + + // finite volume solver invocation + ExecutableCommand solverCmd = new ExecutableCommand( + new ExecutableCommand.LibraryPath("/usr/local/app/localsolvers/linux64"), + "/usr/local/app/localsolvers/linux64/smoldyn_x64", + "/share/apps/vcell3/users/schaff/SimID_274630052_0_.smoldynInput", + "-tid", + "0"); + + // postprocessor + final String SOLVER_EXIT_CODE_REPLACE_STRING = "SOLVER_EXIT_CODE_REPLACE_STRING"; + ExecutableCommand postprocessorCmd = new ExecutableCommand(null,false, false, + PropertyLoader.getRequiredProperty(PropertyLoader.simulationPostprocessor), + simKey.toString(), + simOwner.getName(), + simOwner.getID().toString(), + Integer.toString(jobId), + Integer.toString(simTask.getTaskID()), + SOLVER_EXIT_CODE_REPLACE_STRING, + subFileExternal.getAbsolutePath()); + postprocessorCmd.setExitCodeToken(SOLVER_EXIT_CODE_REPLACE_STRING); + + ExecutableCommand.Container commandSet = new ExecutableCommand.Container(); + commandSet.add(preprocessorCmd); + commandSet.add(solverCmd); + commandSet.add(postprocessorCmd); + + int NUM_CPUs = 1; + int MEM_SIZE_MB = 1000; + ArrayList postProcessingCommands = new ArrayList<>(); + String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub"); + slurmProxy.saveJobScript(JOB_NAME, submitScript.toFile(), commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); + String slurmScript = FileUtils.readFileToString(submitScript.toFile()); + Assertions.assertEquals(expectedSlurmScript, slurmScript); + } + + @Test + public void testSimJobScriptCVODE() throws IOException, XmlParseException, ExpressionException { + + SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource("slurm_fixtures/cvode/SimID_274630682_0__0.simtask.xml")); + + SlurmProxy slurmProxy = new SlurmProxy(null, "vcell"); + // make temp file + Path submitScript = Files.createTempFile("submit_script",".sh"); + File subFileExternal = new File("/share/apps/vcell3/htclogs/V_REL_274630682_0_0.slurm.sub"); + String JOB_NAME = "V_REL_274630682_0_0"; + + KeyValue simKey = simTask.getSimKey(); + User simOwner = simTask.getSimulation().getVersion().getOwner(); + final int jobId = simTask.getSimulationJob().getJobIndex(); + + // preprocessor + String simTaskFilePathExternal = "/share/apps/vcell3/users/schaff/SimID_274630682_0__0.simtask.xml"; + File primaryUserDirExternal = new File("/share/apps/vcell3/users/schaff"); + List args = new ArrayList<>( 4 ); + args.add( PropertyLoader.getRequiredProperty(PropertyLoader.simulationPreprocessor) ); + args.add( simTaskFilePathExternal ); + args.add( primaryUserDirExternal.getAbsolutePath() ); + ExecutableCommand preprocessorCmd = new ExecutableCommand(null, false, false,args); + + // finite volume solver invocation + ExecutableCommand solverCmd = new ExecutableCommand( + new ExecutableCommand.LibraryPath("/usr/local/app/localsolvers/linux64"), + "/usr/local/app/localsolvers/linux64/SundialsSolverStandalone_x64", + "/share/apps/vcell3/users/schaff/SimID_274630682_0_.cvodeInput", + "/share/apps/vcell3/users/schaff/SimID_274630682_0_.ida", + "-tid", + "0"); + + // postprocessor + final String SOLVER_EXIT_CODE_REPLACE_STRING = "SOLVER_EXIT_CODE_REPLACE_STRING"; + ExecutableCommand postprocessorCmd = new ExecutableCommand(null,false, false, + PropertyLoader.getRequiredProperty(PropertyLoader.simulationPostprocessor), + simKey.toString(), + simOwner.getName(), + simOwner.getID().toString(), + Integer.toString(jobId), + Integer.toString(simTask.getTaskID()), + SOLVER_EXIT_CODE_REPLACE_STRING, + subFileExternal.getAbsolutePath()); + postprocessorCmd.setExitCodeToken(SOLVER_EXIT_CODE_REPLACE_STRING); + + ExecutableCommand.Container commandSet = new ExecutableCommand.Container(); + commandSet.add(preprocessorCmd); + commandSet.add(solverCmd); + commandSet.add(postprocessorCmd); + + int NUM_CPUs = 1; + int MEM_SIZE_MB = 1000; + ArrayList postProcessingCommands = new ArrayList<>(); + String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub"); + slurmProxy.saveJobScript(JOB_NAME, submitScript.toFile(), commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); + String slurmScript = FileUtils.readFileToString(submitScript.toFile()); + Assertions.assertEquals(expectedSlurmScript, slurmScript); + } + + @Test + public void testSimJobScriptRK45() throws IOException, XmlParseException, ExpressionException { + + SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource("slurm_fixtures/runge_kutta_fehlberg/SimID_274631114_0__0.simtask.xml")); + + SlurmProxy slurmProxy = new SlurmProxy(null, "vcell"); + // make temp file + Path submitScript = Files.createTempFile("submit_script",".sh"); + File subFileExternal = new File("/share/apps/vcell3/htclogs/V_REL_274631114_0_0.slurm.sub"); + String JOB_NAME = "V_REL_274631114_0_0"; + + KeyValue simKey = simTask.getSimKey(); + User simOwner = simTask.getSimulation().getVersion().getOwner(); + final int jobId = simTask.getSimulationJob().getJobIndex(); + + // preprocessor +// String simTaskFilePathExternal = "/share/apps/vcell3/users/schaff/SimID_274631114_0__0.simtask.xml"; +// File primaryUserDirExternal = new File("/share/apps/vcell3/users/schaff"); +// List args = new ArrayList<>( 4 ); +// args.add( PropertyLoader.getRequiredProperty(PropertyLoader.simulationPreprocessor) ); +// args.add( simTaskFilePathExternal ); +// args.add( primaryUserDirExternal.getAbsolutePath() ); +// ExecutableCommand preprocessorCmd = new ExecutableCommand(null, false, false,args); + + // finite volume solver invocation + ExecutableCommand.LibraryPath libraryPath = null; + ExecutableCommand solverCmd = new ExecutableCommand( + libraryPath, + "JavaSimExe64", + "/share/apps/vcell3/users/schaff/SimID_274631114_0__0.simtask.xml", + "/share/apps/vcell3/users/schaff"); + + // postprocessor + final String SOLVER_EXIT_CODE_REPLACE_STRING = "SOLVER_EXIT_CODE_REPLACE_STRING"; + ExecutableCommand postprocessorCmd = new ExecutableCommand(null,false, false, + PropertyLoader.getRequiredProperty(PropertyLoader.simulationPostprocessor), + simKey.toString(), + simOwner.getName(), + simOwner.getID().toString(), + Integer.toString(jobId), + Integer.toString(simTask.getTaskID()), + SOLVER_EXIT_CODE_REPLACE_STRING, + subFileExternal.getAbsolutePath()); + postprocessorCmd.setExitCodeToken(SOLVER_EXIT_CODE_REPLACE_STRING); + + ExecutableCommand.Container commandSet = new ExecutableCommand.Container(); +// commandSet.add(preprocessorCmd); + commandSet.add(solverCmd); + commandSet.add(postprocessorCmd); + + int NUM_CPUs = 1; + int MEM_SIZE_MB = 1000; + ArrayList postProcessingCommands = new ArrayList<>(); + String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub"); + slurmProxy.saveJobScript(JOB_NAME, submitScript.toFile(), commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); + String slurmScript = FileUtils.readFileToString(submitScript.toFile()); + Assertions.assertEquals(expectedSlurmScript, slurmScript); + } + + private String readTextFileFromResource(String filename) throws IOException { InputStream inputStream = getClass().getClassLoader().getResourceAsStream(filename); if (inputStream == null) { diff --git a/vcell-server/src/test/resources/slurm_fixtures/cvode/SimID_274630682_0__0.simtask.xml b/vcell-server/src/test/resources/slurm_fixtures/cvode/SimID_274630682_0__0.simtask.xml new file mode 100644 index 0000000000..d4f0ab475a --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/cvode/SimID_274630682_0__0.simtask.xml @@ -0,0 +1,112 @@ + + + cloned from 'non-spatial ODE_generated' owned by user frm +cloned from 'non-spatial ODE_generated' owned by user anu +cloned from 'non-spatial ODE_generated' owned by user schaff +cloned from 'non-spatial ODE_generated' owned by user les + 96485.3321 + 9.64853321E-5 + 1.0E-9 + 6.02214179E11 + 3.141592653589793 + 8314.46261815 + 300.0 + 0.0 + 1000.0 + 2.0 + 2.0 + 0.001660538783162726 + 1000.0 + 1.0 + 0.0 + 0.0 + 4.493165893949507E-4 + 0.0 + 14891.899581611733 + 124712.10435961554 + 1406.7733692487282 + 3697.013658772733 + 4738.640600365477 + (1.0 * pow(KMOLE,1.0)) + 0.0 + 0.0 + + + (kfl * (RanC_cyt - RanC_nuc)) + ((Kf * RanC_cyt) - ((Kr * Ran_cyt) * C_cyt)) + ((Size_cyt * Ran_cyt_init_uM) - (Size_cyt * C_cyt_init_uM)) + ((Size_cyt * RanC_cyt_init_uM) + (Size_cyt * C_cyt_init_uM) + (Size_nuc * RanC_nuc_init_uM)) + (UnitFactor_uM_um3_molecules_neg_1 * Size_pm * s2_init_molecules_um_2) + (Size_nm / Size_cyt) + (Size_nm / Size_nuc) + ((K_Ran_cyt_total + (Size_cyt * C_cyt)) / Size_cyt) + ((K_RanC_cyt_total - (Size_cyt * C_cyt) - (Size_nuc * RanC_nuc)) / Size_cyt) + (K_s2_total / (UnitFactor_uM_um3_molecules_neg_1 * Size_pm)) + + + + + + + + + J_r0 + C_cyt_init_uM + + + (KFlux_nm_nuc * J_flux0) + RanC_nuc_init_uM + + + + + + cloned from 'non-spatial ODE_generated' owned by user frm +cloned from 'non-spatial ODE_generated' owned by user anu +cloned from 'non-spatial ODE_generated' owned by user schaff +cloned from 'non-spatial ODE_generated' owned by user les + + + + cloned from 'Copy of combined ida/cvode' owned by user frm +cloned from 'combined ida/cvode' owned by user anu +cloned from 'combined ida/cvode' owned by user schaff +cloned from 'Simulation1' owned by user les + + + + + + 1 + + + 222.22 + 7.7777 + + + + + cloned from 'Copy of combined ida/cvode' owned by user frm +cloned from 'combined ida/cvode' owned by user anu +cloned from 'combined ida/cvode' owned by user schaff +cloned from 'Simulation1' owned by user les + + + + cloned from 'nonspatial381239605' owned by user frm +cloned from 'nonspatial1214274674' owned by user anu +cloned from 'nonspatial2060234169' owned by user schaff +cloned from 'nonspatial608887770' owned by user les + + + + + + + cloned from 'nonspatial381239605' owned by user frm +cloned from 'nonspatial1214274674' owned by user anu +cloned from 'nonspatial2060234169' owned by user schaff +cloned from 'nonspatial608887770' owned by user les + + + \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub new file mode 100644 index 0000000000..5c7b4d8d99 --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub @@ -0,0 +1,164 @@ +#!/usr/bin/bash +#SBATCH --partition=vcell +#SBATCH --reservation= +#SBATCH --qos=vcell +#SBATCH -J V_REL_274630682_0_0 +#SBATCH -o /share/apps/vcell3/htclogs/V_REL_274630682_0_0.slurm.log +#SBATCH -e /share/apps/vcell3/htclogs/V_REL_274630682_0_0.slurm.log +#SBATCH --mem=4096M +#SBATCH --no-kill +#SBATCH --no-requeue +# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB + + +#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- +set -x + +TMPDIR=/scratch/vcell +echo "using TMPDIR=$TMPDIR" +if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi +echo `hostname` + +export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles + +source /usr/share/Modules/init/bash + +module load singularity/vcell-3.10.0 + +echo "job running on host `hostname -f`" + +echo "id is `id`" + +echo "bash version is `bash --version`" +date + +echo ENVIRONMENT +env + +container_prefix= +if command -v singularity >/dev/null 2>&1; then + # + # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + # + localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + if [ ! -e "$localSingularityImage" ]; then + echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + mkdir -p /state/partition1/singularityImages + singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) + flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" + theStatus=$? + if [ $theStatus -eq 100 ] + then + echo "lock in use, waiting for lock owner to copy singularityImage" + let c=0 + until [ -f $localSingularityImage ] + do + sleep 3 + let c=c+1 + if [ $c -eq 20 ] + then + echo "Exceeded wait time for lock owner to copy singularityImage" + break + fi + done + else + if [ $theStatus -eq 0 ] + then + echo copy succeeded + else + echo copy failed + fi + fi + rm -f ${singularitytempfile} + if [ ! -e "$localSingularityImage" ]; then + echo "Failed to copy $localSingularityImage to hpc from central" + exit 1 + else + echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + fi + fi + container_prefix="singularity run --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " +else + echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " + exit 1 +fi +echo "container_prefix is '${container_prefix}'" +echo "3 date=`date`" +#END---------SlurmProxy.generateScript():slurmInitSingularity---------- + +#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- +sendFailureMsg() { + echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274630682 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274630682 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + stat=$? + if [[ $stat -ne 0 ]]; then + echo 'failed to send error message, retcode=$stat' + else + echo 'sent failure message' + fi +} +#END---------SlurmProxy.generateScript():sendFailureMsg---------- +#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- +callExitProcessor( ) { + echo exitCommand = ${container_prefix}JavaPostprocessor64 274630682 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274630682_0_0.slurm.sub + ${container_prefix}JavaPostprocessor64 274630682 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274630682_0_0.slurm.sub +} +#END---------SlurmProxy.generateScript():hasExitProcessor---------- +echo +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274630682_0__0.simtask.xml /share/apps/vcell3/users/schaff + command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274630682_0__0.simtask.xml /share/apps/vcell3/users/schaff " + $command +stat=$? +echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274630682_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "1 date=`date`" + +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/SundialsSolverStandalone_x64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/SundialsSolverStandalone_x64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/SundialsSolverStandalone_x64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}SundialsSolverStandalone_x64 /share/apps/vcell3/users/schaff/SimID_274630682_0_.cvodeInput /share/apps/vcell3/users/schaff/SimID_274630682_0_.ida -tid 0 +if [ -z ${LD_LIBRARY_PATH+x} ]; then + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 +else + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH +fi + command="${cmd_prefix}SundialsSolverStandalone_x64 /share/apps/vcell3/users/schaff/SimID_274630682_0_.cvodeInput /share/apps/vcell3/users/schaff/SimID_274630682_0_.ida -tid 0 " + $command +stat=$? +echo ${cmd_prefix}SundialsSolverStandalone_x64 /share/apps/vcell3/users/schaff/SimID_274630682_0_.cvodeInput /share/apps/vcell3/users/schaff/SimID_274630682_0_.ida -tid 0 returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------SundialsSolverStandalone_x64 +callExitProcessor 0 + + +#Following commands (if any) are read by JavaPostProcessor64 + + diff --git a/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/SimID_274631114_0__0.simtask.xml b/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/SimID_274631114_0__0.simtask.xml new file mode 100644 index 0000000000..9939c8f88c --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/SimID_274631114_0__0.simtask.xml @@ -0,0 +1,111 @@ + + + cloned from 'non-spatial ODE_generated' owned by user frm +cloned from 'non-spatial ODE_generated' owned by user anu +cloned from 'non-spatial ODE_generated' owned by user schaff +cloned from 'non-spatial ODE_generated' owned by user les + 96485.3321 + 9.64853321E-5 + 1.0E-9 + 6.02214179E11 + 3.141592653589793 + 8314.46261815 + 300.0 + 0.0 + 1000.0 + 2.0 + 2.0 + 0.001660538783162726 + 1000.0 + 1.0 + 0.0 + 0.0 + 4.493165893949507E-4 + 0.0 + 14891.899581611733 + 124712.10435961554 + 1406.7733692487282 + 3697.013658772733 + 4738.640600365477 + (1.0 * pow(KMOLE,1.0)) + 0.0 + 0.0 + + + (kfl * (RanC_cyt - RanC_nuc)) + ((Kf * RanC_cyt) - ((Kr * Ran_cyt) * C_cyt)) + ((Size_cyt * Ran_cyt_init_uM) - (Size_cyt * C_cyt_init_uM)) + ((Size_cyt * RanC_cyt_init_uM) + (Size_cyt * C_cyt_init_uM) + (Size_nuc * RanC_nuc_init_uM)) + (UnitFactor_uM_um3_molecules_neg_1 * Size_pm * s2_init_molecules_um_2) + (Size_nm / Size_cyt) + (Size_nm / Size_nuc) + ((K_Ran_cyt_total + (Size_cyt * C_cyt)) / Size_cyt) + ((K_RanC_cyt_total - (Size_cyt * C_cyt) - (Size_nuc * RanC_nuc)) / Size_cyt) + (K_s2_total / (UnitFactor_uM_um3_molecules_neg_1 * Size_pm)) + + + + + + + + + J_r0 + C_cyt_init_uM + + + (KFlux_nm_nuc * J_flux0) + RanC_nuc_init_uM + + + + + + cloned from 'non-spatial ODE_generated' owned by user frm +cloned from 'non-spatial ODE_generated' owned by user anu +cloned from 'non-spatial ODE_generated' owned by user schaff +cloned from 'non-spatial ODE_generated' owned by user les + + + + cloned from 'Copy of runge kutta fehlberg' owned by user frm +cloned from 'runge kutta fehlberg' owned by user anu +cloned from 'runge kutta fehlberg' owned by user schaff +cloned from 'Copy of Simulation1' owned by user les + + + + + + 1 + + + 0.01 to 10.0, log, 4 values + + + + + cloned from 'Copy of runge kutta fehlberg' owned by user frm +cloned from 'runge kutta fehlberg' owned by user anu +cloned from 'runge kutta fehlberg' owned by user schaff +cloned from 'Copy of Simulation1' owned by user les + + + + cloned from 'nonspatial381239605' owned by user frm +cloned from 'nonspatial1214274674' owned by user anu +cloned from 'nonspatial2060234169' owned by user schaff +cloned from 'nonspatial608887770' owned by user les + + + + + + + cloned from 'nonspatial381239605' owned by user frm +cloned from 'nonspatial1214274674' owned by user anu +cloned from 'nonspatial2060234169' owned by user schaff +cloned from 'nonspatial608887770' owned by user les + + + \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub new file mode 100644 index 0000000000..6a87add1cc --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub @@ -0,0 +1,137 @@ +#!/usr/bin/bash +#SBATCH --partition=vcell +#SBATCH --reservation= +#SBATCH --qos=vcell +#SBATCH -J V_REL_274631114_0_0 +#SBATCH -o /share/apps/vcell3/htclogs/V_REL_274631114_0_0.slurm.log +#SBATCH -e /share/apps/vcell3/htclogs/V_REL_274631114_0_0.slurm.log +#SBATCH --mem=4096M +#SBATCH --no-kill +#SBATCH --no-requeue +# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB + + +#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- +set -x + +TMPDIR=/scratch/vcell +echo "using TMPDIR=$TMPDIR" +if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi +echo `hostname` + +export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles + +source /usr/share/Modules/init/bash + +module load singularity/vcell-3.10.0 + +echo "job running on host `hostname -f`" + +echo "id is `id`" + +echo "bash version is `bash --version`" +date + +echo ENVIRONMENT +env + +container_prefix= +if command -v singularity >/dev/null 2>&1; then + # + # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + # + localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + if [ ! -e "$localSingularityImage" ]; then + echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + mkdir -p /state/partition1/singularityImages + singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) + flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" + theStatus=$? + if [ $theStatus -eq 100 ] + then + echo "lock in use, waiting for lock owner to copy singularityImage" + let c=0 + until [ -f $localSingularityImage ] + do + sleep 3 + let c=c+1 + if [ $c -eq 20 ] + then + echo "Exceeded wait time for lock owner to copy singularityImage" + break + fi + done + else + if [ $theStatus -eq 0 ] + then + echo copy succeeded + else + echo copy failed + fi + fi + rm -f ${singularitytempfile} + if [ ! -e "$localSingularityImage" ]; then + echo "Failed to copy $localSingularityImage to hpc from central" + exit 1 + else + echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + fi + fi + container_prefix="singularity run --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " +else + echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " + exit 1 +fi +echo "container_prefix is '${container_prefix}'" +echo "3 date=`date`" +#END---------SlurmProxy.generateScript():slurmInitSingularity---------- + +#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- +sendFailureMsg() { + echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274631114 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274631114 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + stat=$? + if [[ $stat -ne 0 ]]; then + echo 'failed to send error message, retcode=$stat' + else + echo 'sent failure message' + fi +} +#END---------SlurmProxy.generateScript():sendFailureMsg---------- +#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- +callExitProcessor( ) { + echo exitCommand = ${container_prefix}JavaPostprocessor64 274631114 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274631114_0_0.slurm.sub + ${container_prefix}JavaPostprocessor64 274631114 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274631114_0_0.slurm.sub +} +#END---------SlurmProxy.generateScript():hasExitProcessor---------- +echo +echo "1 date=`date`" + +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaSimExe64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaSimExe64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/JavaSimExe64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274631114_0__0.simtask.xml /share/apps/vcell3/users/schaff + command="${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274631114_0__0.simtask.xml /share/apps/vcell3/users/schaff " + $command +stat=$? +echo ${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274631114_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaSimExe64 +callExitProcessor 0 + + +#Following commands (if any) are read by JavaPostProcessor64 + + diff --git a/vcell-server/src/test/resources/slurm_fixtures/smoldyn/SimID_274630052_0__0.simtask.xml b/vcell-server/src/test/resources/slurm_fixtures/smoldyn/SimID_274630052_0__0.simtask.xml new file mode 100644 index 0000000000..892d14f2f0 --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/smoldyn/SimID_274630052_0__0.simtask.xml @@ -0,0 +1,264 @@ + + + cloned from 'Copy of 3D pde_generated' owned by user frm +cloned from 'Copy of 3D pde_generated' owned by user anu +cloned from 'Copy of 3D pde_generated' owned by user schaff +cloned from 'Copy of 3D pde_generated' owned by user les + 96485.3321 + 9.64853321E-5 + 1.0E-9 + 6.02214179E11 + 3.141592653589793 + 8314.46261815 + 300.0 + 1.0 + 1.0 + 10.0 + 0.0 + 1000.0 + 2.0 + 2.0 + 0.001660538783162726 + 1000.0 + 1.0 + 10.0 + 0.0 + 10.0 + 0.0 + 10.0 + 100.0 + 100.0 + 66.0 + 5000.0 + 50000.0 + 304.6 + 500.0 + 1414.0 + (1.0 * pow(KMOLE, - 1.0)) + (1.0 * pow(KMOLE,1.0)) + 0.0 + 0.0 + 1.0 + 1.0 + 1.0 + + + + + + + + + + + + + + + + + + + + + + + Kf + + + + + + + + (Kr * UnitFactor_uM_um3_molecules_neg_1) + + + + + + + RanC_cyt_initCount + u + u + u + + RanC_cyt_diffusionRate + + + + Ran_cyt_initCount + u + u + u + + Ran_cyt_diffusionRate + + + + C_cyt_initCount + u + u + u + + C_cyt_diffusionRate + + + + + + + + + + + + RanC_nuc_initCount + u + u + u + + RanC_nuc_diffusionRate + + + + + + + + + + + + s2_initCount + u + u + u + + s2_diffusionRate + + + + + + + + + + + + kfl + + + + + + kfl + + + + + + + + cloned from 'Copy of 3D pde_generated' owned by user frm +cloned from 'Copy of 3D pde_generated' owned by user anu +cloned from 'Copy of 3D pde_generated' owned by user schaff +cloned from 'Copy of 3D pde_generated' owned by user les + + + + cloned from 'Copy of smoldyn' owned by user frm +cloned from 'smoldyn' owned by user anu +cloned from 'smoldyn' owned by user schaff +cloned from 'Simulation5' owned by user les + + + + + + + 10.0 + true + true + 4096 + 1 + + 1 + + + + + + + + + cloned from 'Copy of smoldyn' owned by user frm +cloned from 'smoldyn' owned by user anu +cloned from 'smoldyn' owned by user schaff +cloned from 'Simulation5' owned by user les + + + + cloned from 'Site visit _Application0_20111127_1858974515' owned by user frm +cloned from 'Site visit _Application0_20111127_1779663361' owned by user anu +cloned from 'Site visit _Application0_20111127_249495150' owned by user schaff +cloned from 'Site visit _Application0_20111127_192544' owned by user les +cloned from 'img_20110908_141412' owned by user liye +cloned from 'utahProcessed3_NucEdit1727034244' owned by user schaff +cloned from 'utahProcessed3_NucEdit' owned by user frm +NoName + + + + cloned from 'img_20111127_191827126' owned by user frm +cloned from 'img_20111127_1310565439' owned by user anu +cloned from 'img_20111127_1507826175' owned by user schaff +cloned from 'img_20111127_192543' owned by user les +cloned from 'img_20110908_141412' owned by user liye +cloned from 'utahProcessed3_NucEdit1727034244' owned by user schaff +cloned from 'utahProcessed3_NucEdit' owned by user frm +NoNamecloned from 'img_20111127_191827126' owned by user frm +cloned from 'img_20111127_1310565439' owned by user anu +cloned from 'img_20111127_1507826175' owned by user schaff +cloned from 'img_20111127_192543' owned by user les +cloned from 'img_20110908_141412' owned by user liye +cloned from 'utahProcessed3_NucEdit1727034244' owned by user schaff +cloned from 'utahProcessed3_NucEdit' owned by user frm +NoName + + + + + + + + + + + + + + + + + + cloned from 'Site visit _Application0_20111127_1858974515' owned by user frm +cloned from 'Site visit _Application0_20111127_1779663361' owned by user anu +cloned from 'Site visit _Application0_20111127_249495150' owned by user schaff +cloned from 'Site visit _Application0_20111127_192544' owned by user les +cloned from 'img_20110908_141412' owned by user liye +cloned from 'utahProcessed3_NucEdit1727034244' owned by user schaff +cloned from 'utahProcessed3_NucEdit' owned by user frm +NoName + + + \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub new file mode 100644 index 0000000000..97ab4c6254 --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub @@ -0,0 +1,164 @@ +#!/usr/bin/bash +#SBATCH --partition=vcell +#SBATCH --reservation= +#SBATCH --qos=vcell +#SBATCH -J V_REL_274630052_0_0 +#SBATCH -o /share/apps/vcell3/htclogs/V_REL_274630052_0_0.slurm.log +#SBATCH -e /share/apps/vcell3/htclogs/V_REL_274630052_0_0.slurm.log +#SBATCH --mem=4096M +#SBATCH --no-kill +#SBATCH --no-requeue +# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB + + +#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- +set -x + +TMPDIR=/scratch/vcell +echo "using TMPDIR=$TMPDIR" +if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi +echo `hostname` + +export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles + +source /usr/share/Modules/init/bash + +module load singularity/vcell-3.10.0 + +echo "job running on host `hostname -f`" + +echo "id is `id`" + +echo "bash version is `bash --version`" +date + +echo ENVIRONMENT +env + +container_prefix= +if command -v singularity >/dev/null 2>&1; then + # + # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + # + localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + if [ ! -e "$localSingularityImage" ]; then + echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + mkdir -p /state/partition1/singularityImages + singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) + flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" + theStatus=$? + if [ $theStatus -eq 100 ] + then + echo "lock in use, waiting for lock owner to copy singularityImage" + let c=0 + until [ -f $localSingularityImage ] + do + sleep 3 + let c=c+1 + if [ $c -eq 20 ] + then + echo "Exceeded wait time for lock owner to copy singularityImage" + break + fi + done + else + if [ $theStatus -eq 0 ] + then + echo copy succeeded + else + echo copy failed + fi + fi + rm -f ${singularitytempfile} + if [ ! -e "$localSingularityImage" ]; then + echo "Failed to copy $localSingularityImage to hpc from central" + exit 1 + else + echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + fi + fi + container_prefix="singularity run --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " +else + echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " + exit 1 +fi +echo "container_prefix is '${container_prefix}'" +echo "3 date=`date`" +#END---------SlurmProxy.generateScript():slurmInitSingularity---------- + +#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- +sendFailureMsg() { + echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274630052 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274630052 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + stat=$? + if [[ $stat -ne 0 ]]; then + echo 'failed to send error message, retcode=$stat' + else + echo 'sent failure message' + fi +} +#END---------SlurmProxy.generateScript():sendFailureMsg---------- +#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- +callExitProcessor( ) { + echo exitCommand = ${container_prefix}JavaPostprocessor64 274630052 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274630052_0_0.slurm.sub + ${container_prefix}JavaPostprocessor64 274630052 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274630052_0_0.slurm.sub +} +#END---------SlurmProxy.generateScript():hasExitProcessor---------- +echo +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274630052_0__0.simtask.xml /share/apps/vcell3/users/schaff + command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274630052_0__0.simtask.xml /share/apps/vcell3/users/schaff " + $command +stat=$? +echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274630052_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "1 date=`date`" + +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/smoldyn_x64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/smoldyn_x64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/smoldyn_x64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}smoldyn_x64 /share/apps/vcell3/users/schaff/SimID_274630052_0_.smoldynInput -tid 0 +if [ -z ${LD_LIBRARY_PATH+x} ]; then + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 +else + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH +fi + command="${cmd_prefix}smoldyn_x64 /share/apps/vcell3/users/schaff/SimID_274630052_0_.smoldynInput -tid 0 " + $command +stat=$? +echo ${cmd_prefix}smoldyn_x64 /share/apps/vcell3/users/schaff/SimID_274630052_0_.smoldynInput -tid 0 returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------smoldyn_x64 +callExitProcessor 0 + + +#Following commands (if any) are read by JavaPostProcessor64 + + From e7e29ba7a5e7b9b9b8002052c13e0ef051f65398 Mon Sep 17 00:00:00 2001 From: jcschaff Date: Wed, 21 Aug 2024 18:11:07 -0400 Subject: [PATCH 03/11] added test for Adams Moulton solver, refactored native/java tests --- .../server/htc/slurm/SlurmProxyTest.java | 200 +++++------------- .../SimID_274633859_0__0.simtask.xml | 111 ++++++++++ .../V_REL_274633859_0_0.slurm.sub | 137 ++++++++++++ 3 files changed, 303 insertions(+), 145 deletions(-) create mode 100644 vcell-server/src/test/resources/slurm_fixtures/adams_moulton/SimID_274633859_0__0.simtask.xml create mode 100644 vcell-server/src/test/resources/slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java index b0fbad69f4..42a593bf55 100644 --- a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java +++ b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java @@ -66,23 +66,21 @@ public static void setProperties() throws MalformedURLException System.setProperty(PropertyLoader.simulationPreprocessor, "JavaPreprocessor64"); } - @Test - public void testSimJobScriptFiniteVolume() throws IOException, XmlParseException, ExpressionException { + public String createScriptForNativeSolvers(String simTaskResourcePath, String solverExeName, String inputFileSuffix, String outputFileSuffix, String JOB_NAME) throws IOException, XmlParseException, ExpressionException { - SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource("slurm_fixtures/finite_volume/SimID_274514696_0__0.simtask.xml")); + SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource(simTaskResourcePath)); + KeyValue simKey = simTask.getSimKey(); SlurmProxy slurmProxy = new SlurmProxy(null, "vcell"); // make temp file Path submitScript = Files.createTempFile("submit_script",".sh"); - File subFileExternal = new File("/share/apps/vcell3/htclogs/V_REL_274514696_0_0.slurm.sub"); - String JOB_NAME = "V_REL_274514696_0_0"; + File subFileExternal = new File("/share/apps/vcell3/htclogs/V_REL_"+simKey+"_0_0.slurm.sub"); - KeyValue simKey = simTask.getSimKey(); User simOwner = simTask.getSimulation().getVersion().getOwner(); final int jobId = simTask.getSimulationJob().getJobIndex(); // preprocessor - String simTaskFilePathExternal = "/share/apps/vcell3/users/schaff/SimID_274514696_0__0.simtask.xml"; + String simTaskFilePathExternal = "/share/apps/vcell3/users/schaff/SimID_"+simKey+"_0__0.simtask.xml"; File primaryUserDirExternal = new File("/share/apps/vcell3/users/schaff"); List args = new ArrayList<>( 4 ); args.add( PropertyLoader.getRequiredProperty(PropertyLoader.simulationPreprocessor) ); @@ -90,13 +88,16 @@ public void testSimJobScriptFiniteVolume() throws IOException, XmlParseException args.add( primaryUserDirExternal.getAbsolutePath() ); ExecutableCommand preprocessorCmd = new ExecutableCommand(null, false, false,args); - // finite volume solver invocation - ExecutableCommand solverCmd = new ExecutableCommand( - new ExecutableCommand.LibraryPath("/usr/local/app/localsolvers/linux64"), - "/usr/local/app/localsolvers/linux64/FiniteVolume_x64", - "/share/apps/vcell3/users/schaff/SimID_274514696_0_.fvinput", - "-tid", - "0"); + ExecutableCommand.LibraryPath libraryPath = new ExecutableCommand.LibraryPath("/usr/local/app/localsolvers/linux64"); + String command = "/usr/local/app/localsolvers/linux64/"+solverExeName; + String inputFilePath = "/share/apps/vcell3/users/schaff/SimID_"+simKey+"_0_."+inputFileSuffix; + String outputFilePath = "/share/apps/vcell3/users/schaff/SimID_"+simKey+"_0_."+outputFileSuffix; + final ExecutableCommand solverCmd; + if (outputFileSuffix == null) { + solverCmd = new ExecutableCommand(libraryPath, command, inputFilePath, "-tid", "0"); + } else { + solverCmd = new ExecutableCommand(libraryPath, command, inputFilePath, outputFilePath, "-tid", "0"); + } // postprocessor final String SOLVER_EXIT_CODE_REPLACE_STRING = "SOLVER_EXIT_CODE_REPLACE_STRING"; @@ -119,43 +120,29 @@ public void testSimJobScriptFiniteVolume() throws IOException, XmlParseException int NUM_CPUs = 1; int MEM_SIZE_MB = 1000; ArrayList postProcessingCommands = new ArrayList<>(); - String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub"); slurmProxy.saveJobScript(JOB_NAME, submitScript.toFile(), commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); String slurmScript = FileUtils.readFileToString(submitScript.toFile()); - Assertions.assertEquals(expectedSlurmScript, slurmScript); + return slurmScript; } - @Test - public void testSimJobScriptSmoldyn() throws IOException, XmlParseException, ExpressionException { + public String createScriptForJavaSolvers(String simTaskResourcePath, String JOB_NAME) throws IOException, XmlParseException, ExpressionException { - SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource("slurm_fixtures/smoldyn/SimID_274630052_0__0.simtask.xml")); + SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource(simTaskResourcePath)); + KeyValue simKey = simTask.getSimKey(); SlurmProxy slurmProxy = new SlurmProxy(null, "vcell"); // make temp file Path submitScript = Files.createTempFile("submit_script",".sh"); - File subFileExternal = new File("/share/apps/vcell3/htclogs/V_REL_274630052_0_0.slurm.sub"); - String JOB_NAME = "V_REL_274630052_0_0"; + File subFileExternal = new File("/share/apps/vcell3/htclogs/V_REL_"+simKey+"_0_0.slurm.sub"); - KeyValue simKey = simTask.getSimKey(); User simOwner = simTask.getSimulation().getVersion().getOwner(); final int jobId = simTask.getSimulationJob().getJobIndex(); - // preprocessor - String simTaskFilePathExternal = "/share/apps/vcell3/users/schaff/SimID_274630052_0__0.simtask.xml"; - File primaryUserDirExternal = new File("/share/apps/vcell3/users/schaff"); - List args = new ArrayList<>( 4 ); - args.add( PropertyLoader.getRequiredProperty(PropertyLoader.simulationPreprocessor) ); - args.add( simTaskFilePathExternal ); - args.add( primaryUserDirExternal.getAbsolutePath() ); - ExecutableCommand preprocessorCmd = new ExecutableCommand(null, false, false,args); - - // finite volume solver invocation - ExecutableCommand solverCmd = new ExecutableCommand( - new ExecutableCommand.LibraryPath("/usr/local/app/localsolvers/linux64"), - "/usr/local/app/localsolvers/linux64/smoldyn_x64", - "/share/apps/vcell3/users/schaff/SimID_274630052_0_.smoldynInput", - "-tid", - "0"); + ExecutableCommand.LibraryPath libraryPath = null; + String command = "JavaSimExe64"; + String userDir = "/share/apps/vcell3/users/schaff"; + String simTaskRemoteFilename = "/share/apps/vcell3/users/schaff/SimID_"+simKey+"_0__0.simtask.xml"; + final ExecutableCommand solverCmd = new ExecutableCommand(libraryPath, command, simTaskRemoteFilename, userDir); // postprocessor final String SOLVER_EXIT_CODE_REPLACE_STRING = "SOLVER_EXIT_CODE_REPLACE_STRING"; @@ -171,138 +158,61 @@ public void testSimJobScriptSmoldyn() throws IOException, XmlParseException, Exp postprocessorCmd.setExitCodeToken(SOLVER_EXIT_CODE_REPLACE_STRING); ExecutableCommand.Container commandSet = new ExecutableCommand.Container(); - commandSet.add(preprocessorCmd); commandSet.add(solverCmd); commandSet.add(postprocessorCmd); int NUM_CPUs = 1; int MEM_SIZE_MB = 1000; ArrayList postProcessingCommands = new ArrayList<>(); - String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub"); slurmProxy.saveJobScript(JOB_NAME, submitScript.toFile(), commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); String slurmScript = FileUtils.readFileToString(submitScript.toFile()); - Assertions.assertEquals(expectedSlurmScript, slurmScript); + return slurmScript; } @Test - public void testSimJobScriptCVODE() throws IOException, XmlParseException, ExpressionException { + public void testSimJobScriptFiniteVolume() throws IOException, XmlParseException, ExpressionException { + String simTaskResourcePath = "slurm_fixtures/finite_volume/SimID_274514696_0__0.simtask.xml"; + String JOB_NAME = "V_REL_274514696_0_0"; + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "FiniteVolume_x64", "fvinput", null, JOB_NAME); + String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub"); + Assertions.assertEquals(expectedSlurmScript, slurmScript); + } - SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource("slurm_fixtures/cvode/SimID_274630682_0__0.simtask.xml")); + @Test + public void testSimJobScriptSmoldyn() throws IOException, XmlParseException, ExpressionException { + String simTaskResourcePath = "slurm_fixtures/smoldyn/SimID_274630052_0__0.simtask.xml"; + String JOB_NAME = "V_REL_274630052_0_0"; + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "smoldyn_x64", "smoldynInput", null, JOB_NAME); + String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub"); + Assertions.assertEquals(expectedSlurmScript, slurmScript); + } - SlurmProxy slurmProxy = new SlurmProxy(null, "vcell"); - // make temp file - Path submitScript = Files.createTempFile("submit_script",".sh"); - File subFileExternal = new File("/share/apps/vcell3/htclogs/V_REL_274630682_0_0.slurm.sub"); + @Test + public void testSimJobScriptCVODE() throws IOException, XmlParseException, ExpressionException { + String simTaskResourcePath = "slurm_fixtures/cvode/SimID_274630682_0__0.simtask.xml"; String JOB_NAME = "V_REL_274630682_0_0"; - - KeyValue simKey = simTask.getSimKey(); - User simOwner = simTask.getSimulation().getVersion().getOwner(); - final int jobId = simTask.getSimulationJob().getJobIndex(); - - // preprocessor - String simTaskFilePathExternal = "/share/apps/vcell3/users/schaff/SimID_274630682_0__0.simtask.xml"; - File primaryUserDirExternal = new File("/share/apps/vcell3/users/schaff"); - List args = new ArrayList<>( 4 ); - args.add( PropertyLoader.getRequiredProperty(PropertyLoader.simulationPreprocessor) ); - args.add( simTaskFilePathExternal ); - args.add( primaryUserDirExternal.getAbsolutePath() ); - ExecutableCommand preprocessorCmd = new ExecutableCommand(null, false, false,args); - - // finite volume solver invocation - ExecutableCommand solverCmd = new ExecutableCommand( - new ExecutableCommand.LibraryPath("/usr/local/app/localsolvers/linux64"), - "/usr/local/app/localsolvers/linux64/SundialsSolverStandalone_x64", - "/share/apps/vcell3/users/schaff/SimID_274630682_0_.cvodeInput", - "/share/apps/vcell3/users/schaff/SimID_274630682_0_.ida", - "-tid", - "0"); - - // postprocessor - final String SOLVER_EXIT_CODE_REPLACE_STRING = "SOLVER_EXIT_CODE_REPLACE_STRING"; - ExecutableCommand postprocessorCmd = new ExecutableCommand(null,false, false, - PropertyLoader.getRequiredProperty(PropertyLoader.simulationPostprocessor), - simKey.toString(), - simOwner.getName(), - simOwner.getID().toString(), - Integer.toString(jobId), - Integer.toString(simTask.getTaskID()), - SOLVER_EXIT_CODE_REPLACE_STRING, - subFileExternal.getAbsolutePath()); - postprocessorCmd.setExitCodeToken(SOLVER_EXIT_CODE_REPLACE_STRING); - - ExecutableCommand.Container commandSet = new ExecutableCommand.Container(); - commandSet.add(preprocessorCmd); - commandSet.add(solverCmd); - commandSet.add(postprocessorCmd); - - int NUM_CPUs = 1; - int MEM_SIZE_MB = 1000; - ArrayList postProcessingCommands = new ArrayList<>(); + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "SundialsSolverStandalone_x64", "cvodeInput", "ida", JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub"); - slurmProxy.saveJobScript(JOB_NAME, submitScript.toFile(), commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); - String slurmScript = FileUtils.readFileToString(submitScript.toFile()); Assertions.assertEquals(expectedSlurmScript, slurmScript); } @Test public void testSimJobScriptRK45() throws IOException, XmlParseException, ExpressionException { - - SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource("slurm_fixtures/runge_kutta_fehlberg/SimID_274631114_0__0.simtask.xml")); - - SlurmProxy slurmProxy = new SlurmProxy(null, "vcell"); - // make temp file - Path submitScript = Files.createTempFile("submit_script",".sh"); - File subFileExternal = new File("/share/apps/vcell3/htclogs/V_REL_274631114_0_0.slurm.sub"); + String simTaskResourcePath = "slurm_fixtures/runge_kutta_fehlberg/SimID_274631114_0__0.simtask.xml"; String JOB_NAME = "V_REL_274631114_0_0"; - - KeyValue simKey = simTask.getSimKey(); - User simOwner = simTask.getSimulation().getVersion().getOwner(); - final int jobId = simTask.getSimulationJob().getJobIndex(); - - // preprocessor -// String simTaskFilePathExternal = "/share/apps/vcell3/users/schaff/SimID_274631114_0__0.simtask.xml"; -// File primaryUserDirExternal = new File("/share/apps/vcell3/users/schaff"); -// List args = new ArrayList<>( 4 ); -// args.add( PropertyLoader.getRequiredProperty(PropertyLoader.simulationPreprocessor) ); -// args.add( simTaskFilePathExternal ); -// args.add( primaryUserDirExternal.getAbsolutePath() ); -// ExecutableCommand preprocessorCmd = new ExecutableCommand(null, false, false,args); - - // finite volume solver invocation - ExecutableCommand.LibraryPath libraryPath = null; - ExecutableCommand solverCmd = new ExecutableCommand( - libraryPath, - "JavaSimExe64", - "/share/apps/vcell3/users/schaff/SimID_274631114_0__0.simtask.xml", - "/share/apps/vcell3/users/schaff"); - - // postprocessor - final String SOLVER_EXIT_CODE_REPLACE_STRING = "SOLVER_EXIT_CODE_REPLACE_STRING"; - ExecutableCommand postprocessorCmd = new ExecutableCommand(null,false, false, - PropertyLoader.getRequiredProperty(PropertyLoader.simulationPostprocessor), - simKey.toString(), - simOwner.getName(), - simOwner.getID().toString(), - Integer.toString(jobId), - Integer.toString(simTask.getTaskID()), - SOLVER_EXIT_CODE_REPLACE_STRING, - subFileExternal.getAbsolutePath()); - postprocessorCmd.setExitCodeToken(SOLVER_EXIT_CODE_REPLACE_STRING); - - ExecutableCommand.Container commandSet = new ExecutableCommand.Container(); -// commandSet.add(preprocessorCmd); - commandSet.add(solverCmd); - commandSet.add(postprocessorCmd); - - int NUM_CPUs = 1; - int MEM_SIZE_MB = 1000; - ArrayList postProcessingCommands = new ArrayList<>(); + String slurmScript = createScriptForJavaSolvers(simTaskResourcePath, JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub"); - slurmProxy.saveJobScript(JOB_NAME, submitScript.toFile(), commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); - String slurmScript = FileUtils.readFileToString(submitScript.toFile()); Assertions.assertEquals(expectedSlurmScript, slurmScript); } + @Test + public void testSimJobScriptAdamsMoulton() throws IOException, XmlParseException, ExpressionException { + String simTaskResourcePath = "slurm_fixtures/adams_moulton/SimID_274633859_0__0.simtask.xml"; + String JOB_NAME = "V_REL_274633859_0_0"; + String slurmScript = createScriptForJavaSolvers(simTaskResourcePath, JOB_NAME); + String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub"); + Assertions.assertEquals(expectedSlurmScript, slurmScript); + } private String readTextFileFromResource(String filename) throws IOException { InputStream inputStream = getClass().getClassLoader().getResourceAsStream(filename); diff --git a/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/SimID_274633859_0__0.simtask.xml b/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/SimID_274633859_0__0.simtask.xml new file mode 100644 index 0000000000..85e4a7b795 --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/SimID_274633859_0__0.simtask.xml @@ -0,0 +1,111 @@ + + + cloned from 'non-spatial ODE_generated' owned by user frm +cloned from 'non-spatial ODE_generated' owned by user anu +cloned from 'non-spatial ODE_generated' owned by user schaff +cloned from 'non-spatial ODE_generated' owned by user les + 96485.3321 + 9.64853321E-5 + 1.0E-9 + 6.02214179E11 + 3.141592653589793 + 8314.46261815 + 300.0 + 0.0 + 1000.0 + 2.0 + 2.0 + 0.001660538783162726 + 1000.0 + 1.0 + 0.0 + 0.0 + 4.493165893949507E-4 + 0.0 + 14891.899581611733 + 124712.10435961554 + 1406.7733692487282 + 3697.013658772733 + 4738.640600365477 + (1.0 * pow(KMOLE,1.0)) + 0.0 + 0.0 + + + (kfl * (RanC_cyt - RanC_nuc)) + ((Kf * RanC_cyt) - ((Kr * Ran_cyt) * C_cyt)) + ((Size_cyt * Ran_cyt_init_uM) - (Size_cyt * C_cyt_init_uM)) + ((Size_cyt * RanC_cyt_init_uM) + (Size_cyt * C_cyt_init_uM) + (Size_nuc * RanC_nuc_init_uM)) + (UnitFactor_uM_um3_molecules_neg_1 * Size_pm * s2_init_molecules_um_2) + (Size_nm / Size_cyt) + (Size_nm / Size_nuc) + ((K_Ran_cyt_total + (Size_cyt * C_cyt)) / Size_cyt) + ((K_RanC_cyt_total - (Size_cyt * C_cyt) - (Size_nuc * RanC_nuc)) / Size_cyt) + (K_s2_total / (UnitFactor_uM_um3_molecules_neg_1 * Size_pm)) + + + + + + + + + J_r0 + C_cyt_init_uM + + + (KFlux_nm_nuc * J_flux0) + RanC_nuc_init_uM + + + + + + cloned from 'non-spatial ODE_generated' owned by user frm +cloned from 'non-spatial ODE_generated' owned by user anu +cloned from 'non-spatial ODE_generated' owned by user schaff +cloned from 'non-spatial ODE_generated' owned by user les + + + + cloned from 'Copy of adams moulton' owned by user frm +cloned from 'adams moulton' owned by user anu +cloned from 'adams moulton' owned by user schaff +cloned from 'Copy of Simulation1' owned by user les + + + + + + 1 + + + 0.01 to 10.0, log, 4 values + + + + + cloned from 'Copy of adams moulton' owned by user frm +cloned from 'adams moulton' owned by user anu +cloned from 'adams moulton' owned by user schaff +cloned from 'Copy of Simulation1' owned by user les + + + + cloned from 'nonspatial381239605' owned by user frm +cloned from 'nonspatial1214274674' owned by user anu +cloned from 'nonspatial2060234169' owned by user schaff +cloned from 'nonspatial608887770' owned by user les + + + + + + + cloned from 'nonspatial381239605' owned by user frm +cloned from 'nonspatial1214274674' owned by user anu +cloned from 'nonspatial2060234169' owned by user schaff +cloned from 'nonspatial608887770' owned by user les + + + \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub new file mode 100644 index 0000000000..453e68274b --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub @@ -0,0 +1,137 @@ +#!/usr/bin/bash +#SBATCH --partition=vcell +#SBATCH --reservation= +#SBATCH --qos=vcell +#SBATCH -J V_REL_274633859_0_0 +#SBATCH -o /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.log +#SBATCH -e /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.log +#SBATCH --mem=4096M +#SBATCH --no-kill +#SBATCH --no-requeue +# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB + + +#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- +set -x + +TMPDIR=/scratch/vcell +echo "using TMPDIR=$TMPDIR" +if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi +echo `hostname` + +export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles + +source /usr/share/Modules/init/bash + +module load singularity/vcell-3.10.0 + +echo "job running on host `hostname -f`" + +echo "id is `id`" + +echo "bash version is `bash --version`" +date + +echo ENVIRONMENT +env + +container_prefix= +if command -v singularity >/dev/null 2>&1; then + # + # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + # + localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + if [ ! -e "$localSingularityImage" ]; then + echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + mkdir -p /state/partition1/singularityImages + singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) + flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" + theStatus=$? + if [ $theStatus -eq 100 ] + then + echo "lock in use, waiting for lock owner to copy singularityImage" + let c=0 + until [ -f $localSingularityImage ] + do + sleep 3 + let c=c+1 + if [ $c -eq 20 ] + then + echo "Exceeded wait time for lock owner to copy singularityImage" + break + fi + done + else + if [ $theStatus -eq 0 ] + then + echo copy succeeded + else + echo copy failed + fi + fi + rm -f ${singularitytempfile} + if [ ! -e "$localSingularityImage" ]; then + echo "Failed to copy $localSingularityImage to hpc from central" + exit 1 + else + echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + fi + fi + container_prefix="singularity run --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " +else + echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " + exit 1 +fi +echo "container_prefix is '${container_prefix}'" +echo "3 date=`date`" +#END---------SlurmProxy.generateScript():slurmInitSingularity---------- + +#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- +sendFailureMsg() { + echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274633859 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274633859 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + stat=$? + if [[ $stat -ne 0 ]]; then + echo 'failed to send error message, retcode=$stat' + else + echo 'sent failure message' + fi +} +#END---------SlurmProxy.generateScript():sendFailureMsg---------- +#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- +callExitProcessor( ) { + echo exitCommand = ${container_prefix}JavaPostprocessor64 274633859 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.sub + ${container_prefix}JavaPostprocessor64 274633859 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.sub +} +#END---------SlurmProxy.generateScript():hasExitProcessor---------- +echo +echo "1 date=`date`" + +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaSimExe64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaSimExe64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/JavaSimExe64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274633859_0__0.simtask.xml /share/apps/vcell3/users/schaff + command="${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274633859_0__0.simtask.xml /share/apps/vcell3/users/schaff " + $command +stat=$? +echo ${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274633859_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaSimExe64 +callExitProcessor 0 + + +#Following commands (if any) are read by JavaPostProcessor64 + + From 0b91c0aeb51400fee54af40d3fb36472719f1f27 Mon Sep 17 00:00:00 2001 From: jcschaff Date: Wed, 21 Aug 2024 18:24:20 -0400 Subject: [PATCH 04/11] added test for gibson solver --- .../server/htc/slurm/SlurmProxyTest.java | 24 ++- .../gibson/SimID_274635122_0__0.simtask.xml | 132 ++++++++++++++ .../gibson/V_REL_274635122_0_0.slurm.sub | 164 ++++++++++++++++++ 3 files changed, 315 insertions(+), 5 deletions(-) create mode 100644 vcell-server/src/test/resources/slurm_fixtures/gibson/SimID_274635122_0__0.simtask.xml create mode 100644 vcell-server/src/test/resources/slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java index 42a593bf55..361be6f715 100644 --- a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java +++ b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java @@ -66,7 +66,7 @@ public static void setProperties() throws MalformedURLException System.setProperty(PropertyLoader.simulationPreprocessor, "JavaPreprocessor64"); } - public String createScriptForNativeSolvers(String simTaskResourcePath, String solverExeName, String inputFileSuffix, String outputFileSuffix, String JOB_NAME) throws IOException, XmlParseException, ExpressionException { + public String createScriptForNativeSolvers(String simTaskResourcePath, String solverExeName, String subcommand, String inputFileSuffix, String outputFileSuffix, String JOB_NAME) throws IOException, XmlParseException, ExpressionException { SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource(simTaskResourcePath)); KeyValue simKey = simTask.getSimKey(); @@ -94,9 +94,14 @@ public String createScriptForNativeSolvers(String simTaskResourcePath, String so String outputFilePath = "/share/apps/vcell3/users/schaff/SimID_"+simKey+"_0_."+outputFileSuffix; final ExecutableCommand solverCmd; if (outputFileSuffix == null) { + new ExecutableCommand(libraryPath, new String[0]); solverCmd = new ExecutableCommand(libraryPath, command, inputFilePath, "-tid", "0"); } else { - solverCmd = new ExecutableCommand(libraryPath, command, inputFilePath, outputFilePath, "-tid", "0"); + if (subcommand != null) { + solverCmd = new ExecutableCommand(libraryPath, command, subcommand, inputFilePath, outputFilePath, "-tid", "0"); + } else { + solverCmd = new ExecutableCommand(libraryPath, command, inputFilePath, outputFilePath, "-tid", "0"); + } } // postprocessor @@ -173,7 +178,7 @@ public String createScriptForJavaSolvers(String simTaskResourcePath, String JOB_ public void testSimJobScriptFiniteVolume() throws IOException, XmlParseException, ExpressionException { String simTaskResourcePath = "slurm_fixtures/finite_volume/SimID_274514696_0__0.simtask.xml"; String JOB_NAME = "V_REL_274514696_0_0"; - String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "FiniteVolume_x64", "fvinput", null, JOB_NAME); + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "FiniteVolume_x64", null,"fvinput", null, JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub"); Assertions.assertEquals(expectedSlurmScript, slurmScript); } @@ -182,7 +187,7 @@ public void testSimJobScriptFiniteVolume() throws IOException, XmlParseException public void testSimJobScriptSmoldyn() throws IOException, XmlParseException, ExpressionException { String simTaskResourcePath = "slurm_fixtures/smoldyn/SimID_274630052_0__0.simtask.xml"; String JOB_NAME = "V_REL_274630052_0_0"; - String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "smoldyn_x64", "smoldynInput", null, JOB_NAME); + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "smoldyn_x64", null, "smoldynInput", null, JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub"); Assertions.assertEquals(expectedSlurmScript, slurmScript); } @@ -191,11 +196,20 @@ public void testSimJobScriptSmoldyn() throws IOException, XmlParseException, Exp public void testSimJobScriptCVODE() throws IOException, XmlParseException, ExpressionException { String simTaskResourcePath = "slurm_fixtures/cvode/SimID_274630682_0__0.simtask.xml"; String JOB_NAME = "V_REL_274630682_0_0"; - String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "SundialsSolverStandalone_x64", "cvodeInput", "ida", JOB_NAME); + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "SundialsSolverStandalone_x64", null, "cvodeInput", "ida", JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub"); Assertions.assertEquals(expectedSlurmScript, slurmScript); } + @Test + public void testSimJobScriptGibson() throws IOException, XmlParseException, ExpressionException { + String simTaskResourcePath = "slurm_fixtures/gibson/SimID_274635122_0__0.simtask.xml"; + String JOB_NAME = "V_REL_274635122_0_0"; + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "VCellStoch_x64", "gibson", "stochInput", "ida", JOB_NAME); + String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub"); + Assertions.assertEquals(expectedSlurmScript, slurmScript); + } + @Test public void testSimJobScriptRK45() throws IOException, XmlParseException, ExpressionException { String simTaskResourcePath = "slurm_fixtures/runge_kutta_fehlberg/SimID_274631114_0__0.simtask.xml"; diff --git a/vcell-server/src/test/resources/slurm_fixtures/gibson/SimID_274635122_0__0.simtask.xml b/vcell-server/src/test/resources/slurm_fixtures/gibson/SimID_274635122_0__0.simtask.xml new file mode 100644 index 0000000000..b1d0e73894 --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/gibson/SimID_274635122_0__0.simtask.xml @@ -0,0 +1,132 @@ + + + cloned from 'Copy of 3D pde_generated' owned by user frm +cloned from 'Copy of 3D pde_generated' owned by user anu +cloned from 'Copy of 3D pde_generated' owned by user schaff +cloned from 'Copy of 3D pde_generated' owned by user les + 96485.3321 + 9.64853321E-5 + 6.02214179E11 + 3.141592653589793 + 8314.46261815 + 300.0 + 0.0 + 2.0 + 2.0 + 0.001660538783162726 + 1000.0 + 1.0 + 0.0 + 0.0 + 100.0 + 0.0 + 14891.899581611733 + 124712.10435961554 + 1406.7733692487282 + 3697.013658772733 + 4738.640600365477 + (1.0 * pow(KMOLE, - 1.0)) + (1.0 * pow(KMOLE,1.0)) + 0.0 + 0.0 + + + + + + ((C_cyt_Count * UnitFactor_uM_um3_molecules_neg_1) / Size_cyt) + (kfl * (RanC_cyt - RanC_nuc)) + ((Kf * RanC_cyt) - ((Kr * Ran_cyt) * C_cyt)) + (UnitFactor_molecules_uM_neg_1_um_neg_3 * kfl * Size_nm * RanC_cyt) + (kfl * UnitFactor_molecules_uM_neg_1_um_neg_3 * Size_nm * RanC_nuc) + (Kf * RanC_cyt_Count * UnitFactor_molecules_uM_neg_1_um_neg_3 * UnitFactor_uM_um3_molecules_neg_1) + (Kr * Ran_cyt_Count * C_cyt_Count * UnitFactor_molecules_uM_neg_1_um_neg_3 * UnitFactor_uM_um3_molecules_neg_1 * UnitFactor_uM_um3_molecules_neg_1 / Size_cyt) + ((Ran_cyt_Count * UnitFactor_uM_um3_molecules_neg_1) / Size_cyt) + ((RanC_cyt_Count * UnitFactor_uM_um3_molecules_neg_1) / Size_cyt) + ((RanC_nuc_Count * UnitFactor_uM_um3_molecules_neg_1) / Size_nuc) + (s2_Count / Size_pm) + + + + + + + + RanC_cyt_Count_initCount + Ran_cyt_Count_initCount + C_cyt_Count_initCount + RanC_nuc_Count_initCount + s2_Count_initCount + + P_r0_probabilityRate + -1.0 + 1.0 + 1.0 + + + P_r0_reverse_probabilityRate + 1.0 + -1.0 + -1.0 + + + P_flux0_probabilityRate + -1.0 + 1.0 + + + P_flux0_reverse_probabilityRate + 1.0 + -1.0 + + + + + + cloned from 'Copy of 3D pde_generated' owned by user frm +cloned from 'Copy of 3D pde_generated' owned by user anu +cloned from 'Copy of 3D pde_generated' owned by user schaff +cloned from 'Copy of 3D pde_generated' owned by user les + + + + cloned from 'Copy of Gibson' owned by user frm +cloned from 'Gibson' owned by user anu +cloned from 'Gibson' owned by user schaff +cloned from 'Simulation0' owned by user les + + + + + + + 1 + + + + + + cloned from 'Copy of Gibson' owned by user frm +cloned from 'Gibson' owned by user anu +cloned from 'Gibson' owned by user schaff +cloned from 'Simulation0' owned by user les + + + + cloned from 'nonspatial1435481798' owned by user frm +cloned from 'nonspatial660203233' owned by user anu +cloned from 'nonspatial637508148' owned by user schaff +cloned from 'nonspatial608887770' owned by user les + + + + + + + cloned from 'nonspatial1435481798' owned by user frm +cloned from 'nonspatial660203233' owned by user anu +cloned from 'nonspatial637508148' owned by user schaff +cloned from 'nonspatial608887770' owned by user les + + + \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub new file mode 100644 index 0000000000..cc4a0f3f7b --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub @@ -0,0 +1,164 @@ +#!/usr/bin/bash +#SBATCH --partition=vcell +#SBATCH --reservation= +#SBATCH --qos=vcell +#SBATCH -J V_REL_274635122_0_0 +#SBATCH -o /share/apps/vcell3/htclogs/V_REL_274635122_0_0.slurm.log +#SBATCH -e /share/apps/vcell3/htclogs/V_REL_274635122_0_0.slurm.log +#SBATCH --mem=4096M +#SBATCH --no-kill +#SBATCH --no-requeue +# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB + + +#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- +set -x + +TMPDIR=/scratch/vcell +echo "using TMPDIR=$TMPDIR" +if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi +echo `hostname` + +export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles + +source /usr/share/Modules/init/bash + +module load singularity/vcell-3.10.0 + +echo "job running on host `hostname -f`" + +echo "id is `id`" + +echo "bash version is `bash --version`" +date + +echo ENVIRONMENT +env + +container_prefix= +if command -v singularity >/dev/null 2>&1; then + # + # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + # + localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + if [ ! -e "$localSingularityImage" ]; then + echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + mkdir -p /state/partition1/singularityImages + singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) + flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" + theStatus=$? + if [ $theStatus -eq 100 ] + then + echo "lock in use, waiting for lock owner to copy singularityImage" + let c=0 + until [ -f $localSingularityImage ] + do + sleep 3 + let c=c+1 + if [ $c -eq 20 ] + then + echo "Exceeded wait time for lock owner to copy singularityImage" + break + fi + done + else + if [ $theStatus -eq 0 ] + then + echo copy succeeded + else + echo copy failed + fi + fi + rm -f ${singularitytempfile} + if [ ! -e "$localSingularityImage" ]; then + echo "Failed to copy $localSingularityImage to hpc from central" + exit 1 + else + echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + fi + fi + container_prefix="singularity run --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " +else + echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " + exit 1 +fi +echo "container_prefix is '${container_prefix}'" +echo "3 date=`date`" +#END---------SlurmProxy.generateScript():slurmInitSingularity---------- + +#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- +sendFailureMsg() { + echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274635122 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274635122 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + stat=$? + if [[ $stat -ne 0 ]]; then + echo 'failed to send error message, retcode=$stat' + else + echo 'sent failure message' + fi +} +#END---------SlurmProxy.generateScript():sendFailureMsg---------- +#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- +callExitProcessor( ) { + echo exitCommand = ${container_prefix}JavaPostprocessor64 274635122 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274635122_0_0.slurm.sub + ${container_prefix}JavaPostprocessor64 274635122 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274635122_0_0.slurm.sub +} +#END---------SlurmProxy.generateScript():hasExitProcessor---------- +echo +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274635122_0__0.simtask.xml /share/apps/vcell3/users/schaff + command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274635122_0__0.simtask.xml /share/apps/vcell3/users/schaff " + $command +stat=$? +echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274635122_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "1 date=`date`" + +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/VCellStoch_x64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/VCellStoch_x64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/VCellStoch_x64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}VCellStoch_x64 gibson /share/apps/vcell3/users/schaff/SimID_274635122_0_.stochInput /share/apps/vcell3/users/schaff/SimID_274635122_0_.ida -tid 0 +if [ -z ${LD_LIBRARY_PATH+x} ]; then + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 +else + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH +fi + command="${cmd_prefix}VCellStoch_x64 gibson /share/apps/vcell3/users/schaff/SimID_274635122_0_.stochInput /share/apps/vcell3/users/schaff/SimID_274635122_0_.ida -tid 0 " + $command +stat=$? +echo ${cmd_prefix}VCellStoch_x64 gibson /share/apps/vcell3/users/schaff/SimID_274635122_0_.stochInput /share/apps/vcell3/users/schaff/SimID_274635122_0_.ida -tid 0 returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------VCellStoch_x64 +callExitProcessor 0 + + +#Following commands (if any) are read by JavaPostProcessor64 + + From 2d37b6a35e360b617666a13c899af3b30db53d17 Mon Sep 17 00:00:00 2001 From: jcschaff Date: Wed, 21 Aug 2024 21:02:07 -0400 Subject: [PATCH 05/11] refactor native solver tests --- .../server/htc/slurm/SlurmProxyTest.java | 70 ++++++++++--------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java index 361be6f715..cec84a71a2 100644 --- a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java +++ b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java @@ -14,16 +14,12 @@ import cbit.vcell.xml.XmlHelper; import cbit.vcell.xml.XmlParseException; import org.apache.commons.io.FileUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.*; import org.vcell.util.document.KeyValue; import org.vcell.util.document.User; import org.vcell.util.exe.ExecutableException; import java.io.*; -import java.net.MalformedURLException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -32,10 +28,12 @@ import java.util.Random; import java.util.stream.Collectors; + +@Tag("Fast") public class SlurmProxyTest { @BeforeAll - public static void setProperties() throws MalformedURLException + public static void setProperties() { System.setProperty(PropertyLoader.vcellServerIDProperty,"REL"); System.setProperty(PropertyLoader.htcLogDirExternal,"/share/apps/vcell3/htclogs"); @@ -66,7 +64,7 @@ public static void setProperties() throws MalformedURLException System.setProperty(PropertyLoader.simulationPreprocessor, "JavaPreprocessor64"); } - public String createScriptForNativeSolvers(String simTaskResourcePath, String solverExeName, String subcommand, String inputFileSuffix, String outputFileSuffix, String JOB_NAME) throws IOException, XmlParseException, ExpressionException { + public String createScriptForNativeSolvers(String simTaskResourcePath, String[] command, String JOB_NAME) throws IOException, XmlParseException, ExpressionException { SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource(simTaskResourcePath)); KeyValue simKey = simTask.getSimKey(); @@ -89,20 +87,7 @@ public String createScriptForNativeSolvers(String simTaskResourcePath, String so ExecutableCommand preprocessorCmd = new ExecutableCommand(null, false, false,args); ExecutableCommand.LibraryPath libraryPath = new ExecutableCommand.LibraryPath("/usr/local/app/localsolvers/linux64"); - String command = "/usr/local/app/localsolvers/linux64/"+solverExeName; - String inputFilePath = "/share/apps/vcell3/users/schaff/SimID_"+simKey+"_0_."+inputFileSuffix; - String outputFilePath = "/share/apps/vcell3/users/schaff/SimID_"+simKey+"_0_."+outputFileSuffix; - final ExecutableCommand solverCmd; - if (outputFileSuffix == null) { - new ExecutableCommand(libraryPath, new String[0]); - solverCmd = new ExecutableCommand(libraryPath, command, inputFilePath, "-tid", "0"); - } else { - if (subcommand != null) { - solverCmd = new ExecutableCommand(libraryPath, command, subcommand, inputFilePath, outputFilePath, "-tid", "0"); - } else { - solverCmd = new ExecutableCommand(libraryPath, command, inputFilePath, outputFilePath, "-tid", "0"); - } - } + final ExecutableCommand solverCmd = new ExecutableCommand(libraryPath, command); // postprocessor final String SOLVER_EXIT_CODE_REPLACE_STRING = "SOLVER_EXIT_CODE_REPLACE_STRING"; @@ -126,8 +111,7 @@ public String createScriptForNativeSolvers(String simTaskResourcePath, String so int MEM_SIZE_MB = 1000; ArrayList postProcessingCommands = new ArrayList<>(); slurmProxy.saveJobScript(JOB_NAME, submitScript.toFile(), commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); - String slurmScript = FileUtils.readFileToString(submitScript.toFile()); - return slurmScript; + return FileUtils.readFileToString(submitScript.toFile()); } public String createScriptForJavaSolvers(String simTaskResourcePath, String JOB_NAME) throws IOException, XmlParseException, ExpressionException { @@ -143,7 +127,7 @@ public String createScriptForJavaSolvers(String simTaskResourcePath, String JOB_ User simOwner = simTask.getSimulation().getVersion().getOwner(); final int jobId = simTask.getSimulationJob().getJobIndex(); - ExecutableCommand.LibraryPath libraryPath = null; + final ExecutableCommand.LibraryPath libraryPath = null; String command = "JavaSimExe64"; String userDir = "/share/apps/vcell3/users/schaff"; String simTaskRemoteFilename = "/share/apps/vcell3/users/schaff/SimID_"+simKey+"_0__0.simtask.xml"; @@ -170,15 +154,19 @@ public String createScriptForJavaSolvers(String simTaskResourcePath, String JOB_ int MEM_SIZE_MB = 1000; ArrayList postProcessingCommands = new ArrayList<>(); slurmProxy.saveJobScript(JOB_NAME, submitScript.toFile(), commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); - String slurmScript = FileUtils.readFileToString(submitScript.toFile()); - return slurmScript; + return FileUtils.readFileToString(submitScript.toFile()); } @Test public void testSimJobScriptFiniteVolume() throws IOException, XmlParseException, ExpressionException { String simTaskResourcePath = "slurm_fixtures/finite_volume/SimID_274514696_0__0.simtask.xml"; String JOB_NAME = "V_REL_274514696_0_0"; - String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "FiniteVolume_x64", null,"fvinput", null, JOB_NAME); + + String executable = "/usr/local/app/localsolvers/linux64/FiniteVolume_x64"; + String inputFilePath = "/share/apps/vcell3/users/schaff/SimID_274514696_0_.fvinput"; + String[] command = new String[] { executable, inputFilePath, "-tid", "0" }; + + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, command, JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub"); Assertions.assertEquals(expectedSlurmScript, slurmScript); } @@ -187,7 +175,12 @@ public void testSimJobScriptFiniteVolume() throws IOException, XmlParseException public void testSimJobScriptSmoldyn() throws IOException, XmlParseException, ExpressionException { String simTaskResourcePath = "slurm_fixtures/smoldyn/SimID_274630052_0__0.simtask.xml"; String JOB_NAME = "V_REL_274630052_0_0"; - String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "smoldyn_x64", null, "smoldynInput", null, JOB_NAME); + + String executable = "/usr/local/app/localsolvers/linux64/smoldyn_x64"; + String inputFilePath = "/share/apps/vcell3/users/schaff/SimID_274630052_0_.smoldynInput"; + String[] command = new String[] { executable, inputFilePath, "-tid", "0" }; + + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, command, JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub"); Assertions.assertEquals(expectedSlurmScript, slurmScript); } @@ -196,7 +189,13 @@ public void testSimJobScriptSmoldyn() throws IOException, XmlParseException, Exp public void testSimJobScriptCVODE() throws IOException, XmlParseException, ExpressionException { String simTaskResourcePath = "slurm_fixtures/cvode/SimID_274630682_0__0.simtask.xml"; String JOB_NAME = "V_REL_274630682_0_0"; - String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "SundialsSolverStandalone_x64", null, "cvodeInput", "ida", JOB_NAME); + + String executable = "/usr/local/app/localsolvers/linux64/SundialsSolverStandalone_x64"; + String inputFilePath = "/share/apps/vcell3/users/schaff/SimID_274630682_0_.cvodeInput"; + String outputFilePath = "/share/apps/vcell3/users/schaff/SimID_274630682_0_.ida"; + String[] command = new String[] { executable, inputFilePath, outputFilePath, "-tid", "0" }; + + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, command, JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub"); Assertions.assertEquals(expectedSlurmScript, slurmScript); } @@ -205,7 +204,14 @@ public void testSimJobScriptCVODE() throws IOException, XmlParseException, Expre public void testSimJobScriptGibson() throws IOException, XmlParseException, ExpressionException { String simTaskResourcePath = "slurm_fixtures/gibson/SimID_274635122_0__0.simtask.xml"; String JOB_NAME = "V_REL_274635122_0_0"; - String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, "VCellStoch_x64", "gibson", "stochInput", "ida", JOB_NAME); + + String executable = "/usr/local/app/localsolvers/linux64/VCellStoch_x64"; + String subcommand = "gibson"; + String inputFilePath = "/share/apps/vcell3/users/schaff/SimID_274635122_0_.stochInput"; + String outputFilePath = "/share/apps/vcell3/users/schaff/SimID_274635122_0_.ida"; + String[] command = new String[] { executable, subcommand, inputFilePath, outputFilePath, "-tid", "0" }; + + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, command, JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub"); Assertions.assertEquals(expectedSlurmScript, slurmScript); } @@ -242,7 +248,7 @@ private String readTextFileFromResource(String filename) throws IOException { @Disabled // this test is disabled because it requires a running slurm server @Test - public void testSingularitySupport() throws IOException, ExecutableException { + public void testSingularitySupport() { CommandServiceSshNative cmd = null; try { Random r = new Random(); @@ -262,7 +268,7 @@ public void testSingularitySupport() throws IOException, ExecutableException { File sub_file_localpath = new File("/Volumes/vcell/htclogs/"+jobName+".slurm.sub"); File sub_file_remotepath = new File("/share/apps/vcell3/htclogs/"+jobName+".slurm.sub"); - StringBuffer subfileContent = new StringBuffer(); + StringBuilder subfileContent = new StringBuilder(); subfileContent.append("#!/usr/bin/bash\n"); String partition = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_partition); subfileContent.append("#SBATCH --partition="+partition+"\n"); From 0fb37d891e8155a8b246e1a976307450cb276403 Mon Sep 17 00:00:00 2001 From: jcschaff Date: Wed, 21 Aug 2024 21:02:17 -0400 Subject: [PATCH 06/11] add Gibson/Milstein Hybrid Stoch, MovingBoundary, NFsim --- .../server/htc/slurm/SlurmProxyTest.java | 58 +++- .../SimID_274641698_0__0.simtask.xml | 132 +++++++++ .../V_REL_274641698_0_0.slurm.sub | 164 +++++++++++ .../SimID_274641196_0__0.simtask.xml | 148 ++++++++++ .../V_REL_274641196_0_0.slurm.sub | 164 +++++++++++ .../nfsim/SimID_274642453_0__0.simtask.xml | 268 ++++++++++++++++++ .../nfsim/V_REL_274642453_0_0.slurm.sub | 164 +++++++++++ 7 files changed, 1092 insertions(+), 6 deletions(-) create mode 100644 vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/SimID_274641698_0__0.simtask.xml create mode 100644 vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/V_REL_274641698_0_0.slurm.sub create mode 100644 vcell-server/src/test/resources/slurm_fixtures/moving_boundary/SimID_274641196_0__0.simtask.xml create mode 100644 vcell-server/src/test/resources/slurm_fixtures/moving_boundary/V_REL_274641196_0_0.slurm.sub create mode 100644 vcell-server/src/test/resources/slurm_fixtures/nfsim/SimID_274642453_0__0.simtask.xml create mode 100644 vcell-server/src/test/resources/slurm_fixtures/nfsim/V_REL_274642453_0_0.slurm.sub diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java index cec84a71a2..6cf2518d69 100644 --- a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java +++ b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java @@ -168,7 +168,7 @@ public void testSimJobScriptFiniteVolume() throws IOException, XmlParseException String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, command, JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub"); - Assertions.assertEquals(expectedSlurmScript, slurmScript); + Assertions.assertEquals(expectedSlurmScript.trim(), slurmScript.trim()); } @Test @@ -182,7 +182,7 @@ public void testSimJobScriptSmoldyn() throws IOException, XmlParseException, Exp String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, command, JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub"); - Assertions.assertEquals(expectedSlurmScript, slurmScript); + Assertions.assertEquals(expectedSlurmScript.trim(), slurmScript.trim()); } @Test @@ -197,7 +197,53 @@ public void testSimJobScriptCVODE() throws IOException, XmlParseException, Expre String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, command, JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub"); - Assertions.assertEquals(expectedSlurmScript, slurmScript); + Assertions.assertEquals(expectedSlurmScript.trim(), slurmScript.trim()); + } + + @Test + public void testSimJobScriptNFsim() throws IOException, XmlParseException, ExpressionException { + String simTaskResourcePath = "slurm_fixtures/nfsim/SimID_274642453_0__0.simtask.xml"; + String JOB_NAME = "V_REL_274642453_0_0"; + + String executable = "/usr/local/app/localsolvers/linux64/NFsim_x64"; + String inputFilePath = "/share/apps/vcell3/users/schaff/SimID_274642453_0_.nfsimInput"; + String gdatFilePath = "/share/apps/vcell3/users/schaff/SimID_274642453_0_.gdat"; + String speciesFilePath = "/share/apps/vcell3/users/schaff/SimID_274642453_0_.species"; + String[] command = new String[] { executable, "-seed", "716746135", "-vcell", "-xml", inputFilePath, + "-o", gdatFilePath, "-sim", "1.0", "-ss", speciesFilePath, "-oSteps", "20", "-notf", "-utl", "1000", + "-cb", "-pcmatch", "-tid", "0" }; + + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, command, JOB_NAME); + String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/nfsim/V_REL_274642453_0_0.slurm.sub"); + Assertions.assertEquals(expectedSlurmScript.trim(), slurmScript.trim()); + } + + @Test + public void testSimJobScriptGibsonMilstein() throws IOException, XmlParseException, ExpressionException { + String simTaskResourcePath = "slurm_fixtures/gibson_milstein/SimID_274641698_0__0.simtask.xml"; + String JOB_NAME = "V_REL_274641698_0_0"; + + String executable = "/usr/local/app/localsolvers/linux64/Hybrid_EM_x64"; + String inputFilePath = "/share/apps/vcell3/users/schaff/SimID_274641698_0_.nc"; + String[] command = new String[] { executable, inputFilePath, "100.0", "10.0", "0.01", "0.1", "-OV", "-tid", "0" }; + + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, command, JOB_NAME); + String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/gibson_milstein/V_REL_274641698_0_0.slurm.sub"); + Assertions.assertEquals(expectedSlurmScript.trim(), slurmScript.trim()); + } + + @Test + public void testSimJobScriptMovingBoundary() throws IOException, XmlParseException, ExpressionException { + String simTaskResourcePath = "slurm_fixtures/moving_boundary/SimID_274641196_0__0.simtask.xml"; + String JOB_NAME = "V_REL_274641196_0_0"; + + String executable = "/usr/local/app/localsolvers/linux64/MovingBoundary_x64"; + String inputFilePath = "/share/apps/vcell3/users/schaff/SimID_274641196_0_mb.xml"; + String[] command = new String[] { executable, "--config", inputFilePath, "-tid", "0" }; + + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, command, JOB_NAME); + String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/moving_boundary/V_REL_274641196_0_0.slurm.sub"); + Assertions.assertEquals(expectedSlurmScript.trim(), slurmScript.trim()); } @Test @@ -213,7 +259,7 @@ public void testSimJobScriptGibson() throws IOException, XmlParseException, Expr String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, command, JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub"); - Assertions.assertEquals(expectedSlurmScript, slurmScript); + Assertions.assertEquals(expectedSlurmScript.trim(), slurmScript.trim()); } @Test @@ -222,7 +268,7 @@ public void testSimJobScriptRK45() throws IOException, XmlParseException, Expres String JOB_NAME = "V_REL_274631114_0_0"; String slurmScript = createScriptForJavaSolvers(simTaskResourcePath, JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub"); - Assertions.assertEquals(expectedSlurmScript, slurmScript); + Assertions.assertEquals(expectedSlurmScript.trim(), slurmScript.trim()); } @Test @@ -231,7 +277,7 @@ public void testSimJobScriptAdamsMoulton() throws IOException, XmlParseException String JOB_NAME = "V_REL_274633859_0_0"; String slurmScript = createScriptForJavaSolvers(simTaskResourcePath, JOB_NAME); String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub"); - Assertions.assertEquals(expectedSlurmScript, slurmScript); + Assertions.assertEquals(expectedSlurmScript.trim(), slurmScript.trim()); } private String readTextFileFromResource(String filename) throws IOException { diff --git a/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/SimID_274641698_0__0.simtask.xml b/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/SimID_274641698_0__0.simtask.xml new file mode 100644 index 0000000000..ec651e981e --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/SimID_274641698_0__0.simtask.xml @@ -0,0 +1,132 @@ + + + cloned from 'Copy of 3D pde_generated' owned by user frm +cloned from 'Copy of 3D pde_generated' owned by user anu +cloned from 'Copy of 3D pde_generated' owned by user schaff +cloned from 'Copy of 3D pde_generated' owned by user les + 96485.3321 + 9.64853321E-5 + 6.02214179E11 + 3.141592653589793 + 8314.46261815 + 300.0 + 0.0 + 2.0 + 2.0 + 0.001660538783162726 + 1000.0 + 1.0 + 0.0 + 0.0 + 100.0 + 0.0 + 14891.899581611733 + 124712.10435961554 + 1406.7733692487282 + 3697.013658772733 + 4738.640600365477 + (1.0 * pow(KMOLE, - 1.0)) + (1.0 * pow(KMOLE,1.0)) + 0.0 + 0.0 + + + + + + ((C_cyt_Count * UnitFactor_uM_um3_molecules_neg_1) / Size_cyt) + (kfl * (RanC_cyt - RanC_nuc)) + ((Kf * RanC_cyt) - ((Kr * Ran_cyt) * C_cyt)) + (UnitFactor_molecules_uM_neg_1_um_neg_3 * kfl * Size_nm * RanC_cyt) + (kfl * UnitFactor_molecules_uM_neg_1_um_neg_3 * Size_nm * RanC_nuc) + (Kf * RanC_cyt_Count * UnitFactor_molecules_uM_neg_1_um_neg_3 * UnitFactor_uM_um3_molecules_neg_1) + (Kr * Ran_cyt_Count * C_cyt_Count * UnitFactor_molecules_uM_neg_1_um_neg_3 * UnitFactor_uM_um3_molecules_neg_1 * UnitFactor_uM_um3_molecules_neg_1 / Size_cyt) + ((Ran_cyt_Count * UnitFactor_uM_um3_molecules_neg_1) / Size_cyt) + ((RanC_cyt_Count * UnitFactor_uM_um3_molecules_neg_1) / Size_cyt) + ((RanC_nuc_Count * UnitFactor_uM_um3_molecules_neg_1) / Size_nuc) + (s2_Count / Size_pm) + + + + + + + + RanC_cyt_Count_initCount + Ran_cyt_Count_initCount + C_cyt_Count_initCount + RanC_nuc_Count_initCount + s2_Count_initCount + + P_r0_probabilityRate + -1.0 + 1.0 + 1.0 + + + P_r0_reverse_probabilityRate + 1.0 + -1.0 + -1.0 + + + P_flux0_probabilityRate + -1.0 + 1.0 + + + P_flux0_reverse_probabilityRate + 1.0 + -1.0 + + + + + + cloned from 'Copy of 3D pde_generated' owned by user frm +cloned from 'Copy of 3D pde_generated' owned by user anu +cloned from 'Copy of 3D pde_generated' owned by user schaff +cloned from 'Copy of 3D pde_generated' owned by user les + + + + cloned from 'Copy of Hybrid Gibson Milstein' owned by user frm +cloned from 'Hybrid Gibson Milstein' owned by user anu +cloned from 'Hybrid Gibson Milstein' owned by user schaff +cloned from 'Simulation0' owned by user les + + + + + + + 1 + + + + + + cloned from 'Copy of Hybrid Gibson Milstein' owned by user frm +cloned from 'Hybrid Gibson Milstein' owned by user anu +cloned from 'Hybrid Gibson Milstein' owned by user schaff +cloned from 'Simulation0' owned by user les + + + + cloned from 'nonspatial1435481798' owned by user frm +cloned from 'nonspatial660203233' owned by user anu +cloned from 'nonspatial637508148' owned by user schaff +cloned from 'nonspatial608887770' owned by user les + + + + + + + cloned from 'nonspatial1435481798' owned by user frm +cloned from 'nonspatial660203233' owned by user anu +cloned from 'nonspatial637508148' owned by user schaff +cloned from 'nonspatial608887770' owned by user les + + + \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/V_REL_274641698_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/V_REL_274641698_0_0.slurm.sub new file mode 100644 index 0000000000..d512bfad88 --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/V_REL_274641698_0_0.slurm.sub @@ -0,0 +1,164 @@ +#!/usr/bin/bash +#SBATCH --partition=vcell +#SBATCH --reservation= +#SBATCH --qos=vcell +#SBATCH -J V_REL_274641698_0_0 +#SBATCH -o /share/apps/vcell3/htclogs/V_REL_274641698_0_0.slurm.log +#SBATCH -e /share/apps/vcell3/htclogs/V_REL_274641698_0_0.slurm.log +#SBATCH --mem=4096M +#SBATCH --no-kill +#SBATCH --no-requeue +# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB + + +#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- +set -x + +TMPDIR=/scratch/vcell +echo "using TMPDIR=$TMPDIR" +if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi +echo `hostname` + +export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles + +source /usr/share/Modules/init/bash + +module load singularity/vcell-3.10.0 + +echo "job running on host `hostname -f`" + +echo "id is `id`" + +echo "bash version is `bash --version`" +date + +echo ENVIRONMENT +env + +container_prefix= +if command -v singularity >/dev/null 2>&1; then + # + # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + # + localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + if [ ! -e "$localSingularityImage" ]; then + echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + mkdir -p /state/partition1/singularityImages + singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) + flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" + theStatus=$? + if [ $theStatus -eq 100 ] + then + echo "lock in use, waiting for lock owner to copy singularityImage" + let c=0 + until [ -f $localSingularityImage ] + do + sleep 3 + let c=c+1 + if [ $c -eq 20 ] + then + echo "Exceeded wait time for lock owner to copy singularityImage" + break + fi + done + else + if [ $theStatus -eq 0 ] + then + echo copy succeeded + else + echo copy failed + fi + fi + rm -f ${singularitytempfile} + if [ ! -e "$localSingularityImage" ]; then + echo "Failed to copy $localSingularityImage to hpc from central" + exit 1 + else + echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + fi + fi + container_prefix="singularity run --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " +else + echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " + exit 1 +fi +echo "container_prefix is '${container_prefix}'" +echo "3 date=`date`" +#END---------SlurmProxy.generateScript():slurmInitSingularity---------- + +#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- +sendFailureMsg() { + echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274641698 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274641698 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + stat=$? + if [[ $stat -ne 0 ]]; then + echo 'failed to send error message, retcode=$stat' + else + echo 'sent failure message' + fi +} +#END---------SlurmProxy.generateScript():sendFailureMsg---------- +#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- +callExitProcessor( ) { + echo exitCommand = ${container_prefix}JavaPostprocessor64 274641698 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274641698_0_0.slurm.sub + ${container_prefix}JavaPostprocessor64 274641698 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274641698_0_0.slurm.sub +} +#END---------SlurmProxy.generateScript():hasExitProcessor---------- +echo +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274641698_0__0.simtask.xml /share/apps/vcell3/users/schaff + command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274641698_0__0.simtask.xml /share/apps/vcell3/users/schaff " + $command +stat=$? +echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274641698_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "1 date=`date`" + +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/Hybrid_EM_x64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/Hybrid_EM_x64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/Hybrid_EM_x64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}Hybrid_EM_x64 /share/apps/vcell3/users/schaff/SimID_274641698_0_.nc 100.0 10.0 0.01 0.1 -OV -tid 0 +if [ -z ${LD_LIBRARY_PATH+x} ]; then + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 +else + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH +fi + command="${cmd_prefix}Hybrid_EM_x64 /share/apps/vcell3/users/schaff/SimID_274641698_0_.nc 100.0 10.0 0.01 0.1 -OV -tid 0 " + $command +stat=$? +echo ${cmd_prefix}Hybrid_EM_x64 /share/apps/vcell3/users/schaff/SimID_274641698_0_.nc 100.0 10.0 0.01 0.1 -OV -tid 0 returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------Hybrid_EM_x64 +callExitProcessor 0 + + +#Following commands (if any) are read by JavaPostProcessor64 + + diff --git a/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/SimID_274641196_0__0.simtask.xml b/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/SimID_274641196_0__0.simtask.xml new file mode 100644 index 0000000000..8c35bfc0c0 --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/SimID_274641196_0__0.simtask.xml @@ -0,0 +1,148 @@ + + + 96480.0 + 9.648E-5 + 1.0E-9 + 6.02E11 + 3.141592653589793 + 8314.0 + 300.0 + 1.0 + 1.0 + 10.0 + 1000.0 + (1.0 / 602.0) + 10.0 + 10.0 + 0.0 + 0.0 + 0.0 + 0.0 + 0.7 + 1.0 + 0.3 + + + + x + y + (x + y) + RanC_nuc_init_uM + s2_init_molecules_um_2 + (VolumePerUnitVolume_cyt * vcRegionVolume('cell')) + (VolumePerUnitVolume_EC * vcRegionVolume('ec')) + (AreaPerUnitVolume_nm * vcRegionVolume('cell')) + (VolumePerUnitVolume_nuc * vcRegionVolume('cell')) + (AreaPerUnitArea_pm * vcRegionArea('cell_ec_membrane')) + vcRegionArea('cell_ec_membrane') + sproc_0.velocityX + sproc_0.velocityY + sin(t) + cos(t) + vcRegionVolume('cell') + vcRegionVolume('ec') + + + + + + + + + 0.0 + RanC_cyt_diffusionRate + RanC_cyt_init_uM + + + 0.0 + Ran_cyt_diffusionRate + Ran_cyt_init_uM + + + 0.0 + C_cyt_diffusionRate + C_cyt_init_uM + + + + + + + + + + + + + + + + + + + 0.0 + 0.0 + + + 0.0 + 0.0 + + + 0.0 + 0.0 + + + sobj_cell1_ec0_velX + sobj_cell1_ec0_velY + + + + + + + + + + + + + + + 1.0 + FULL_REDIST + EQUI_BOND_REDISTRIBUTE + 5 + NEAREST_NEIGHBOR + + 1 + + + + + + + + + + + + + + + ((((x - 5.0) ^ 2.0) + ((y - 5.0) ^ 2.0)) < (3.0 ^ 2.0)) + + + 1.0 + + + + + + + + + + + + + \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/V_REL_274641196_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/V_REL_274641196_0_0.slurm.sub new file mode 100644 index 0000000000..6adc1eacf5 --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/V_REL_274641196_0_0.slurm.sub @@ -0,0 +1,164 @@ +#!/usr/bin/bash +#SBATCH --partition=vcell +#SBATCH --reservation= +#SBATCH --qos=vcell +#SBATCH -J V_REL_274641196_0_0 +#SBATCH -o /share/apps/vcell3/htclogs/V_REL_274641196_0_0.slurm.log +#SBATCH -e /share/apps/vcell3/htclogs/V_REL_274641196_0_0.slurm.log +#SBATCH --mem=4096M +#SBATCH --no-kill +#SBATCH --no-requeue +# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB + + +#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- +set -x + +TMPDIR=/scratch/vcell +echo "using TMPDIR=$TMPDIR" +if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi +echo `hostname` + +export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles + +source /usr/share/Modules/init/bash + +module load singularity/vcell-3.10.0 + +echo "job running on host `hostname -f`" + +echo "id is `id`" + +echo "bash version is `bash --version`" +date + +echo ENVIRONMENT +env + +container_prefix= +if command -v singularity >/dev/null 2>&1; then + # + # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + # + localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + if [ ! -e "$localSingularityImage" ]; then + echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + mkdir -p /state/partition1/singularityImages + singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) + flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" + theStatus=$? + if [ $theStatus -eq 100 ] + then + echo "lock in use, waiting for lock owner to copy singularityImage" + let c=0 + until [ -f $localSingularityImage ] + do + sleep 3 + let c=c+1 + if [ $c -eq 20 ] + then + echo "Exceeded wait time for lock owner to copy singularityImage" + break + fi + done + else + if [ $theStatus -eq 0 ] + then + echo copy succeeded + else + echo copy failed + fi + fi + rm -f ${singularitytempfile} + if [ ! -e "$localSingularityImage" ]; then + echo "Failed to copy $localSingularityImage to hpc from central" + exit 1 + else + echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + fi + fi + container_prefix="singularity run --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " +else + echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " + exit 1 +fi +echo "container_prefix is '${container_prefix}'" +echo "3 date=`date`" +#END---------SlurmProxy.generateScript():slurmInitSingularity---------- + +#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- +sendFailureMsg() { + echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274641196 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274641196 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + stat=$? + if [[ $stat -ne 0 ]]; then + echo 'failed to send error message, retcode=$stat' + else + echo 'sent failure message' + fi +} +#END---------SlurmProxy.generateScript():sendFailureMsg---------- +#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- +callExitProcessor( ) { + echo exitCommand = ${container_prefix}JavaPostprocessor64 274641196 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274641196_0_0.slurm.sub + ${container_prefix}JavaPostprocessor64 274641196 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274641196_0_0.slurm.sub +} +#END---------SlurmProxy.generateScript():hasExitProcessor---------- +echo +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274641196_0__0.simtask.xml /share/apps/vcell3/users/schaff + command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274641196_0__0.simtask.xml /share/apps/vcell3/users/schaff " + $command +stat=$? +echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274641196_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "1 date=`date`" + +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/MovingBoundary_x64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/MovingBoundary_x64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/MovingBoundary_x64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}MovingBoundary_x64 --config /share/apps/vcell3/users/schaff/SimID_274641196_0_mb.xml -tid 0 +if [ -z ${LD_LIBRARY_PATH+x} ]; then + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 +else + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH +fi + command="${cmd_prefix}MovingBoundary_x64 --config /share/apps/vcell3/users/schaff/SimID_274641196_0_mb.xml -tid 0 " + $command +stat=$? +echo ${cmd_prefix}MovingBoundary_x64 --config /share/apps/vcell3/users/schaff/SimID_274641196_0_mb.xml -tid 0 returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------MovingBoundary_x64 +callExitProcessor 0 + + +#Following commands (if any) are read by JavaPostProcessor64 + + diff --git a/vcell-server/src/test/resources/slurm_fixtures/nfsim/SimID_274642453_0__0.simtask.xml b/vcell-server/src/test/resources/slurm_fixtures/nfsim/SimID_274642453_0__0.simtask.xml new file mode 100644 index 0000000000..f7bd0519b7 --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/nfsim/SimID_274642453_0__0.simtask.xml @@ -0,0 +1,268 @@ + + + cloned from 'Copy of Application0_generated' owned by user danv + + + + + + + 96485.3321 + 9.64853321E-5 + 6.02214179E11 + 3.141592653589793 + 8314.46261815 + 300.0 + 1.0 + 0.0 + 1.0 + 0.0 + 0.0 + 0.0 + 1.0 + 0.001660538783162726 + 1.0 + 1.0 + 0.3 + (1.0 * pow(KMOLE, - 1.0)) + (1.0 * pow(KMOLE,1.0)) + 0.0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ((UnitFactor_uM_um3_molecules_neg_1 * cargo_cyt_Count) / Size_cyt) + (UnitFactor_molecules_uM_neg_1_um_neg_3 * cargo_cyt_Count_init_uM * Size_cyt) + ((UnitFactor_uM_um3_molecules_neg_1 * cargo_ex_Count) / Size_ex) + (UnitFactor_molecules_uM_neg_1_um_neg_3 * cargo_ex_Count_init_uM * Size_ex) + ((UnitFactor_uM_um3_molecules_neg_1 * carrier_cyt_Count) / Size_cyt) + (UnitFactor_molecules_uM_neg_1_um_neg_3 * carrier_cyt_Count_init_uM * Size_cyt) + ((UnitFactor_uM_um3_molecules_neg_1 * carrier_ex_Count) / Size_ex) + (UnitFactor_molecules_uM_neg_1_um_neg_3 * carrier_ex_Count_init_uM * Size_ex) + ((UnitFactor_uM_um3_molecules_neg_1 * complex_cyt_Count) / Size_cyt) + (UnitFactor_molecules_uM_neg_1_um_neg_3 * complex_cyt_Count_init_uM * Size_cyt) + ((UnitFactor_uM_um3_molecules_neg_1 * complex_ex_Count) / Size_ex) + (UnitFactor_molecules_uM_neg_1_um_neg_3 * complex_ex_Count_init_uM * Size_ex) + (O0_Cargo_tot_Count / Size_mem) + ((O0_Carrier_tot_Count * UnitFactor_uM_um3_molecules_neg_1) / Size_cyt) + ((UnitFactor_uM_um3_molecules_neg_1 * Kf) / Size_cyt) + + + + + + + + + + + P_cyt_binding_probabilityRate + + + + + + + cargo_cyt_Count_initCount + 0.0 + u + u + + 0.0 + 0.0 + 0.0 + 0.0 + + + + cargo_ex_Count_initCount + 0.0 + u + u + + 0.0 + 0.0 + 0.0 + 0.0 + + + + carrier_cyt_Count_initCount + 0.0 + u + u + + 0.0 + 0.0 + 0.0 + 0.0 + + + + carrier_ex_Count_initCount + 0.0 + u + u + + 0.0 + 0.0 + 0.0 + 0.0 + + + + complex_cyt_Count_initCount + 0.0 + u + u + + 0.0 + 0.0 + 0.0 + 0.0 + + + + complex_ex_Count_initCount + 0.0 + u + u + + 0.0 + 0.0 + 0.0 + 0.0 + + + + + + cloned from 'Copy of Application0_generated' owned by user danv + + + + + + + + + + true + false + true + true + 1000 + 1 + + 1 + + + + + + + + + cloned from 'non-spatial453902550' owned by user danv +cloned from 'non-spatial1747226078' owned by user mblinov + + + + + + + cloned from 'non-spatial453902550' owned by user danv +cloned from 'non-spatial1747226078' owned by user mblinov + + + \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/nfsim/V_REL_274642453_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/nfsim/V_REL_274642453_0_0.slurm.sub new file mode 100644 index 0000000000..70dc6a5a8c --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/nfsim/V_REL_274642453_0_0.slurm.sub @@ -0,0 +1,164 @@ +#!/usr/bin/bash +#SBATCH --partition=vcell +#SBATCH --reservation= +#SBATCH --qos=vcell +#SBATCH -J V_REL_274642453_0_0 +#SBATCH -o /share/apps/vcell3/htclogs/V_REL_274642453_0_0.slurm.log +#SBATCH -e /share/apps/vcell3/htclogs/V_REL_274642453_0_0.slurm.log +#SBATCH --mem=4096M +#SBATCH --no-kill +#SBATCH --no-requeue +# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB + + +#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- +set -x + +TMPDIR=/scratch/vcell +echo "using TMPDIR=$TMPDIR" +if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi +echo `hostname` + +export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles + +source /usr/share/Modules/init/bash + +module load singularity/vcell-3.10.0 + +echo "job running on host `hostname -f`" + +echo "id is `id`" + +echo "bash version is `bash --version`" +date + +echo ENVIRONMENT +env + +container_prefix= +if command -v singularity >/dev/null 2>&1; then + # + # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + # + localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + if [ ! -e "$localSingularityImage" ]; then + echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + mkdir -p /state/partition1/singularityImages + singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) + flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" + theStatus=$? + if [ $theStatus -eq 100 ] + then + echo "lock in use, waiting for lock owner to copy singularityImage" + let c=0 + until [ -f $localSingularityImage ] + do + sleep 3 + let c=c+1 + if [ $c -eq 20 ] + then + echo "Exceeded wait time for lock owner to copy singularityImage" + break + fi + done + else + if [ $theStatus -eq 0 ] + then + echo copy succeeded + else + echo copy failed + fi + fi + rm -f ${singularitytempfile} + if [ ! -e "$localSingularityImage" ]; then + echo "Failed to copy $localSingularityImage to hpc from central" + exit 1 + else + echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + fi + fi + container_prefix="singularity run --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " +else + echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " + exit 1 +fi +echo "container_prefix is '${container_prefix}'" +echo "3 date=`date`" +#END---------SlurmProxy.generateScript():slurmInitSingularity---------- + +#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- +sendFailureMsg() { + echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274642453 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274642453 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + stat=$? + if [[ $stat -ne 0 ]]; then + echo 'failed to send error message, retcode=$stat' + else + echo 'sent failure message' + fi +} +#END---------SlurmProxy.generateScript():sendFailureMsg---------- +#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- +callExitProcessor( ) { + echo exitCommand = ${container_prefix}JavaPostprocessor64 274642453 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274642453_0_0.slurm.sub + ${container_prefix}JavaPostprocessor64 274642453 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274642453_0_0.slurm.sub +} +#END---------SlurmProxy.generateScript():hasExitProcessor---------- +echo +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274642453_0__0.simtask.xml /share/apps/vcell3/users/schaff + command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274642453_0__0.simtask.xml /share/apps/vcell3/users/schaff " + $command +stat=$? +echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274642453_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "1 date=`date`" + +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/NFsim_x64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/NFsim_x64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/NFsim_x64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}NFsim_x64 -seed 716746135 -vcell -xml /share/apps/vcell3/users/schaff/SimID_274642453_0_.nfsimInput -o /share/apps/vcell3/users/schaff/SimID_274642453_0_.gdat -sim 1.0 -ss /share/apps/vcell3/users/schaff/SimID_274642453_0_.species -oSteps 20 -notf -utl 1000 -cb -pcmatch -tid 0 +if [ -z ${LD_LIBRARY_PATH+x} ]; then + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 +else + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH +fi + command="${cmd_prefix}NFsim_x64 -seed 716746135 -vcell -xml /share/apps/vcell3/users/schaff/SimID_274642453_0_.nfsimInput -o /share/apps/vcell3/users/schaff/SimID_274642453_0_.gdat -sim 1.0 -ss /share/apps/vcell3/users/schaff/SimID_274642453_0_.species -oSteps 20 -notf -utl 1000 -cb -pcmatch -tid 0 " + $command +stat=$? +echo ${cmd_prefix}NFsim_x64 -seed 716746135 -vcell -xml /share/apps/vcell3/users/schaff/SimID_274642453_0_.nfsimInput -o /share/apps/vcell3/users/schaff/SimID_274642453_0_.gdat -sim 1.0 -ss /share/apps/vcell3/users/schaff/SimID_274642453_0_.species -oSteps 20 -notf -utl 1000 -cb -pcmatch -tid 0 returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------NFsim_x64 +callExitProcessor 0 + + +#Following commands (if any) are read by JavaPostProcessor64 + + From ab1747d852c73a966ef401eaad2fe377f4653b36 Mon Sep 17 00:00:00 2001 From: jcschaff Date: Wed, 21 Aug 2024 21:39:15 -0400 Subject: [PATCH 07/11] carefull setup and teardown of global properties for slurmProxyTest --- .../server/htc/slurm/SlurmProxyTest.java | 82 +++++++++++-------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java index 6cf2518d69..551bc2ba96 100644 --- a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java +++ b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java @@ -22,48 +22,64 @@ import java.io.*; import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Random; +import java.util.*; import java.util.stream.Collectors; @Tag("Fast") public class SlurmProxyTest { - @BeforeAll - public static void setProperties() + private HashMap originalProperties = new HashMap<>(); + + private void setProperty(String key, String value) { + originalProperties.put(key, System.getProperty(key)); + System.setProperty(key, value); + } + + private void restoreProperties() { + for (String key : originalProperties.keySet()) { + System.setProperty(key, originalProperties.get(key)); + } + originalProperties.clear(); + } + + @BeforeEach + public void setup() { - System.setProperty(PropertyLoader.vcellServerIDProperty,"REL"); - System.setProperty(PropertyLoader.htcLogDirExternal,"/share/apps/vcell3/htclogs"); - System.setProperty(PropertyLoader.slurm_partition,"vcell"); - System.setProperty(PropertyLoader.slurm_reservation,""); - System.setProperty(PropertyLoader.slurm_qos,"vcell"); - System.setProperty(PropertyLoader.primarySimDataDirExternalProperty,"/share/apps/vcell3/users"); - System.setProperty(PropertyLoader.secondarySimDataDirExternalProperty,"/share/apps/vcell7/users"); - System.setProperty(PropertyLoader.jmsSimHostExternal, "rke-wn-01.cam.uchc.edu"); - System.setProperty(PropertyLoader.jmsSimPortExternal, "31618"); - System.setProperty(PropertyLoader.jmsSimRestPortExternal, "30163"); - System.setProperty(PropertyLoader.jmsUser, "clientUser"); - System.setProperty(PropertyLoader.jmsPasswordValue, "dummy"); - System.setProperty(PropertyLoader.mongodbHostExternal, "rke-wn-01.cam.uchc.edu"); - System.setProperty(PropertyLoader.mongodbPortExternal, "30019"); - System.setProperty(PropertyLoader.mongodbDatabase, "test"); - System.setProperty(PropertyLoader.vcellSoftwareVersion, "Rel_Version_7.6.0_build_28"); - System.setProperty(PropertyLoader.vcellbatch_singularity_image, "/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img"); - System.setProperty(PropertyLoader.slurm_tmpdir, "/scratch/vcell"); - System.setProperty(PropertyLoader.slurm_central_singularity_dir, "/share/apps/vcell3/singularityImages"); - System.setProperty(PropertyLoader.slurm_local_singularity_dir, "/state/partition1/singularityImages"); - System.setProperty(PropertyLoader.slurm_singularity_module_name, "singularity/vcell-3.10.0"); - System.setProperty(PropertyLoader.simDataDirArchiveExternal, "/share/apps/vcell12/users"); - System.setProperty(PropertyLoader.simDataDirArchiveInternal, "/share/apps/vcell12/users"); - System.setProperty(PropertyLoader.nativeSolverDir_External, "/share/apps/vcell3/nativesolvers"); - System.setProperty(PropertyLoader.jmsBlobMessageMinSize, "100000"); - System.setProperty(PropertyLoader.simulationPostprocessor, "JavaPostprocessor64"); - System.setProperty(PropertyLoader.simulationPreprocessor, "JavaPreprocessor64"); + setProperty(PropertyLoader.vcellServerIDProperty,"REL"); + setProperty(PropertyLoader.htcLogDirExternal,"/share/apps/vcell3/htclogs"); + setProperty(PropertyLoader.slurm_partition,"vcell"); + setProperty(PropertyLoader.slurm_reservation,""); + setProperty(PropertyLoader.slurm_qos,"vcell"); + setProperty(PropertyLoader.primarySimDataDirExternalProperty,"/share/apps/vcell3/users"); + setProperty(PropertyLoader.secondarySimDataDirExternalProperty,"/share/apps/vcell7/users"); + setProperty(PropertyLoader.jmsSimHostExternal, "rke-wn-01.cam.uchc.edu"); + setProperty(PropertyLoader.jmsSimPortExternal, "31618"); + setProperty(PropertyLoader.jmsSimRestPortExternal, "30163"); + setProperty(PropertyLoader.jmsUser, "clientUser"); + setProperty(PropertyLoader.jmsPasswordValue, "dummy"); + setProperty(PropertyLoader.mongodbHostExternal, "rke-wn-01.cam.uchc.edu"); + setProperty(PropertyLoader.mongodbPortExternal, "30019"); + setProperty(PropertyLoader.mongodbDatabase, "test"); + setProperty(PropertyLoader.vcellSoftwareVersion, "Rel_Version_7.6.0_build_28"); + setProperty(PropertyLoader.vcellbatch_singularity_image, "/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img"); + setProperty(PropertyLoader.slurm_tmpdir, "/scratch/vcell"); + setProperty(PropertyLoader.slurm_central_singularity_dir, "/share/apps/vcell3/singularityImages"); + setProperty(PropertyLoader.slurm_local_singularity_dir, "/state/partition1/singularityImages"); + setProperty(PropertyLoader.slurm_singularity_module_name, "singularity/vcell-3.10.0"); + setProperty(PropertyLoader.simDataDirArchiveExternal, "/share/apps/vcell12/users"); + setProperty(PropertyLoader.simDataDirArchiveInternal, "/share/apps/vcell12/users"); + setProperty(PropertyLoader.nativeSolverDir_External, "/share/apps/vcell3/nativesolvers"); + setProperty(PropertyLoader.jmsBlobMessageMinSize, "100000"); + setProperty(PropertyLoader.simulationPostprocessor, "JavaPostprocessor64"); + setProperty(PropertyLoader.simulationPreprocessor, "JavaPreprocessor64"); } + @AfterEach + public void teardown() { + restoreProperties(); + } + public String createScriptForNativeSolvers(String simTaskResourcePath, String[] command, String JOB_NAME) throws IOException, XmlParseException, ExpressionException { SimulationTask simTask = XmlHelper.XMLToSimTask(readTextFileFromResource(simTaskResourcePath)); From 30587dceecd66ef58fdcaf1e78374d21bab9d2bf Mon Sep 17 00:00:00 2001 From: jcschaff Date: Thu, 22 Aug 2024 08:05:55 -0400 Subject: [PATCH 08/11] fix restoration of properties can cause NullPointerException --- .../server/htc/slurm/SlurmProxyTest.java | 260 +++++++++--------- 1 file changed, 130 insertions(+), 130 deletions(-) diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java index 551bc2ba96..71f753c6cf 100644 --- a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java +++ b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java @@ -1,14 +1,8 @@ package cbit.vcell.message.server.htc.slurm; -import cbit.vcell.message.server.cmd.CommandServiceSshNative; -import cbit.vcell.message.server.htc.HtcJobStatus; -import cbit.vcell.message.server.htc.HtcProxy.HtcJobInfo; -import cbit.vcell.message.server.htc.HtcProxy.PartitionStatistics; import cbit.vcell.messaging.server.SimulationTask; -import cbit.vcell.mongodb.VCMongoMessage; import cbit.vcell.parser.ExpressionException; import cbit.vcell.resource.PropertyLoader; -import cbit.vcell.server.HtcJobID; import cbit.vcell.simdata.PortableCommand; import cbit.vcell.solvers.ExecutableCommand; import cbit.vcell.xml.XmlHelper; @@ -17,19 +11,20 @@ import org.junit.jupiter.api.*; import org.vcell.util.document.KeyValue; import org.vcell.util.document.User; -import org.vcell.util.exe.ExecutableException; import java.io.*; import java.nio.file.Files; import java.nio.file.Path; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; import java.util.stream.Collectors; @Tag("Fast") public class SlurmProxyTest { - private HashMap originalProperties = new HashMap<>(); + private final HashMap originalProperties = new HashMap<>(); private void setProperty(String key, String value) { originalProperties.put(key, System.getProperty(key)); @@ -38,7 +33,12 @@ private void setProperty(String key, String value) { private void restoreProperties() { for (String key : originalProperties.keySet()) { - System.setProperty(key, originalProperties.get(key)); + String originalPropertyValue = originalProperties.get(key); + if (originalPropertyValue == null) { + System.clearProperty(key); + } else { + System.setProperty(key, originalPropertyValue); + } } originalProperties.clear(); } @@ -308,126 +308,126 @@ private String readTextFileFromResource(String filename) throws IOException { return xmlString; } - @Disabled // this test is disabled because it requires a running slurm server - @Test - public void testSingularitySupport() { - CommandServiceSshNative cmd = null; - try { - Random r = new Random(); - System.setProperty("log4j2.trace","true"); - PropertyLoader.setProperty(PropertyLoader.vcellServerIDProperty, "Test2"); - PropertyLoader.setProperty(PropertyLoader.htcLogDirExternal, "/Volumes/vcell/htclogs"); - VCMongoMessage.enabled=false; - String partitions[] = new String[] { "vcell", "vcell2" }; - PropertyLoader.setProperty(PropertyLoader.slurm_partition, partitions[1]); - - - cmd = new CommandServiceSshNative(new String[] {"vcell-service.cam.uchc.edu"}, "vcell", new File("/Users/schaff/.ssh/schaff_rsa")); - SlurmProxy slurmProxy = new SlurmProxy(cmd, "vcell"); - - String jobName = "V_TEST2_999999999_0_"+r.nextInt(10000); - System.out.println("job name is "+jobName); - File sub_file_localpath = new File("/Volumes/vcell/htclogs/"+jobName+".slurm.sub"); - File sub_file_remotepath = new File("/share/apps/vcell3/htclogs/"+jobName+".slurm.sub"); - - StringBuilder subfileContent = new StringBuilder(); - subfileContent.append("#!/usr/bin/bash\n"); - String partition = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_partition); - subfileContent.append("#SBATCH --partition="+partition+"\n"); - subfileContent.append("#SBATCH -J "+jobName+"\n"); - subfileContent.append("#SBATCH -o /share/apps/vcell3/htclogs/"+jobName+".slurm.log\n"); - subfileContent.append("#SBATCH -e /share/apps/vcell3/htclogs/"+jobName+".slurm.log\n"); - subfileContent.append("#SBATCH --mem=1000M\n"); - subfileContent.append("#SBATCH --no-kill\n"); - subfileContent.append("#SBATCH --no-requeue\n"); - subfileContent.append("env\n"); - subfileContent.append("echo `hostname`\n"); - subfileContent.append("python -c \"some_str = ' ' * 51200000\"\n"); - subfileContent.append("retcode=$?\n"); - subfileContent.append("echo \"return code was $retcode\"\n"); - subfileContent.append("if [[ $retcode == 137 ]]; then\n"); - subfileContent.append(" echo \"job was killed via kill -9 (probably out of memory)\"\n"); - subfileContent.append("fi\n"); - subfileContent.append("sleep 20\n"); - subfileContent.append("exit $retcode\n"); - //subfileContent.append("export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles\n"); - //subfileContent.append("source /usr/share/Modules/init/bash\n"); -// subfileContent.append("module load singularity\n"); -// subfileContent.append("if command -v singularity >/dev/null 2>&1; then\n"); -// subfileContent.append(" echo 'singularity command exists'\n"); -// subfileContent.append(" exit 0\n"); -// subfileContent.append("else\n"); -// subfileContent.append(" echo 'singularity command not found'\n"); -// subfileContent.append(" exit 1\n"); +// @Disabled // this test is disabled because it requires a running slurm server +// @Test +// public void testSingularitySupport() { +// CommandServiceSshNative cmd = null; +// try { +// Random r = new Random(); +// System.setProperty("log4j2.trace","true"); +// PropertyLoader.setProperty(PropertyLoader.vcellServerIDProperty, "Test2"); +// PropertyLoader.setProperty(PropertyLoader.htcLogDirExternal, "/Volumes/vcell/htclogs"); +// VCMongoMessage.enabled=false; +// String partitions[] = new String[] { "vcell", "vcell2" }; +// PropertyLoader.setProperty(PropertyLoader.slurm_partition, partitions[1]); +// +// +// cmd = new CommandServiceSshNative(new String[] {"vcell-service.cam.uchc.edu"}, "vcell", new File("/Users/schaff/.ssh/schaff_rsa")); +// SlurmProxy slurmProxy = new SlurmProxy(cmd, "vcell"); +// +// String jobName = "V_TEST2_999999999_0_"+r.nextInt(10000); +// System.out.println("job name is "+jobName); +// File sub_file_localpath = new File("/Volumes/vcell/htclogs/"+jobName+".slurm.sub"); +// File sub_file_remotepath = new File("/share/apps/vcell3/htclogs/"+jobName+".slurm.sub"); +// +// StringBuilder subfileContent = new StringBuilder(); +// subfileContent.append("#!/usr/bin/bash\n"); +// String partition = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_partition); +// subfileContent.append("#SBATCH --partition="+partition+"\n"); +// subfileContent.append("#SBATCH -J "+jobName+"\n"); +// subfileContent.append("#SBATCH -o /share/apps/vcell3/htclogs/"+jobName+".slurm.log\n"); +// subfileContent.append("#SBATCH -e /share/apps/vcell3/htclogs/"+jobName+".slurm.log\n"); +// subfileContent.append("#SBATCH --mem=1000M\n"); +// subfileContent.append("#SBATCH --no-kill\n"); +// subfileContent.append("#SBATCH --no-requeue\n"); +// subfileContent.append("env\n"); +// subfileContent.append("echo `hostname`\n"); +// subfileContent.append("python -c \"some_str = ' ' * 51200000\"\n"); +// subfileContent.append("retcode=$?\n"); +// subfileContent.append("echo \"return code was $retcode\"\n"); +// subfileContent.append("if [[ $retcode == 137 ]]; then\n"); +// subfileContent.append(" echo \"job was killed via kill -9 (probably out of memory)\"\n"); // subfileContent.append("fi\n"); - - FileUtils.writeStringToFile(sub_file_localpath, subfileContent.toString()); - HtcJobID htcJobId = slurmProxy.submitJobFile(sub_file_remotepath); - System.out.println("running job "+htcJobId); - HtcJobInfo htcJobInfo = new HtcJobInfo(htcJobId, jobName); - - ArrayList jobInfos = new ArrayList(); - jobInfos.add(htcJobInfo); - - Map jobStatusMap = slurmProxy.getJobStatus(jobInfos); - - int attempts = 0; - while (attempts<80 && (jobStatusMap.get(htcJobInfo)==null || !jobStatusMap.get(htcJobInfo).isDone())){ - try { Thread.sleep(1000); } catch (InterruptedException e){} - jobStatusMap = slurmProxy.getJobStatus(jobInfos); - System.out.println(jobStatusMap.get(htcJobInfo)); - if (attempts==5) { - slurmProxy.killJobs(jobName); - } - attempts++; - } - System.out.println(jobStatusMap.get(htcJobInfo)); - - }catch (Exception e) { - e.printStackTrace(); - Assertions.fail(e.getMessage()); - }finally { - if (cmd != null) { - cmd.close(); - } - } - } - - - @Disabled // this test is disabled because it requires a running slurm server - @Test - public void testSLURM() throws IOException, ExecutableException { - System.setProperty("log4j2.trace","true"); - PropertyLoader.setProperty(PropertyLoader.vcellServerIDProperty, "Test2"); - VCMongoMessage.enabled=false; - String partitions[] = new String[] { "vcell", "vcell2" }; - PropertyLoader.setProperty(PropertyLoader.slurm_partition, partitions[0]); - - CommandServiceSshNative cmd = null; - try { - cmd = new CommandServiceSshNative(new String[] {"vcell-service.cam.uchc.edu"}, "vcell", new File("/Users/schaff/.ssh/schaff_rsa")); - SlurmProxy slurmProxy = new SlurmProxy(cmd, "vcell"); - Map runningJobs = slurmProxy.getRunningJobs(); - for (HtcJobInfo jobInfo : runningJobs.keySet()) { - HtcJobStatus jobStatus = runningJobs.get(jobInfo); - System.out.println("job "+jobInfo.getHtcJobID()+" "+jobInfo.getJobName()+", status="+jobStatus.toString()); - } - for (String partition : partitions) { - PropertyLoader.setProperty(PropertyLoader.slurm_partition, partition); - PartitionStatistics partitionStatistics = slurmProxy.getPartitionStatistics(); - System.out.println("partition statistics for partition "+partition+": "+partitionStatistics); - System.out.println("number of cpus allocated = "+partitionStatistics.numCpusAllocated); - System.out.println("load = "+partitionStatistics.load); - System.out.println("number of cpus total = "+partitionStatistics.numCpusTotal); - } - }catch (Exception e) { - e.printStackTrace(); - Assertions.fail(e.getMessage()); - }finally { - if (cmd != null) { - cmd.close(); - } - } - } +// subfileContent.append("sleep 20\n"); +// subfileContent.append("exit $retcode\n"); +// //subfileContent.append("export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles\n"); +// //subfileContent.append("source /usr/share/Modules/init/bash\n"); +//// subfileContent.append("module load singularity\n"); +//// subfileContent.append("if command -v singularity >/dev/null 2>&1; then\n"); +//// subfileContent.append(" echo 'singularity command exists'\n"); +//// subfileContent.append(" exit 0\n"); +//// subfileContent.append("else\n"); +//// subfileContent.append(" echo 'singularity command not found'\n"); +//// subfileContent.append(" exit 1\n"); +//// subfileContent.append("fi\n"); +// +// FileUtils.writeStringToFile(sub_file_localpath, subfileContent.toString()); +// HtcJobID htcJobId = slurmProxy.submitJobFile(sub_file_remotepath); +// System.out.println("running job "+htcJobId); +// HtcJobInfo htcJobInfo = new HtcJobInfo(htcJobId, jobName); +// +// ArrayList jobInfos = new ArrayList(); +// jobInfos.add(htcJobInfo); +// +// Map jobStatusMap = slurmProxy.getJobStatus(jobInfos); +// +// int attempts = 0; +// while (attempts<80 && (jobStatusMap.get(htcJobInfo)==null || !jobStatusMap.get(htcJobInfo).isDone())){ +// try { Thread.sleep(1000); } catch (InterruptedException e){} +// jobStatusMap = slurmProxy.getJobStatus(jobInfos); +// System.out.println(jobStatusMap.get(htcJobInfo)); +// if (attempts==5) { +// slurmProxy.killJobs(jobName); +// } +// attempts++; +// } +// System.out.println(jobStatusMap.get(htcJobInfo)); +// +// }catch (Exception e) { +// e.printStackTrace(); +// Assertions.fail(e.getMessage()); +// }finally { +// if (cmd != null) { +// cmd.close(); +// } +// } +// } +// +// +// @Disabled // this test is disabled because it requires a running slurm server +// @Test +// public void testSLURM() throws IOException, ExecutableException { +// System.setProperty("log4j2.trace","true"); +// PropertyLoader.setProperty(PropertyLoader.vcellServerIDProperty, "Test2"); +// VCMongoMessage.enabled=false; +// String partitions[] = new String[] { "vcell", "vcell2" }; +// PropertyLoader.setProperty(PropertyLoader.slurm_partition, partitions[0]); +// +// CommandServiceSshNative cmd = null; +// try { +// cmd = new CommandServiceSshNative(new String[] {"vcell-service.cam.uchc.edu"}, "vcell", new File("/Users/schaff/.ssh/schaff_rsa")); +// SlurmProxy slurmProxy = new SlurmProxy(cmd, "vcell"); +// Map runningJobs = slurmProxy.getRunningJobs(); +// for (HtcJobInfo jobInfo : runningJobs.keySet()) { +// HtcJobStatus jobStatus = runningJobs.get(jobInfo); +// System.out.println("job "+jobInfo.getHtcJobID()+" "+jobInfo.getJobName()+", status="+jobStatus.toString()); +// } +// for (String partition : partitions) { +// PropertyLoader.setProperty(PropertyLoader.slurm_partition, partition); +// PartitionStatistics partitionStatistics = slurmProxy.getPartitionStatistics(); +// System.out.println("partition statistics for partition "+partition+": "+partitionStatistics); +// System.out.println("number of cpus allocated = "+partitionStatistics.numCpusAllocated); +// System.out.println("load = "+partitionStatistics.load); +// System.out.println("number of cpus total = "+partitionStatistics.numCpusTotal); +// } +// }catch (Exception e) { +// e.printStackTrace(); +// Assertions.fail(e.getMessage()); +// }finally { +// if (cmd != null) { +// cmd.close(); +// } +// } +// } } From ddd6f41055d6d3a240527a4622b7eacb66dd8a9a Mon Sep 17 00:00:00 2001 From: jcschaff Date: Thu, 22 Aug 2024 08:51:44 -0400 Subject: [PATCH 09/11] remove temporary file writing when creating slurm submit script --- .../vcell/message/server/htc/HtcProxy.java | 39 ++- .../message/server/htc/slurm/SlurmProxy.java | 236 ++---------------- .../server/htc/slurm/SlurmProxyTest.java | 13 +- 3 files changed, 33 insertions(+), 255 deletions(-) diff --git a/vcell-server/src/main/java/cbit/vcell/message/server/htc/HtcProxy.java b/vcell-server/src/main/java/cbit/vcell/message/server/htc/HtcProxy.java index 642f145457..d1131ad19a 100644 --- a/vcell-server/src/main/java/cbit/vcell/message/server/htc/HtcProxy.java +++ b/vcell-server/src/main/java/cbit/vcell/message/server/htc/HtcProxy.java @@ -1,13 +1,12 @@ package cbit.vcell.message.server.htc; import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.CharBuffer; -import java.nio.channels.FileChannel; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; import java.util.Collection; @@ -18,7 +17,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.vcell.util.BeanUtils; import org.vcell.util.document.KeyValue; import org.vcell.util.exe.ExecutableException; @@ -224,29 +222,22 @@ public static String createHtcSimJobName(SimTaskInfo simTaskInfo) { return HTC_SIMULATION_JOB_NAME_PREFIX+simTaskInfo.simId.toString()+"_"+simTaskInfo.jobIndex+"_"+simTaskInfo.taskId; } - public static void writeUnixStyleTextFile(File file, String javaString) throws IOException { - try (FileOutputStream fos = new FileOutputStream(file)) { - Charset asciiCharset = Charset.forName("US-ASCII"); - CharsetEncoder encoder = asciiCharset.newEncoder(); - CharBuffer unicodeCharBuffer = CharBuffer.wrap(javaString); - ByteBuffer asciiByteBuffer = encoder.encode(unicodeCharBuffer); - byte[] asciiArray = asciiByteBuffer.array(); - ByteBuffer unixByteBuffer = ByteBuffer.allocate(asciiArray.length); - int count = 0; - for (int i=0;i postProcessingCommands, SimulationTask simTask,File primaryUserDirExternal) throws ExecutableException, IOException { - saveJobScript(jobName, sub_file_as_internal_path, commandSet, ncpus, memSizeMB, postProcessingCommands, simTask); + String scriptText = createJobScriptText(jobName, commandSet, ncpus, memSizeMB, postProcessingCommands, simTask); + Files.writeString(sub_file_as_internal_path.toPath(), scriptText); return submitJobFile(sub_file_with_external_path); } - public void saveJobScript(String jobName, File sub_file_as_internal_path, ExecutableCommand.Container commandSet, int ncpus, double memSizeMB, Collection postProcessingCommands, SimulationTask simTask) throws IOException { + public String createJobScriptText(String jobName, ExecutableCommand.Container commandSet, int ncpus, double memSizeMB, Collection postProcessingCommands, SimulationTask simTask) throws IOException { if (LG.isDebugEnabled()) { LG.debug("generating local SLURM submit script for jobName="+jobName); } SlurmProxy.SbatchSolverComponents sbatchSolverComponents = generateScript(jobName, commandSet, ncpus, memSizeMB, postProcessingCommands, simTask); - final String SUB = ".sub"; - //String slurmRootName = sub_file_with_external_path.getName().substring(0, sub_file_with_external_path.getName().length()-SUB.length()); - //String child = slurmRootName+".sh"; - //File intSolverScriptFile = new File(sub_file_as_internal_path.getParentFile(),child); - //File extSolverScriptFile = new File(sub_file_with_external_path.getParentFile(),child); - StringBuilder scriptContent = new StringBuilder(); - //Write the .slurm.sh File that the .slurm.sub file references and make it executable - //Files.write(sbatchSolverComponents.getSingularityCommands(),intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); + StringBuilder scriptContent = new StringBuilder(); scriptContent.append(sbatchSolverComponents.getSingularityCommands()); - //Files.append(sbatchSolverComponents.getSendFailureMsgCommands(),intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); scriptContent.append(sbatchSolverComponents.getSendFailureMsgCommands()); - //Files.append(sbatchSolverComponents.getCallExitCommands(),intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); scriptContent.append(sbatchSolverComponents.getCallExitCommands()); -// Files.append(sbatchSolverComponents.getPreProcessCommands(),intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); - -// String STARTFLAG_SNIP = "_arrstartflag_"; -// File dataSaveFile = null; -// File stochInputFile = null; -// File startFlagFile = null; -// long numOfTrials = 1; -// if(HtcProxy.isStochMultiTrial(simTask)) {//Find Gibson solver outputfile name from command arguments -// numOfTrials = simTask.getSimulationJob().getSimulation().getSolverTaskDescription().getStochOpt().getNumOfTrials(); -// Files.append("#simTask.getSimulationJob().getSimulation().getSolverTaskDescription().getStochOpt().isHistogram()="+simTask.getSimulationJob().getSimulation().getSolverTaskDescription().getStochOpt().isHistogram()+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("#simTask.getSimulationJob().getSimulation().getSolverTaskDescription().getStochOpt().getNumOfTrials()="+numOfTrials+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("#simTask.getSimulationJob().getSimulation().getSolverTaskDescription().getStochOpt().getCustomSeed()="+simTask.getSimulationJob().getSimulation().getSolverTaskDescription().getStochOpt().getCustomSeed()+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("#isMultiTrial="+HtcProxy.isStochMultiTrial(simTask)+"\n" ,intSolverScriptFile, Charset.forName(StandardCharsets.UTF_8.name())); -// List execCommands = commandSet.getExecCommands(); -//// outerloop: -// for (Iterator iterator = execCommands.iterator(); iterator.hasNext();) { -// ExecutableCommand executableCommand = (ExecutableCommand) iterator.next(); -// List commands = executableCommand.getCommands(); -// for (Iterator iterator2 = commands.iterator(); iterator2.hasNext();) { -// String cmdParam = (String) iterator2.next(); -// if(cmdParam.contains("SimID_") && cmdParam.endsWith(SimDataConstants.IDA_DATA_EXTENSION)) { -// dataSaveFile = new File(primaryUserDirExternal,new File(cmdParam).getName()); -// int rand = new Random().nextInt(1000000); -// String idaname=new File(cmdParam).getName(); -// idaname=idaname.substring(0, idaname.length()-SimDataConstants.IDA_DATA_EXTENSION.length()); -// startFlagFile = new File(primaryUserDirExternal,idaname+STARTFLAG_SNIP+rand); -// Files.append("#dataSaveFile="+dataSaveFile+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("#startFlagFile="+startFlagFile+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// -//// break outerloop; -// }else if(cmdParam.contains("SimID_") && cmdParam.endsWith(SimDataConstants.STOCHINPUT_DATA_EXTENSION)) { -// stochInputFile = new File(primaryUserDirExternal,new File(cmdParam).getName()); -// Files.append("#stochInputFile="+stochInputFile+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// } -// } -// } -// } -// long TRIALS_PER_ARR_MAX = 100; -// long slurmArrayCount = (numOfTrials+TRIALS_PER_ARR_MAX-1)/TRIALS_PER_ARR_MAX; -// boolean isCompleteMultiTrialArray = /*(slurmArrayCount > 1) && */HtcProxy.isStochMultiTrial(simTask) && dataSaveFile != null && stochInputFile != null; -// if(isCompleteMultiTrialArray) { -// String jmsrestpswd=PropertyLoader.getSecretValue(null,PropertyLoader.jmsRestPasswordFile); -// String jmshost_sim_external = System.getProperty("vcell.jms.sim.host.external"); -// String jmsrestport_sim_external = System.getProperty("vcell.jms.sim.restport.external"); -// if(jmshost_sim_external == null || jmsrestport_sim_external == null) { -// throw new ExecutableException("Array job expects vcell.jms.sim.host.external and vcell.jms.sim.restport.external to be non-null"); -// } -// Files.append("getprogcnt() {"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("progcnt=`find "+primaryUserDirExternal.getAbsolutePath()+" -name '"+dataSaveFile.getName()+"_*' | wc -l`"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("echo $progcnt"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("}"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// //Create progress function -// Files.append("sendprogressevent() {"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("local progcnt=$1"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("if [ $SLURM_ARRAY_TASK_ID -ne 1 ]"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("then"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("return"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("fi"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// //Send progress_worker_event -// Files.append("nexttime=$(date +%s)"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("let \"diff = $nexttime-$starttime\""+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("if [ $diff -ge 10 ]"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("then"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("starttime=$nexttime"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -//// Files.append("local progcnt=$(getprogcnt)"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("arrprog=$(echo \"scale=2; $progcnt/"+numOfTrials+"\" | bc)"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append(" echo -en \"POST /api/message/workerEvent?type=queue&JMSPriority=5&JMSTimeToLive=60000&JMSDeliveryMode=nonpersistent&MessageType=WorkerEvent&UserName="+simTask.getUserName()+"&HostName=${HOSTNAME}&SimKey="+simTask.getSimKey().toString()+"&TaskID="+simTask.getTaskID()+"&JobIndex="+simTask.getSimulationJob().getJobIndex()+"&WorkerEvent_Status="+WorkerEvent.JOB_PROGRESS+"&WorkerEvent_Progress=${arrprog}&WorkerEvent_TimePoint=${progcnt} HTTP/1.1\\r\\nHost: "+jmshost_sim_external+"\\r\\nAuthorization: Basic "+jmsrestpswd+"\\r\\nAccept: */*\\r\\nContent-Length: 0\\r\\nContent-Type: application/x-www-form-urlencoded\\r\\n\\r\\n\" | nc "+jmshost_sim_external+" "+jmsrestport_sim_external+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("fi"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("}"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// -// //If we'r the first task in slurm array job Remove old data, create startFlagFile to signal other tasks to begin creating new data -// Files.append("starttime=$(date +%s)"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// //slurm array 1 creates array progress file (created with random number name so other slurm arrays wait until it exists -// Files.append("if [ \"$SLURM_ARRAY_TASK_ID\" -eq \""+"1"+"\" ]"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("then"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -//// Files.append(" echo -en \"POST /api/message/workerEvent?type=queue&JMSPriority=5&JMSTimeToLive=60000&JMSDeliveryMode=nonpersistent&MessageType=WorkerEvent&UserName="+simTask.getUserName()+"&HostName=${HOSTNAME}&SimKey="+simTask.getSimKey().toString()+"&TaskID="+simTask.getTaskID()+"&JobIndex="+simTask.getSimulationJob().getJobIndex()+"&WorkerEvent_Status="+WorkerEvent.JOB_STARTING+"&WorkerEvent_Progress=0&WorkerEvent_TimePoint=0 HTTP/1.1\\r\\nHost: "+jmshost_sim_external+"\\r\\nAuthorization: Basic "+jmsrestpswd+"\\r\\nAccept: */*\\r\\nContent-Length: 0\\r\\nContent-Type: application/x-www-form-urlencoded\\r\\n\\r\\n\" | nc "+jmshost_sim_external+" "+jmsrestport_sim_external+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append(sbatchSolverComponents.getPreProcessCommands(),intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("rm -f "+dataSaveFile.getAbsolutePath()+"*"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("echo -n \"0\" >"+startFlagFile.getAbsolutePath()+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("fi"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// -// //Wait for startFlagFile file to be created (other array tasks wait to start for this file) -// Files.append("until [ -f "+startFlagFile.getAbsolutePath()+" ]"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("do"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("\tsleep 2"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("done"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// -// String s = "if [ $stat -ne 0 ]; then\n" + -// " callExitProcessor $stat\n" + -// " echo returning $stat to Slurm\n" + -// " exit $stat\n" + -// "fi"; -// String s2 = "if [ $stat -ne 0 ]; then\n" + -// " let \"run=$run-1\"\n" + -// " continue\n" + -// "# callExitProcessor $stat\n" + -// "# echo returning $stat to Slurm\n" + -// "# exit $stat\n" + -// "fi"; -// -// String substituedCmd = sbatchSolverComponents.getSolverCommands(); -// int lastIndex = substituedCmd.lastIndexOf(s); -// substituedCmd = substituedCmd.substring(0, lastIndex)+s2+substituedCmd.substring(lastIndex+s.length()); -// //if \[ \$stat \-ne 0 \]\; then\n callExitProcessor \$stat\n echo returning \$stat to Slurm\n exit \$stat\nfi -// Files.append("for (( run=1; run<="+TRIALS_PER_ARR_MAX+"; run++ )); do"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("let \"currcnt = ((${SLURM_ARRAY_TASK_ID}-1)*"+TRIALS_PER_ARR_MAX+")+$run\""+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// //exit run loop if we're in last arraytask and there are fewer jobs than full TRIALS_PER_ARR_MAX -// Files.append("if [ $currcnt -gt "+numOfTrials+" ]"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("then"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("break"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("fi"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// //cp .stochInput for each run and change seed -// substituedCmd = substituedCmd.replace(".stochInput", ".stochInput_${SLURM_ARRAY_TASK_ID}"); -// //change output file name based on task and run -// String arrRunDataFile = dataSaveFile.getName()+"_"+"${SLURM_ARRAY_TASK_ID}"+"_"+"${run}"; -// substituedCmd = substituedCmd.replace(dataSaveFile.getName(), arrRunDataFile); -//// //Prevent solver c++ from sending progress updates (will be handled in this script) -// substituedCmd = substituedCmd.replace("-tid "+simTask.getTaskID(),""); -// String taskStochInput = stochInputFile.getAbsolutePath()+"_${SLURM_ARRAY_TASK_ID}"; -// Files.append("cp "+stochInputFile.getAbsolutePath()+" "+taskStochInput+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// -// Files.append("origseed=`grep -oP 'SEED\\s\\K\\w+' "+taskStochInput+"`"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// //grep -oP 'SEED\s\K\w+' -// Files.append("let \"newseed = ${origseed}+${currcnt}\""+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("origline=`grep \"SEED.*\" "+taskStochInput+"`"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// //update seed -// Files.append("sed -i \"s/${origline}/SEED\t${newseed}/g\" "+taskStochInput+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// //change output file name -// //Files.append("sed -i 's/"+dataSaveFile.getName()+"/"+dataSaveFile.getAbsolutePath()+"_"+"${SLURM_ARRAY_TASK_ID}"+"_"+"${run}"+"/g' "+taskStochInput+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append(substituedCmd,intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// //Send progress -// Files.append("progcnt=$(getprogcnt)"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("sendprogressevent $progcnt"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("done"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// -// //Loop waiting for all array jobs to create sim results (if array task 1) -// Files.append("if [ $SLURM_ARRAY_TASK_ID -eq 1 ]"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("then"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("lastprogcnt=\"0\""+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("maxlooptime=\"300\""+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name()));//5 minutes -// Files.append("slptime=5"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("slpvar=0"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("while : ; do"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("sleep $slptime"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("progcnt=$(getprogcnt)"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// -// Files.append("if [ $progcnt -ne $lastprogcnt ]"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("then"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("lastprogcnt=$progcnt"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("slpvar=0"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("fi"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// -// Files.append("if [ $progcnt -eq "+numOfTrials+" ]"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("then"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append(sbatchSolverComponents.getExitCommands(),intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("echo -en \"POST /api/message/workerEvent?type=queue&JMSPriority=5&JMSTimeToLive=600000&JMSDeliveryMode=persistent&MessageType=WorkerEvent&UserName="+simTask.getUserName()+"&HostName=${HOSTNAME}&SimKey="+simTask.getSimKey().toString()+"&TaskID="+simTask.getTaskID()+"&JobIndex="+simTask.getSimulationJob().getJobIndex()+"&WorkerEvent_Status="+WorkerEvent.JOB_COMPLETED+"&WorkerEvent_Progress=1&WorkerEvent_TimePoint=${progcnt} HTTP/1.1\\r\\nHost: "+jmshost_sim_external+"\\r\\nAuthorization: Basic "+jmsrestpswd+"\\r\\nAccept: */*\\r\\nContent-Length: 0\\r\\nContent-Type: application/x-www-form-urlencoded\\r\\n\\r\\n\" | nc "+jmshost_sim_external+" "+jmsrestport_sim_external+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("exit 0"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name()));//fail -// Files.append("fi"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("let slpvar=$slpvar+$slptime"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("if [ $slpvar -gt $maxlooptime ]"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("then"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("callExitProcessor 1"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name()));//fail -// Files.append("exit 1"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name()));//fail -// Files.append("fi"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("sendprogressevent $progcnt"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("done"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// Files.append("fi"+"\n",intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); -// }else { - //Files.append(sbatchSolverComponents.getPreProcessCommands(),intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); - scriptContent.append(sbatchSolverComponents.getPreProcessCommands()); - //Files.append(sbatchSolverComponents.solverCommands,intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); - scriptContent.append(sbatchSolverComponents.solverCommands); - //Files.append(sbatchSolverComponents.getExitCommands(),intSolverScriptFile , Charset.forName(StandardCharsets.UTF_8.name())); - scriptContent.append(sbatchSolverComponents.getExitCommands()); -// } - //Set ownerRWX = PosixFilePermissions.fromString("rwxr-xr-x"); -// FileAttribute permissions = PosixFilePermissions.asFileAttribute(ownerWritable); - //java.nio.file.Files.setPosixFilePermissions(intSolverScriptFile.toPath(), ownerRWX); - - //----------Add solver script path to sbatch file, write the .slurm.sub file + scriptContent.append(sbatchSolverComponents.getPreProcessCommands()); + scriptContent.append(sbatchSolverComponents.solverCommands); + scriptContent.append(sbatchSolverComponents.getExitCommands()); String substitutedSbatchCommands = sbatchSolverComponents.getSbatchCommands(); -// if(isCompleteMultiTrialArray) { -// substitutedSbatchCommands = substitutedSbatchCommands.replaceAll("#SBATCH -o.*", "#SBATCH -o "+new File(sub_file_with_external_path.getParent(),slurmRootName+".log").getAbsolutePath()+"_%a"); -// substitutedSbatchCommands = substitutedSbatchCommands.replaceAll("#SBATCH -e.*", "#SBATCH -e "+new File(sub_file_with_external_path.getParent(),slurmRootName+".log").getAbsolutePath()+"_%a"); -// substitutedSbatchCommands+= "#SBATCH --array=1-"+slurmArrayCount; -// } - File tempFile = File.createTempFile("tempSubFile", SUB); -// writeUnixStyleTextFile(tempFile, substitutedSbatchCommands+"\n\n"+extSolverScriptFile.getAbsolutePath()+"\n\n"+ -// "#Following commands (if any) are read by JavaPostProcessor64\n"+sbatchSolverComponents.postProcessCommands+"\n"); - writeUnixStyleTextFile(tempFile, substitutedSbatchCommands+"\n\n"+scriptContent.toString()+"\n\n"+ - "#Following commands (if any) are read by JavaPostProcessor64\n"+sbatchSolverComponents.postProcessCommands+"\n"); - - - // move submission file to final location (either locally or remotely). - if (LG.isDebugEnabled()) { - LG.debug("moving local SLURM submit file '"+tempFile.getAbsolutePath()+"' to remote file '"+sub_file_as_internal_path+"'"); - } - FileUtils.copyFile(tempFile, sub_file_as_internal_path); - tempFile.delete(); + String origScriptText = substitutedSbatchCommands+"\n\n"+ + scriptContent.toString()+"\n\n"+ + "#Following commands (if any) are read by JavaPostProcessor64\n"+ + sbatchSolverComponents.postProcessCommands+"\n"; + String scriptText = toUnixStyleText(origScriptText); + return scriptText; } HtcJobID submitJobFile(File sub_file_external) throws ExecutableException { @@ -1115,16 +918,9 @@ public HtcJobID submitOptimizationJob(String jobName, File sub_file_internal, Fi String optReport_container_filename = optReportFile.getAbsolutePath().replace(optReportFile.getParent(),"/simdata"); lsb.write("${cmd_prefix} " + optProblemInput_container_filename + " " + optProblemOutput_container_filename + " " + optReport_container_filename); - File tempFile = File.createTempFile("tempSubFile", ".sub"); - - writeUnixStyleTextFile(tempFile, lsb.toString()); + String scriptText = toUnixStyleText(lsb.toString()); + Files.writeString(sub_file_internal.toPath(), scriptText); - // move submission file to final location (either locally or remotely). - if (LG.isDebugEnabled()) { - LG.debug("moving local SLURM submit file '"+tempFile.getAbsolutePath()+"' to remote file '"+sub_file_external+"'"); - } - FileUtils.copyFile(tempFile, sub_file_internal); - tempFile.delete(); } catch (IOException ex) { LG.error(ex); return null; diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java index 71f753c6cf..9d5060073d 100644 --- a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java +++ b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java @@ -7,14 +7,11 @@ import cbit.vcell.solvers.ExecutableCommand; import cbit.vcell.xml.XmlHelper; import cbit.vcell.xml.XmlParseException; -import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.*; import org.vcell.util.document.KeyValue; import org.vcell.util.document.User; import java.io.*; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -86,8 +83,6 @@ public String createScriptForNativeSolvers(String simTaskResourcePath, String[] KeyValue simKey = simTask.getSimKey(); SlurmProxy slurmProxy = new SlurmProxy(null, "vcell"); - // make temp file - Path submitScript = Files.createTempFile("submit_script",".sh"); File subFileExternal = new File("/share/apps/vcell3/htclogs/V_REL_"+simKey+"_0_0.slurm.sub"); User simOwner = simTask.getSimulation().getVersion().getOwner(); @@ -126,8 +121,7 @@ public String createScriptForNativeSolvers(String simTaskResourcePath, String[] int NUM_CPUs = 1; int MEM_SIZE_MB = 1000; ArrayList postProcessingCommands = new ArrayList<>(); - slurmProxy.saveJobScript(JOB_NAME, submitScript.toFile(), commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); - return FileUtils.readFileToString(submitScript.toFile()); + return slurmProxy.createJobScriptText(JOB_NAME, commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); } public String createScriptForJavaSolvers(String simTaskResourcePath, String JOB_NAME) throws IOException, XmlParseException, ExpressionException { @@ -136,8 +130,6 @@ public String createScriptForJavaSolvers(String simTaskResourcePath, String JOB_ KeyValue simKey = simTask.getSimKey(); SlurmProxy slurmProxy = new SlurmProxy(null, "vcell"); - // make temp file - Path submitScript = Files.createTempFile("submit_script",".sh"); File subFileExternal = new File("/share/apps/vcell3/htclogs/V_REL_"+simKey+"_0_0.slurm.sub"); User simOwner = simTask.getSimulation().getVersion().getOwner(); @@ -169,8 +161,7 @@ public String createScriptForJavaSolvers(String simTaskResourcePath, String JOB_ int NUM_CPUs = 1; int MEM_SIZE_MB = 1000; ArrayList postProcessingCommands = new ArrayList<>(); - slurmProxy.saveJobScript(JOB_NAME, submitScript.toFile(), commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); - return FileUtils.readFileToString(submitScript.toFile()); + return slurmProxy.createJobScriptText(JOB_NAME, commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); } @Test From 4c129903819a3b90386338859d9802248d4f322d Mon Sep 17 00:00:00 2001 From: jcschaff Date: Thu, 22 Aug 2024 10:15:52 -0400 Subject: [PATCH 10/11] added slurm unit test for Langevin solver --- .../server/htc/slurm/SlurmProxyTest.java | 17 ++ .../langevin/SimID_274672135_0__0.simtask.xml | 206 ++++++++++++++++++ .../langevin/V_REL_274672135_0_0.slurm.sub | 163 ++++++++++++++ 3 files changed, 386 insertions(+) create mode 100644 vcell-server/src/test/resources/slurm_fixtures/langevin/SimID_274672135_0__0.simtask.xml create mode 100644 vcell-server/src/test/resources/slurm_fixtures/langevin/V_REL_274672135_0_0.slurm.sub diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java index 9d5060073d..0631ff5537 100644 --- a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java +++ b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java @@ -207,6 +207,23 @@ public void testSimJobScriptCVODE() throws IOException, XmlParseException, Expre Assertions.assertEquals(expectedSlurmScript.trim(), slurmScript.trim()); } + @Test + public void testSimJobScriptLangevin() throws IOException, XmlParseException, ExpressionException { + String simTaskResourcePath = "slurm_fixtures/langevin/SimID_274672135_0__0.simtask.xml"; + String JOB_NAME = "V_REL_274672135_0_0"; + + String executable = "/usr/local/app/localsolvers/linux64/langevin_x64"; + String outputLog = "/share/apps/vcell3/users/schaff/SimID_274672135_0_.log"; + String messagingConfig = "/share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinMessagingConfig"; + String inputFilePath = "/share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinInput"; + String[] command = new String[] { executable, "simulate", "--output-log="+outputLog, + "--vc-send-status-config="+messagingConfig, inputFilePath, "0", "-tid", "0" }; + + String slurmScript = createScriptForNativeSolvers(simTaskResourcePath, command, JOB_NAME); + String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/langevin/V_REL_274672135_0_0.slurm.sub"); + Assertions.assertEquals(expectedSlurmScript.trim(), slurmScript.trim()); + } + @Test public void testSimJobScriptNFsim() throws IOException, XmlParseException, ExpressionException { String simTaskResourcePath = "slurm_fixtures/nfsim/SimID_274642453_0__0.simtask.xml"; diff --git a/vcell-server/src/test/resources/slurm_fixtures/langevin/SimID_274672135_0__0.simtask.xml b/vcell-server/src/test/resources/slurm_fixtures/langevin/SimID_274672135_0__0.simtask.xml new file mode 100644 index 0000000000..665b64ec6b --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/langevin/SimID_274672135_0__0.simtask.xml @@ -0,0 +1,206 @@ + + + + + + + + + + + + + 96485.3321 + 9.64853321E-5 + 6.02214179E11 + 3.141592653589793 + 8314.46261815 + 300.0 + 1.0 + 0.001660538783162726 + 0.0 + 0.0 + 0.0 + 1.0499999999999999E-4 + 8.949999999999999E-4 + 0.009999999999999827 + (1.0 * pow(KMOLE, - 1.0)) + (1.0 * pow(KMOLE,1.0)) + 0.0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ((O0_MT0_tot_Count * UnitFactor_uM_um3_molecules_neg_1) / Size_Intracellular) + ((O0_MT1_tot_Count * UnitFactor_uM_um3_molecules_neg_1) / Size_Intracellular) + Kf + Kr + ((UnitFactor_uM_um3_molecules_neg_1 * s0_Count) / Size_Intracellular) + (Size_Intracellular * UnitFactor_molecules_uM_neg_1_um_neg_3 * s0_Count_init_uM) + ((UnitFactor_uM_um3_molecules_neg_1 * s1_Count) / Size_Intracellular) + (Size_Intracellular * UnitFactor_molecules_uM_neg_1_um_neg_3 * s1_Count_init_uM) + + + + + + + + + + + P_r0_probabilityRate + + + + + + + P_r0_reverse_probabilityRate + + + + + + + s0_Count_initCount + 0.0 + 0.0 + 0.0 + + 0.0 + 0.0 + 0.0 + 0.0 + + + + s1_Count_initCount + 0.0 + 0.0 + 0.0 + + 0.0 + 0.0 + 0.0 + 0.0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + 1.0E-9 + 1.0E-4 + + 1 + + + + + + + + + + + + + + + (z < 0.09) + + + 1.0 + + + + + + + + + + + + + \ No newline at end of file diff --git a/vcell-server/src/test/resources/slurm_fixtures/langevin/V_REL_274672135_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/langevin/V_REL_274672135_0_0.slurm.sub new file mode 100644 index 0000000000..73efd9a977 --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/langevin/V_REL_274672135_0_0.slurm.sub @@ -0,0 +1,163 @@ +#!/usr/bin/bash +#SBATCH --partition=vcell +#SBATCH --reservation= +#SBATCH --qos=vcell +#SBATCH -J V_REL_274672135_0_0 +#SBATCH -o /share/apps/vcell3/htclogs/V_REL_274672135_0_0.slurm.log +#SBATCH -e /share/apps/vcell3/htclogs/V_REL_274672135_0_0.slurm.log +#SBATCH --mem=4096M +#SBATCH --no-kill +#SBATCH --no-requeue +# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB + + +#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- +set -x + +TMPDIR=/scratch/vcell +echo "using TMPDIR=$TMPDIR" +if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi +echo `hostname` + +export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles + +source /usr/share/Modules/init/bash + +module load singularity/vcell-3.10.0 + +echo "job running on host `hostname -f`" + +echo "id is `id`" + +echo "bash version is `bash --version`" +date + +echo ENVIRONMENT +env + +container_prefix= +if command -v singularity >/dev/null 2>&1; then + # + # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + # + localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + if [ ! -e "$localSingularityImage" ]; then + echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + mkdir -p /state/partition1/singularityImages + singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) + flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" + theStatus=$? + if [ $theStatus -eq 100 ] + then + echo "lock in use, waiting for lock owner to copy singularityImage" + let c=0 + until [ -f $localSingularityImage ] + do + sleep 3 + let c=c+1 + if [ $c -eq 20 ] + then + echo "Exceeded wait time for lock owner to copy singularityImage" + break + fi + done + else + if [ $theStatus -eq 0 ] + then + echo copy succeeded + else + echo copy failed + fi + fi + rm -f ${singularitytempfile} + if [ ! -e "$localSingularityImage" ]; then + echo "Failed to copy $localSingularityImage to hpc from central" + exit 1 + else + echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img + fi + fi + container_prefix="singularity run --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " +else + echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " + exit 1 +fi +echo "container_prefix is '${container_prefix}'" +echo "3 date=`date`" +#END---------SlurmProxy.generateScript():slurmInitSingularity---------- + +#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- +sendFailureMsg() { + echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274672135 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274672135 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg + stat=$? + if [[ $stat -ne 0 ]]; then + echo 'failed to send error message, retcode=$stat' + else + echo 'sent failure message' + fi +} +#END---------SlurmProxy.generateScript():sendFailureMsg---------- +#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- +callExitProcessor( ) { + echo exitCommand = ${container_prefix}JavaPostprocessor64 274672135 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274672135_0_0.slurm.sub + ${container_prefix}JavaPostprocessor64 274672135 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274672135_0_0.slurm.sub +} +#END---------SlurmProxy.generateScript():hasExitProcessor---------- +echo +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaPreprocessor64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/JavaPreprocessor64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274672135_0__0.simtask.xml /share/apps/vcell3/users/schaff + command="${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274672135_0__0.simtask.xml /share/apps/vcell3/users/schaff " + $command +stat=$? +echo ${cmd_prefix}JavaPreprocessor64 /share/apps/vcell3/users/schaff/SimID_274672135_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaPreprocessor64 +echo "1 date=`date`" + +echo +#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------/usr/local/app/localsolvers/linux64/langevin_x64 +echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/langevin_x64' which overrides container invocations" +nativeExe=/share/apps/vcell3/nativesolvers/langevin_x64 +if [ -e "${nativeExe}" ]; then + cmd_prefix="/share/apps/vcell3/nativesolvers/" +else + cmd_prefix="$container_prefix" +fi +echo "cmd_prefix is '${cmd_prefix}'" +echo "5 date=`date`" +echo command = ${cmd_prefix}langevin_x64 simulate --output-log=/share/apps/vcell3/users/schaff/SimID_274672135_0_.log --vc-send-status-config=/share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinMessagingConfig /share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinInput 0 -tid 0 +if [ -z ${LD_LIBRARY_PATH+x} ]; then + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64 +else + export LD_LIBRARY_PATH=/usr/local/app/localsolvers/linux64:$LD_LIBRARY_PATH +fi + command="${cmd_prefix}langevin_x64 simulate --output-log=/share/apps/vcell3/users/schaff/SimID_274672135_0_.log --vc-send-status-config=/share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinMessagingConfig /share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinInput 0 -tid 0 " + $command +stat=$? +echo ${cmd_prefix}langevin_x64 simulate --output-log=/share/apps/vcell3/users/schaff/SimID_274672135_0_.log --vc-send-status-config=/share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinMessagingConfig /share/apps/vcell3/users/schaff/SimID_274672135_0_.langevinInput 0 -tid 0 returned $stat +if [ $stat -ne 0 ]; then + callExitProcessor $stat + echo returning $stat to Slurm + exit $stat +fi +#END---------SlurmProxy.generateScript():ExecutableCommand----------langevin_x64 +callExitProcessor 0 + + +#Following commands (if any) are read by JavaPostProcessor64 + From abca605e3d3fca064e5a468003fb349802b80a12 Mon Sep 17 00:00:00 2001 From: jcschaff Date: Thu, 22 Aug 2024 11:03:32 -0400 Subject: [PATCH 11/11] add unit test for optimization Slurm submit script --- .../message/server/htc/slurm/SlurmProxy.java | 112 +++++++++--------- .../server/htc/slurm/SlurmProxyTest.java | 22 +++- .../opt/CopasiParest_152878.sub | 90 ++++++++++++++ .../opt/CopasiParest_152878_optProblem.json | 1 + 4 files changed, 167 insertions(+), 58 deletions(-) create mode 100644 vcell-server/src/test/resources/slurm_fixtures/opt/CopasiParest_152878.sub create mode 100644 vcell-server/src/test/resources/slurm_fixtures/opt/CopasiParest_152878_optProblem.json diff --git a/vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java b/vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java index a1c60bdab9..4f0c3ea188 100644 --- a/vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java +++ b/vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java @@ -816,7 +816,7 @@ public HtcJobID submitJob(String jobName, File sub_file_as_internal_path, File s return submitJobFile(sub_file_with_external_path); } - public String createJobScriptText(String jobName, ExecutableCommand.Container commandSet, int ncpus, double memSizeMB, Collection postProcessingCommands, SimulationTask simTask) throws IOException { + String createJobScriptText(String jobName, ExecutableCommand.Container commandSet, int ncpus, double memSizeMB, Collection postProcessingCommands, SimulationTask simTask) throws IOException { if (LG.isDebugEnabled()) { LG.debug("generating local SLURM submit script for jobName="+jobName); } @@ -864,63 +864,13 @@ HtcJobID submitJobFile(File sub_file_external) throws ExecutableException { public HtcJobID submitOptimizationJob(String jobName, File sub_file_internal, File sub_file_external, File optProblemInputFile,File optProblemOutputFile,File optReportFile) throws ExecutableException{ try { - if (LG.isDebugEnabled()) { - LG.debug("generating local SLURM submit script for jobName="+jobName); - } -// String text = generateScript(jobName, commandSet, ncpus, memSizeMB, postProcessingCommands, simTask); - LG.info("sub_file_internal: "+sub_file_internal.getAbsolutePath()); - LG.info("sub_file_external: "+sub_file_external.getAbsolutePath()); - LG.info("optProblemInput: "+optProblemInputFile.getAbsolutePath()); - LG.info("optProblemOutput: "+optProblemOutputFile.getAbsolutePath()); - LG.info("optReport: "+optReportFile.getAbsolutePath()); - - String primaryDataDirInternal = PropertyLoader.getRequiredProperty(PropertyLoader.primarySimDataDirInternalProperty); - String primaryDataDirExternal = PropertyLoader.getRequiredProperty(PropertyLoader.primarySimDataDirExternalProperty); - String htclogdir_external = PropertyLoader.getRequiredProperty(PropertyLoader.htcLogDirExternal); - String serverid=PropertyLoader.getRequiredProperty(PropertyLoader.vcellServerIDProperty); - String softwareVersion=PropertyLoader.getRequiredProperty(PropertyLoader.vcellSoftwareVersion); - String remote_singularity_image = PropertyLoader.getRequiredProperty(PropertyLoader.vcellopt_singularity_image); - String slurm_singularity_local_image_filepath = remote_singularity_image; -// String docker_image = PropertyLoader.getRequiredProperty(PropertyLoader.vcellbatch_docker_name); - String slurm_tmpdir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_tmpdir); - String slurm_central_singularity_dir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_central_singularity_dir); - String slurm_local_singularity_dir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_local_singularity_dir); - String slurm_singularity_module_name = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_singularity_module_name); - String simDataDirArchiveExternal = PropertyLoader.getRequiredProperty(PropertyLoader.simDataDirArchiveExternal); - String simDataDirArchiveInternal = PropertyLoader.getRequiredProperty(PropertyLoader.simDataDirArchiveInternal); - File slurm_singularity_central_filepath = new File(slurm_central_singularity_dir,new File(slurm_singularity_local_image_filepath).getName()); - - HtcProxy.MemLimitResults memoryMBAllowed = new HtcProxy.MemLimitResults(256, "Optimization Default"); - String[] environmentVars = new String[] { - "datadir_external="+primaryDataDirExternal, - }; - - LineStringBuilder lsb = new LineStringBuilder(); - slurmScriptInit(jobName, false, memoryMBAllowed, lsb); - File optDataDir = optProblemInputFile.getParentFile(); - File optDataDirExternal = new File(optDataDir.getAbsolutePath().replace(primaryDataDirInternal, primaryDataDirExternal)); - if (!optDataDirExternal.exists() && !optDataDirExternal.mkdir()){ - LG.error("failed to make optimization data directory "+optDataDir.getAbsolutePath()); - } -// if (optDataDirExternal.setWritable(true,false)) - slurmInitSingularity(lsb, optDataDirExternal.getAbsolutePath(), Optional.empty(), htclogdir_external, softwareVersion, - slurm_singularity_local_image_filepath, slurm_tmpdir, slurm_central_singularity_dir, - slurm_local_singularity_dir, simDataDirArchiveExternal, simDataDirArchiveInternal, - slurm_singularity_central_filepath, slurm_singularity_module_name, environmentVars); - - lsb.write(" cmd_prefix=\"$container_prefix\""); - lsb.write("echo \"cmd_prefix is '${cmd_prefix}'\""); - lsb.append("echo command = "); - lsb.write("${cmd_prefix}" + ""); - - String optProblemInput_container_filename = optProblemInputFile.getAbsolutePath().replace(optProblemInputFile.getParent(),"/simdata"); - String optProblemOutput_container_filename = optProblemOutputFile.getAbsolutePath().replace(optProblemOutputFile.getParent(),"/simdata"); - String optReport_container_filename = optReportFile.getAbsolutePath().replace(optReportFile.getParent(),"/simdata"); - lsb.write("${cmd_prefix} " + optProblemInput_container_filename + " " + optProblemOutput_container_filename + " " + optReport_container_filename); - - String scriptText = toUnixStyleText(lsb.toString()); + String scriptText = createOptJobScript(jobName, optProblemInputFile, optProblemOutputFile, optReportFile); + LG.info("sub_file_internal: " + sub_file_internal.getAbsolutePath() + + ", sub_file_external: " + sub_file_external.getAbsolutePath() + + ", optProblemInput: " + optProblemInputFile.getAbsolutePath() + + ", optProblemOutput: " + optProblemOutputFile.getAbsolutePath() + + ", optReport: " + optReportFile.getAbsolutePath()); Files.writeString(sub_file_internal.toPath(), scriptText); - } catch (IOException ex) { LG.error(ex); return null; @@ -929,5 +879,53 @@ public HtcJobID submitOptimizationJob(String jobName, File sub_file_internal, Fi return submitJobFile(sub_file_external); } + String createOptJobScript(String jobName, File optProblemInputFile, File optProblemOutputFile, File optReportFile) throws IOException { + String primaryDataDirInternal = PropertyLoader.getRequiredProperty(PropertyLoader.primarySimDataDirInternalProperty); + String primaryDataDirExternal = PropertyLoader.getRequiredProperty(PropertyLoader.primarySimDataDirExternalProperty); + String htclogdir_external = PropertyLoader.getRequiredProperty(PropertyLoader.htcLogDirExternal); + String serverid=PropertyLoader.getRequiredProperty(PropertyLoader.vcellServerIDProperty); + String softwareVersion=PropertyLoader.getRequiredProperty(PropertyLoader.vcellSoftwareVersion); + String remote_singularity_image = PropertyLoader.getRequiredProperty(PropertyLoader.vcellopt_singularity_image); + String slurm_singularity_local_image_filepath = remote_singularity_image; +// String docker_image = PropertyLoader.getRequiredProperty(PropertyLoader.vcellbatch_docker_name); + String slurm_tmpdir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_tmpdir); + String slurm_central_singularity_dir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_central_singularity_dir); + String slurm_local_singularity_dir = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_local_singularity_dir); + String slurm_singularity_module_name = PropertyLoader.getRequiredProperty(PropertyLoader.slurm_singularity_module_name); + String simDataDirArchiveExternal = PropertyLoader.getRequiredProperty(PropertyLoader.simDataDirArchiveExternal); + String simDataDirArchiveInternal = PropertyLoader.getRequiredProperty(PropertyLoader.simDataDirArchiveInternal); + File slurm_singularity_central_filepath = new File(slurm_central_singularity_dir,new File(slurm_singularity_local_image_filepath).getName()); + + MemLimitResults memoryMBAllowed = new MemLimitResults(256, "Optimization Default"); + String[] environmentVars = new String[] { + "datadir_external="+primaryDataDirExternal, + }; + + LineStringBuilder lsb = new LineStringBuilder(); + slurmScriptInit(jobName, false, memoryMBAllowed, lsb); + File optDataDir = optProblemInputFile.getParentFile(); + File optDataDirExternal = new File(optDataDir.getAbsolutePath().replace(primaryDataDirInternal, primaryDataDirExternal)); + if (!optDataDirExternal.exists() && !optDataDirExternal.mkdir()){ + LG.error("failed to make optimization data directory "+optDataDir.getAbsolutePath()); + } +// if (optDataDirExternal.setWritable(true,false)) + slurmInitSingularity(lsb, optDataDirExternal.getAbsolutePath(), Optional.empty(), htclogdir_external, softwareVersion, + slurm_singularity_local_image_filepath, slurm_tmpdir, slurm_central_singularity_dir, + slurm_local_singularity_dir, simDataDirArchiveExternal, simDataDirArchiveInternal, + slurm_singularity_central_filepath, slurm_singularity_module_name, environmentVars); + + lsb.write(" cmd_prefix=\"$container_prefix\""); + lsb.write("echo \"cmd_prefix is '${cmd_prefix}'\""); + lsb.append("echo command = "); + lsb.write("${cmd_prefix}" + ""); + + String optProblemInput_container_filename = optProblemInputFile.getAbsolutePath().replace(optProblemInputFile.getParent(),"/simdata"); + String optProblemOutput_container_filename = optProblemOutputFile.getAbsolutePath().replace(optProblemOutputFile.getParent(),"/simdata"); + String optReport_container_filename = optReportFile.getAbsolutePath().replace(optReportFile.getParent(),"/simdata"); + lsb.write("${cmd_prefix} " + optProblemInput_container_filename + " " + optProblemOutput_container_filename + " " + optReport_container_filename); + + return toUnixStyleText(lsb.toString()); + } + } diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java index 0631ff5537..9cf9f78d67 100644 --- a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java +++ b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java @@ -70,6 +70,9 @@ public void setup() setProperty(PropertyLoader.jmsBlobMessageMinSize, "100000"); setProperty(PropertyLoader.simulationPostprocessor, "JavaPostprocessor64"); setProperty(PropertyLoader.simulationPreprocessor, "JavaPreprocessor64"); + + setProperty(PropertyLoader.primarySimDataDirInternalProperty, "/share/apps/vcell3/users"); + setProperty(PropertyLoader.vcellopt_singularity_image, "/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img"); } @AfterEach @@ -121,7 +124,7 @@ public String createScriptForNativeSolvers(String simTaskResourcePath, String[] int NUM_CPUs = 1; int MEM_SIZE_MB = 1000; ArrayList postProcessingCommands = new ArrayList<>(); - return slurmProxy.createJobScriptText(JOB_NAME, commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); + return slurmProxy.createJobScriptText(JOB_NAME, commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); } public String createScriptForJavaSolvers(String simTaskResourcePath, String JOB_NAME) throws IOException, XmlParseException, ExpressionException { @@ -164,6 +167,23 @@ public String createScriptForJavaSolvers(String simTaskResourcePath, String JOB_ return slurmProxy.createJobScriptText(JOB_NAME, commandSet, NUM_CPUs, MEM_SIZE_MB, postProcessingCommands, simTask); } + public String createScriptForOptimizations(String JOB_NAME, int job_id) throws IOException, XmlParseException, ExpressionException { + SlurmProxy slurmProxy = new SlurmProxy(null, "vcell"); + File optProblemInputFile = new File("/share/apps/vcell3/users/parest_data/CopasiParest_"+job_id+"_optProblem.json"); + File optProblemOutputFile = new File("/share/apps/vcell3/users/parest_data/CopasiParest_"+job_id+"_optRun.json"); + File optProblemReportFile = new File("/share/apps/vcell3/users/parest_data/CopasiParest_"+job_id+"_optReport.txt"); + return slurmProxy.createOptJobScript(JOB_NAME, optProblemInputFile, optProblemOutputFile, optProblemReportFile); + } + + @Test + public void testOptimization() throws IOException, XmlParseException, ExpressionException { + String JOB_NAME = "CopasiParest_152878"; + int job_id = 152878; + String slurmScript = createScriptForOptimizations(JOB_NAME, job_id); + String expectedSlurmScript = readTextFileFromResource("slurm_fixtures/opt/CopasiParest_152878.sub"); + Assertions.assertEquals(expectedSlurmScript.trim(), slurmScript.trim()); + } + @Test public void testSimJobScriptFiniteVolume() throws IOException, XmlParseException, ExpressionException { String simTaskResourcePath = "slurm_fixtures/finite_volume/SimID_274514696_0__0.simtask.xml"; diff --git a/vcell-server/src/test/resources/slurm_fixtures/opt/CopasiParest_152878.sub b/vcell-server/src/test/resources/slurm_fixtures/opt/CopasiParest_152878.sub new file mode 100644 index 0000000000..7f45efc7de --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/opt/CopasiParest_152878.sub @@ -0,0 +1,90 @@ +#!/usr/bin/bash +#SBATCH --partition=vcell +#SBATCH --reservation= +#SBATCH --qos=vcell +#SBATCH -J CopasiParest_152878 +#SBATCH -o /share/apps/vcell3/htclogs/CopasiParest_152878.slurm.log +#SBATCH -e /share/apps/vcell3/htclogs/CopasiParest_152878.slurm.log +#SBATCH --mem=256M +#SBATCH --no-kill +#SBATCH --no-requeue +# VCell SlurmProxy memory limit source=Optimization Default +#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- +set -x + +TMPDIR=/scratch/vcell +echo "using TMPDIR=$TMPDIR" +if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi +echo `hostname` + +export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles + +source /usr/share/Modules/init/bash + +module load singularity/vcell-3.10.0 + +echo "job running on host `hostname -f`" + +echo "id is `id`" + +echo "bash version is `bash --version`" +date + +echo ENVIRONMENT +env + +container_prefix= +if command -v singularity >/dev/null 2>&1; then + # + # Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img + # + localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img + if [ ! -e "$localSingularityImage" ]; then + echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img + mkdir -p /state/partition1/singularityImages + singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) + flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img" + theStatus=$? + if [ $theStatus -eq 100 ] + then + echo "lock in use, waiting for lock owner to copy singularityImage" + let c=0 + until [ -f $localSingularityImage ] + do + sleep 3 + let c=c+1 + if [ $c -eq 20 ] + then + echo "Exceeded wait time for lock owner to copy singularityImage" + break + fi + done + else + if [ $theStatus -eq 0 ] + then + echo copy succeeded + else + echo copy failed + fi + fi + rm -f ${singularitytempfile} + if [ ! -e "$localSingularityImage" ]; then + echo "Failed to copy $localSingularityImage to hpc from central" + exit 1 + else + echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-opt_d6825f4.img + fi + fi + container_prefix="singularity run --bind /share/apps/vcell3/users/parest_data:/simdata --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env datadir_external=/share/apps/vcell3/users " +else + echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " + exit 1 +fi +echo "container_prefix is '${container_prefix}'" +echo "3 date=`date`" +#END---------SlurmProxy.generateScript():slurmInitSingularity---------- + + cmd_prefix="$container_prefix" +echo "cmd_prefix is '${cmd_prefix}'" +echo command = ${cmd_prefix} +${cmd_prefix} /simdata/CopasiParest_152878_optProblem.json /simdata/CopasiParest_152878_optRun.json /simdata/CopasiParest_152878_optReport.txt diff --git a/vcell-server/src/test/resources/slurm_fixtures/opt/CopasiParest_152878_optProblem.json b/vcell-server/src/test/resources/slurm_fixtures/opt/CopasiParest_152878_optProblem.json new file mode 100644 index 0000000000..a782644730 --- /dev/null +++ b/vcell-server/src/test/resources/slurm_fixtures/opt/CopasiParest_152878_optProblem.json @@ -0,0 +1 @@ +{"copasiOptimizationMethod":{"optimizationMethodType":"evolutionaryProgram","optimizationParameter":[{"dataType":"int","paramType":"numberOfGenerations","value":200.0},{"dataType":"int","paramType":"populationSize","value":20.0},{"dataType":"int","paramType":"randomNumberGenerator","value":1.0},{"dataType":"int","paramType":"seed","value":0.0}]},"dataSet":[[0.0,0.0,0.0],[0.2,7.837771465569739E-6,7.731335126235786E-6],[0.4,2.0593843070764626E-5,8.238309058312503E-6],[1.2000000000000002,6.092800158137852E-5,4.901858616668307E-6],[1.6,7.383302982330054E-5,3.717370895040455E-6],[2.0,8.341720488015709E-5,2.844045663870549E-6],[2.4000000000000004,9.05357751654149E-5,2.199007183369077E-6],[2.8000000000000003,9.582350644303182E-5,1.721894658600328E-6],[3.2,9.975160511616322E-5,1.3685810402960113E-6],[3.6,1.0266987090396688E-4,1.1067085357385573E-6],[4.0,1.0483796387456141E-4,9.124911434197557E-7],[4.4,1.0644894024296935E-4,7.683674905977245E-7],[5.0,1.0812336397947287E-4,6.187564192656043E-7],[5.800000000000001,1.0945926255992334E-4,4.995069384264931E-7],[6.6000000000000005,1.1019666203564935E-4,4.3373005052257187E-7],[7.4,1.1060377237367107E-4,3.9742960805785747E-7],[8.200000000000001,1.1082853753977734E-4,3.77392559705245E-7],[9.0,1.1095262992343858E-4,3.663314913622183E-7],[9.8,1.1102114085387212E-4,3.602251284802516E-7],[10.0,1.1103279529923035E-4,3.591864014841431E-7]],"mathModelSbmlContents":"\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n C_cyt_init_uM \n \n \n \n \n RanC_nuc_init_uM \n \n \n \n \n \n \n \n \n 1 \n \n \n KMOLE \n 1 \n \n \n \n \n \n \n \n \n kfl \n \n \n RanC_cyt \n \n \n RanC_nuc \n \n \n \n \n \n \n \n \n \n \n \n Kf \n RanC_cyt \n \n \n \n \n \n \n \n Kr \n Ran_cyt \n \n C_cyt \n \n \n \n \n \n \n \n \n \n \n \n Size_cyt \n Ran_cyt_init_uM \n \n \n \n \n \n Size_cyt \n C_cyt_init_uM \n \n \n \n \n \n \n \n \n \n \n \n Size_cyt \n RanC_cyt_init_uM \n \n \n \n Size_cyt \n C_cyt_init_uM \n \n \n \n Size_nuc \n RanC_nuc_init_uM \n \n \n \n \n \n \n \n \n UnitFactor_uM_um3_molecules_neg_1 \n Size_pm \n s2_init_molecules_um_2 \n \n \n \n \n \n \n \n Size_nm \n \n \n 1 \n Size_cyt \n \n \n \n \n \n \n \n \n Size_nm \n \n \n 1 \n Size_nuc \n \n \n \n \n \n \n \n \n \n \n K_Ran_cyt_total \n \n \n Size_cyt \n C_cyt \n \n \n \n \n 1 \n Size_cyt \n \n \n \n \n \n \n \n \n \n \n K_RanC_cyt_total \n \n \n \n \n Size_cyt \n C_cyt \n \n \n \n \n \n \n Size_nuc \n RanC_nuc \n \n \n \n \n \n 1 \n Size_cyt \n \n \n \n \n \n \n \n \n K_s2_total \n \n \n 1 \n \n \n UnitFactor_uM_um3_molecules_neg_1 \n Size_pm \n \n \n \n \n \n \n \n J_r0 \n \n \n \n \n \n \n KFlux_nm_nuc \n J_flux0 \n \n \n \n \n \n","numberOfOptimizationRuns":1,"parameterDescriptionList":[{"initialValue":1.0,"maxValue":10.0,"minValue":0.1,"name":"Kf","scale":1.0},{"initialValue":1000.0,"maxValue":10000.0,"minValue":100.0,"name":"Kr","scale":1000.0}],"referenceVariable":[{"referenceVariableType":"independent","varName":"t"},{"referenceVariableType":"dependent","varName":"Ran_cyt"},{"referenceVariableType":"dependent","varName":"RanC_cyt"}]} \ No newline at end of file