Skip to content

Commit

Permalink
Merge pull request #1337 from virtualcell/decouple-batch
Browse files Browse the repository at this point in the history
Unit tests for SlurmProxy to baseline current behavior.
  • Loading branch information
jcschaff authored Aug 22, 2024
2 parents 9d84243 + abca605 commit 2faada1
Show file tree
Hide file tree
Showing 25 changed files with 3,853 additions and 452 deletions.
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
package cbit.vcell.message.server.htc;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Collection;
Expand All @@ -18,7 +17,6 @@

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.vcell.util.BeanUtils;
import org.vcell.util.document.KeyValue;
import org.vcell.util.exe.ExecutableException;

Expand Down Expand Up @@ -170,7 +168,7 @@ public HtcProxy(CommandService commandService, String htcUser){
* @throws ExecutableException
*/
public abstract HtcJobID submitJob(String jobName, File sub_file_internal, File sub_file_external, ExecutableCommand.Container commandSet,
int ncpus, double memSize, Collection<PortableCommand> postProcessingCommands, SimulationTask simTask,File primaryUserDirExternal) throws ExecutableException;
int ncpus, double memSize, Collection<PortableCommand> postProcessingCommands, SimulationTask simTask,File primaryUserDirExternal) throws ExecutableException, IOException;
public abstract HtcJobID submitOptimizationJob(String jobName, File sub_file_internal, File sub_file_external,
File optProblemInputFile,File optProblemOutputFile,File optReportFile)throws ExecutableException;
public abstract HtcProxy cloneThreadsafe();
Expand Down Expand Up @@ -224,29 +222,22 @@ public static String createHtcSimJobName(SimTaskInfo simTaskInfo) {
return HTC_SIMULATION_JOB_NAME_PREFIX+simTaskInfo.simId.toString()+"_"+simTaskInfo.jobIndex+"_"+simTaskInfo.taskId;
}

public static void writeUnixStyleTextFile(File file, String javaString) throws IOException {
try (FileOutputStream fos = new FileOutputStream(file)) {
Charset asciiCharset = Charset.forName("US-ASCII");
CharsetEncoder encoder = asciiCharset.newEncoder();
CharBuffer unicodeCharBuffer = CharBuffer.wrap(javaString);
ByteBuffer asciiByteBuffer = encoder.encode(unicodeCharBuffer);
byte[] asciiArray = asciiByteBuffer.array();
ByteBuffer unixByteBuffer = ByteBuffer.allocate(asciiArray.length);
int count = 0;
for (int i=0;i<asciiArray.length;i++){
if (asciiArray[i] != 0x0d){ // skip \r character
unixByteBuffer.put(asciiArray[i]);
count++;
}
}
//do this to not write the zeros at the end of unixByteBuffer
ByteBuffer bb = ByteBuffer.wrap(unixByteBuffer.array(),0,count);

try (FileChannel fc = fos.getChannel()) {
fc.write(bb);
fc.close();
public static String toUnixStyleText(String javaString) throws IOException {
Charset asciiCharset = StandardCharsets.US_ASCII;
CharsetEncoder encoder = asciiCharset.newEncoder();
CharBuffer unicodeCharBuffer = CharBuffer.wrap(javaString);
ByteBuffer asciiByteBuffer = encoder.encode(unicodeCharBuffer);
byte[] asciiArray = asciiByteBuffer.array();
ByteBuffer unixByteBuffer = ByteBuffer.allocate(asciiArray.length);
int count = 0;
for (int i = 0; i < asciiArray.length; i++) {
if (asciiArray[i] != 0x0d) { // skip \r character
unixByteBuffer.put(asciiArray[i]);
count++;
}
}
ByteBuffer bb = ByteBuffer.wrap(unixByteBuffer.array(), 0, count);
return new String(bb.array(), 0, bb.limit(), asciiCharset);
}

public abstract String getSubmissionFileExtension();
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
<SimulationTask xmlns="http://sourceforge.net/projects/vcell/vcml" TaskId="0" JobIndex="0" isPowerUser="false">
<MathDescription Name="non-spatial ODE_generated">
<Annotation>cloned from 'non-spatial ODE_generated' owned by user frm
cloned from 'non-spatial ODE_generated' owned by user anu
cloned from 'non-spatial ODE_generated' owned by user schaff
cloned from 'non-spatial ODE_generated' owned by user les</Annotation>
<Constant Name="_F_">96485.3321</Constant>
<Constant Name="_F_nmol_">9.64853321E-5</Constant>
<Constant Name="_K_GHK_">1.0E-9</Constant>
<Constant Name="_N_pmol_">6.02214179E11</Constant>
<Constant Name="_PI_">3.141592653589793</Constant>
<Constant Name="_R_">8314.46261815</Constant>
<Constant Name="_T_">300.0</Constant>
<Constant Name="C_cyt_init_uM">0.0</Constant>
<Constant Name="K_millivolts_per_volt">1000.0</Constant>
<Constant Name="Kf">2.0</Constant>
<Constant Name="kfl">2.0</Constant>
<Constant Name="KMOLE">0.001660538783162726</Constant>
<Constant Name="Kr">1000.0</Constant>
<Constant Name="netValence">1.0</Constant>
<Constant Name="Ran_cyt_init_uM">0.0</Constant>
<Constant Name="RanC_cyt_init_uM">0.0</Constant>
<Constant Name="RanC_nuc_init_uM">4.493165893949507E-4</Constant>
<Constant Name="s2_init_molecules_um_2">0.0</Constant>
<Constant Name="Size_cyt">14891.899581611733</Constant>
<Constant Name="Size_EC">124712.10435961554</Constant>
<Constant Name="Size_nm">1406.7733692487282</Constant>
<Constant Name="Size_nuc">3697.013658772733</Constant>
<Constant Name="Size_pm">4738.640600365477</Constant>
<Constant Name="UnitFactor_uM_um3_molecules_neg_1">(1.0 * pow(KMOLE,1.0))</Constant>
<Constant Name="Voltage_nm">0.0</Constant>
<Constant Name="Voltage_pm">0.0</Constant>
<VolumeVariable Name="C_cyt" Domain="Compartment" />
<VolumeVariable Name="RanC_nuc" Domain="Compartment" />
<Function Name="J_flux0" Domain="Compartment">(kfl * (RanC_cyt - RanC_nuc))</Function>
<Function Name="J_r0" Domain="Compartment">((Kf * RanC_cyt) - ((Kr * Ran_cyt) * C_cyt))</Function>
<Function Name="K_Ran_cyt_total" Domain="Compartment">((Size_cyt * Ran_cyt_init_uM) - (Size_cyt * C_cyt_init_uM))</Function>
<Function Name="K_RanC_cyt_total" Domain="Compartment">((Size_cyt * RanC_cyt_init_uM) + (Size_cyt * C_cyt_init_uM) + (Size_nuc * RanC_nuc_init_uM))</Function>
<Function Name="K_s2_total" Domain="Compartment">(UnitFactor_uM_um3_molecules_neg_1 * Size_pm * s2_init_molecules_um_2)</Function>
<Function Name="KFlux_nm_cyt" Domain="Compartment">(Size_nm / Size_cyt)</Function>
<Function Name="KFlux_nm_nuc" Domain="Compartment">(Size_nm / Size_nuc)</Function>
<Function Name="Ran_cyt" Domain="Compartment">((K_Ran_cyt_total + (Size_cyt * C_cyt)) / Size_cyt)</Function>
<Function Name="RanC_cyt" Domain="Compartment">((K_RanC_cyt_total - (Size_cyt * C_cyt) - (Size_nuc * RanC_nuc)) / Size_cyt)</Function>
<Function Name="s2" Domain="Compartment">(K_s2_total / (UnitFactor_uM_um3_molecules_neg_1 * Size_pm))</Function>
<CompartmentSubDomain Name="Compartment">
<BoundaryType Boundary="Xm" Type="Value" />
<BoundaryType Boundary="Xp" Type="Value" />
<BoundaryType Boundary="Ym" Type="Value" />
<BoundaryType Boundary="Yp" Type="Value" />
<BoundaryType Boundary="Zm" Type="Value" />
<BoundaryType Boundary="Zp" Type="Value" />
<OdeEquation Name="C_cyt" SolutionType="Unknown">
<Rate>J_r0</Rate>
<Initial>C_cyt_init_uM</Initial>
</OdeEquation>
<OdeEquation Name="RanC_nuc" SolutionType="Unknown">
<Rate>(KFlux_nm_nuc * J_flux0)</Rate>
<Initial>RanC_nuc_init_uM</Initial>
</OdeEquation>
</CompartmentSubDomain>
<Version Name="non-spatial ODE_generated" KeyValue="252546356" BranchId="84086544" Archived="0" Date="08-Feb-2023 01:54:34" FromVersionable="false">
<Owner Name="schaff" Identifier="17" />
<GroupAccess Type="1" />
<Annotation>cloned from 'non-spatial ODE_generated' owned by user frm
cloned from 'non-spatial ODE_generated' owned by user anu
cloned from 'non-spatial ODE_generated' owned by user schaff
cloned from 'non-spatial ODE_generated' owned by user les</Annotation>
</Version>
</MathDescription>
<Simulation Name="Copy of adams moulton">
<Annotation>cloned from 'Copy of adams moulton' owned by user frm
cloned from 'adams moulton' owned by user anu
cloned from 'adams moulton' owned by user schaff
cloned from 'Copy of Simulation1' owned by user les</Annotation>
<SolverTaskDescription TaskType="Unsteady" UseSymbolicJacobian="false" Solver="Adams-Moulton (Fifth Order, Fixed Time Step)">
<TimeBound StartTime="0.0" EndTime="10.0" />
<TimeStep DefaultTime="0.1" MinTime="1.0E-8" MaxTime="1.0" />
<ErrorTolerance Absolut="1.0E-9" Relative="1.0E-9" />
<OutputOptions KeepEvery="1" KeepAtMost="1000" />
<NumberProcessors>1</NumberProcessors>
</SolverTaskDescription>
<MathOverrides>
<Constant Name="Kf" ConstantArraySpec="1001">0.01 to 10.0, log, 4 values</Constant>
</MathOverrides>
<Version Name="Copy of adams moulton" KeyValue="274633859" BranchId="274633860" Archived="0" Date="21-Aug-2024 21:44:26" FromVersionable="false">
<Owner Name="schaff" Identifier="17" />
<GroupAccess Type="1" />
<Annotation>cloned from 'Copy of adams moulton' owned by user frm
cloned from 'adams moulton' owned by user anu
cloned from 'adams moulton' owned by user schaff
cloned from 'Copy of Simulation1' owned by user les</Annotation>
</Version>
</Simulation>
<Geometry Name="nonspatial381239605" Dimension="0">
<Annotation>cloned from 'nonspatial381239605' owned by user frm
cloned from 'nonspatial1214274674' owned by user anu
cloned from 'nonspatial2060234169' owned by user schaff
cloned from 'nonspatial608887770' owned by user les</Annotation>
<Extent X="10.0" Y="10.0" Z="10.0" />
<Origin X="0.0" Y="0.0" Z="0.0" />
<SubVolume Name="Compartment" Handle="0" Type="Compartmental" KeyValue="252545860" />
<Version Name="nonspatial381239605" KeyValue="252545857" BranchId="84086519" Archived="0" Date="08-Feb-2023 01:53:13" FromVersionable="false">
<Owner Name="schaff" Identifier="17" />
<GroupAccess Type="1" />
<Annotation>cloned from 'nonspatial381239605' owned by user frm
cloned from 'nonspatial1214274674' owned by user anu
cloned from 'nonspatial2060234169' owned by user schaff
cloned from 'nonspatial608887770' owned by user les</Annotation>
</Version>
</Geometry>
</SimulationTask>
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#!/usr/bin/bash
#SBATCH --partition=vcell
#SBATCH --reservation=
#SBATCH --qos=vcell
#SBATCH -J V_REL_274633859_0_0
#SBATCH -o /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.log
#SBATCH -e /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.log
#SBATCH --mem=4096M
#SBATCH --no-kill
#SBATCH --no-requeue
# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB


#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity----------
set -x

TMPDIR=/scratch/vcell
echo "using TMPDIR=$TMPDIR"
if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi
echo `hostname`

export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles

source /usr/share/Modules/init/bash

module load singularity/vcell-3.10.0

echo "job running on host `hostname -f`"

echo "id is `id`"

echo "bash version is `bash --version`"
date

echo ENVIRONMENT
env

container_prefix=
if command -v singularity >/dev/null 2>&1; then
#
# Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img
#
localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img
if [ ! -e "$localSingularityImage" ]; then
echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img
mkdir -p /state/partition1/singularityImages
singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages)
flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img"
theStatus=$?
if [ $theStatus -eq 100 ]
then
echo "lock in use, waiting for lock owner to copy singularityImage"
let c=0
until [ -f $localSingularityImage ]
do
sleep 3
let c=c+1
if [ $c -eq 20 ]
then
echo "Exceeded wait time for lock owner to copy singularityImage"
break
fi
done
else
if [ $theStatus -eq 0 ]
then
echo copy succeeded
else
echo copy failed
fi
fi
rm -f ${singularitytempfile}
if [ ! -e "$localSingularityImage" ]; then
echo "Failed to copy $localSingularityImage to hpc from central"
exit 1
else
echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img
fi
fi
container_prefix="singularity run --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL "
else
echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) "
exit 1
fi
echo "container_prefix is '${container_prefix}'"
echo "3 date=`date`"
#END---------SlurmProxy.generateScript():slurmInitSingularity----------

#BEGIN---------SlurmProxy.generateScript():sendFailureMsg----------
sendFailureMsg() {
echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274633859 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg
${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274633859 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg
stat=$?
if [[ $stat -ne 0 ]]; then
echo 'failed to send error message, retcode=$stat'
else
echo 'sent failure message'
fi
}
#END---------SlurmProxy.generateScript():sendFailureMsg----------
#BEGIN---------SlurmProxy.generateScript():hasExitProcessor----------
callExitProcessor( ) {
echo exitCommand = ${container_prefix}JavaPostprocessor64 274633859 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.sub
${container_prefix}JavaPostprocessor64 274633859 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.sub
}
#END---------SlurmProxy.generateScript():hasExitProcessor----------
echo
echo "1 date=`date`"

echo
#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaSimExe64
echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaSimExe64' which overrides container invocations"
nativeExe=/share/apps/vcell3/nativesolvers/JavaSimExe64
if [ -e "${nativeExe}" ]; then
cmd_prefix="/share/apps/vcell3/nativesolvers/"
else
cmd_prefix="$container_prefix"
fi
echo "cmd_prefix is '${cmd_prefix}'"
echo "5 date=`date`"
echo command = ${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274633859_0__0.simtask.xml /share/apps/vcell3/users/schaff
command="${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274633859_0__0.simtask.xml /share/apps/vcell3/users/schaff "
$command
stat=$?
echo ${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274633859_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat
if [ $stat -ne 0 ]; then
callExitProcessor $stat
echo returning $stat to Slurm
exit $stat
fi
#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaSimExe64
callExitProcessor 0


#Following commands (if any) are read by JavaPostProcessor64


Loading

0 comments on commit 2faada1

Please sign in to comment.