-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1337 from virtualcell/decouple-batch
Unit tests for SlurmProxy to baseline current behavior.
- Loading branch information
Showing
25 changed files
with
3,853 additions
and
452 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
369 changes: 79 additions & 290 deletions
369
vcell-server/src/main/java/cbit/vcell/message/server/htc/slurm/SlurmProxy.java
Large diffs are not rendered by default.
Oops, something went wrong.
580 changes: 443 additions & 137 deletions
580
vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java
Large diffs are not rendered by default.
Oops, something went wrong.
111 changes: 111 additions & 0 deletions
111
...l-server/src/test/resources/slurm_fixtures/adams_moulton/SimID_274633859_0__0.simtask.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
<SimulationTask xmlns="http://sourceforge.net/projects/vcell/vcml" TaskId="0" JobIndex="0" isPowerUser="false"> | ||
<MathDescription Name="non-spatial ODE_generated"> | ||
<Annotation>cloned from 'non-spatial ODE_generated' owned by user frm | ||
cloned from 'non-spatial ODE_generated' owned by user anu | ||
cloned from 'non-spatial ODE_generated' owned by user schaff | ||
cloned from 'non-spatial ODE_generated' owned by user les</Annotation> | ||
<Constant Name="_F_">96485.3321</Constant> | ||
<Constant Name="_F_nmol_">9.64853321E-5</Constant> | ||
<Constant Name="_K_GHK_">1.0E-9</Constant> | ||
<Constant Name="_N_pmol_">6.02214179E11</Constant> | ||
<Constant Name="_PI_">3.141592653589793</Constant> | ||
<Constant Name="_R_">8314.46261815</Constant> | ||
<Constant Name="_T_">300.0</Constant> | ||
<Constant Name="C_cyt_init_uM">0.0</Constant> | ||
<Constant Name="K_millivolts_per_volt">1000.0</Constant> | ||
<Constant Name="Kf">2.0</Constant> | ||
<Constant Name="kfl">2.0</Constant> | ||
<Constant Name="KMOLE">0.001660538783162726</Constant> | ||
<Constant Name="Kr">1000.0</Constant> | ||
<Constant Name="netValence">1.0</Constant> | ||
<Constant Name="Ran_cyt_init_uM">0.0</Constant> | ||
<Constant Name="RanC_cyt_init_uM">0.0</Constant> | ||
<Constant Name="RanC_nuc_init_uM">4.493165893949507E-4</Constant> | ||
<Constant Name="s2_init_molecules_um_2">0.0</Constant> | ||
<Constant Name="Size_cyt">14891.899581611733</Constant> | ||
<Constant Name="Size_EC">124712.10435961554</Constant> | ||
<Constant Name="Size_nm">1406.7733692487282</Constant> | ||
<Constant Name="Size_nuc">3697.013658772733</Constant> | ||
<Constant Name="Size_pm">4738.640600365477</Constant> | ||
<Constant Name="UnitFactor_uM_um3_molecules_neg_1">(1.0 * pow(KMOLE,1.0))</Constant> | ||
<Constant Name="Voltage_nm">0.0</Constant> | ||
<Constant Name="Voltage_pm">0.0</Constant> | ||
<VolumeVariable Name="C_cyt" Domain="Compartment" /> | ||
<VolumeVariable Name="RanC_nuc" Domain="Compartment" /> | ||
<Function Name="J_flux0" Domain="Compartment">(kfl * (RanC_cyt - RanC_nuc))</Function> | ||
<Function Name="J_r0" Domain="Compartment">((Kf * RanC_cyt) - ((Kr * Ran_cyt) * C_cyt))</Function> | ||
<Function Name="K_Ran_cyt_total" Domain="Compartment">((Size_cyt * Ran_cyt_init_uM) - (Size_cyt * C_cyt_init_uM))</Function> | ||
<Function Name="K_RanC_cyt_total" Domain="Compartment">((Size_cyt * RanC_cyt_init_uM) + (Size_cyt * C_cyt_init_uM) + (Size_nuc * RanC_nuc_init_uM))</Function> | ||
<Function Name="K_s2_total" Domain="Compartment">(UnitFactor_uM_um3_molecules_neg_1 * Size_pm * s2_init_molecules_um_2)</Function> | ||
<Function Name="KFlux_nm_cyt" Domain="Compartment">(Size_nm / Size_cyt)</Function> | ||
<Function Name="KFlux_nm_nuc" Domain="Compartment">(Size_nm / Size_nuc)</Function> | ||
<Function Name="Ran_cyt" Domain="Compartment">((K_Ran_cyt_total + (Size_cyt * C_cyt)) / Size_cyt)</Function> | ||
<Function Name="RanC_cyt" Domain="Compartment">((K_RanC_cyt_total - (Size_cyt * C_cyt) - (Size_nuc * RanC_nuc)) / Size_cyt)</Function> | ||
<Function Name="s2" Domain="Compartment">(K_s2_total / (UnitFactor_uM_um3_molecules_neg_1 * Size_pm))</Function> | ||
<CompartmentSubDomain Name="Compartment"> | ||
<BoundaryType Boundary="Xm" Type="Value" /> | ||
<BoundaryType Boundary="Xp" Type="Value" /> | ||
<BoundaryType Boundary="Ym" Type="Value" /> | ||
<BoundaryType Boundary="Yp" Type="Value" /> | ||
<BoundaryType Boundary="Zm" Type="Value" /> | ||
<BoundaryType Boundary="Zp" Type="Value" /> | ||
<OdeEquation Name="C_cyt" SolutionType="Unknown"> | ||
<Rate>J_r0</Rate> | ||
<Initial>C_cyt_init_uM</Initial> | ||
</OdeEquation> | ||
<OdeEquation Name="RanC_nuc" SolutionType="Unknown"> | ||
<Rate>(KFlux_nm_nuc * J_flux0)</Rate> | ||
<Initial>RanC_nuc_init_uM</Initial> | ||
</OdeEquation> | ||
</CompartmentSubDomain> | ||
<Version Name="non-spatial ODE_generated" KeyValue="252546356" BranchId="84086544" Archived="0" Date="08-Feb-2023 01:54:34" FromVersionable="false"> | ||
<Owner Name="schaff" Identifier="17" /> | ||
<GroupAccess Type="1" /> | ||
<Annotation>cloned from 'non-spatial ODE_generated' owned by user frm | ||
cloned from 'non-spatial ODE_generated' owned by user anu | ||
cloned from 'non-spatial ODE_generated' owned by user schaff | ||
cloned from 'non-spatial ODE_generated' owned by user les</Annotation> | ||
</Version> | ||
</MathDescription> | ||
<Simulation Name="Copy of adams moulton"> | ||
<Annotation>cloned from 'Copy of adams moulton' owned by user frm | ||
cloned from 'adams moulton' owned by user anu | ||
cloned from 'adams moulton' owned by user schaff | ||
cloned from 'Copy of Simulation1' owned by user les</Annotation> | ||
<SolverTaskDescription TaskType="Unsteady" UseSymbolicJacobian="false" Solver="Adams-Moulton (Fifth Order, Fixed Time Step)"> | ||
<TimeBound StartTime="0.0" EndTime="10.0" /> | ||
<TimeStep DefaultTime="0.1" MinTime="1.0E-8" MaxTime="1.0" /> | ||
<ErrorTolerance Absolut="1.0E-9" Relative="1.0E-9" /> | ||
<OutputOptions KeepEvery="1" KeepAtMost="1000" /> | ||
<NumberProcessors>1</NumberProcessors> | ||
</SolverTaskDescription> | ||
<MathOverrides> | ||
<Constant Name="Kf" ConstantArraySpec="1001">0.01 to 10.0, log, 4 values</Constant> | ||
</MathOverrides> | ||
<Version Name="Copy of adams moulton" KeyValue="274633859" BranchId="274633860" Archived="0" Date="21-Aug-2024 21:44:26" FromVersionable="false"> | ||
<Owner Name="schaff" Identifier="17" /> | ||
<GroupAccess Type="1" /> | ||
<Annotation>cloned from 'Copy of adams moulton' owned by user frm | ||
cloned from 'adams moulton' owned by user anu | ||
cloned from 'adams moulton' owned by user schaff | ||
cloned from 'Copy of Simulation1' owned by user les</Annotation> | ||
</Version> | ||
</Simulation> | ||
<Geometry Name="nonspatial381239605" Dimension="0"> | ||
<Annotation>cloned from 'nonspatial381239605' owned by user frm | ||
cloned from 'nonspatial1214274674' owned by user anu | ||
cloned from 'nonspatial2060234169' owned by user schaff | ||
cloned from 'nonspatial608887770' owned by user les</Annotation> | ||
<Extent X="10.0" Y="10.0" Z="10.0" /> | ||
<Origin X="0.0" Y="0.0" Z="0.0" /> | ||
<SubVolume Name="Compartment" Handle="0" Type="Compartmental" KeyValue="252545860" /> | ||
<Version Name="nonspatial381239605" KeyValue="252545857" BranchId="84086519" Archived="0" Date="08-Feb-2023 01:53:13" FromVersionable="false"> | ||
<Owner Name="schaff" Identifier="17" /> | ||
<GroupAccess Type="1" /> | ||
<Annotation>cloned from 'nonspatial381239605' owned by user frm | ||
cloned from 'nonspatial1214274674' owned by user anu | ||
cloned from 'nonspatial2060234169' owned by user schaff | ||
cloned from 'nonspatial608887770' owned by user les</Annotation> | ||
</Version> | ||
</Geometry> | ||
</SimulationTask> |
137 changes: 137 additions & 0 deletions
137
vcell-server/src/test/resources/slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
#!/usr/bin/bash | ||
#SBATCH --partition=vcell | ||
#SBATCH --reservation= | ||
#SBATCH --qos=vcell | ||
#SBATCH -J V_REL_274633859_0_0 | ||
#SBATCH -o /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.log | ||
#SBATCH -e /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.log | ||
#SBATCH --mem=4096M | ||
#SBATCH --no-kill | ||
#SBATCH --no-requeue | ||
# VCell SlurmProxy memory limit source=Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB | ||
|
||
|
||
#BEGIN---------SlurmProxy.generateScript():slurmInitSingularity---------- | ||
set -x | ||
|
||
TMPDIR=/scratch/vcell | ||
echo "using TMPDIR=$TMPDIR" | ||
if [ ! -e $TMPDIR ]; then mkdir -p $TMPDIR ; fi | ||
echo `hostname` | ||
|
||
export MODULEPATH=/isg/shared/modulefiles:/tgcapps/modulefiles | ||
|
||
source /usr/share/Modules/init/bash | ||
|
||
module load singularity/vcell-3.10.0 | ||
|
||
echo "job running on host `hostname -f`" | ||
|
||
echo "id is `id`" | ||
|
||
echo "bash version is `bash --version`" | ||
date | ||
|
||
echo ENVIRONMENT | ||
env | ||
|
||
container_prefix= | ||
if command -v singularity >/dev/null 2>&1; then | ||
# | ||
# Copy of singularity image will be downloaded if not found in /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img | ||
# | ||
localSingularityImage=/state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img | ||
if [ ! -e "$localSingularityImage" ]; then | ||
echo "local singularity image $localSingularityImage not found, trying to download to hpc from "/share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img | ||
mkdir -p /state/partition1/singularityImages | ||
singularitytempfile=$(mktemp -up /share/apps/vcell3/singularityImages) | ||
flock -E 100 -n /tmp/vcellSingularityLock_Rel_Version_7.6.0_build_28.lock sh -c "cp /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img ${singularitytempfile} ; mv -n ${singularitytempfile} /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img" | ||
theStatus=$? | ||
if [ $theStatus -eq 100 ] | ||
then | ||
echo "lock in use, waiting for lock owner to copy singularityImage" | ||
let c=0 | ||
until [ -f $localSingularityImage ] | ||
do | ||
sleep 3 | ||
let c=c+1 | ||
if [ $c -eq 20 ] | ||
then | ||
echo "Exceeded wait time for lock owner to copy singularityImage" | ||
break | ||
fi | ||
done | ||
else | ||
if [ $theStatus -eq 0 ] | ||
then | ||
echo copy succeeded | ||
else | ||
echo copy failed | ||
fi | ||
fi | ||
rm -f ${singularitytempfile} | ||
if [ ! -e "$localSingularityImage" ]; then | ||
echo "Failed to copy $localSingularityImage to hpc from central" | ||
exit 1 | ||
else | ||
echo successful copy from /share/apps/vcell3/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img to /state/partition1/singularityImages/ghcr.io_virtualcell_vcell-batch_d6825f4.img | ||
fi | ||
fi | ||
container_prefix="singularity run --bind /share/apps/vcell3/users:/simdata --bind /share/apps/vcell7/users:/simdata_secondary --bind /share/apps/vcell12/users:/share/apps/vcell12/users --bind /share/apps/vcell3/htclogs:/htclogs --bind /scratch/vcell:/solvertmp $localSingularityImage --env java_mem_Xmx=4096M --env jmshost_sim_internal=rke-wn-01.cam.uchc.edu --env jmsport_sim_internal=31618 --env jmsrestport_sim_internal=30163 --env jmsuser=clientUser --env jmspswd=dummy --env jmsblob_minsize=100000 --env mongodbhost_internal=rke-wn-01.cam.uchc.edu --env mongodbport_internal=30019 --env mongodb_database=test --env primary_datadir_external=/share/apps/vcell3/users --env secondary_datadir_external=/share/apps/vcell7/users --env htclogdir_external=/share/apps/vcell3/htclogs --env softwareVersion=Rel_Version_7.6.0_build_28 --env serverid=REL " | ||
else | ||
echo "Required singularity command not found (maybe 'module load singularity/vcell-3.10.0' command didn't work) " | ||
exit 1 | ||
fi | ||
echo "container_prefix is '${container_prefix}'" | ||
echo "3 date=`date`" | ||
#END---------SlurmProxy.generateScript():slurmInitSingularity---------- | ||
|
||
#BEGIN---------SlurmProxy.generateScript():sendFailureMsg---------- | ||
sendFailureMsg() { | ||
echo ${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274633859 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg | ||
${container_prefix} --msg-userid clientUser --msg-password dummy --msg-host rke-wn-01.cam.uchc.edu --msg-port 31618 --msg-job-host `hostname` --msg-job-userid schaff --msg-job-simkey 274633859 --msg-job-jobindex 0 --msg-job-taskid 0 --msg-job-errmsg "$1" SendErrorMsg | ||
stat=$? | ||
if [[ $stat -ne 0 ]]; then | ||
echo 'failed to send error message, retcode=$stat' | ||
else | ||
echo 'sent failure message' | ||
fi | ||
} | ||
#END---------SlurmProxy.generateScript():sendFailureMsg---------- | ||
#BEGIN---------SlurmProxy.generateScript():hasExitProcessor---------- | ||
callExitProcessor( ) { | ||
echo exitCommand = ${container_prefix}JavaPostprocessor64 274633859 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.sub | ||
${container_prefix}JavaPostprocessor64 274633859 schaff 17 0 0 $1 /share/apps/vcell3/htclogs/V_REL_274633859_0_0.slurm.sub | ||
} | ||
#END---------SlurmProxy.generateScript():hasExitProcessor---------- | ||
echo | ||
echo "1 date=`date`" | ||
|
||
echo | ||
#BEGIN---------SlurmProxy.generateScript():ExecutableCommand----------JavaSimExe64 | ||
echo "testing existance of native exe '/share/apps/vcell3/nativesolvers/JavaSimExe64' which overrides container invocations" | ||
nativeExe=/share/apps/vcell3/nativesolvers/JavaSimExe64 | ||
if [ -e "${nativeExe}" ]; then | ||
cmd_prefix="/share/apps/vcell3/nativesolvers/" | ||
else | ||
cmd_prefix="$container_prefix" | ||
fi | ||
echo "cmd_prefix is '${cmd_prefix}'" | ||
echo "5 date=`date`" | ||
echo command = ${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274633859_0__0.simtask.xml /share/apps/vcell3/users/schaff | ||
command="${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274633859_0__0.simtask.xml /share/apps/vcell3/users/schaff " | ||
$command | ||
stat=$? | ||
echo ${cmd_prefix}JavaSimExe64 /share/apps/vcell3/users/schaff/SimID_274633859_0__0.simtask.xml /share/apps/vcell3/users/schaff returned $stat | ||
if [ $stat -ne 0 ]; then | ||
callExitProcessor $stat | ||
echo returning $stat to Slurm | ||
exit $stat | ||
fi | ||
#END---------SlurmProxy.generateScript():ExecutableCommand----------JavaSimExe64 | ||
callExitProcessor 0 | ||
|
||
|
||
#Following commands (if any) are read by JavaPostProcessor64 | ||
|
||
|
Oops, something went wrong.