diff --git a/docker/build/Dockerfile-sched-dev b/docker/build/Dockerfile-sched-dev index cc9827cb94..08e0386838 100644 --- a/docker/build/Dockerfile-sched-dev +++ b/docker/build/Dockerfile-sched-dev @@ -68,7 +68,11 @@ ENV softwareVersion=SOFTWARE-VERSION-NOT-SET \ maxOdeJobsPerUser="max-ode-jobs-per-user-not-set" \ vcell_ssh_cmd_cmdtimeout="cmdSrvcSshCmdTimeoutMS-not-set" \ vcell_ssh_cmd_restoretimeout="cmdSrvcSshCmdRestoreTimeoutFactor-not-set" \ - maxPdeJobsPerUser="max-pde-jobs-per-user-not-set" + maxPdeJobsPerUser="max-pde-jobs-per-user-not-set" \ + htcMinMemoryMB="htc-min-memory-not-set" \ + htcMaxMemoryMB="htc-max-memory-not-set" \ + htcPowerUserMemoryFloorMB="htc-power-user-memory-floor-not-set" \ + htcPowerUserMemoryMaxMB="htc-power-user-memory-max-not-set" ENV dbpswdfile=/run/secrets/dbpswd \ jmspswdfile=/run/secrets/jmspswd \ @@ -119,4 +123,8 @@ ENTRYPOINT java \ -Dvcell.server.maxPdeJobsPerUser=${maxPdeJobsPerUser} \ -Dvcell.ssh.cmd.cmdtimeout=${vcell_ssh_cmd_cmdtimeout} \ -Dvcell.ssh.cmd.restoretimeout=${vcell_ssh_cmd_restoretimeout} \ - -cp "./lib/*" cbit.vcell.message.server.dispatcher.SimulationDispatcher + -Dvcell.htc.memory.min.mb=${htcMinMemoryMB} \ + -Dvcell.htc.memory.max.mb=${htcMaxMemoryMB} \ + -Dvcell.htc.memory.pu.floor.mb=${htcPowerUserMemoryFloorMB} \ + -Dvcell.htc.memory.pu.max.mb=${htcPowerUserMemoryMaxMB} \ + -cp "./lib/*" cbit.vcell.message.server.dispatcher.SimulationDispatcherMain diff --git a/docker/build/Dockerfile-submit-dev b/docker/build/Dockerfile-submit-dev index 9df249c39b..438ebad206 100644 --- a/docker/build/Dockerfile-submit-dev +++ b/docker/build/Dockerfile-submit-dev @@ -91,7 +91,11 @@ ENV softwareVersion=SOFTWARE-VERSION-NOT-SET \ vcell_ssh_cmd_cmdtimeout="cmdSrvcSshCmdTimeoutMS-not-set" \ vcell_ssh_cmd_restoretimeout="cmdSrvcSshCmdRestoreTimeoutFactor-not-set" \ simdatadir_archive_external="simdatadir_archive_external-not-set" \ - simdatadir_archive_internal="simdatadir_archive_internal-not-set" + simdatadir_archive_internal="simdatadir_archive_internal-not-set" \ + htcMinMemoryMB="htc-min-memory-not-set" \ + htcMaxMemoryMB="htc-max-memory-not-set" \ + htcPowerUserMemoryFloorMB="htc-power-user-memory-floor-not-set" \ + htcPowerUserMemoryMaxMB="htc-power-user-memory-max-not-set" ENV jmspswdfile=/run/secrets/jmspswd \ jmsrestpswdfile=/run/secrets/jmsrestpswd \ @@ -170,4 +174,8 @@ ENTRYPOINT java \ -Dvcell.simdatadir.archive.external=${simdatadir_archive_external} \ -Dvcell.ssh.cmd.cmdtimeout=${vcell_ssh_cmd_cmdtimeout} \ -Dvcell.ssh.cmd.restoretimeout=${vcell_ssh_cmd_restoretimeout} \ + -Dvcell.htc.memory.min.mb=${htcMinMemoryMB} \ + -Dvcell.htc.memory.max.mb=${htcMaxMemoryMB} \ + -Dvcell.htc.memory.pu.floor.mb=${htcPowerUserMemoryFloorMB} \ + -Dvcell.htc.memory.pu.max.mb=${htcPowerUserMemoryMaxMB} \ -cp "./lib/*" cbit.vcell.message.server.batch.sim.HtcSimulationWorker diff --git a/docker/build/build.sh b/docker/build/build.sh index 600c90beca..e63d040cd8 100755 --- a/docker/build/build.sh +++ b/docker/build/build.sh @@ -130,6 +130,8 @@ build_webapp() { if [[ $? -ne 0 ]]; then echo "failed to build prod"; exit 1; fi build_webapp_common island if [[ $? -ne 0 ]]; then echo "failed to build island"; exit 1; fi + build_webapp_common remote + if [[ $? -ne 0 ]]; then echo "failed to build remote"; exit 1; fi } build_batch() { diff --git a/vcell-core/src/main/java/cbit/rmi/event/WorkerEvent.java b/vcell-core/src/main/java/cbit/rmi/event/WorkerEvent.java index 3c5c4a2392..660d7e9ce6 100644 --- a/vcell-core/src/main/java/cbit/rmi/event/WorkerEvent.java +++ b/vcell-core/src/main/java/cbit/rmi/event/WorkerEvent.java @@ -19,6 +19,8 @@ import cbit.vcell.solver.VCSimulationIdentifier; import cbit.vcell.solver.server.SimulationMessage; +import java.util.ArrayList; + /** * Insert the type's description here. * Creation date: (2/5/2004 12:35:20 PM) @@ -36,6 +38,10 @@ public class WorkerEvent extends MessageEvent { public static final int JOB_WORKER_EXIT_NORMAL = 1015; public static final int JOB_WORKER_EXIT_ERROR = 1016; + public static final ArrayList ALL_JOB_EVENTS = new ArrayList<>(){{add(JOB_ACCEPTED); + add(JOB_STARTING); add(JOB_DATA); add(JOB_PROGRESS); add(JOB_FAILURE); add(JOB_COMPLETED); add(JOB_WORKER_ALIVE); + add(JOB_WORKER_EXIT_NORMAL); add(JOB_WORKER_EXIT_ERROR);}}; + private VCSimulationIdentifier vcSimulationIdentifier = null; private int jobIndex = -1; private String hostName = null; diff --git a/vcell-core/src/main/java/cbit/vcell/resource/PropertyLoader.java b/vcell-core/src/main/java/cbit/vcell/resource/PropertyLoader.java index a8d6c5243c..2c5539362c 100644 --- a/vcell-core/src/main/java/cbit/vcell/resource/PropertyLoader.java +++ b/vcell-core/src/main/java/cbit/vcell/resource/PropertyLoader.java @@ -80,6 +80,10 @@ public static void setConfigProvider(VCellConfigProvider configProvider) { public static final String htcPbsHome = record("vcell.htc.pbs.home",ValueType.GEN); public static final String htcSgeHome = record("vcell.htc.sge.home",ValueType.GEN); public static final String htcNodeList = record("vcell.htc.nodelist",ValueType.GEN); + public static final String htcMinMemoryMB = record("vcell.htc.memory.min.mb", ValueType.INT); // minimum memory request in MB, currently 4g + public static final String htcMaxMemoryMB = record("vcell.htc.memory.max.mb", ValueType.INT); // maximum memory request in MB + public static final String htcPowerUserMemoryFloorMB = record("vcell.htc.memory.pu.floor.mb", ValueType.INT); // MIN memory allowed if declared to be a power user, currently 50g (Previously Existing Value) + public static final String htcPowerUserMemoryMaxMB = record("vcell.htc.memory.pu.max.mb", ValueType.INT); // MAX memory allowed if declared to be a power user public static final String htc_vcellfvsolver_docker_name = record("vcell.htc.vcellfvsolver.docker.name",ValueType.GEN); public static final String htc_vcellfvsolver_solver_list = record("vcell.htc.vcellfvsolver.solver.list",ValueType.GEN); diff --git a/vcell-server/src/main/java/cbit/vcell/message/server/batch/sim/HtcSimulationWorker.java b/vcell-server/src/main/java/cbit/vcell/message/server/batch/sim/HtcSimulationWorker.java index 1ab3d64dfd..63575003d7 100644 --- a/vcell-server/src/main/java/cbit/vcell/message/server/batch/sim/HtcSimulationWorker.java +++ b/vcell-server/src/main/java/cbit/vcell/message/server/batch/sim/HtcSimulationWorker.java @@ -678,7 +678,11 @@ public static void main(String[] args) throws IOException { PropertyLoader.slurm_qos, PropertyLoader.slurm_partition_pu, PropertyLoader.slurm_reservation_pu, - PropertyLoader.slurm_qos_pu + PropertyLoader.slurm_qos_pu, + PropertyLoader.htcMinMemoryMB, + PropertyLoader.htcMaxMemoryMB, + PropertyLoader.htcPowerUserMemoryFloorMB, + PropertyLoader.htcPowerUserMemoryMaxMB }; diff --git a/vcell-server/src/main/java/cbit/vcell/message/server/batch/sim/SolverPostprocessor.java b/vcell-server/src/main/java/cbit/vcell/message/server/batch/sim/SolverPostprocessor.java index 8cd7c2667e..5adc524047 100644 --- a/vcell-server/src/main/java/cbit/vcell/message/server/batch/sim/SolverPostprocessor.java +++ b/vcell-server/src/main/java/cbit/vcell/message/server/batch/sim/SolverPostprocessor.java @@ -146,7 +146,11 @@ private static Exception runPostprocessingCommands(String filename, Logger lg) { } private static final String POST_PROCESSOR_PROPERTIES[] = { PropertyLoader.primarySimDataDirInternalProperty, - PropertyLoader.secondarySimDataDirInternalProperty + PropertyLoader.secondarySimDataDirInternalProperty, + PropertyLoader.mongodbDatabase, + PropertyLoader.jmsSimHostInternal, + PropertyLoader.jmsSimPortInternal, + PropertyLoader.jmsBlobMessageUseMongo }; } diff --git a/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationDispatcher.java b/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationDispatcher.java index 09bbce31fb..d612019a4d 100644 --- a/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationDispatcher.java +++ b/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationDispatcher.java @@ -29,21 +29,22 @@ import cbit.vcell.modeldb.AdminDBTopLevel; import cbit.vcell.modeldb.DatabaseServerImpl; import cbit.vcell.mongodb.VCMongoMessage; -import cbit.vcell.resource.OperatingSystemInfo; import cbit.vcell.resource.PropertyLoader; import cbit.vcell.server.*; import cbit.vcell.server.SimulationJobStatus.SchedulerStatus; import cbit.vcell.solver.Simulation; import cbit.vcell.solver.VCSimulationIdentifier; import com.google.gson.Gson; -import com.google.inject.Guice; -import com.google.inject.Injector; +import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.WriterAppender; +import org.apache.logging.log4j.core.config.Configuration; +import org.apache.logging.log4j.core.config.LoggerConfig; import org.vcell.db.ConnectionFactory; import org.vcell.db.DatabaseService; import org.vcell.db.KeyFactory; -import org.vcell.dependency.server.VCellServerModule; import org.vcell.util.DataAccessException; import org.vcell.util.PermissionException; import org.vcell.util.document.KeyValue; @@ -51,6 +52,7 @@ import org.vcell.util.document.VCellServerID; import org.vcell.util.exe.ExecutableException; +import java.io.StringWriter; import java.sql.SQLException; import java.text.SimpleDateFormat; import java.util.*; @@ -71,7 +73,9 @@ public class SimulationDispatcher { /** * minutes between zombie kill runs */ - public static final int ZOMBIE_MINUTES = 1; + public static final int ZOMBIE_MINUTES = 1; + // changed only for testing + static int INITIAL_ZOMBIE_DELAY = 0; /** * minutes between queue flushing */ @@ -79,7 +83,7 @@ public class SimulationDispatcher { /** * queue flush wait time */ - public static final long QUEUE_FLUSH_WAITIME = MessageConstants.MINUTE_IN_MS*5; + public final static long QUEUE_FLUSH_WAITIME = MessageConstants.MINUTE_IN_MS*5; private final VCMessagingService vcMessagingService_int; private final VCMessagingService vcMessagingService_sim; @@ -89,16 +93,17 @@ public class SimulationDispatcher { private final VCQueueConsumer simRequestConsumer_int; private final VCRpcMessageHandler rpcMessageHandler_int; - private final SimulationDispatcherEngine simDispatcherEngine = new SimulationDispatcherEngine(); + protected final SimulationDispatcherEngine simDispatcherEngine = new SimulationDispatcherEngine(); - private final DispatchThread dispatchThread; - private final SimulationMonitor simMonitor; + protected final DispatchThread dispatchThread; + protected final SimulationMonitor simMonitor; private final VCMessageSession dispatcherQueueSession_int; private final VCMessageSession clientStatusTopicSession_int; private final VCMessageSession simMonitorThreadSession_sim; private final HtcProxy htcProxy; public static Logger lg = LogManager.getLogger(SimulationDispatcher.class); + public final SimulationService simServiceImpl; public class SimulationServiceImpl implements SimulationService { @@ -144,8 +149,8 @@ public SimulationStatus startSimulation(User user, VCSimulationIdentifier vcSimu // wake up dispatcher thread if (dispatchThread!=null){ try { - synchronized (dispatchThread.notifyObject){ - dispatchThread.notifyObject.notify(); + synchronized (dispatchThread.dispatcherNotifyObject){ + dispatchThread.dispatcherNotifyObject.notify(); } }catch (IllegalMonitorStateException e){ lg.error("failed to notify dispatchThread",e); @@ -238,7 +243,8 @@ private void reloadSpecialUsers() { } public class DispatchThread extends Thread { - Object notifyObject = new Object(); + final Object dispatcherNotifyObject = new Object(); + final Object finishListener = new Object(); //used for tests public DispatchThread() { super(); @@ -316,7 +322,7 @@ public void run() { tempSimulationMap.put(simKey, sim); } if (lg.isDebugEnabled()) { - lg.debug("dispatching simKey="+vcSimID+", jobId="+jobStatus.getJobIndex()+", taskId="+jobStatus.getTaskID()); + lg.debug("dispatching simKey={}, jobId={}, taskId={}", vcSimID, jobStatus.getJobIndex(), jobStatus.getTaskID()); } simDispatcherEngine.onDispatch(sim, jobStatus, simulationDatabase, dispatcherQueueSession_int); bDispatchedAnyJobs = true; @@ -331,14 +337,19 @@ public void run() { } catch (Exception ex) { lg.error(ex.getMessage(), ex); } + finally { + synchronized (finishListener){ + finishListener.notify(); + } + } // if there are no messages or no qualified jobs or exceptions, sleep for a few seconds while // this will be interrupted if there is a start request. if (!bDispatchedAnyJobs){ - synchronized (notifyObject) { + synchronized (dispatcherNotifyObject) { try { long waitTime = 5 * MessageConstants.SECOND_IN_MS; - notifyObject.wait(waitTime); + dispatcherNotifyObject.wait(waitTime); } catch (InterruptedException ex) { lg.debug("Dispatch thread wait interrupted", ex); } @@ -354,30 +365,34 @@ public void run() { } class SimulationMonitor implements ThreadFactory, RejectedExecutionHandler { - private ScheduledThreadPoolExecutor executor; + protected final ScheduledThreadPoolExecutor executor; private int threadCount; + ZombieKiller initialZombieKiller = new ZombieKiller(); + QueueFlusher initialQueueFlusher = new QueueFlusher(); /** * synchronizes {@link SimulationDispatcher#onWorkerEventMessage(VCMessage, VCMessageSession)} and * {@link QueueFlusher#flushWorkerEventQueue()} */ - Object notifyObject = new Object(); + final Object monitorNotifyObject = new Object(); public SimulationMonitor( ) { threadCount = 1; executor = new ScheduledThreadPoolExecutor(2,this,this); - executor.scheduleAtFixedRate(new ZombieKiller( ), 0, ZOMBIE_MINUTES, TimeUnit.MINUTES); - executor.scheduleAtFixedRate(new QueueFlusher( ), 1,FLUSH_QUEUE_MINUTES,TimeUnit.MINUTES); + executor.scheduleAtFixedRate(initialZombieKiller, INITIAL_ZOMBIE_DELAY, ZOMBIE_MINUTES, TimeUnit.MINUTES); + executor.scheduleAtFixedRate(initialQueueFlusher, 1,FLUSH_QUEUE_MINUTES,TimeUnit.MINUTES); } /** * find and kill zombie processes */ class ZombieKiller implements Runnable { + public static final String noJob = "no jobStatus found in database for running htc job"; + public static final String newJobFound = "newer task found in database for running htc job"; + public static final String jobIsAlreadyDone = "jobStatus Done in database for running htc job"; @Override public void run() { try { traceThread(this); - Map runningJobs = htcProxy.getRunningJobs(); for (HtcJobInfo htcJobInfo : runningJobs.keySet()){ try { @@ -390,13 +405,13 @@ public void run() { String failureMessage = null; boolean killJob = false; if (simJobStatus==null){ - failureMessage = "no jobStatus found in database for running htc job"; + failureMessage = noJob; killJob = true; }else if (simTaskInfo.taskId < simJobStatus.getTaskID()){ - failureMessage = "newer task found in database for running htc job"; + failureMessage = newJobFound; killJob = true; }else if (simJobStatus.getSchedulerStatus().isDone()){ - failureMessage = "jobStatus Done in database for running htc job"; + failureMessage = jobIsAlreadyDone; if (simJobStatus.getSimulationExecutionStatus()==null){ killJob = true; }else{ @@ -409,9 +424,8 @@ public void run() { } if (killJob && HtcProxy.isMySimulationJob(htcJobInfo)){ if (lg.isWarnEnabled()) { - lg.warn("killing " + htcJobInfo + ", " + failureMessage); + lg.warn("killing {}; {}; {}", htcJobInfo, failureMessage, simJobStatus); } - VCMongoMessage.sendZombieJob(simJobStatus,failureMessage,htcJobInfo.getHtcJobID()); htcProxy.killJobSafe(htcJobInfo); } }catch (Exception e){ @@ -431,7 +445,10 @@ public void run() { /** * flush message queue */ - class QueueFlusher implements Runnable { + class QueueFlusher implements Runnable { + protected final static String timeOutFailure = "failed: timed out"; + protected final static String unreferencedFailure = "failed: unreferenced simulation"; + protected final Object finishListener = new Object(); //used for tests public void run() { try { traceThread(this); @@ -449,20 +466,24 @@ public void run() { abortStalledOrUnreferencedSimulationTasks(messageFlushTimeMS); } catch (Exception e1) { lg.error(e1.getMessage(), e1); + } finally { + synchronized (finishListener){ + finishListener.notify(); + } } } private void flushWorkerEventQueue() throws VCMessagingException{ VCMessage message = simMonitorThreadSession_sim.createObjectMessage(VCMongoMessage.getServiceStartupTime()); message.setStringProperty(VCMessagingConstants.MESSAGE_TYPE_PROPERTY,MessageConstants.MESSAGE_TYPE_FLUSH_VALUE); - synchronized (notifyObject) { + synchronized (monitorNotifyObject) { simMonitorThreadSession_sim.sendQueueMessage(VCellQueue.WorkerEventQueue, message, false, MessageConstants.MINUTE_IN_MS*5L); try { long startWaitTime = System.currentTimeMillis(); - notifyObject.wait(QUEUE_FLUSH_WAITIME); + monitorNotifyObject.wait(QUEUE_FLUSH_WAITIME); long endWaitTime = System.currentTimeMillis(); long elapsedFlushTime = endWaitTime-startWaitTime; - VCMongoMessage.sendInfo("flushed worker event queue: elapsedTime="+(elapsedFlushTime/1000.0)+" s"); + lg.info("flushed worker event queue: elapsedTime={} s", elapsedFlushTime / 1000.0); if (elapsedFlushTime >= QUEUE_FLUSH_WAITIME){ throw new VCMessagingException("worker event queue flush timed out (>"+QUEUE_FLUSH_WAITIME+" s), considerable message backlog?"); } @@ -514,11 +535,11 @@ private void abortStalledOrUnreferencedSimulationTasks(long messageFlushTimeMS) boolean bUnreferencedSimulation = unreferencedSimKeys.contains(activeJobStatus.getVCSimulationIdentifier().getSimulationKey()); if (bTimedOutSimulation || bUnreferencedSimulation){ - String failureMessage = (bTimedOutSimulation) ? ("failed: timed out") : ("failed: unreferenced simulation"); - lg.info("obsolete job detected at timestampMS="+currentTimeMS+", status=(" + activeJobStatus + ")"); + String failureMessage = (bTimedOutSimulation) ? timeOutFailure : unreferencedFailure; + lg.info("obsolete job detected at timestampMS={}, status={}", currentTimeMS, activeJobStatus); //SimulationStateMachine simStateMachine = simDispatcherEngine.getSimulationStateMachine(activeJobStatus.getVCSimulationIdentifier().getSimulationKey(), activeJobStatus.getJobIndex()); // lg.debug(simStateMachine.show()); - VCMongoMessage.sendObsoleteJob(activeJobStatus,failureMessage); + lg.warn("{} {}", activeJobStatus, failureMessage); simDispatcherEngine.onSystemAbort(activeJobStatus, failureMessage, simulationDatabase, clientStatusTopicSession_int); if (activeJobStatus.getSimulationExecutionStatus()!=null && activeJobStatus.getSimulationExecutionStatus().getHtcJobID()!=null){ HtcJobID htcJobId = activeJobStatus.getSimulationExecutionStatus().getHtcJobID(); @@ -547,25 +568,38 @@ public Thread newThread(Runnable r) { } } - /** - * Scheduler constructor comment. - */ - public SimulationDispatcher() throws Exception { + public static SimulationDispatcher simulationDispatcherCreator(SimulationDatabase simulationDatabase, VCMessagingService messagingServiceInternal, + VCMessagingService messagingServiceSim, HtcProxy htcProxy, boolean startDispatcher){ + return new SimulationDispatcher(simulationDatabase, messagingServiceInternal, messagingServiceSim, htcProxy, startDispatcher); + } + + public static SimulationDispatcher simulationDispatcherCreator() throws SQLException, DataAccessException { ConnectionFactory conFactory = DatabaseService.getInstance().createConnectionFactory(); KeyFactory keyFactory = conFactory.getKeyFactory(); DatabaseServerImpl databaseServerImpl = new DatabaseServerImpl(conFactory, keyFactory); AdminDBTopLevel adminDbTopLevel = new AdminDBTopLevel(conFactory); - this.simulationDatabase = new SimulationDatabaseDirect(adminDbTopLevel, databaseServerImpl, true); + SimulationDatabase simulationDatabase = new SimulationDatabaseDirect(adminDbTopLevel, databaseServerImpl, true); - this.vcMessagingService_int = new VCMessagingServiceActiveMQ(); + VCMessagingService vcMessagingServiceInternal = new VCMessagingServiceActiveMQ(); String jmshost_int = PropertyLoader.getRequiredProperty(PropertyLoader.jmsIntHostInternal); int jmsport_int = Integer.parseInt(PropertyLoader.getRequiredProperty(PropertyLoader.jmsIntPortInternal)); - this.vcMessagingService_int.setConfiguration(new ServerMessagingDelegate(), jmshost_int, jmsport_int); + vcMessagingServiceInternal.setConfiguration(new ServerMessagingDelegate(), jmshost_int, jmsport_int); - this.vcMessagingService_sim = new VCMessagingServiceActiveMQ(); + VCMessagingService vcMessagingServiceSim = new VCMessagingServiceActiveMQ(); String jmshost_sim = PropertyLoader.getRequiredProperty(PropertyLoader.jmsSimHostInternal); int jmsport_sim = Integer.parseInt(PropertyLoader.getRequiredProperty(PropertyLoader.jmsSimPortInternal)); - this.vcMessagingService_sim.setConfiguration(new ServerMessagingDelegate(), jmshost_sim, jmsport_sim); + vcMessagingServiceSim.setConfiguration(new ServerMessagingDelegate(), jmshost_sim, jmsport_sim); + + return SimulationDispatcher.simulationDispatcherCreator(simulationDatabase, + vcMessagingServiceInternal, vcMessagingServiceSim, SlurmProxy.createRemoteProxy(), true); + } + + private SimulationDispatcher(SimulationDatabase simulationDatabase, VCMessagingService messagingServiceInternal, + VCMessagingService messagingServiceSim, HtcProxy htcProxy, boolean startDispatcher){ + this.simulationDatabase = simulationDatabase; + this.vcMessagingService_int = messagingServiceInternal; + this.vcMessagingService_sim = messagingServiceSim; + QueueListener workerEventListener = new QueueListener() { public void onQueueMessage(VCMessage vcMessage, VCMessageSession session) throws RollbackException { onWorkerEventMessage(vcMessage, session); @@ -579,7 +613,7 @@ public void onQueueMessage(VCMessage vcMessage, VCMessageSession session) throws // // set up consumer for Simulation Request (non-blocking RPC) messages // - SimulationService simServiceImpl = new SimulationServiceImpl(); + simServiceImpl = new SimulationServiceImpl(); VCMessageSelector simRequestSelector = null; threadName = "Sim Request Consumer"; @@ -591,17 +625,17 @@ public void onQueueMessage(VCMessage vcMessage, VCMessageSession session) throws this.dispatcherQueueSession_int = this.vcMessagingService_int.createProducerSession(); this.clientStatusTopicSession_int = this.vcMessagingService_int.createProducerSession(); - this.dispatchThread = new DispatchThread(); - this.dispatchThread.start(); this.simMonitorThreadSession_sim = this.vcMessagingService_sim.createProducerSession(); - this.simMonitor = new SimulationMonitor(); - this.htcProxy = SlurmProxy.createRemoteProxy(); - } + this.htcProxy = htcProxy; + // Wait until all resources are created to start separate threads - public void init() { - + this.simMonitor = new SimulationMonitor(); + this.dispatchThread = new DispatchThread(); + if (startDispatcher){ + this.dispatchThread.start(); + } } @@ -617,8 +651,8 @@ private void onWorkerEventMessage(VCMessage vcMessage, VCMessageSession session) if (vcMessage.propertyExists(VCMessagingConstants.MESSAGE_TYPE_PROPERTY) && vcMessage.getStringProperty(VCMessagingConstants.MESSAGE_TYPE_PROPERTY).equals(MessageConstants.MESSAGE_TYPE_FLUSH_VALUE)){ if (simMonitor!=null){ try { - synchronized (simMonitor.notifyObject){ - simMonitor.notifyObject.notify(); + synchronized (simMonitor.monitorNotifyObject){ + simMonitor.monitorNotifyObject.notify(); } }catch (IllegalMonitorStateException e){ lg.warn(e); @@ -657,57 +691,5 @@ private void traceThread(Object source) { " commencing run cycle at " + new SimpleDateFormat("k:m:s").format(new Date( )) ); } } - - /** - * Starts the application. - * @param args an array of command-line arguments - */ - public static void main(java.lang.String[] args) { - - if (args.length != 0) { - System.out.println("No arguments expected: " + SimulationDispatcher.class.getName()); - System.exit(1); - } - - try { - OperatingSystemInfo.getInstance(); - PropertyLoader.loadProperties(REQUIRED_SERVICE_PROPERTIES); - - Injector injector = Guice.createInjector(new VCellServerModule()); - - SimulationDispatcher simulationDispatcher = injector.getInstance(SimulationDispatcher.class); - simulationDispatcher.init(); - - } catch (Throwable e) { - lg.error("uncaught exception initializing SimulationDispatcher: "+e.getLocalizedMessage(), e); - System.exit(1); - } - } - - - private static final String REQUIRED_SERVICE_PROPERTIES[] = { - PropertyLoader.vcellServerIDProperty, - PropertyLoader.installationRoot, - PropertyLoader.dbConnectURL, - PropertyLoader.dbDriverName, - PropertyLoader.dbUserid, - PropertyLoader.dbPasswordFile, - PropertyLoader.userTimezone, - PropertyLoader.mongodbHostInternal, - PropertyLoader.mongodbPortInternal, - PropertyLoader.mongodbDatabase, - PropertyLoader.jmsIntHostInternal, - PropertyLoader.jmsIntPortInternal, - PropertyLoader.jmsSimHostInternal, - PropertyLoader.jmsSimPortInternal, - PropertyLoader.jmsUser, - PropertyLoader.jmsPasswordFile, - PropertyLoader.htcUser, - PropertyLoader.jmsBlobMessageUseMongo, - PropertyLoader.maxJobsPerScan, - PropertyLoader.maxOdeJobsPerUser, - PropertyLoader.maxPdeJobsPerUser, - PropertyLoader.slurm_partition - }; } diff --git a/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationDispatcherEngine.java b/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationDispatcherEngine.java index 7ee67d4c84..1af3550fc9 100644 --- a/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationDispatcherEngine.java +++ b/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationDispatcherEngine.java @@ -1,175 +1,168 @@ -/* - * Copyright (C) 1999-2011 University of Connecticut Health Center - * - * Licensed under the MIT License (the "License"). - * You may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.opensource.org/licenses/mit-license.php - */ - -package cbit.vcell.message.server.dispatcher; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.vcell.util.DataAccessException; -import org.vcell.util.document.KeyValue; -import org.vcell.util.document.User; -import org.vcell.util.document.VCellServerID; - -import cbit.rmi.event.WorkerEvent; -import cbit.vcell.message.VCMessageSession; -import cbit.vcell.message.VCMessagingException; -import cbit.vcell.message.messages.StatusMessage; -import cbit.vcell.server.SimulationJobStatus; -import cbit.vcell.server.SimulationJobStatus.SchedulerStatus; -import cbit.vcell.server.UpdateSynchronizationException; -import cbit.vcell.solver.Simulation; -import cbit.vcell.solver.SimulationInfo; -import cbit.vcell.solver.VCSimulationIdentifier; -import cbit.vcell.solver.server.SimulationMessage; - -/** - * Insert the type's description here. - * Creation date: (10/18/2001 4:31:11 PM) - * @author: Jim Schaff - */ -public class SimulationDispatcherEngine { - public static final Logger lg = LogManager.getLogger(SimulationDispatcherEngine.class); - - private HashMap> simStateMachineHash = new HashMap>(); - - /** - * Scheduler constructor comment. - */ - public SimulationDispatcherEngine() { - } - - /** - * reset simulation state time stamps in case of transient error in getting running status - */ - void resetTimeStamps( ) { - long now = System.currentTimeMillis(); - for (List lst : simStateMachineHash.values()) { - for (SimulationStateMachine ssm: lst) { - ssm.setSolverProcessTimestamp(now); - } - } - } - - public SimulationStateMachine getSimulationStateMachine(KeyValue simulationKey, int jobIndex) { - List stateMachineList = simStateMachineHash.get(simulationKey); - if (stateMachineList==null){ - stateMachineList = new ArrayList(); - simStateMachineHash.put(simulationKey,stateMachineList); - } - for (SimulationStateMachine stateMachine : stateMachineList){ - if (stateMachine.getJobIndex() == jobIndex){ - return stateMachine; - } - } - SimulationStateMachine newStateMachine = new SimulationStateMachine(simulationKey, jobIndex); - stateMachineList.add(newStateMachine); - return newStateMachine; - } - - public void onDispatch(Simulation simulation, SimulationJobStatus simJobStatus, SimulationDatabase simulationDatabase, VCMessageSession dispatcherQueueSession) throws VCMessagingException, DataAccessException, SQLException{ - KeyValue simulationKey = simJobStatus.getVCSimulationIdentifier().getSimulationKey(); - SimulationStateMachine simStateMachine = getSimulationStateMachine(simulationKey, simJobStatus.getJobIndex()); - - simStateMachine.onDispatch(simulation, simJobStatus, simulationDatabase, dispatcherQueueSession); - } - - public void onStartRequest(VCSimulationIdentifier vcSimID, User user, int simulationScanCount, SimulationDatabase simulationDatabase, VCMessageSession session, VCMessageSession dispatcherQueueSession) throws VCMessagingException, DataAccessException, SQLException { - KeyValue simKey = vcSimID.getSimulationKey(); - - User.SpecialUser myUser = simulationDatabase.getUser(user.getName()); - boolean isAdmin = Arrays.asList(myUser.getMySpecials()).contains(User.SPECIAL_CLAIM.admins); - - SimulationInfo simulationInfo = null; - try { - simulationInfo = simulationDatabase.getSimulationInfo(user, simKey); - } catch (DataAccessException ex) { - if (lg.isWarnEnabled()) lg.warn("Bad simulation " + vcSimID); - StatusMessage message = new StatusMessage(new SimulationJobStatus(VCellServerID.getSystemServerID(), vcSimID, -1, null, - SchedulerStatus.FAILED, 0, SimulationMessage.workerFailure("Failed to dispatch simulation: "+ ex.getMessage()), null, null), user.getName(), null, null); - message.sendToClient(session); - return; - } - if (simulationInfo == null) { - if (lg.isWarnEnabled()) lg.warn("Can't start, simulation [" + vcSimID + "] doesn't exist in database"); - StatusMessage message = new StatusMessage(new SimulationJobStatus(VCellServerID.getSystemServerID(), vcSimID, -1, null, - SchedulerStatus.FAILED, 0, SimulationMessage.workerFailure("Can't start, simulation [" + vcSimID + "] doesn't exist"), null, null), user.getName(), null, null); - message.sendToClient(session); - return; - } - - if (!isAdmin && simulationScanCount > Integer.parseInt(cbit.vcell.resource.PropertyLoader.getRequiredProperty(cbit.vcell.resource.PropertyLoader.maxJobsPerScan))) { - if (lg.isWarnEnabled()) lg.warn("Too many simulations (" + simulationScanCount + ") for parameter scan." + vcSimID); - StatusMessage message = new StatusMessage(new SimulationJobStatus(VCellServerID.getSystemServerID(), vcSimID, -1, null, - SchedulerStatus.FAILED, 0, SimulationMessage.workerFailure("Too many simulations (" + simulationScanCount + ") for parameter scan."), null, null), user.getName(), null, null); - message.sendToClient(session); - return; - } - - for (int jobIndex = 0; jobIndex < simulationScanCount; jobIndex++){ - SimulationStateMachine simStateMachine = getSimulationStateMachine(simKey, jobIndex); - try { - simStateMachine.onStartRequest(user, vcSimID, simulationDatabase, session); - }catch (UpdateSynchronizationException e){ - simStateMachine.onStartRequest(user, vcSimID, simulationDatabase, session); - } - } - } - - - public void onStopRequest(VCSimulationIdentifier vcSimID, User user, SimulationDatabase simulationDatabase, VCMessageSession session) throws DataAccessException, VCMessagingException, SQLException { - KeyValue simKey = vcSimID.getSimulationKey(); - - SimulationJobStatus[] allActiveSimJobStatusArray = simulationDatabase.getActiveJobs(VCellServerID.getSystemServerID()); - ArrayList simJobStatusArray = new ArrayList(); - for (SimulationJobStatus activeSimJobStatus : allActiveSimJobStatusArray){ - if (activeSimJobStatus.getVCSimulationIdentifier().getSimulationKey().equals(vcSimID.getSimulationKey())){ - simJobStatusArray.add(activeSimJobStatus); - } - } - for (SimulationJobStatus simJobStatus : simJobStatusArray){ - SimulationStateMachine simStateMachine = getSimulationStateMachine(simKey, simJobStatus.getJobIndex()); - try { - simStateMachine.onStopRequest(user, simJobStatus, simulationDatabase, session); - }catch (UpdateSynchronizationException e){ - simStateMachine.onStopRequest(user, simJobStatus, simulationDatabase, session); - } - } - } - - - public void onWorkerEvent(WorkerEvent workerEvent, SimulationDatabase simulationDatabase, VCMessageSession session) { - try { - KeyValue simKey = workerEvent.getVCSimulationDataIdentifier().getSimulationKey(); - int jobIndex = workerEvent.getJobIndex(); - SimulationStateMachine simStateMachine = getSimulationStateMachine(simKey, jobIndex); - simStateMachine.onWorkerEvent(workerEvent, simulationDatabase, session); - } catch (Exception ex) { - lg.error(ex.getMessage(),ex); - } - } - - - public void onSystemAbort(SimulationJobStatus jobStatus, String failureMessage, SimulationDatabase simulationDatabase, VCMessageSession session) { - try { - KeyValue simKey = jobStatus.getVCSimulationIdentifier().getSimulationKey(); - int jobIndex = jobStatus.getJobIndex(); - SimulationStateMachine simStateMachine = getSimulationStateMachine(simKey, jobIndex); - simStateMachine.onSystemAbort(jobStatus, failureMessage, simulationDatabase, session); - } catch (Exception ex) { - lg.error(ex.getMessage(),ex); - } - } - -} +package cbit.vcell.message.server.dispatcher; + +import cbit.rmi.event.WorkerEvent; +import cbit.vcell.message.VCMessageSession; +import cbit.vcell.message.VCMessagingException; +import cbit.vcell.message.messages.StatusMessage; +import cbit.vcell.server.SimulationJobStatus; +import cbit.vcell.server.SimulationJobStatus.SchedulerStatus; +import cbit.vcell.server.UpdateSynchronizationException; +import cbit.vcell.solver.Simulation; +import cbit.vcell.solver.SimulationInfo; +import cbit.vcell.solver.VCSimulationIdentifier; +import cbit.vcell.solver.server.SimulationMessage; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.vcell.util.DataAccessException; +import org.vcell.util.document.KeyValue; +import org.vcell.util.document.User; +import org.vcell.util.document.VCellServerID; + +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +public class SimulationDispatcherEngine { + public static final Logger lg = LogManager.getLogger(SimulationDispatcherEngine.class); + + private HashMap> simStateMachineHash = new HashMap>(); + + /** + * reset simulation state time stamps in case of transient error in getting running status + */ + void resetTimeStamps( ) { + long now = System.currentTimeMillis(); + for (List lst : simStateMachineHash.values()) { + for (SimulationStateMachine ssm: lst) { + ssm.setSolverProcessTimestamp(now); + } + } + } + + public SimulationDispatcherEngine() { + } + + public SimulationStateMachine getSimulationStateMachine(KeyValue simulationKey, int jobIndex) { + List stateMachineList = simStateMachineHash.get(simulationKey); + if (stateMachineList==null){ + stateMachineList = new ArrayList(); + simStateMachineHash.put(simulationKey,stateMachineList); + } + for (SimulationStateMachine stateMachine : stateMachineList){ + if (stateMachine.getJobIndex() == jobIndex){ + return stateMachine; + } + } + SimulationStateMachine newStateMachine = new SimulationStateMachine(simulationKey, jobIndex); + stateMachineList.add(newStateMachine); + return newStateMachine; + } + + public void onDispatch(Simulation simulation, SimulationJobStatus simJobStatus, SimulationDatabase simulationDatabase, VCMessageSession dispatcherQueueSession) throws VCMessagingException, DataAccessException, SQLException { + KeyValue simulationKey = simJobStatus.getVCSimulationIdentifier().getSimulationKey(); + SimulationStateMachine simStateMachine = getSimulationStateMachine(simulationKey, simJobStatus.getJobIndex()); + + simStateMachine.onDispatch(simulation, simJobStatus, simulationDatabase, dispatcherQueueSession); + } + + public ArrayList onStartRequest(VCSimulationIdentifier vcSimID, User user, int simulationScanCount, SimulationDatabase simulationDatabase, VCMessageSession session, VCMessageSession dispatcherQueueSession) throws VCMessagingException, DataAccessException, SQLException { + KeyValue simKey = vcSimID.getSimulationKey(); + + User.SpecialUser myUser = simulationDatabase.getUser(user.getName()); + boolean isAdmin = Arrays.asList(myUser.getMySpecials()).contains(User.SPECIAL_CLAIM.admins); + + SimulationInfo simulationInfo = null; + SimulationJobStatus simJobStatus = null; + ArrayList status = new ArrayList<>(); + try { + simulationInfo = simulationDatabase.getSimulationInfo(user, simKey); + } catch (DataAccessException ex) { + if (lg.isWarnEnabled()) lg.warn("Bad simulation " + vcSimID); + simJobStatus = new SimulationJobStatus(VCellServerID.getSystemServerID(), vcSimID, -1, null, + SchedulerStatus.FAILED, 0, SimulationMessage.workerFailure("Failed to dispatch simulation: "+ ex.getMessage()), null, null); + StatusMessage message = new StatusMessage(simJobStatus, user.getName(), null, null); + message.sendToClient(session); + status.add(message); + return status; + } + if (simulationInfo == null) { + if (lg.isWarnEnabled()) lg.warn("Can't start, simulation [" + vcSimID + "] doesn't exist in database"); + simJobStatus = new SimulationJobStatus(VCellServerID.getSystemServerID(), vcSimID, -1, null, + SchedulerStatus.FAILED, 0, SimulationMessage.workerFailure("Can't start, simulation [" + vcSimID + "] doesn't exist"), null, null); + StatusMessage message = new StatusMessage(simJobStatus, user.getName(), null, null); + message.sendToClient(session); + status.add(message); + return status; + } + + if (!isAdmin && simulationScanCount > Integer.parseInt(cbit.vcell.resource.PropertyLoader.getRequiredProperty(cbit.vcell.resource.PropertyLoader.maxJobsPerScan))) { + if (lg.isWarnEnabled()) lg.warn("Too many simulations (" + simulationScanCount + ") for parameter scan." + vcSimID); + simJobStatus = new SimulationJobStatus(VCellServerID.getSystemServerID(), vcSimID, -1, null, + SchedulerStatus.FAILED, 0, SimulationMessage.workerFailure("Too many simulations (" + simulationScanCount + ") for parameter scan."), null, null); + StatusMessage message = new StatusMessage(simJobStatus, user.getName(), null, null); + message.sendToClient(session); + status.add(message); + + return status; + } + + for (int simulationJobIndex = 0; simulationJobIndex < simulationScanCount; simulationJobIndex++){ + SimulationStateMachine simStateMachine = getSimulationStateMachine(simKey, simulationJobIndex); + try { + status.add(simStateMachine.onStartRequest(user, vcSimID, simulationDatabase, session)); + }catch (UpdateSynchronizationException e){ + status.add(simStateMachine.onStartRequest(user, vcSimID, simulationDatabase, session)); + } + } + return status; + } + + + public ArrayList onStopRequest(VCSimulationIdentifier vcSimID, User user, SimulationDatabase simulationDatabase, VCMessageSession session) throws DataAccessException, VCMessagingException, SQLException { + KeyValue simKey = vcSimID.getSimulationKey(); + + SimulationJobStatus[] allActiveSimJobStatusArray = simulationDatabase.getActiveJobs(VCellServerID.getSystemServerID()); + ArrayList simJobStatusArray = new ArrayList(); + for (SimulationJobStatus activeSimJobStatus : allActiveSimJobStatusArray){ + if (activeSimJobStatus.getVCSimulationIdentifier().getSimulationKey().equals(vcSimID.getSimulationKey())){ + simJobStatusArray.add(activeSimJobStatus); + } + } + ArrayList stoppedSimulations = new ArrayList<>(); + for (SimulationJobStatus simJobStatus : simJobStatusArray){ + SimulationStateMachine simStateMachine = getSimulationStateMachine(simKey, simJobStatus.getJobIndex()); + try { + stoppedSimulations.add(simStateMachine.onStopRequest(user, simJobStatus, simulationDatabase, session)); + }catch (UpdateSynchronizationException e){ + stoppedSimulations.add(simStateMachine.onStopRequest(user, simJobStatus, simulationDatabase, session)); + } + } + return stoppedSimulations; + } + + + public void onWorkerEvent(WorkerEvent workerEvent, SimulationDatabase simulationDatabase, VCMessageSession session) { + try { + KeyValue simKey = workerEvent.getVCSimulationDataIdentifier().getSimulationKey(); + int jobIndex = workerEvent.getJobIndex(); + SimulationStateMachine simStateMachine = getSimulationStateMachine(simKey, jobIndex); + simStateMachine.onWorkerEvent(workerEvent, simulationDatabase, session); + } catch (Exception ex) { + lg.error(ex.getMessage(),ex); + } + } + + + public void onSystemAbort(SimulationJobStatus jobStatus, String failureMessage, SimulationDatabase simulationDatabase, VCMessageSession session) { + try { + KeyValue simKey = jobStatus.getVCSimulationIdentifier().getSimulationKey(); + int jobIndex = jobStatus.getJobIndex(); + SimulationStateMachine simStateMachine = getSimulationStateMachine(simKey, jobIndex); + simStateMachine.onSystemAbort(jobStatus, failureMessage, simulationDatabase, session); + } catch (Exception ex) { + lg.error(ex.getMessage(),ex); + } + } +} diff --git a/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationDispatcherMain.java b/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationDispatcherMain.java new file mode 100644 index 0000000000..7de4d645b7 --- /dev/null +++ b/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationDispatcherMain.java @@ -0,0 +1,81 @@ +/* + * Copyright (C) 1999-2011 University of Connecticut Health Center + * + * Licensed under the MIT License (the "License"). + * You may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.opensource.org/licenses/mit-license.php + */ + +package cbit.vcell.message.server.dispatcher; + +import cbit.vcell.resource.OperatingSystemInfo; +import cbit.vcell.resource.PropertyLoader; +import com.google.inject.Guice; +import com.google.inject.Injector; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.vcell.dependency.server.VCellServerModule; + +/** + * Insert the type's description here. + * Creation date: (10/18/2001 4:31:11 PM) + * @author: Jim Schaff + */ +public class SimulationDispatcherMain { + public static Logger lg = LogManager.getLogger(SimulationDispatcher.class); + /** + * Starts the application. + * @param args an array of command-line arguments + */ + public static void main(String[] args) { + + if (args.length != 0) { + System.out.println("No arguments expected: " + SimulationDispatcherMain.class.getName()); + System.exit(1); + } + + try { + OperatingSystemInfo.getInstance(); + PropertyLoader.loadProperties(REQUIRED_SERVICE_PROPERTIES); + + Injector injector = Guice.createInjector(new VCellServerModule()); + + SimulationDispatcher simulationDispatcher = SimulationDispatcher.simulationDispatcherCreator(); + injector.injectMembers(simulationDispatcher); + } catch (Throwable e) { + lg.error("uncaught exception initializing SimulationDispatcher: "+e.getLocalizedMessage(), e); + System.exit(1); + } + } + + + private static final String REQUIRED_SERVICE_PROPERTIES[] = { + PropertyLoader.vcellServerIDProperty, + PropertyLoader.installationRoot, + PropertyLoader.dbConnectURL, + PropertyLoader.dbDriverName, + PropertyLoader.dbUserid, + PropertyLoader.dbPasswordFile, + PropertyLoader.userTimezone, + PropertyLoader.mongodbHostInternal, + PropertyLoader.mongodbPortInternal, + PropertyLoader.mongodbDatabase, + PropertyLoader.jmsIntHostInternal, + PropertyLoader.jmsIntPortInternal, + PropertyLoader.jmsSimHostInternal, + PropertyLoader.jmsSimPortInternal, + PropertyLoader.jmsUser, + PropertyLoader.jmsPasswordFile, + PropertyLoader.htcUser, + PropertyLoader.jmsBlobMessageUseMongo, + PropertyLoader.maxJobsPerScan, + PropertyLoader.maxOdeJobsPerUser, + PropertyLoader.maxPdeJobsPerUser, + PropertyLoader.slurm_partition, + PropertyLoader.htcPowerUserMemoryMaxMB, + PropertyLoader.htcMaxMemoryMB + }; + +} diff --git a/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationStateMachine.java b/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationStateMachine.java index 3df01647e5..1f318017d6 100644 --- a/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationStateMachine.java +++ b/vcell-server/src/main/java/cbit/vcell/message/server/dispatcher/SimulationStateMachine.java @@ -1,659 +1,597 @@ package cbit.vcell.message.server.dispatcher; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.Date; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.vcell.util.DataAccessException; -import org.vcell.util.document.KeyValue; -import org.vcell.util.document.User; -import org.vcell.util.document.VCellServerID; - import cbit.rmi.event.WorkerEvent; import cbit.vcell.field.FieldDataIdentifierSpec; -import cbit.vcell.message.VCMessage; -import cbit.vcell.message.VCMessageSession; -import cbit.vcell.message.VCMessagingConstants; -import cbit.vcell.message.VCMessagingException; -import cbit.vcell.message.VCellTopic; +import cbit.vcell.message.*; import cbit.vcell.message.messages.MessageConstants; import cbit.vcell.message.messages.SimulationTaskMessage; import cbit.vcell.message.messages.StatusMessage; import cbit.vcell.message.messages.WorkerEventMessage; -import cbit.vcell.message.server.htc.HtcProxy; -import cbit.vcell.message.server.htc.HtcProxy.MemLimitResults; import cbit.vcell.messaging.server.SimulationTask; import cbit.vcell.mongodb.VCMongoMessage; -import cbit.vcell.server.HtcJobID; -import cbit.vcell.server.RunningStateInfo; -import cbit.vcell.server.SimulationExecutionStatus; -import cbit.vcell.server.SimulationJobStatus; -import cbit.vcell.server.SimulationJobStatus.SchedulerStatus; -import cbit.vcell.server.SimulationQueueEntryStatus; -import cbit.vcell.server.SimulationStatus; -import cbit.vcell.server.UpdateSynchronizationException; -import cbit.vcell.solver.Simulation; -import cbit.vcell.solver.SimulationJob; -import cbit.vcell.solver.SolverDescription; -import cbit.vcell.solver.VCSimulationDataIdentifier; -import cbit.vcell.solver.VCSimulationIdentifier; +import cbit.vcell.resource.PropertyLoader; +import cbit.vcell.server.*; +import cbit.vcell.solver.*; import cbit.vcell.solver.server.SimulationMessage; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.vcell.util.DataAccessException; +import cbit.vcell.server.SimulationJobStatus.SchedulerStatus; +import cbit.vcell.server.SimulationJobStatus.SimulationQueueID; +import org.vcell.util.document.KeyValue; +import org.vcell.util.document.User; +import org.vcell.util.document.VCellServerID; + +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Date; public class SimulationStateMachine { - public static final Logger lg = LogManager.getLogger(SimulationStateMachine.class); - - // bitmapped counter so that allows 3 retries for each request (but preserves ordinal nature) - // bits 0-3: retry count - // bits 4-31: submit - // max retries must be less than 15. - public static final int TASKID_USERCOUNTER_MASK = SimulationStatus.TASKID_USERCOUNTER_MASK; - public static final int TASKID_RETRYCOUNTER_MASK = SimulationStatus.TASKID_RETRYCOUNTER_MASK; - public static final int TASKID_USERINCREMENT = SimulationStatus.TASKID_USERINCREMENT; - - public static final int PRIORITY_LOW = 0; - public static final int PRIORITY_DEFAULT = 5; - public static final int PRIORITY_HIGH = 9; - - private final KeyValue simKey; - private final int jobIndex; - - /** - * in memory storage of last time information about this job was received or status was unknown due - * to transient failure or system restart - */ - private long solverProcessTimestamp; - - public SimulationStateMachine(KeyValue simKey, int jobIndex){ - this.simKey = simKey; - this.jobIndex = jobIndex; - updateSolverProcessTimestamp(); - } - - /* - public SimulationStateMachine(SimulationJobStatus[] simJobStatus) { - this(simJobStatus[0].getVCSimulationIdentifier().getSimulationKey(),simJobStatus[0].getJobIndex()); - } - */ - - /** - * set in memory last update time to now - */ - private void updateSolverProcessTimestamp( ) { - solverProcessTimestamp = System.currentTimeMillis(); - } - - /** - * set to specified time (for mass setting) - * @param solverProcessTimestamp - */ - void setSolverProcessTimestamp(long solverProcessTimestamp) { - this.solverProcessTimestamp = solverProcessTimestamp; - } - - public KeyValue getSimKey() { - return simKey; - } - - public int getJobIndex() { - return jobIndex; - } - -// public List getStateMachineTransitions() { -// return stateMachineTransitions; -// } - -// public String show(){ -// StringBuffer buffer = new StringBuffer(); -// buffer.append("SimulationStateMachine for SimID='"+simKey+"', jobIndex="+jobIndex+"\n"); -// for (StateMachineTransition stateMachineTransition : stateMachineTransitions){ -// buffer.append(stateMachineTransition+"\n"); -// } -// return buffer.toString(); -// } - -// private void addStateMachineTransition(StateMachineTransition stateMachineTransition){ -// stateMachineTransitions.add(stateMachineTransition); -// } - - /** - * return last time a status update was received in memory - * @return time since information last changed about this task - */ - long getSolverProcessTimestamp() { - return solverProcessTimestamp; - } - - public synchronized void onWorkerEvent(WorkerEvent workerEvent, SimulationDatabase simulationDatabase, VCMessageSession session) throws DataAccessException, VCMessagingException, SQLException { - updateSolverProcessTimestamp(); - WorkerEventMessage workerEventMessage = new WorkerEventMessage(workerEvent); - VCMongoMessage.sendWorkerEvent(workerEventMessage); - - String userName = workerEvent.getUserName(); // as the filter of the client - int workerEventTaskID = workerEvent.getTaskID(); - - if (lg.isTraceEnabled()) lg.trace("onWorkerEventMessage[" + workerEvent.getEventTypeID() + "," + workerEvent.getSimulationMessage() + "][simid=" + workerEvent.getVCSimulationDataIdentifier() + ",job=" + jobIndex + ",task=" + workerEventTaskID + "]"); - - VCSimulationDataIdentifier vcSimDataID = workerEvent.getVCSimulationDataIdentifier(); - if (vcSimDataID == null) { - VCMongoMessage.sendInfo("onWorkerEvent() ignoring WorkerEvent - no SimID in message): "+workerEvent.show()); - return; - } - KeyValue simKey = vcSimDataID.getSimulationKey(); - SimulationJobStatus oldSimulationJobStatus = simulationDatabase.getLatestSimulationJobStatus(simKey, jobIndex); - - if (oldSimulationJobStatus == null){ - VCMongoMessage.sendInfo("onWorkerEvent() ignoring WorkerEvent, no current SimulationJobStatus: "+workerEvent.show()); - return; - } - if (oldSimulationJobStatus == null || oldSimulationJobStatus.getSchedulerStatus().isDone() || oldSimulationJobStatus.getTaskID() > workerEventTaskID){ - VCMongoMessage.sendInfo("onWorkerEvent() ignoring outdated WorkerEvent, (currState="+oldSimulationJobStatus.getSchedulerStatus().getDescription()+"): "+workerEvent.show()); - return; - } - int taskID = oldSimulationJobStatus.getTaskID(); - SchedulerStatus oldSchedulerStatus = oldSimulationJobStatus.getSchedulerStatus(); - - // - // status information (initialized as if new record) - // - Date startDate = null; - Date lastUpdateDate = null; - Date endDate = null; - boolean hasData = false; - HtcJobID htcJobID = null; - String computeHost = null; - VCellServerID vcServerID = VCellServerID.getSystemServerID(); - Date submitDate = null; - Date queueDate = null; - int queuePriority = PRIORITY_DEFAULT; - SimulationJobStatus.SimulationQueueID simQueueID = SimulationJobStatus.SimulationQueueID.QUEUE_ID_WAITING; - - - // - // update using previously stored status (if available). - // - SimulationExecutionStatus oldSimExeStatus = oldSimulationJobStatus.getSimulationExecutionStatus(); - if (oldSimExeStatus!=null && oldSimExeStatus.getStartDate()!=null){ - startDate = oldSimExeStatus.getStartDate(); - } - if (oldSimExeStatus!=null && oldSimExeStatus.getLatestUpdateDate()!=null){ - lastUpdateDate = oldSimExeStatus.getLatestUpdateDate(); - } - if (oldSimExeStatus!=null && oldSimExeStatus.getEndDate()!=null){ - endDate = oldSimExeStatus.getEndDate(); - } - if (oldSimExeStatus!=null && oldSimExeStatus.hasData()){ - hasData = true; - } - if (oldSimExeStatus!=null && oldSimExeStatus.getComputeHost()!=null){ - computeHost = oldSimExeStatus.getComputeHost(); - } - if (oldSimExeStatus!=null && oldSimExeStatus.getHtcJobID()!=null){ - htcJobID = oldSimExeStatus.getHtcJobID(); - } - vcServerID = oldSimulationJobStatus.getServerID(); - submitDate = oldSimulationJobStatus.getSubmitDate(); - SimulationQueueEntryStatus oldQueueStatus = oldSimulationJobStatus.getSimulationQueueEntryStatus(); - if (oldQueueStatus!=null && oldQueueStatus.getQueueDate()!=null){ - queueDate = oldQueueStatus.getQueueDate(); - } - if (oldQueueStatus!=null){ - queuePriority = oldQueueStatus.getQueuePriority(); - } - if (oldQueueStatus!=null && oldQueueStatus.getQueueID()!=null){ - simQueueID = oldQueueStatus.getQueueID(); - } - - // - // update using new information from event - // - if (workerEvent.getHtcJobID()!=null){ - htcJobID = workerEvent.getHtcJobID(); - } - if (workerEvent.getHostName()!=null){ - computeHost = workerEvent.getHostName(); - } - SimulationMessage workerEventSimulationMessage = workerEvent.getSimulationMessage(); - if (workerEventSimulationMessage.getHtcJobId()!=null){ - htcJobID = workerEventSimulationMessage.getHtcJobId(); - } - - - SimulationJobStatus newJobStatus = null; - - if (workerEvent.isAcceptedEvent()) { - // - // job message accepted by HtcSimulationWorker and sent to Scheduler (PBS/SGE/SLURM) (with a htcJobID) ... previous state should be "WAITING" - // - if (oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued()) { - // new queue status - SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationJobStatus.SimulationQueueID.QUEUE_ID_NULL); - - // new exe status - lastUpdateDate = new Date(); - startDate = lastUpdateDate; - endDate = null; - SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); - - newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.DISPATCHED, - taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); - } - - } else if (workerEvent.isStartingEvent()) { - // only update database when the job event changes from started to running. The later progress event will not be recorded. - if ( oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued() || oldSchedulerStatus.isDispatched() || oldSchedulerStatus.isRunning()) { - // new queue status - SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationJobStatus.SimulationQueueID.QUEUE_ID_NULL); - - // new exe status - lastUpdateDate = new Date(); - if (startDate == null){ - startDate = lastUpdateDate; - } - SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); - - newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.RUNNING, - taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); - } - - } else if (workerEvent.isNewDataEvent()) { - if (oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued() || oldSchedulerStatus.isDispatched() || oldSchedulerStatus.isRunning()){ - - if (!oldSchedulerStatus.isRunning() || simQueueID != SimulationJobStatus.SimulationQueueID.QUEUE_ID_NULL || hasData==false){ - - // new queue status - SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationJobStatus.SimulationQueueID.QUEUE_ID_NULL); - - // new exe status - if (startDate == null){ - startDate = lastUpdateDate; - } - hasData = true; - SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); - - newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.RUNNING, - taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); - } - } - - } else if (workerEvent.isProgressEvent() || workerEvent.isWorkerAliveEvent()) { - if (oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued() || oldSchedulerStatus.isDispatched() || oldSchedulerStatus.isRunning()){ - - - if (!oldSchedulerStatus.isRunning() || simQueueID != SimulationJobStatus.SimulationQueueID.QUEUE_ID_NULL){ - // new queue status - SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationJobStatus.SimulationQueueID.QUEUE_ID_NULL); - - // new exe status - if (startDate == null){ - startDate = lastUpdateDate; - } - SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); - - newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.RUNNING, - taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); - - }else if (oldSchedulerStatus.isRunning()){ - if (oldSimExeStatus != null) { -// Date latestUpdate = oldSimExeStatus.getLatestUpdateDate(); -// if (System.currentTimeMillis() - latestUpdate.getTime() >= MessageConstants.INTERVAL_PING_SERVER_MS * 3 / 5) { - // new queue status - SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationJobStatus.SimulationQueueID.QUEUE_ID_NULL); - SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); - - newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.RUNNING, - taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); - } -// } - } - } - - } else if (workerEvent.isCompletedEvent()) { - if (oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued() || oldSchedulerStatus.isDispatched() || oldSchedulerStatus.isRunning()){ - // new queue status - SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationJobStatus.SimulationQueueID.QUEUE_ID_NULL); - - // new exe status - endDate = new Date(); - hasData = true; - - SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); - - newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.COMPLETED, - taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); - - } - - } else if (workerEvent.isFailedEvent()) { - if (oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued() || oldSchedulerStatus.isDispatched() || oldSchedulerStatus.isRunning()){ - // new queue status - SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationJobStatus.SimulationQueueID.QUEUE_ID_NULL); - - // new exe status - endDate = new Date(); - - SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); - - newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.FAILED, - taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); - - } - } else if (workerEvent.isWorkerExitErrorEvent()) { - if (oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued() || oldSchedulerStatus.isDispatched() || oldSchedulerStatus.isRunning()){ - // new queue status - SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationJobStatus.SimulationQueueID.QUEUE_ID_NULL); - - // new exe status - endDate = new Date(); - - SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); - - SimulationMessage simulationMessage = SimulationMessage.workerFailure("solver stopped unexpectedly, "+workerEventSimulationMessage.getDisplayMessage()); - - newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.FAILED, - taskID, simulationMessage, newQueueStatus, newExeStatus); - - } - } - if (newJobStatus!=null){ - if (!newJobStatus.compareEqual(oldSimulationJobStatus) || workerEvent.isProgressEvent() || workerEvent.isNewDataEvent()) { - Double progress = workerEvent.getProgress(); - Double timepoint = workerEvent.getTimePoint(); - RunningStateInfo runningStateInfo = null; - if (progress != null && timepoint != null){ - runningStateInfo = new RunningStateInfo(progress,timepoint); - } - simulationDatabase.updateSimulationJobStatus(newJobStatus,runningStateInfo); - StatusMessage msgForClient = new StatusMessage(newJobStatus, userName, progress, timepoint); - msgForClient.sendToClient(session); - if (lg.isTraceEnabled()) lg.trace("Send status to client: " + msgForClient); - } else { - simulationDatabase.updateSimulationJobStatus(newJobStatus); - StatusMessage msgForClient = new StatusMessage(newJobStatus, userName, null, null); - msgForClient.sendToClient(session); - if (lg.isTraceEnabled()) lg.trace("Send status to client: " + msgForClient); - } - }else if (workerEvent.isProgressEvent() || workerEvent.isNewDataEvent()){ - Double progress = workerEvent.getProgress(); - Double timepoint = workerEvent.getTimePoint(); - RunningStateInfo runningStateInfo = null; - if (progress!=null && timepoint!=null){ - runningStateInfo = new RunningStateInfo(progress,timepoint); - } - simulationDatabase.updateSimulationJobStatus(oldSimulationJobStatus,runningStateInfo); - StatusMessage msgForClient = new StatusMessage(oldSimulationJobStatus, userName, progress, timepoint); - msgForClient.sendToClient(session); - if (lg.isTraceEnabled()) lg.trace("Send status to client: " + msgForClient); - }else{ - VCMongoMessage.sendInfo("onWorkerEvent() ignoring WorkerEvent (currState="+oldSchedulerStatus.getDescription()+"): "+workerEvent.show()); - } + public static final Logger lg = LogManager.getLogger(SimulationStateMachine.class); + + // bitmapped counter so that allows 3 retries for each request (but preserves ordinal nature) + // bits 0-3: retry count + // bits 4-31: submit + // max retries must be less than 15. + public static final int TASKID_USERCOUNTER_MASK = SimulationStatus.TASKID_USERCOUNTER_MASK; + public static final int TASKID_RETRYCOUNTER_MASK = SimulationStatus.TASKID_RETRYCOUNTER_MASK; + public static final int TASKID_USERINCREMENT = SimulationStatus.TASKID_USERINCREMENT; + + public static final int PRIORITY_LOW = 0; + public static final int PRIORITY_DEFAULT = 5; + public static final int PRIORITY_HIGH = 9; + + private final KeyValue simKey; + private final int jobIndex; + + /** + * in memory storage of last time information about this job was received or status was unknown due + * to transient failure or system restart + */ + private long solverProcessTimestamp; + + private class CurrentState { + + public Date startDate; + public Date lastUpdateDate; + public Date endDate; + public boolean hasData; + public HtcJobID htcJobID; + public String computeHost; + public VCellServerID vcServerID; + public Date submitDate; + public Date queueDate; + public int queuePriority; + public SimulationJobStatus.SimulationQueueID simQueueID; + + public CurrentState(SimulationExecutionStatus oldSimExeStatus, + SimulationQueueEntryStatus oldQueueStatus, + SimulationJobStatus oldSimulationJobStatus){ + boolean isOldExeNull = oldSimExeStatus == null; + boolean isOldQueueNull = oldQueueStatus == null; + // + // status information (initialized as if new record) + // + startDate = !isOldExeNull && oldSimExeStatus.getStartDate()!=null ? oldSimExeStatus.getStartDate() :null; + lastUpdateDate = !isOldExeNull && oldSimExeStatus.getLatestUpdateDate()!=null ? oldSimExeStatus.getLatestUpdateDate() : null; + endDate = !isOldExeNull && oldSimExeStatus.getEndDate()!=null ? oldSimExeStatus.getEndDate() : null; + hasData = !isOldExeNull && oldSimExeStatus.hasData(); + htcJobID = !isOldExeNull && oldSimExeStatus.getHtcJobID()!=null ? oldSimExeStatus.getHtcJobID() : null; + computeHost = !isOldExeNull && oldSimExeStatus.getComputeHost()!=null ? oldSimExeStatus.getComputeHost() : null; + vcServerID = oldSimulationJobStatus.getServerID(); + submitDate = oldSimulationJobStatus.getSubmitDate(); + queueDate = !isOldQueueNull && oldQueueStatus.getQueueDate() != null ? oldQueueStatus.getQueueDate() : null; + queuePriority = !isOldQueueNull ? oldQueueStatus.getQueuePriority() : PRIORITY_DEFAULT; + simQueueID = !isOldQueueNull && oldQueueStatus.getQueueID()!=null ? oldQueueStatus.getQueueID() : SimulationJobStatus.SimulationQueueID.QUEUE_ID_WAITING; + + + } + } + + public SimulationStateMachine(KeyValue simKey, int jobIndex){ + this.simKey = simKey; + this.jobIndex = jobIndex; + updateSolverProcessTimestamp(); + } + + private void updateSolverProcessTimestamp( ) { + solverProcessTimestamp = System.currentTimeMillis(); + } + + /** + * set to specified time (for mass setting) + * @param solverProcessTimestamp + */ + void setSolverProcessTimestamp(long solverProcessTimestamp) { + this.solverProcessTimestamp = solverProcessTimestamp; + } + + public KeyValue getSimKey() { + return simKey; + } + + public int getJobIndex() { + return jobIndex; + } + + + long getSolverProcessTimestamp() { + return solverProcessTimestamp; + } + + protected boolean isWorkerEventOkay(WorkerEvent workerEvent, SimulationDatabase simulationDatabase) throws SQLException, DataAccessException { + VCSimulationDataIdentifier vcSimDataID = workerEvent.getVCSimulationDataIdentifier(); + int workerEventTaskID = workerEvent.getTaskID(); + if (vcSimDataID == null) { + VCMongoMessage.sendInfo("onWorkerEvent() ignoring WorkerEvent - no SimID in message): "+workerEvent.show()); + return false; + } + KeyValue simKey = vcSimDataID.getSimulationKey(); + SimulationJobStatus oldSimulationJobStatus = simulationDatabase.getLatestSimulationJobStatus(simKey, jobIndex); + + if (oldSimulationJobStatus == null){ + VCMongoMessage.sendInfo("onWorkerEvent() ignoring WorkerEvent, no current SimulationJobStatus: "+workerEvent.show()); + return false; + } + if (oldSimulationJobStatus.getSchedulerStatus().isDone() || oldSimulationJobStatus.getTaskID() > workerEventTaskID){ + VCMongoMessage.sendInfo("onWorkerEvent() ignoring outdated WorkerEvent, (currState="+oldSimulationJobStatus.getSchedulerStatus().getDescription()+"): "+workerEvent.show()); + return false; + } + return true; + } + + private SimulationJobStatus produceStateFromWorkerEvent( + WorkerEvent workerEvent, + SimulationJobStatus oldSimulationJobStatus){ + + SimulationExecutionStatus oldSimExeStatus = oldSimulationJobStatus.getSimulationExecutionStatus(); + SimulationQueueEntryStatus oldQueueStatus = oldSimulationJobStatus.getSimulationQueueEntryStatus(); + SimulationJobStatus.SchedulerStatus oldSchedulerStatus = oldSimulationJobStatus.getSchedulerStatus(); + VCSimulationDataIdentifier vcSimDataID = workerEvent.getVCSimulationDataIdentifier(); + + int taskID = oldSimulationJobStatus.getTaskID(); + + CurrentState currentState = new CurrentState(oldSimExeStatus, oldQueueStatus, oldSimulationJobStatus); + + // + // status information (initialized as if new record) + // + Date startDate = currentState.startDate; + Date lastUpdateDate = currentState.lastUpdateDate; + Date endDate = currentState.endDate; + boolean hasData = currentState.hasData; + HtcJobID htcJobID = currentState.htcJobID; + String computeHost = currentState.computeHost; + VCellServerID vcServerID = currentState.vcServerID; + Date submitDate = currentState.submitDate; + Date queueDate = currentState.queueDate; + int queuePriority = currentState.queuePriority; + SimulationJobStatus.SimulationQueueID simQueueID = currentState.simQueueID; + + // + // update using new information from event + // + if (workerEvent.getHtcJobID()!=null){ + htcJobID = workerEvent.getHtcJobID(); + } + if (workerEvent.getHostName()!=null){ + computeHost = workerEvent.getHostName(); + } + SimulationMessage workerEventSimulationMessage = workerEvent.getSimulationMessage(); + if (workerEventSimulationMessage.getHtcJobId()!=null){ + htcJobID = workerEventSimulationMessage.getHtcJobId(); + } + + + SimulationJobStatus newJobStatus = null; + + if (workerEvent.isAcceptedEvent()) { + // + // job message accepted by HtcSimulationWorker and sent to Scheduler (PBS/SGE/SLURM) (with a htcJobID) ... previous state should be "WAITING" + // + if (oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued()) { + // new queue status + SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationQueueID.QUEUE_ID_NULL); + + // new exe status + lastUpdateDate = new Date(); + startDate = lastUpdateDate; + endDate = null; + SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); + + newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.DISPATCHED, + taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); + } + + } else if (workerEvent.isStartingEvent()) { + // only update database when the job event changes from started to running. The later progress event will not be recorded. + if ( oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued() || oldSchedulerStatus.isDispatched() || oldSchedulerStatus.isRunning()) { + // new queue status + SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationQueueID.QUEUE_ID_NULL); + + // new exe status + lastUpdateDate = new Date(); + if (startDate == null){ + startDate = lastUpdateDate; + } + SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); + + newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.RUNNING, + taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); + } + + } else if (workerEvent.isNewDataEvent()) { + if (oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued() || oldSchedulerStatus.isDispatched() || oldSchedulerStatus.isRunning()){ + + if (!oldSchedulerStatus.isRunning() || simQueueID != SimulationQueueID.QUEUE_ID_NULL || hasData==false){ + + // new queue status + SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationQueueID.QUEUE_ID_NULL); + + // new exe status + if (startDate == null){ + startDate = lastUpdateDate; + } + hasData = true; + SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); + + newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.RUNNING, + taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); + } + } + + } else if (workerEvent.isProgressEvent() || workerEvent.isWorkerAliveEvent()) { + if (oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued() || oldSchedulerStatus.isDispatched() || oldSchedulerStatus.isRunning()){ + + + if (!oldSchedulerStatus.isRunning() || simQueueID != SimulationQueueID.QUEUE_ID_NULL){ + // new queue status + SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationQueueID.QUEUE_ID_NULL); + + // new exe status + if (startDate == null){ + startDate = lastUpdateDate; + } + SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); + + newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.RUNNING, + taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); + + }else if (oldSchedulerStatus.isRunning()){ + if (oldSimExeStatus != null) { + // new queue status + SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationQueueID.QUEUE_ID_NULL); + SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); + + newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.RUNNING, + taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); + } + } + } + + } else if (workerEvent.isCompletedEvent()) { + if (oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued() || oldSchedulerStatus.isDispatched() || oldSchedulerStatus.isRunning()){ + // new queue status + SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationQueueID.QUEUE_ID_NULL); + // new exe status + endDate = new Date(); + hasData = true; + + SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); + newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.COMPLETED, + taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); + } + + } else if (workerEvent.isFailedEvent()) { + if (oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued() || oldSchedulerStatus.isDispatched() || oldSchedulerStatus.isRunning()){ + // new queue status + SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationQueueID.QUEUE_ID_NULL); + // new exe status + endDate = new Date(); + + SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); + newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.FAILED, + taskID, workerEventSimulationMessage, newQueueStatus, newExeStatus); + + } + } else if (workerEvent.isWorkerExitErrorEvent()) { + if (oldSchedulerStatus.isWaiting() || oldSchedulerStatus.isQueued() || oldSchedulerStatus.isDispatched() || oldSchedulerStatus.isRunning()){ + // new queue status + SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationQueueID.QUEUE_ID_NULL); + // new exe status + endDate = new Date(); + SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); + + SimulationMessage simulationMessage = SimulationMessage.workerFailure("solver stopped unexpectedly, "+workerEventSimulationMessage.getDisplayMessage()); + newJobStatus = new SimulationJobStatus(vcServerID, vcSimDataID.getVcSimID(), jobIndex, submitDate, SchedulerStatus.FAILED, + taskID, simulationMessage, newQueueStatus, newExeStatus); + + } + } + + return newJobStatus; + } + + public synchronized void onWorkerEvent(WorkerEvent workerEvent, SimulationDatabase simulationDatabase, VCMessageSession session) throws DataAccessException, VCMessagingException, SQLException { + updateSolverProcessTimestamp(); + WorkerEventMessage workerEventMessage = new WorkerEventMessage(workerEvent); + VCMongoMessage.sendWorkerEvent(workerEventMessage); + + String userName = workerEvent.getUserName(); // as the filter of the client + int workerEventTaskID = workerEvent.getTaskID(); + + if (lg.isTraceEnabled()) lg.trace("onWorkerEventMessage[" + workerEvent.getEventTypeID() + "," + workerEvent.getSimulationMessage() + "][simid=" + workerEvent.getVCSimulationDataIdentifier() + ",job=" + jobIndex + ",task=" + workerEventTaskID + "]"); + + if (!isWorkerEventOkay(workerEvent, simulationDatabase)){ + return; + } + + VCSimulationDataIdentifier vcSimDataID = workerEvent.getVCSimulationDataIdentifier(); + KeyValue simKey = vcSimDataID.getSimulationKey(); + SimulationJobStatus oldSimulationJobStatus = simulationDatabase.getLatestSimulationJobStatus(simKey, jobIndex); + + SchedulerStatus oldSchedulerStatus = oldSimulationJobStatus.getSchedulerStatus(); + SimulationJobStatus newJobStatus = produceStateFromWorkerEvent(workerEvent, oldSimulationJobStatus); + + if (newJobStatus!=null){ + if (!newJobStatus.compareEqual(oldSimulationJobStatus) || workerEvent.isProgressEvent() || workerEvent.isNewDataEvent()) { + Double progress = workerEvent.getProgress(); + Double timepoint = workerEvent.getTimePoint(); + RunningStateInfo runningStateInfo = null; + if (progress != null && timepoint != null){ + runningStateInfo = new RunningStateInfo(progress,timepoint); + } + simulationDatabase.updateSimulationJobStatus(newJobStatus,runningStateInfo); + StatusMessage msgForClient = new StatusMessage(newJobStatus, userName, progress, timepoint); + + msgForClient.sendToClient(session); + if (lg.isTraceEnabled()) lg.trace("Send status to client: " + msgForClient); + } else { + simulationDatabase.updateSimulationJobStatus(newJobStatus); + StatusMessage msgForClient = new StatusMessage(newJobStatus, userName, null, null); + msgForClient.sendToClient(session); + if (lg.isTraceEnabled()) lg.trace("Send status to client: " + msgForClient); + } + }else if (workerEvent.isProgressEvent() || workerEvent.isNewDataEvent()){ + Double progress = workerEvent.getProgress(); + Double timepoint = workerEvent.getTimePoint(); + RunningStateInfo runningStateInfo = null; + if (progress!=null && timepoint!=null){ + runningStateInfo = new RunningStateInfo(progress,timepoint); + } + simulationDatabase.updateSimulationJobStatus(oldSimulationJobStatus,runningStateInfo); + StatusMessage msgForClient = new StatusMessage(oldSimulationJobStatus, userName, progress, timepoint); + // TODO: Implement messaging to client + msgForClient.sendToClient(session); + if (lg.isTraceEnabled()) lg.trace("Send status to client: " + msgForClient); + }else{ + VCMongoMessage.sendInfo("onWorkerEvent() ignoring WorkerEvent (currState="+oldSchedulerStatus.getDescription()+"): "+workerEvent.show()); + } // addStateMachineTransition(new StateMachineTransition(new WorkerStateMachineEvent(taskID, workerEvent), oldSimulationJobStatus, newJobStatus)); - } - - public synchronized void onStartRequest(User user, VCSimulationIdentifier vcSimID, SimulationDatabase simulationDatabase, VCMessageSession session) throws VCMessagingException, DataAccessException, SQLException { - - if (!user.equals(vcSimID.getOwner())) { - lg.error(user + " is not authorized to start simulation (key=" + simKey + ")"); - StatusMessage message = new StatusMessage(new SimulationJobStatus(VCellServerID.getSystemServerID(), vcSimID, 0, null, - SchedulerStatus.FAILED, 0, SimulationMessage.workerFailure("You are not authorized to start this simulation!"), null, null), user.getName(), null, null); - message.sendToClient(session); - VCMongoMessage.sendInfo("onStartRequest("+vcSimID.getID()+") ignoring start simulation request - wrong user): simID="+vcSimID); - return; - } - - SimulationJobStatus newJobStatus = saveSimulationStartRequest(vcSimID, jobIndex, simulationDatabase); -// addStateMachineTransition(new StateMachineTransition(new StartStateMachineEvent(newTaskID), oldSimulationJobStatus, newJobStatus)); - - StatusMessage message = new StatusMessage(newJobStatus, user.getName(), null, null); - message.sendToClient(session); - } - - public static SimulationJobStatus saveSimulationStartRequest(VCSimulationIdentifier vcSimID, int jobIndex, SimulationDatabase simulationDatabase) throws DataAccessException, SQLException { - // - // get latest simulation job task (if any). - // - SimulationJobStatus oldSimulationJobStatus = simulationDatabase.getLatestSimulationJobStatus(vcSimID.getSimulationKey(), jobIndex); - int oldTaskID = -1; - if (oldSimulationJobStatus != null){ - oldTaskID = oldSimulationJobStatus.getTaskID(); - } - // if already started by another thread - if (oldSimulationJobStatus != null && !oldSimulationJobStatus.getSchedulerStatus().isDone()) { - VCMongoMessage.sendInfo("onStartRequest("+ vcSimID.getID()+") ignoring start simulation request - (currentSimJobStatus:"+oldSimulationJobStatus.getSchedulerStatus().getDescription()+"): simID="+ vcSimID); - throw new RuntimeException("Can't start, simulation[" + vcSimID + "] job [" + jobIndex + "] task [" + oldTaskID + "] is running already ("+oldSimulationJobStatus.getSchedulerStatus().getDescription()+")"); - } - - int newTaskID; - - if (oldTaskID > -1){ - // calculate new task - newTaskID = (oldTaskID & SimulationStatus.TASKID_USERCOUNTER_MASK) + SimulationStatus.TASKID_USERINCREMENT; - }else{ - // first task, start with 0 - newTaskID = 0; - } - - Date currentDate = new Date(); - // new queue status - SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(currentDate, PRIORITY_DEFAULT, SimulationJobStatus.SimulationQueueID.QUEUE_ID_WAITING); - - // new exe status - Date lastUpdateDate = new Date(); - String computeHost = null; - Date startDate = null; - Date endDate = null; - HtcJobID htcJobID = null; - boolean hasData = false; - - SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); - - VCellServerID vcServerID = VCellServerID.getSystemServerID(); - Date submitDate = currentDate; - - SimulationJobStatus newJobStatus = new SimulationJobStatus(vcServerID, vcSimID, jobIndex, submitDate, SchedulerStatus.WAITING, - newTaskID, SimulationMessage.MESSAGE_JOB_WAITING, newQueueStatus, newExeStatus); - - simulationDatabase.insertSimulationJobStatus(newJobStatus); - return newJobStatus; - } - - - public synchronized void onDispatch(Simulation simulation, SimulationJobStatus oldSimulationJobStatus, SimulationDatabase simulationDatabase, VCMessageSession session) throws VCMessagingException, DataAccessException, SQLException { - updateSolverProcessTimestamp(); - VCSimulationIdentifier vcSimID = oldSimulationJobStatus.getVCSimulationIdentifier(); - int taskID = oldSimulationJobStatus.getTaskID(); - - if (!oldSimulationJobStatus.getSchedulerStatus().isWaiting()) { - VCMongoMessage.sendInfo("onDispatch("+vcSimID.getID()+") Can't start, simulation[" + vcSimID + "] job [" + jobIndex + "] task [" + taskID + "] is already dispatched ("+oldSimulationJobStatus.getSchedulerStatus().getDescription()+")"); - throw new RuntimeException("Can't start, simulation[" + vcSimID + "] job [" + jobIndex + "] task [" + taskID + "] is already dispatched ("+oldSimulationJobStatus.getSchedulerStatus().getDescription()+")"); - } - - FieldDataIdentifierSpec[] fieldDataIdentifierSpecs = simulationDatabase.getFieldDataIdentifierSpecs(simulation); - //Check if user wants long running sims activated in SlurmProxy.generateScript(...) - //only happens if user is allowed to be power user (entry in vc_specialusers table) and - //has checked the 'timeoutDisabledCheckBox' in SolverTaskDescriptionAdvancedPanel on the client-side GUI - boolean isPowerUser = simulation.getSolverTaskDescription().isTimeoutDisabled();//Set from GUI - if(isPowerUser) {//Check if user allowed to be power user for 'special1' long running sims (see User.SPECIALS and vc_specialusers table) - User.SpecialUser myUser = simulationDatabase.getUser(simulation.getVersion().getOwner().getName()); - //'powerUsers' (previously called 'special1') assigned to users by request to allow long running sims - isPowerUser = isPowerUser && Arrays.asList(myUser.getMySpecials()).contains(User.SPECIAL_CLAIM.powerUsers); - } - SimulationTask simulationTask = new SimulationTask(new SimulationJob(simulation, jobIndex, fieldDataIdentifierSpecs), taskID,null,isPowerUser); - - double requiredMemMB = simulationTask.getEstimatedMemorySizeMB(); - //SimulationStateMachine ultimately instantiated from {vcellroot}/docker/build/Dockerfile-sched-dev by way of cbit.vcell.message.server.dispatcher.SimulationDispatcher - String vcellUserid = simulationTask.getUser().getName(); - KeyValue simID = simulationTask.getSimulationInfo().getSimulationVersion().getVersionKey(); - SolverDescription solverDescription = simulationTask.getSimulation().getSolverTaskDescription().getSolverDescription(); - - MemLimitResults allowableMemMB = HtcProxy.getMemoryLimit(vcellUserid,simID,solverDescription, requiredMemMB, isPowerUser); - - final SimulationJobStatus newSimJobStatus; - if (requiredMemMB > allowableMemMB.getMemLimit()) { - // - // fail the simulation - // - Date currentDate = new Date(); - // new queue status - SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(currentDate, PRIORITY_DEFAULT, SimulationJobStatus.SimulationQueueID.QUEUE_ID_NULL); - SimulationExecutionStatus newSimExeStatus = new SimulationExecutionStatus(null, null, new Date(), null, false, null); - newSimJobStatus = new SimulationJobStatus(VCellServerID.getSystemServerID(),vcSimID,jobIndex, - oldSimulationJobStatus.getSubmitDate(),SchedulerStatus.FAILED,taskID, - SimulationMessage.jobFailed("simulation required "+requiredMemMB+"MB of memory, only "+allowableMemMB.getMemLimit()+"MB allowed from "+allowableMemMB.getMemLimitSource()), - newQueueStatus,newSimExeStatus); - - simulationDatabase.updateSimulationJobStatus(newSimJobStatus); - - StatusMessage message = new StatusMessage(newSimJobStatus, simulation.getVersion().getOwner().getName(), null, null); - message.sendToClient(session); - - }else{ - // - // dispatch the simulation, new queue status - // - Date currentDate = new Date(); - SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(currentDate, PRIORITY_DEFAULT, SimulationJobStatus.SimulationQueueID.QUEUE_ID_SIMULATIONJOB); - SimulationExecutionStatus newSimExeStatus = new SimulationExecutionStatus(null, null, new Date(), null, false, null); - newSimJobStatus = new SimulationJobStatus(VCellServerID.getSystemServerID(),vcSimID,jobIndex, - oldSimulationJobStatus.getSubmitDate(),SchedulerStatus.DISPATCHED,taskID, - SimulationMessage.MESSAGE_JOB_DISPATCHED, - newQueueStatus,newSimExeStatus); - - SimulationTaskMessage simTaskMessage = new SimulationTaskMessage(simulationTask); - simTaskMessage.sendSimulationTask(session); - - simulationDatabase.updateSimulationJobStatus(newSimJobStatus); - - StatusMessage message = new StatusMessage(newSimJobStatus, simulation.getVersion().getOwner().getName(), null, null); - message.sendToClient(session); - - } + } + + public synchronized StatusMessage onStartRequest(User user, VCSimulationIdentifier vcSimID, SimulationDatabase simulationDatabase, VCMessageSession session) throws VCMessagingException, DataAccessException, SQLException { + + StatusMessage statusMessage; + if (!user.equals(vcSimID.getOwner())) { + lg.error(user + " is not authorized to start simulation (key=" + simKey + ")"); + SimulationJobStatus simulationJobStatus = new SimulationJobStatus(VCellServerID.getSystemServerID(), vcSimID, 0, null, + SchedulerStatus.FAILED, 0, SimulationMessage.workerFailure("You are not authorized to start this simulation!"), null, null); + statusMessage = new StatusMessage(simulationJobStatus, user.getName(), null, null); + VCMongoMessage.sendInfo("onStartRequest("+vcSimID.getID()+") ignoring start simulation request - wrong user): simID="+vcSimID); + statusMessage.sendToClient(session); + return statusMessage; + } + SimulationJobStatus newJobStatus = saveSimulationStartRequest(vcSimID, jobIndex, simulationDatabase); + statusMessage = new StatusMessage(newJobStatus, user.getName(), null, null); + statusMessage.sendToClient(session); + return statusMessage; + } + + public static SimulationJobStatus saveSimulationStartRequest(VCSimulationIdentifier vcSimID, int jobIndex, SimulationDatabase simulationDatabase) throws DataAccessException, SQLException { + // + // get latest simulation job task (if any). + // + SimulationJobStatus oldSimulationJobStatus = simulationDatabase.getLatestSimulationJobStatus(vcSimID.getSimulationKey(), jobIndex); + int oldTaskID = -1; + if (oldSimulationJobStatus != null){ + oldTaskID = oldSimulationJobStatus.getTaskID(); + } + // if already started by another thread + if (oldSimulationJobStatus != null && !oldSimulationJobStatus.getSchedulerStatus().isDone()) { + VCMongoMessage.sendInfo("onStartRequest("+ vcSimID.getID()+") ignoring start simulation request - (currentSimJobStatus:"+oldSimulationJobStatus.getSchedulerStatus().getDescription()+"): simID="+ vcSimID); + throw new RuntimeException("Can't start, simulation[" + vcSimID + "] job [" + jobIndex + "] task [" + oldTaskID + "] is running already ("+oldSimulationJobStatus.getSchedulerStatus().getDescription()+")"); + } + + int newTaskID; + + if (oldTaskID > -1){ + // calculate new task + newTaskID = (oldTaskID & SimulationStatus.TASKID_USERCOUNTER_MASK) + SimulationStatus.TASKID_USERINCREMENT; + }else{ + // first task, start with 0 + newTaskID = 0; + } + + Date currentDate = new Date(); + // new queue status + SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(currentDate, PRIORITY_DEFAULT, SimulationQueueID.QUEUE_ID_WAITING); + + // new exe status + Date lastUpdateDate = new Date(); + boolean hasData = false; + String computeHost = null; + Date startDate = null; + Date endDate = null; + HtcJobID htcJobID = null; + + SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); + + VCellServerID vcServerID = VCellServerID.getSystemServerID(); + Date submitDate = currentDate; + + SimulationJobStatus newJobStatus = new SimulationJobStatus(vcServerID, vcSimID, jobIndex, submitDate, SchedulerStatus.WAITING, + newTaskID, SimulationMessage.MESSAGE_JOB_WAITING, newQueueStatus, newExeStatus); + + simulationDatabase.insertSimulationJobStatus(newJobStatus); + return newJobStatus; + } + + + public synchronized void onDispatch(Simulation simulation, SimulationJobStatus oldSimulationJobStatus, SimulationDatabase simulationDatabase, VCMessageSession session) throws VCMessagingException, DataAccessException, SQLException { + updateSolverProcessTimestamp(); + VCSimulationIdentifier vcSimID = oldSimulationJobStatus.getVCSimulationIdentifier(); + int taskID = oldSimulationJobStatus.getTaskID(); + + if (!oldSimulationJobStatus.getSchedulerStatus().isWaiting()) { + VCMongoMessage.sendInfo("onDispatch("+vcSimID.getID()+") Can't start, simulation[" + vcSimID + "] job [" + jobIndex + "] task [" + taskID + "] is already dispatched ("+oldSimulationJobStatus.getSchedulerStatus().getDescription()+")"); + throw new RuntimeException("Can't start, simulation[" + vcSimID + "] job [" + jobIndex + "] task [" + taskID + "] is already dispatched ("+oldSimulationJobStatus.getSchedulerStatus().getDescription()+")"); + } + + FieldDataIdentifierSpec[] fieldDataIdentifierSpecs = simulationDatabase.getFieldDataIdentifierSpecs(simulation); + //Check if user wants long running sims activated in SlurmProxy.generateScript(...) + //only happens if user is allowed to be power user (entry in vc_specialusers table) and + //has checked the 'timeoutDisabledCheckBox' in SolverTaskDescriptionAdvancedPanel on the client-side GUI + boolean isPowerUser = simulation.getSolverTaskDescription().isTimeoutDisabled();//Set from GUI + if(isPowerUser) {//Check if user allowed to be power user for 'special1' long running sims (see User.SPECIALS and vc_specialusers table) + User.SpecialUser myUser = simulationDatabase.getUser(simulation.getVersion().getOwner().getName()); + //'powerUsers' (previously called 'special1') assigned to users by request to allow long running sims + isPowerUser = isPowerUser && Arrays.asList(myUser.getMySpecials()).contains(User.SPECIAL_CLAIM.powerUsers); + } + SimulationTask simulationTask = new SimulationTask(new SimulationJob(simulation, jobIndex, fieldDataIdentifierSpecs), taskID,null,isPowerUser); + + double estimatedMemMB = simulationTask.getEstimatedMemorySizeMB(); + double htcMinMemoryMB = Integer.parseInt(PropertyLoader.getRequiredProperty(PropertyLoader.htcMinMemoryMB)); + double htcMaxMemoryMB = Integer.parseInt(PropertyLoader.getRequiredProperty(PropertyLoader.htcMaxMemoryMB)); + if (isPowerUser){ + htcMinMemoryMB = Integer.parseInt(PropertyLoader.getRequiredProperty(PropertyLoader.htcPowerUserMemoryFloorMB)); + htcMaxMemoryMB = Integer.parseInt(PropertyLoader.getRequiredProperty(PropertyLoader.htcPowerUserMemoryMaxMB)); + } + double requestedMemoryMB = Math.max(estimatedMemMB, htcMinMemoryMB); + + final SimulationJobStatus newSimJobStatus; + if (requestedMemoryMB > htcMaxMemoryMB) { + // + // fail the simulation + // + Date currentDate = new Date(); + // new queue status + SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(currentDate, PRIORITY_DEFAULT, SimulationQueueID.QUEUE_ID_NULL); + SimulationExecutionStatus newSimExeStatus = new SimulationExecutionStatus(null, null, new Date(), null, false, null); + newSimJobStatus = new SimulationJobStatus(VCellServerID.getSystemServerID(),vcSimID,jobIndex, + oldSimulationJobStatus.getSubmitDate(), SchedulerStatus.FAILED,taskID, + SimulationMessage.jobFailed("simulation required "+estimatedMemMB+"MB of memory, only "+htcMaxMemoryMB+"MB allowed"), + newQueueStatus,newSimExeStatus); + + simulationDatabase.updateSimulationJobStatus(newSimJobStatus); + + StatusMessage message = new StatusMessage(newSimJobStatus, simulation.getVersion().getOwner().getName(), null, null); + message.sendToClient(session); + + }else{ + // + // dispatch the simulation, new queue status + // + Date currentDate = new Date(); + SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(currentDate, PRIORITY_DEFAULT, SimulationQueueID.QUEUE_ID_SIMULATIONJOB); + SimulationExecutionStatus newSimExeStatus = new SimulationExecutionStatus(null, null, new Date(), null, false, null); + newSimJobStatus = new SimulationJobStatus(VCellServerID.getSystemServerID(),vcSimID,jobIndex, + oldSimulationJobStatus.getSubmitDate(), SchedulerStatus.DISPATCHED,taskID, + SimulationMessage.MESSAGE_JOB_DISPATCHED, + newQueueStatus,newSimExeStatus); + + SimulationTaskMessage simTaskMessage = new SimulationTaskMessage(simulationTask); + simTaskMessage.sendSimulationTask(session); + + simulationDatabase.updateSimulationJobStatus(newSimJobStatus); + + StatusMessage message = new StatusMessage(newSimJobStatus, simulation.getVersion().getOwner().getName(), null, null); + message.sendToClient(session); + + } // addStateMachineTransition(new StateMachineTransition(new DispatchStateMachineEvent(taskID), oldSimulationJobStatus, newSimJobStatus)); - } - - public synchronized void onStopRequest(User user, SimulationJobStatus simJobStatus, SimulationDatabase simulationDatabase, VCMessageSession session) throws VCMessagingException, DataAccessException, SQLException { - updateSolverProcessTimestamp(); - - if (!user.equals(simJobStatus.getVCSimulationIdentifier().getOwner())) { - lg.error(user + " is not authorized to stop simulation (key=" + simKey + ")"); - StatusMessage message = new StatusMessage(new SimulationJobStatus(VCellServerID.getSystemServerID(), simJobStatus.getVCSimulationIdentifier(), 0, null, - SchedulerStatus.FAILED, 0, SimulationMessage.workerFailure("You are not authorized to stop this simulation!"), null, null), user.getName(), null, null); - message.sendToClient(session); - VCMongoMessage.sendInfo("onStopRequest("+simJobStatus.getVCSimulationIdentifier()+") ignoring stop simulation request - wrong user)"); - return; - } - - // stop latest task if active - SchedulerStatus schedulerStatus = simJobStatus.getSchedulerStatus(); - int taskID = simJobStatus.getTaskID(); - - if (schedulerStatus.isActive()){ - SimulationQueueEntryStatus simQueueEntryStatus = simJobStatus.getSimulationQueueEntryStatus(); - SimulationExecutionStatus simExeStatus = simJobStatus.getSimulationExecutionStatus(); - SimulationJobStatus newJobStatus = new SimulationJobStatus(simJobStatus.getServerID(),simJobStatus.getVCSimulationIdentifier(),jobIndex,simJobStatus.getSubmitDate(), - SchedulerStatus.STOPPED,taskID,SimulationMessage.solverStopped("simulation stopped by user"),simQueueEntryStatus,simExeStatus); - - // - // send stopSimulation to serviceControl topic - // - if (lg.isTraceEnabled()) lg.trace("send " + MessageConstants.MESSAGE_TYPE_STOPSIMULATION_VALUE + " to " + VCellTopic.ServiceControlTopic.getName() + " topic"); - VCMessage msg = session.createMessage(); - msg.setStringProperty(VCMessagingConstants.MESSAGE_TYPE_PROPERTY, MessageConstants.MESSAGE_TYPE_STOPSIMULATION_VALUE); - msg.setLongProperty(MessageConstants.SIMKEY_PROPERTY, Long.parseLong(simKey + "")); - msg.setIntProperty(MessageConstants.JOBINDEX_PROPERTY, jobIndex); - msg.setIntProperty(MessageConstants.TASKID_PROPERTY, taskID); - msg.setStringProperty(VCMessagingConstants.USERNAME_PROPERTY, user.getName()); - if (simExeStatus.getHtcJobID()!=null){ - msg.setStringProperty(MessageConstants.HTCJOBID_PROPERTY, simExeStatus.getHtcJobID().toDatabase()); - } - session.sendTopicMessage(VCellTopic.ServiceControlTopic, msg); - - simulationDatabase.updateSimulationJobStatus(newJobStatus); -// addStateMachineTransition(new StateMachineTransition(new StopStateMachineEvent(taskID), simJobStatus, newJobStatus)); - - // update client - StatusMessage message = new StatusMessage(newJobStatus, user.getName(), null, null); - message.sendToClient(session); - } - } - - public synchronized void onSystemAbort(SimulationJobStatus oldJobStatus, String failureMessage, SimulationDatabase simulationDatabase, VCMessageSession session) throws VCMessagingException, UpdateSynchronizationException, DataAccessException, SQLException { - updateSolverProcessTimestamp(); - - int taskID = oldJobStatus.getTaskID(); - - // - // status information (initialized as if new record) - // - Date startDate = null; - boolean hasData = false; - HtcJobID htcJobID = null; - String computeHost = null; - VCellServerID vcServerID = VCellServerID.getSystemServerID(); - Date submitDate = null; - Date queueDate = null; - int queuePriority = PRIORITY_DEFAULT; - - - // - // update using previously stored status (if available). - // - SimulationExecutionStatus oldSimExeStatus = oldJobStatus.getSimulationExecutionStatus(); - if (oldSimExeStatus!=null && oldSimExeStatus.getStartDate()!=null){ - startDate = oldSimExeStatus.getStartDate(); - } - if (oldSimExeStatus!=null && oldSimExeStatus.hasData()){ - hasData = true; - } - if (oldSimExeStatus!=null && oldSimExeStatus.getComputeHost()!=null){ - computeHost = oldSimExeStatus.getComputeHost(); - } - if (oldSimExeStatus!=null && oldSimExeStatus.getHtcJobID()!=null){ - htcJobID = oldSimExeStatus.getHtcJobID(); - } - vcServerID = oldJobStatus.getServerID(); - submitDate = oldJobStatus.getSubmitDate(); - SimulationQueueEntryStatus oldQueueStatus = oldJobStatus.getSimulationQueueEntryStatus(); - if (oldQueueStatus!=null && oldQueueStatus.getQueueDate()!=null){ - queueDate = oldQueueStatus.getQueueDate(); - } - if (oldQueueStatus!=null){ - queuePriority = oldQueueStatus.getQueuePriority(); - } - - SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(queueDate, queuePriority, SimulationJobStatus.SimulationQueueID.QUEUE_ID_NULL); - - Date endDate = new Date(); - Date lastUpdateDate = new Date(); - - SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(startDate, computeHost, lastUpdateDate, endDate, hasData, htcJobID); - - SimulationJobStatus newJobStatus = new SimulationJobStatus(vcServerID, oldJobStatus.getVCSimulationIdentifier(), jobIndex, submitDate, SchedulerStatus.FAILED, - taskID, SimulationMessage.jobFailed(failureMessage), newQueueStatus, newExeStatus); - - simulationDatabase.updateSimulationJobStatus(newJobStatus); + } + + public synchronized StatusMessage onStopRequest(User user, SimulationJobStatus simJobStatus, SimulationDatabase simulationDatabase, VCMessageSession session) throws VCMessagingException, DataAccessException, SQLException { + updateSolverProcessTimestamp(); + + StatusMessage statusMessage; + if (!user.equals(simJobStatus.getVCSimulationIdentifier().getOwner())) { + lg.error(user + " is not authorized to stop simulation (key=" + simKey + ")"); + SimulationJobStatus simulationJobStatus = new SimulationJobStatus(VCellServerID.getSystemServerID(), simJobStatus.getVCSimulationIdentifier(), 0, null, + SchedulerStatus.FAILED, 0, SimulationMessage.workerFailure("You are not authorized to stop this simulation!"), null, null); + + VCMongoMessage.sendInfo("onStopRequest("+simJobStatus.getVCSimulationIdentifier()+") ignoring stop simulation request - wrong user)"); + statusMessage = new StatusMessage(simulationJobStatus, user.getName(), null, null); + statusMessage.sendToClient(session); + return statusMessage; + } + + // stop latest task if active + SchedulerStatus schedulerStatus = simJobStatus.getSchedulerStatus(); + int taskID = simJobStatus.getTaskID(); + + if (schedulerStatus.isActive()){ + SimulationQueueEntryStatus simQueueEntryStatus = simJobStatus.getSimulationQueueEntryStatus(); + SimulationExecutionStatus simExeStatus = simJobStatus.getSimulationExecutionStatus(); + SimulationJobStatus newJobStatus = new SimulationJobStatus(simJobStatus.getServerID(),simJobStatus.getVCSimulationIdentifier(),jobIndex,simJobStatus.getSubmitDate(), + SchedulerStatus.STOPPED,taskID,SimulationMessage.solverStopped("simulation stopped by user"),simQueueEntryStatus,simExeStatus); + + + if (lg.isTraceEnabled()) lg.trace("send " + MessageConstants.MESSAGE_TYPE_STOPSIMULATION_VALUE + " to " + VCellTopic.ServiceControlTopic.getName() + " topic"); + + // + // send stopSimulation to serviceControl topic + // + VCMessage msg = session.createMessage(); + msg.setStringProperty(VCMessagingConstants.MESSAGE_TYPE_PROPERTY, MessageConstants.MESSAGE_TYPE_STOPSIMULATION_VALUE); + msg.setLongProperty(MessageConstants.SIMKEY_PROPERTY, Long.parseLong(simKey + "")); + msg.setIntProperty(MessageConstants.JOBINDEX_PROPERTY, jobIndex); + msg.setIntProperty(MessageConstants.TASKID_PROPERTY, taskID); + msg.setStringProperty(VCMessagingConstants.USERNAME_PROPERTY, user.getName()); + if (simExeStatus.getHtcJobID()!=null){ + msg.setStringProperty(MessageConstants.HTCJOBID_PROPERTY, simExeStatus.getHtcJobID().toDatabase()); + } + session.sendTopicMessage(VCellTopic.ServiceControlTopic, msg); + + simulationDatabase.updateSimulationJobStatus(newJobStatus); + statusMessage = new StatusMessage(newJobStatus, user.getName(), null, null); + statusMessage.sendToClient(session); + + return statusMessage; + } + return null; + } + + public synchronized void onSystemAbort(SimulationJobStatus oldJobStatus, String failureMessage, SimulationDatabase simulationDatabase, VCMessageSession session) throws VCMessagingException, UpdateSynchronizationException, DataAccessException, SQLException { + updateSolverProcessTimestamp(); + + int taskID = oldJobStatus.getTaskID(); + + // + // update using previously stored status (if available). + // + CurrentState currentState = new CurrentState(oldJobStatus.getSimulationExecutionStatus(), oldJobStatus.getSimulationQueueEntryStatus(), oldJobStatus); + + SimulationQueueEntryStatus newQueueStatus = new SimulationQueueEntryStatus(currentState.queueDate, currentState.queuePriority, SimulationQueueID.QUEUE_ID_NULL); + + Date endDate = new Date(); + Date lastUpdateDate = new Date(); + + SimulationExecutionStatus newExeStatus = new SimulationExecutionStatus(currentState.startDate, currentState.computeHost, lastUpdateDate, endDate, currentState.hasData, currentState.htcJobID); + + SimulationJobStatus newJobStatus = new SimulationJobStatus(currentState.vcServerID, oldJobStatus.getVCSimulationIdentifier(), jobIndex, currentState.submitDate, SchedulerStatus.FAILED, + taskID, SimulationMessage.jobFailed(failureMessage), newQueueStatus, newExeStatus); + + simulationDatabase.updateSimulationJobStatus(newJobStatus); // addStateMachineTransition(new StateMachineTransition(new AbortStateMachineEvent(taskID, failureMessage), oldJobStatus, newJobStatus)); - String userName = VCMessagingConstants.USERNAME_PROPERTY_VALUE_ALL; - StatusMessage msgForClient = new StatusMessage(newJobStatus, userName, null, null); - msgForClient.sendToClient(session); - if (lg.isTraceEnabled()) lg.trace("Send status to client: " + msgForClient); - } - -// public int getLatestKnownTaskID() { -// int taskID = -1; -// for (StateMachineTransition transition : stateMachineTransitions){ -// if (transition.event.taskID!=null && transition.event.taskID>taskID){ -// taskID = transition.event.taskID; -// } -// if (transition.newSimJobStatus!=null && transition.newSimJobStatus.getTaskID()>taskID){ -// taskID = transition.newSimJobStatus.getTaskID(); -// } -// } -// return taskID; -// } -// + String userName = VCMessagingConstants.USERNAME_PROPERTY_VALUE_ALL; + StatusMessage msgForClient = new StatusMessage(newJobStatus, userName, null, null); + msgForClient.sendToClient(session); + if (lg.isTraceEnabled()) lg.trace("Send status to client: " + msgForClient); + } + } diff --git a/vcell-server/src/main/java/cbit/vcell/message/server/htc/HtcProxy.java b/vcell-server/src/main/java/cbit/vcell/message/server/htc/HtcProxy.java index d1131ad19a..9b0245a154 100644 --- a/vcell-server/src/main/java/cbit/vcell/message/server/htc/HtcProxy.java +++ b/vcell-server/src/main/java/cbit/vcell/message/server/htc/HtcProxy.java @@ -219,7 +219,7 @@ public static SimTaskInfo getSimTaskInfoFromSimJobName(String simJobName) throws } public static String createHtcSimJobName(SimTaskInfo simTaskInfo) { - return HTC_SIMULATION_JOB_NAME_PREFIX+simTaskInfo.simId.toString()+"_"+simTaskInfo.jobIndex+"_"+simTaskInfo.taskId; + return simulationJobNamePrefix()+simTaskInfo.simId.toString()+"_"+simTaskInfo.jobIndex+"_"+simTaskInfo.taskId; } public static String toUnixStyleText(String javaString) throws IOException { @@ -242,8 +242,6 @@ public static String toUnixStyleText(String javaString) throws IOException { public abstract String getSubmissionFileExtension(); public static class MemLimitResults { - private static final long FALLBACK_MEM_LIMIT_MB=4096; // MAX memory allowed if not set in limitFile, currently 4g - private static final long POWER_USER_MEMORY_FLOOR=51200; // MIN memory allowed if declared to be a power user, currently 50g private long memLimit; private String memLimitSource; public MemLimitResults(long memLimit, String memLimitSource) { @@ -257,223 +255,28 @@ public long getMemLimit() { public String getMemLimitSource() { return memLimitSource; } - private static MemLimitResults getFallbackMemLimitMB(SolverDescription solverDescription,double estimatedMemSizeMB, boolean isPowerUser) { - Long result = null; - String source = null; - try { - List solverMemLimits = Files.readAllLines(Paths.get(new File("/"+System.getProperty(PropertyLoader.htcLogDirInternal)+"/slurmMinMem.txt").getAbsolutePath())); - for (Iterator iterator = solverMemLimits.iterator(); iterator.hasNext();) { - String solverAndLimit = iterator.next().trim(); - if(solverAndLimit.length()==0 || solverAndLimit.startsWith("//")) { - continue; - } - StringTokenizer st = new StringTokenizer(solverAndLimit,":"); - String limitSolver = st.nextToken(); - if(limitSolver.equalsIgnoreCase("all") && result == null) {//use all if there is not solver matching name in slurmMinMem.txt - result = Long.parseLong(st.nextToken()); - source = "used slurmMinMem.txt all"; - }else if(solverDescription != null && limitSolver.equals(solverDescription.name())) {//use matching solver mem limit from file - result = Long.parseLong(st.nextToken()); - source = "used slurmMinMem.txt "+solverDescription.name(); - break; - } - } - if(result == null) {//empty slurmMinMem.txt - result = FALLBACK_MEM_LIMIT_MB; - source = "Empty used FALLBACK_MEM_LIMIT_MB"; - } - } catch (Exception e) { - LG.debug(e); - result = FALLBACK_MEM_LIMIT_MB; - source = "Exception "+e.getClass().getSimpleName()+" used FALLBACK_MEM_LIMIT_MB"; - } - if(estimatedMemSizeMB > result) {//Use estimated if bigger - result = (long)estimatedMemSizeMB; - source = "used Estimated"; + private static MemLimitResults getJobRequestedMemoryLimit(SolverDescription solverDescription, double estimatedMemSizeMB, boolean isPowerUser) { + long batchJobMemoryLimit = Integer.parseInt(PropertyLoader.getRequiredProperty(PropertyLoader.htcMinMemoryMB)); // MAX memory allowed if not set in limitFile, currently 4g + String detailedMessage = "default memory limit"; + + if(estimatedMemSizeMB > batchJobMemoryLimit) {//Use estimated if bigger + batchJobMemoryLimit = (long)estimatedMemSizeMB; + detailedMessage = "used Estimated"; } - if (isPowerUser && result < POWER_USER_MEMORY_FLOOR){ - result = (long)POWER_USER_MEMORY_FLOOR; - source = "poweruser's memory override"; + long powerUserMemory = Integer.parseInt(PropertyLoader.getRequiredProperty(PropertyLoader.htcPowerUserMemoryFloorMB)); // MIN memory allowed if declared to be a power user, currently 50g + if (isPowerUser && batchJobMemoryLimit < powerUserMemory){ + batchJobMemoryLimit = powerUserMemory; + detailedMessage = "poweruser's memory override"; } - return new MemLimitResults(result, source); + return new MemLimitResults(batchJobMemoryLimit, detailedMessage); } } public static final boolean bDebugMemLimit = false; public static MemLimitResults getMemoryLimit(String vcellUserid, KeyValue simID, SolverDescription solverDescription ,double estimatedMemSizeMB, boolean isPowerUser) { - return MemLimitResults.getFallbackMemLimitMB(solverDescription, estimatedMemSizeMB*1.5, isPowerUser); -// boolean bUseEstimate = estimatedMemSizeMB >= MemLimitResults.getFallbackMemLimitMB(solverDescription); -// return new MemLimitResults((bUseEstimate?(long)estimatedMemSizeMB:MemLimitResults.getFallbackMemLimitMB(solverDescription)), (bUseEstimate?"used Estimated":"used FALLBACK_MEM_LIMIT")); -// //One of 5 limits are returned (ordered from highest to lowest priority): -// // MemoryMax:PerSimulation Has PropertyLoader.simPerUserMemoryLimitFile, specific user AND simID MATCHED in file (userid MemLimitMb simID) -// // MemoryMax:PerUser Has PropertyLoader.simPerUserMemoryLimitFile, specific user (but not simID) MATCHED in file (userid MemLimitMb '*') -// // MemoryMax:PerSolver Has PropertyLoader.simPerUserMemoryLimitFile, specific solverDescription (but not simID or user) MATCHED in file (solverName MemLimitMb '*') -// // MemoryMax:SimulationTask.getEstimatedMemorySizeMB() Has PropertyLoader.simPerUserMemoryLimitFile, no user or sim MATCHED in file ('defaultSimMemoryLimitMb' MemLimitMb '*') -// // estimated > MemoryMax:AllUsersMemLimit -// // MemoryMax:AllUsersMemLimit(defaultSimMemoryLimitMb) Has PropertyLoader.simPerUserMemoryLimitFile, no user or sim MATCHED in file ('defaultSimMemoryLimitMb' MemLimitMb '*') -// // estimated < MemoryMax:AllUsersMemLimit -// // MemoryMax:HtcProxy.MemLimitResults.FALLBACK_MEM_LIMIT No PropertyLoader.simPerUserMemoryLimitFile -// // estimated < FALLBACK -// -// Long defaultSimMemoryLimitMbFromFile = null; -// File memLimitFile = null; -// try { -// //${vcellroot}/docker/swarm/serverconfig-uch.sh->VCELL_SIMDATADIR_EXTERNAL=/share/apps/vcell3/users -// //${vcellroot}/docker/swarm/serverconfig-uch.sh-> VCELL_SIMDATADIR_HOST=/opt/vcelldata/users -// //${vcellroot}/docker/swarm/docker-compose.yml-> Volume map "${VCELL_SIMDATADIR_HOST}:/simdata" -// Long perUserMemMax = null; -// Long perSimMemMax = null; -// Long perSolverMax = null; -// String memLimitFileDirVal = System.getProperty(PropertyLoader.primarySimDataDirInternalProperty); -// String memLimitFileVal = System.getProperty(PropertyLoader.simPerUserMemoryLimitFile); -// if(memLimitFileDirVal != null && memLimitFileVal != null) { -// memLimitFile = new File(memLimitFileDirVal,memLimitFileVal); -// } -// if(memLimitFile != null && memLimitFile.exists()) { -// List perUserLimits = Files.readAllLines(Paths.get(memLimitFile.getAbsolutePath())); -// for (Iterator iterator = perUserLimits.iterator(); iterator.hasNext();) { -// String userAndLimit = iterator.next().trim(); -// if(userAndLimit.length()==0 || userAndLimit.startsWith("//")) { -// if(bDebugMemLimit){LG.trace("-----skipped '"+userAndLimit+"'");} -// continue; -// } -//// LG.trace("-----"+userAndLimit); -// -// StringTokenizer st = new StringTokenizer(userAndLimit); -// String limitUserid = st.nextToken(); -// if(limitUserid.equals(vcellUserid) || (solverDescription != null && limitUserid.equals(solverDescription.name()))) {//check user -// long memLimit = 0; -// try { -// memLimit = Long.parseLong(st.nextToken()); -// } catch (Exception e) { -// if(bDebugMemLimit){LG.debug("-----ERROR '"+userAndLimit+"' token memlimit not parsed");} -// //bad line in limit file, continue processing other lines -// //lg.debug(e); -// continue; -// } -// if(solverDescription != null && limitUserid.equals(solverDescription.name())) { -// perSolverMax = memLimit; -// if(bDebugMemLimit){LG.debug("-----"+"MATCH Solver "+userAndLimit);} -// continue; -// } -// //get simid -// String simSpecifier = null; -// try { -// simSpecifier = st.nextToken(); -// //check token is '*' or long -// if(!simSpecifier.equals("*") && Long.valueOf(simSpecifier).longValue() < 0 ) { -// throw new Exception(" token 'simSpecifier' expected to be '*' or simID"); -// } -// } catch (Exception e) { -// if(bDebugMemLimit){LG.debug("-----ERROR '"+userAndLimit+"' "+e.getClass().getName()+" "+e.getMessage());} -// //bad line in limit file, continue processing other lines -// //lg.debug(e); -// continue; -// } -// // * means all sims for that user, don't set if sim specific limit is already set -// if(simSpecifier.equals("*") && perSimMemMax == null) { -// perUserMemMax = memLimit;// use this unless overriden by specific simid -// if(bDebugMemLimit){LG.debug("-----"+"MATCH USER "+userAndLimit);} -// } -// //Set sim specific limit, set even if * limit has been set -// if(simID != null && simID.toString().equals(simSpecifier)) { -// perSimMemMax = memLimit;// use sim limit -// if(bDebugMemLimit){LG.debug("-----"+"MATCH SIM "+userAndLimit);} -// } -// }else if(limitUserid.equals("defaultSimMemoryLimitMb")) {//Master sim mem limit -// try { -// defaultSimMemoryLimitMbFromFile = Long.parseLong(st.nextToken()); -// if(bDebugMemLimit){LG.debug("-----"+"MATCH DEFAULT "+userAndLimit);} -// } catch (Exception e) { -// if(bDebugMemLimit){LG.debug("-----ERROR '"+userAndLimit+"' "+e.getClass().getName()+" "+e.getMessage());} -// //bad line in limit file, continue processing other lines -// //LG.debug(e); -// continue; -// } -// }else { -// if(bDebugMemLimit){LG.debug("-----"+"NO MATCH "+userAndLimit);} -// } -// } -// if(perUserMemMax != null || perSimMemMax != null) { -// long finalMax = (perSimMemMax!=null?perSimMemMax:perUserMemMax); -// if(bDebugMemLimit){LG.debug("Set memory limit for user '"+vcellUserid+"' to "+finalMax + (perSimMemMax!=null?" for simID="+simID:""));} -// return new MemLimitResults(finalMax, -// (perSimMemMax!=null? -// "MemoryMax(FILE PerSimulation):"+simID+",User='"+vcellUserid+"' from "+memLimitFile.getAbsolutePath(): -// "MemoryMax(FILE PerUser):'"+vcellUserid+"' from "+memLimitFile.getAbsolutePath())); -// }else if(perSolverMax != null) { -// if(perSolverMax == 0) {//Use estimated size always if solver had 0 for memory limit -// return new MemLimitResults( -// Math.max((long)Math.ceil(estimatedMemSizeMB*1.5), -// (defaultSimMemoryLimitMbFromFile!=null?defaultSimMemoryLimitMbFromFile:MemLimitResults.FALLBACK_MEM_LIMIT_MB)), -// "MemoryMax(FILE PerSolver ESTIMATED):'"+solverDescription.name()+"' from "+memLimitFile.getAbsolutePath()); -// }else { -// return new MemLimitResults(perSolverMax, "MemoryMax(FILE PerSolver):'"+solverDescription.name()+"' from "+memLimitFile.getAbsolutePath()); -// } -// } -// }else { -// if(bDebugMemLimit){LG.debug("-----MemLimitFile "+(memLimitFile==null?"not defined":memLimitFile.getAbsolutePath()+" not exist"));} -// } -// } catch (Exception e) { -// //ignore, try defaults -// LG.error(e); -// } -//// long estimatedMemSizeMBL = (long)Math.ceil(estimatedMemSizeMB*1.5); -// boolean bHasMemLimitFile = defaultSimMemoryLimitMbFromFile!=null; -// long maxAllowedMem = (bHasMemLimitFile?defaultSimMemoryLimitMbFromFile:MemLimitResults.FALLBACK_MEM_LIMIT_MB); -//// boolean bUseEstimated = (estimatedMemSizeMBL <= maxAllowedMem); -//// return new MemLimitResults(maxAllowedMem, -//// (bUseEstimated? -//// "MemoryMax(ESTIMATED):SimulationTask.getEstimatedMemorySizeMB()="+estimatedMemSizeMBL: -//// (bHasMemLimitFile? -//// "MemoryMax(FILE AllUsers):AllUsersMemLimit(defaultSimMemoryLimitMb) from "+memLimitFile.getAbsolutePath(): -//// "MemoryMax(HARDCODE):HtcProxy.MemLimitResults.FALLBACK_MEM_LIMIT_MB"))); -// return new MemLimitResults(maxAllowedMem, -// (bHasMemLimitFile? -// "MemoryMax(FILE AllUsers):AllUsersMemLimit(defaultSimMemoryLimitMb) from "+memLimitFile.getAbsolutePath(): -// "MemoryMax(HARDCODE):HtcProxy.MemLimitResults.FALLBACK_MEM_LIMIT_MB")); + return MemLimitResults.getJobRequestedMemoryLimit(solverDescription, estimatedMemSizeMB*1.5, isPowerUser); } -// public static boolean isStochMultiTrial(SimulationTask simTask) { -// return simTask.getSimulationJob().getSimulation().getSolverTaskDescription().getSolverDescription() == SolverDescription.StochGibson && -// simTask.getSimulationJob().getSimulation().getSolverTaskDescription().getStochOpt() != null && -// !simTask.getSimulationJob().getSimulation().getSolverTaskDescription().getStochOpt().isHistogram() && -// simTask.getSimulationJob().getSimulation().getSolverTaskDescription().getStochOpt().getNumOfTrials() > 1; -// -// } } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/DispatcherTestUtils.java b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/DispatcherTestUtils.java new file mode 100644 index 0000000000..379e34d114 --- /dev/null +++ b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/DispatcherTestUtils.java @@ -0,0 +1,179 @@ +package cbit.vcell.message.server.dispatcher; + +import cbit.vcell.geometry.Geometry; +import cbit.vcell.mapping.MathSymbolMapping; +import cbit.vcell.math.*; +import cbit.vcell.mathmodel.MathModel; +import cbit.vcell.parser.ExpressionBindingException; +import cbit.vcell.resource.PropertyLoader; +import cbit.vcell.server.HtcJobID; +import cbit.vcell.server.SimulationExecutionStatus; +import cbit.vcell.server.SimulationJobStatus; +import cbit.vcell.solver.MeshSpecification; +import cbit.vcell.solver.Simulation; +import cbit.vcell.solver.VCSimulationIdentifier; +import cbit.vcell.solver.server.SimulationMessage; +import org.joda.time.DateTime; +import org.vcell.util.DataAccessException; +import org.vcell.util.ISize; +import org.vcell.util.document.*; + +import java.beans.PropertyVetoException; +import java.math.BigDecimal; +import java.sql.SQLException; +import java.time.Instant; +import java.util.Date; + +public class DispatcherTestUtils { + private static String previousServerID = ""; + private static String previousHtcMax = ""; + private static String previousHtcMin = ""; + private static String previousHtcPowerFloor = ""; + private static String previousHtcPowerMax = ""; + private static String previousMongoBlob = ""; + private static String previousJMSIntHostProperty = ""; + private static String previousJMSIntPortProperty = ""; + private static String previousSimJMSIntHostProperty = ""; + private static String previousSimJMSIntPortProperty = ""; + private static String previousHTCHost = ""; + private static String previousHTCUser = ""; + private static String previousHTCUserKeyFile = ""; + private static String previousMaxJobsPerScan = ""; + private static String previousOdeJobsPerUser = ""; + private static String previousPdeJobsPerUser = ""; + + public static final VCellServerID testVCellServerID = VCellServerID.getServerID("test"); + public static final MockVCMessageSession testMessageSession = new MockVCMessageSession(); + public static final int jobIndex = 0; + public static final int taskID = 0; + public static final KeyValue simKey = new KeyValue("0"); + public static User alice = new User("Alice", new KeyValue("0")); + public static User bob = new User("Bob", new KeyValue("1")); + public static final VCSimulationIdentifier simID = new VCSimulationIdentifier(simKey, alice); + public static final HtcJobID htcJobID = new HtcJobID("2", HtcJobID.BatchSystemType.SLURM); + + public static void setRequiredProperties(){ + previousServerID = PropertyLoader.getProperty(PropertyLoader.vcellServerIDProperty, ""); + PropertyLoader.setProperty(PropertyLoader.vcellServerIDProperty, testVCellServerID.toString()); + + previousHtcMax = PropertyLoader.getProperty(PropertyLoader.htcMaxMemoryMB, ""); + PropertyLoader.setProperty(PropertyLoader.htcMaxMemoryMB, "4096"); + + previousHtcMin = PropertyLoader.getProperty(PropertyLoader.htcMinMemoryMB, ""); + PropertyLoader.setProperty(PropertyLoader.htcMinMemoryMB, "1024"); + + previousHtcPowerFloor = PropertyLoader.getProperty(PropertyLoader.htcPowerUserMemoryFloorMB, ""); + PropertyLoader.setProperty(PropertyLoader.htcPowerUserMemoryFloorMB, "51200"); + + previousHtcPowerMax = PropertyLoader.getProperty(PropertyLoader.htcPowerUserMemoryMaxMB, ""); + PropertyLoader.setProperty(PropertyLoader.htcPowerUserMemoryMaxMB, "64000"); + + previousMongoBlob = PropertyLoader.getProperty(PropertyLoader.jmsBlobMessageUseMongo, ""); + PropertyLoader.setProperty(PropertyLoader.jmsBlobMessageUseMongo, ""); + + previousJMSIntHostProperty = PropertyLoader.getProperty(PropertyLoader.jmsIntHostInternal, ""); + PropertyLoader.setProperty(PropertyLoader.jmsIntHostInternal, "host"); + + previousJMSIntPortProperty = PropertyLoader.getProperty(PropertyLoader.jmsIntPortInternal, ""); + PropertyLoader.setProperty(PropertyLoader.jmsIntPortInternal, "80"); + + previousSimJMSIntHostProperty = PropertyLoader.getProperty(PropertyLoader.jmsSimHostInternal, ""); + PropertyLoader.setProperty(PropertyLoader.jmsSimHostInternal, "host"); + + previousSimJMSIntPortProperty = PropertyLoader.getProperty(PropertyLoader.jmsSimPortInternal, ""); + PropertyLoader.setProperty(PropertyLoader.jmsSimPortInternal, "80"); + + previousHTCHost = PropertyLoader.getProperty(PropertyLoader.htcHosts, ""); + PropertyLoader.setProperty(PropertyLoader.htcHosts, "host"); + + previousHTCUser = PropertyLoader.getProperty(PropertyLoader.htcUser, ""); + PropertyLoader.setProperty(PropertyLoader.htcUser, "user"); + + previousHTCUserKeyFile = PropertyLoader.getProperty(PropertyLoader.htcUserKeyFile, ""); + PropertyLoader.setProperty(PropertyLoader.htcUserKeyFile, "keyFile"); + + previousMaxJobsPerScan = PropertyLoader.getProperty(PropertyLoader.maxJobsPerScan, ""); + PropertyLoader.setProperty(PropertyLoader.maxJobsPerScan, "100"); + + previousPdeJobsPerUser = PropertyLoader.getProperty(PropertyLoader.maxPdeJobsPerUser, ""); + PropertyLoader.setProperty(PropertyLoader.maxPdeJobsPerUser, "100"); + + previousOdeJobsPerUser = PropertyLoader.getProperty(PropertyLoader.maxOdeJobsPerUser, ""); + PropertyLoader.setProperty(PropertyLoader.maxOdeJobsPerUser, "100"); + + PropertyLoader.setProperty(PropertyLoader.mongodbDatabase, "fakehost"); + } + + public static void restoreRequiredProperties(){ + PropertyLoader.setProperty(PropertyLoader.vcellServerIDProperty, previousServerID); + PropertyLoader.setProperty(PropertyLoader.htcMaxMemoryMB, previousHtcMax); + PropertyLoader.setProperty(PropertyLoader.htcMinMemoryMB, previousHtcMin); + PropertyLoader.setProperty(PropertyLoader.htcPowerUserMemoryFloorMB, previousHtcPowerFloor); + PropertyLoader.setProperty(PropertyLoader.htcPowerUserMemoryMaxMB, previousHtcPowerMax); + PropertyLoader.setProperty(PropertyLoader.jmsBlobMessageUseMongo, previousMongoBlob); + PropertyLoader.setProperty(PropertyLoader.jmsIntPortInternal, previousJMSIntPortProperty); + PropertyLoader.setProperty(PropertyLoader.jmsIntHostInternal, previousJMSIntHostProperty); + PropertyLoader.setProperty(PropertyLoader.jmsSimPortInternal, previousSimJMSIntPortProperty); + PropertyLoader.setProperty(PropertyLoader.jmsSimHostInternal, previousSimJMSIntHostProperty); + PropertyLoader.setProperty(PropertyLoader.htcHosts, previousHTCHost); + PropertyLoader.setProperty(PropertyLoader.htcUser, previousHTCUser); + PropertyLoader.setProperty(PropertyLoader.htcUserKeyFile, previousHTCUserKeyFile); + PropertyLoader.setProperty(PropertyLoader.maxJobsPerScan, previousMaxJobsPerScan); + PropertyLoader.setProperty(PropertyLoader.maxOdeJobsPerUser, previousOdeJobsPerUser); + PropertyLoader.setProperty(PropertyLoader.maxPdeJobsPerUser, previousPdeJobsPerUser); + } + + public static Simulation createMockSimulation(int iSizeX, int iSizeY, int iSizeZ, User user) throws PropertyVetoException, MathException, ExpressionBindingException { + VolVariable volVariable = new VolVariable("t", new Variable.Domain(new CompartmentSubDomain("t", 1))); + VolVariable volVariable2 = new VolVariable("b", new Variable.Domain(new CompartmentSubDomain("b", 2))); + MathSymbolMapping mathSymbolMapping = new MathSymbolMapping(); + Geometry geometry = new Geometry("T", 3); + MathModel mathModel = new MathModel(new Version("Test", user)); + MathDescription mathDescription = new MathDescription("Test", mathSymbolMapping); + mathDescription.setGeometry(new Geometry("T", 3)); + SimulationVersion simulationVersion = new SimulationVersion(new KeyValue("5"), "Test", user, + new GroupAccessNone(), null, new BigDecimal("2"), Date.from(Instant.now()), VersionFlag.fromInt(1), + "", new KeyValue("3")); + Simulation simulation = new Simulation(simulationVersion, + mathDescription, mathModel); + MeshSpecification meshSpecification = new MeshSpecification(geometry); + meshSpecification.setSamplingSize(new ISize(iSizeX, iSizeY, iSizeZ)); + simulation.setMeshSpecification(meshSpecification); + mathDescription.setAllVariables(new Variable[]{volVariable, volVariable2}); + return simulation; + } + + public static Simulation createMockSimulation(int iSizeX, int iSizeY, int iSizeZ) throws PropertyVetoException, MathException, ExpressionBindingException { + return createMockSimulation(iSizeX, iSizeY, iSizeZ, alice); + } + + public static void insertOrUpdateStatus(KeyValue simKey, int jobIndex, int taskID, User user, SimulationJobStatus.SchedulerStatus status, SimulationDatabase simulationDB) throws SQLException, DataAccessException { + SimulationJobStatus jobStatus = simulationDB.getLatestSimulationJobStatus(simKey, jobIndex); + VCSimulationIdentifier simID = new VCSimulationIdentifier(simKey, user); + SimulationJobStatus simulationJobStatus = new SimulationJobStatus(testVCellServerID, simID, jobIndex, Date.from(Instant.now()), status, taskID, + SimulationMessage.workerAccepted("accepted"), null, + new SimulationExecutionStatus(Date.from(Instant.now()), "", + Date.from(Instant.now()), Date.from(Instant.now()), false, htcJobID)); + if (jobStatus == null){ + simulationDB.insertSimulationJobStatus(simulationJobStatus); + } else { + simulationDB.updateSimulationJobStatus(simulationJobStatus); + } + } + + public static void insertOrUpdateStatus(KeyValue simKey, int jobIndex, int taskID, User user, SimulationDatabase simulationDB) throws SQLException, DataAccessException { + insertOrUpdateStatus(simKey, jobIndex, taskID, user, SimulationJobStatus.SchedulerStatus.RUNNING, simulationDB); + } + + public static void insertOrUpdateStatus(SimulationDatabase simulationDatabase, SimulationJobStatus.SchedulerStatus status) throws SQLException, DataAccessException { + insertOrUpdateStatus(simKey, jobIndex, taskID, alice, status, simulationDatabase); + } + + /** + Defaults to a running status. + */ + public static void insertOrUpdateStatus(SimulationDatabase simulationDatabase) throws SQLException, DataAccessException { + insertOrUpdateStatus(simKey, jobIndex, taskID, alice, simulationDatabase); + } + +} diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/MockHtcProxy.java b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/MockHtcProxy.java new file mode 100644 index 0000000000..10a419ab69 --- /dev/null +++ b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/MockHtcProxy.java @@ -0,0 +1,94 @@ +package cbit.vcell.message.server.dispatcher; + +import cbit.vcell.message.server.cmd.CommandService; +import cbit.vcell.message.server.htc.HtcException; +import cbit.vcell.message.server.htc.HtcJobNotFoundException; +import cbit.vcell.message.server.htc.HtcJobStatus; +import cbit.vcell.message.server.htc.HtcProxy; +import cbit.vcell.message.server.htc.slurm.SlurmJobStatus; +import cbit.vcell.messaging.server.SimulationTask; +import cbit.vcell.server.HtcJobID; +import cbit.vcell.server.SimulationJobStatus; +import cbit.vcell.simdata.PortableCommand; +import cbit.vcell.solvers.ExecutableCommand; +import org.vcell.util.DataAccessException; +import org.vcell.util.exe.ExecutableException; + +import java.io.File; +import java.io.IOException; +import java.sql.SQLException; +import java.util.*; + +public class MockHtcProxy extends HtcProxy { + private final MockSimulationDB mockSimulationDB; + public MockHtcProxy(CommandService commandService, String htcUser, MockSimulationDB mockSimulationDB) { + super(commandService, htcUser); + this.mockSimulationDB = mockSimulationDB; + } + public final ArrayList jobsKilledSafely = new ArrayList<>(); + public final ArrayList jobsKilledUnsafely = new ArrayList<>(); + + @Override + public void killJobSafe(HtcJobInfo htcJobInfo) throws ExecutableException, HtcJobNotFoundException, HtcException { + jobsKilledSafely.add(htcJobInfo); + } + + @Override + public void killJobUnsafe(HtcJobID htcJobId) throws ExecutableException, HtcJobNotFoundException, HtcException { + jobsKilledUnsafely.add(htcJobId); + } + + @Override + public void killJobs(String htcJobSubstring) throws ExecutableException, HtcJobNotFoundException, HtcException { + + } + + @Override + public Map getJobStatus(List requestedHtcJobInfos) throws ExecutableException, IOException { + return Map.of(); + } + + @Override + public HtcJobID submitJob(String jobName, File sub_file_internal, File sub_file_external, ExecutableCommand.Container commandSet, int ncpus, double memSize, Collection postProcessingCommands, SimulationTask simTask, File primaryUserDirExternal) throws ExecutableException { + return null; + } + + @Override + public HtcJobID submitOptimizationJob(String jobName, File sub_file_internal, File sub_file_external, File optProblemInputFile, File optProblemOutputFile, File optReportFile) throws ExecutableException { + return null; + } + + @Override + public HtcProxy cloneThreadsafe() { + return null; + } + + @Override + public Map getRunningJobs() throws ExecutableException, IOException { + HashMap map = new HashMap<>(); + SimulationJobStatus[] statuses; + try { + statuses = mockSimulationDB.getActiveJobs(DispatcherTestUtils.testVCellServerID); + } catch (DataAccessException | SQLException e) { + throw new RuntimeException(e); + } + for (SimulationJobStatus status : statuses){ + if (status.getSchedulerStatus().isRunning()){ + HtcJobInfo jobInfo = new HtcJobInfo(DispatcherTestUtils.htcJobID, HtcProxy.createHtcSimJobName(new SimTaskInfo(status.getVCSimulationIdentifier().getSimulationKey(), status.getJobIndex(), status.getTaskID()))); + HtcJobStatus jobStatus = new HtcJobStatus(SlurmJobStatus.RUNNING); + map.put(jobInfo, jobStatus); + } + } + return map; + } + + @Override + public PartitionStatistics getPartitionStatistics() throws HtcException, ExecutableException, IOException { + return new PartitionStatistics(1, 20, 100); + } + + @Override + public String getSubmissionFileExtension() { + return ""; + } +} diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/MockMessagingService.java b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/MockMessagingService.java new file mode 100644 index 0000000000..b2a634c996 --- /dev/null +++ b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/MockMessagingService.java @@ -0,0 +1,52 @@ +package cbit.vcell.message.server.dispatcher; + +import cbit.vcell.message.*; + +import java.util.ArrayList; +import java.util.List; + +public class MockMessagingService implements VCMessagingService { + + public ArrayList messagingConsumers = new ArrayList<>(); + public final MockVCMessageSession mockVCMessageSession = new MockVCMessageSession(); + + @Override + public VCMessageSession createProducerSession() { + return mockVCMessageSession; + } + + @Override + public void addMessageConsumer(VCMessagingConsumer vcMessagingConsumer) { + messagingConsumers.add(vcMessagingConsumer); + } + + @Override + public void removeMessageConsumer(VCMessagingConsumer vcMessagingConsumer) { + + } + + @Override + public List getMessageConsumers() { + return List.of(); + } + + @Override + public void close() throws VCMessagingException { + + } + + @Override + public VCMessageSelector createSelector(String clientMessageFilter) { + return null; + } + + @Override + public VCMessagingDelegate getDelegate() { + return null; + } + + @Override + public void setConfiguration(VCMessagingDelegate delegate, String jmshost, int jmsport) { + + } +} diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/MockSimulationDB.java b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/MockSimulationDB.java new file mode 100644 index 0000000000..30fe536545 --- /dev/null +++ b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/MockSimulationDB.java @@ -0,0 +1,220 @@ +package cbit.vcell.message.server.dispatcher; + +import cbit.vcell.field.FieldDataIdentifierSpec; +import cbit.vcell.messaging.db.SimulationRequirements; +import cbit.vcell.server.*; +import cbit.vcell.solver.Simulation; +import cbit.vcell.solver.SimulationInfo; +import org.vcell.util.DataAccessException; +import org.vcell.util.ObjectNotFoundException; +import org.vcell.util.document.*; + +import java.math.BigDecimal; +import java.sql.SQLException; +import java.time.Instant; +import java.util.*; + +public class MockSimulationDB implements SimulationDatabase{ + + private HashMap> dbTable = new HashMap<>(); + + public static User.SpecialUser specialAdmin = new User.SpecialUser("Tom", new KeyValue("999"), new User.SPECIAL_CLAIM[User.SPECIAL_CLAIM.admins.ordinal()]); + public static User.SpecialUser powerUser = new User.SpecialUser("Tim", new KeyValue("2"), new User.SPECIAL_CLAIM[]{User.SpecialUser.SPECIAL_CLAIM.powerUsers}); + + private final HashMap users = new HashMap<>(){ + {put(specialAdmin.getName(), specialAdmin); put(DispatcherTestUtils.alice.getName(), DispatcherTestUtils.alice); + put(powerUser.getName(), powerUser);} + }; + + private final HashMap simulations = new HashMap<>(); + + private final Set unreferencedSimKeys = new HashSet<>(); + + // Return a latest simulation that differs in one of these ways + public enum BadLatestSimulation{ + HIGHER_TASK_ID, + RETURN_NULL, + IS_DONE, + DO_NOTHING + } + + public BadLatestSimulation badLatestSimulation = BadLatestSimulation.DO_NOTHING; + + + @Override + public SimulationJobStatus getLatestSimulationJobStatus(KeyValue simKey, int jobIndex) throws DataAccessException, SQLException { + ArrayList simList = dbTable.get(simKey.toString()); + if (simList == null){ + return null; + } + SimulationJobStatus latestSim = null; + for (SimulationJobStatus jobStatus : simList){ + boolean equalJobIndex = jobStatus.getJobIndex() == jobIndex; + boolean isLatestSimNull = latestSim == null; + if ((equalJobIndex && isLatestSimNull) || (!isLatestSimNull && equalJobIndex && latestSim.getSubmitDate().after(jobStatus.getSubmitDate()))){ + latestSim = jobStatus; + } + } + switch (badLatestSimulation){ + case RETURN_NULL -> { + return null; + } case HIGHER_TASK_ID -> { + SimulationJobStatus simulationJobStatus = new SimulationJobStatus(latestSim.getServerID(), latestSim.getVCSimulationIdentifier(), latestSim.getJobIndex(), + latestSim.getSubmitDate(), latestSim.getSchedulerStatus(), latestSim.getTaskID() + 1, latestSim.getSimulationMessage(), latestSim.getSimulationQueueEntryStatus(), latestSim.getSimulationExecutionStatus()); + return simulationJobStatus; + } case IS_DONE -> { + return new SimulationJobStatus(latestSim.getServerID(), latestSim.getVCSimulationIdentifier(), latestSim.getJobIndex(), latestSim.getSubmitDate(), SimulationJobStatus.SchedulerStatus.COMPLETED, + latestSim.getTaskID(), latestSim.getSimulationMessage(), latestSim.getSimulationQueueEntryStatus(), null); + }default -> { + return latestSim; + } + } + + } + + @Override + public void insertSimulationJobStatus(SimulationJobStatus simulationJobStatus) throws DataAccessException, SQLException { + String simKey = simulationJobStatus.getVCSimulationIdentifier().getSimulationKey().toString(); + if (dbTable.containsKey(simKey)){ + dbTable.get(simKey).add(simulationJobStatus); + } else { + dbTable.put(simKey, new ArrayList<>(){{add(simulationJobStatus);}}); + } + } + + @Override + public SimulationJobStatus[] getActiveJobs(VCellServerID vcellServerID) throws DataAccessException, SQLException { + ArrayList allActiveJobs = new ArrayList<>(); + for (ArrayList jobStatuses : dbTable.values()){ + for (SimulationJobStatus jobStatus: jobStatuses){ + if (jobStatus.getSchedulerStatus().isActive()){ + allActiveJobs.add(jobStatus); + } + } + } + return allActiveJobs.toArray(new SimulationJobStatus[]{}); + } + + @Override + public SimulationJobStatus[] queryJobs(SimpleJobStatusQuerySpec simStatusQuerySpec) throws ObjectNotFoundException, DataAccessException { + throw new ObjectNotFoundException(""); + } + + @Override + public Map getSimulationRequirements(Collection simKeys) throws SQLException { + HashMap map = new HashMap<>(); + for (KeyValue simKey : simKeys){ + map.put(simKey, new SimulationRequirements(simKey, 3)); + } + return map; + } + + @Override + public void updateSimulationJobStatus(SimulationJobStatus newSimulationJobStatus) throws DataAccessException, UpdateSynchronizationException, SQLException { + updateSimulationJobStatus(newSimulationJobStatus, null); + } + + @Override + public void updateSimulationJobStatus(SimulationJobStatus newSimulationJobStatus, StateInfo runningStateInfo) throws DataAccessException, UpdateSynchronizationException, SQLException { + String simKey = newSimulationJobStatus.getVCSimulationIdentifier().getSimulationKey().toString(); + ArrayList jobStatuses = dbTable.get(simKey); + for (int i = 0; i < jobStatuses.size(); i++){ + SimulationJobStatus jobStatus = jobStatuses.get(i); + boolean sameJob = jobStatus.getJobIndex() == newSimulationJobStatus.getJobIndex(); + if (sameJob){ + jobStatuses.set(i,newSimulationJobStatus); + break; + } + } + } + + @Override + public KeyValue[] getSimulationKeysFromBiomodel(KeyValue biomodelKey) throws SQLException, DataAccessException { + throw new SQLException(); + } + + @Override + public Simulation getSimulation(User user, KeyValue simKey) throws DataAccessException { + return simulations.get(simKey.toString() + user.getName()); + } + + @Override + public FieldDataIdentifierSpec[] getFieldDataIdentifierSpecs(Simulation sim) throws DataAccessException { + return new FieldDataIdentifierSpec[0]; + } + + @Override + public Set getUnreferencedSimulations() throws SQLException { + return unreferencedSimKeys; + } + + @Override + public User.SpecialUser getUser(String username) throws DataAccessException, SQLException { + User user = users.get(username); + if (user instanceof User.SpecialUser){ + return (User.SpecialUser) user; + } + User.SpecialUser specialUser = new User.SpecialUser(user.getName(), user.getID(), new User.SPECIAL_CLAIM[]{}); + return specialUser; + } + + @Override + public TreeMap> getSpecialUsers() throws DataAccessException, SQLException { + TreeMap> map = new TreeMap<>(); + TreeMap subMap = new TreeMap<>(new User.UserNameComparator()); + subMap.put(specialAdmin, "f"); + map.put(User.SPECIAL_CLAIM.admins, subMap); + return map; + } + + @Override + public SimulationInfo getSimulationInfo(User user, KeyValue simKey) throws ObjectNotFoundException, DataAccessException { + return mockSimulationInfo(user, simKey); + } + + @Override + public SimulationStatus[] getSimulationStatus(KeyValue[] simKeys) throws ObjectNotFoundException, DataAccessException { + return new SimulationStatus[0]; + } + + @Override + public SimulationStatus getSimulationStatus(KeyValue simulationKey) throws ObjectNotFoundException, DataAccessException { + SimulationJobStatus status = dbTable.get(simulationKey.toString()).get(0); + SimulationStatus simulationStatus = new SimulationStatus(new SimulationJobStatus[]{status}); + return simulationStatus; + } + + @Override + public SimpleJobStatus[] getSimpleJobStatus(User user, SimpleJobStatusQuerySpec simStatusQuerySpec) throws ObjectNotFoundException, DataAccessException { + throw new ObjectNotFoundException(""); + } + + + private SimulationInfo mockSimulationInfo(User user, KeyValue simKey){ + KeyValue versionKey = new KeyValue("22"); + KeyValue versionBranchPoint = new KeyValue("23"); + VersionFlag versionFlag = VersionFlag.fromInt(0); + KeyValue parentSimulationRef = new KeyValue("24"); + SimulationVersion simulationVersion = new SimulationVersion(versionKey, "Mock Sim Info", user, null, + versionBranchPoint, new BigDecimal(22), Date.from(Instant.now()), versionFlag, "Version annot", + parentSimulationRef); + SimulationInfo simulationInfo = new SimulationInfo(simKey, simulationVersion, VCellSoftwareVersion.fromString("50")); + return simulationInfo; + } + + public void resetDataBase(){ + dbTable = new HashMap<>(); + badLatestSimulation = BadLatestSimulation.DO_NOTHING; + unreferencedSimKeys.clear(); + simulations.clear(); + } + + public void insertSimulation(User user, Simulation sim){ + simulations.put(sim.getKey().toString() + user.getName(), sim); + } + + public void insertUnreferencedSimKey(KeyValue k){ + unreferencedSimKeys.add(k); + } + +} diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/MockVCMessageSession.java b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/MockVCMessageSession.java new file mode 100644 index 0000000000..8a35e48096 --- /dev/null +++ b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/MockVCMessageSession.java @@ -0,0 +1,106 @@ +package cbit.vcell.message.server.dispatcher; + +import cbit.vcell.message.*; +import cbit.vcell.message.jms.VCMessageJms; +import org.apache.activemq.command.ActiveMQMessage; +import org.apache.activemq.command.ActiveMQObjectMessage; +import org.apache.activemq.command.ActiveMQTextMessage; +import org.vcell.util.document.UserLoginInfo; + +import javax.jms.JMSException; +import javax.jms.ObjectMessage; +import javax.jms.TextMessage; +import java.io.Serializable; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.Queue; + +public class MockVCMessageSession implements VCMessageSession { + + + public MockVCMessageSession(){ } + + private final HashMap> topics = new HashMap<>(){{ + put(VCellTopic.ClientStatusTopic, new LinkedList<>()); + put(VCellTopic.ServiceControlTopic, new LinkedList<>()); + }}; + private final HashMap> queues = new HashMap<>(){{ + put(VCellQueue.WorkerEventQueue, new LinkedList<>()); + put(VCellQueue.DbRequestQueue, new LinkedList<>()); + put(VCellQueue.DataRequestQueue, new LinkedList<>()); + put(VCellQueue.SimReqQueue, new LinkedList<>()); + put(VCellQueue.SimJobQueue, new LinkedList<>()); + }}; + + @Override + public Object sendRpcMessage(VCellQueue queue, VCRpcRequest vcRpcRequest, boolean returnRequired, long timeoutMS, String[] specialProperties, Object[] specialValues, UserLoginInfo userLoginInfo) throws VCMessagingException, VCMessagingInvocationTargetException { + return null; + } + + @Override + public void sendQueueMessage(VCellQueue queue, VCMessage message, Boolean persistent, Long clientTimeoutMS) throws VCMessagingException { + queues.get(queue).add(message); + } + + @Override + public void sendTopicMessage(VCellTopic topic, VCMessage message) throws VCMessagingException { + topics.get(topic).add(message); + } + + @Override + public void rollback() { + + } + + @Override + public void commit() { + + } + + @Override + public VCMessage createTextMessage(String text) { + TextMessage textMessage = new ActiveMQTextMessage(); + try { + textMessage.setText(text); + } catch (JMSException e) { + throw new RuntimeException(e); + } + return new VCMessageJms(textMessage, null); + } + + @Override + public VCMessage createMessage() { + return new VCMessageJms(new ActiveMQMessage(), null, null); + } + + @Override + public VCMessage createObjectMessage(Serializable object) { + ObjectMessage objectMessage = new ActiveMQObjectMessage(); + try { + objectMessage.setObjectProperty(VCMessageJms.BLOB_MESSAGE_FILE_NAME, ""); + } catch (JMSException e) { + throw new RuntimeException(e); + } + return new VCMessageJms(objectMessage, object, null); + + } + + @Override + public VCMessagingDelegate getDelegate() { + return null; + } + + @Override + public void close() { + + } + + public VCMessage getTopicMessage(VCellTopic vCellTopic){ + return topics.get(vCellTopic).remove(); + } + + public VCMessage getQueueMessage(VCellQueue vCellQueue){ + return queues.get(vCellQueue).remove(); + } + +} diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/SimulationDispatcherTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/SimulationDispatcherTest.java new file mode 100644 index 0000000000..d9800d8fad --- /dev/null +++ b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/SimulationDispatcherTest.java @@ -0,0 +1,226 @@ +package cbit.vcell.message.server.dispatcher; + +import cbit.vcell.math.MathException; +import cbit.vcell.message.VCMessagingConstants; +import cbit.vcell.message.VCellTopic; +import cbit.vcell.message.messages.MessageConstants; +import cbit.vcell.parser.ExpressionBindingException; +import cbit.vcell.server.SimulationJobStatus; +import cbit.vcell.server.SimulationStatus; +import cbit.vcell.solver.Simulation; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.core.Logger; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.WriterAppender; +import org.apache.logging.log4j.core.config.Configuration; +import org.apache.logging.log4j.core.config.Configurator; +import org.apache.logging.log4j.core.config.LoggerConfig; +import org.apache.logging.log4j.spi.ExtendedLogger; +import org.junit.jupiter.api.*; +import org.vcell.util.DataAccessException; +import org.vcell.util.document.User; + +import java.beans.PropertyVetoException; +import java.io.IOException; +import java.io.StringWriter; +import java.sql.SQLException; + +@Tag("Fast") +public class SimulationDispatcherTest { + public static ExtendedLogger lg = LoggerContext.getContext().getLogger(SimulationDispatcher.class); + private final static User testUser = DispatcherTestUtils.alice; + private final MockSimulationDB mockSimulationDB = new MockSimulationDB(); + private final MockMessagingService mockMessagingServiceInternal = new MockMessagingService(); + private final MockMessagingService mockMessagingServiceSim = new MockMessagingService(); + private final MockHtcProxy mockHtcProxy = new MockHtcProxy(null, "htcUser", mockSimulationDB); + private static StringWriter logOutPut; + private static WriterAppender appender; + + @BeforeAll + public static void setSystemProperties(){ + DispatcherTestUtils.setRequiredProperties(); + + logOutPut = new StringWriter(); + appender = WriterAppender.newBuilder().setTarget(logOutPut).setName("Simulation Dispatcher Test").build(); + LoggerContext context = LoggerContext.getContext(false); + Configuration configuration = context.getConfiguration(); + configuration.addLoggerAppender((Logger) lg, appender); + } + + @AfterAll + public static void restoreSystemProperties() throws IOException { + DispatcherTestUtils.restoreRequiredProperties(); + appender.stop(); + logOutPut.close(); + } + + //################# Test Simulation Service Impl ####################### + // All the get functions withing SimulationDispatcher seem to be exercising the DB and not simulation control, so not tested + + @Test + public void onStartRequestTest() throws DataAccessException, SQLException { + SimulationDispatcher simulationDispatcher = SimulationDispatcher.simulationDispatcherCreator(mockSimulationDB, mockMessagingServiceInternal, + mockMessagingServiceSim, mockHtcProxy, false); + SimulationStatus simStatus = simulationDispatcher.simServiceImpl.startSimulation(testUser, DispatcherTestUtils.simID, 1); + SimulationJobStatus jobStatus = mockSimulationDB.getLatestSimulationJobStatus(DispatcherTestUtils.simKey, 0); + Assertions.assertTrue(jobStatus.getSchedulerStatus().isWaiting()); + } + + @Test + public void onStopRequestTest() throws DataAccessException, SQLException { + DispatcherTestUtils.insertOrUpdateStatus(mockSimulationDB); + SimulationDispatcher simulationDispatcher = SimulationDispatcher.simulationDispatcherCreator(mockSimulationDB, mockMessagingServiceInternal, + mockMessagingServiceSim, mockHtcProxy, false); + SimulationStatus simStatus = simulationDispatcher.simServiceImpl.stopSimulation(testUser, DispatcherTestUtils.simID); + SimulationJobStatus jobStatus = mockSimulationDB.getLatestSimulationJobStatus(DispatcherTestUtils.simKey, 0); + Assertions.assertTrue(jobStatus.getSchedulerStatus().isStopped()); + + String s = mockMessagingServiceInternal.mockVCMessageSession.getTopicMessage(VCellTopic.ServiceControlTopic).getStringProperty(VCMessagingConstants.MESSAGE_TYPE_PROPERTY); + Assertions.assertEquals(MessageConstants.MESSAGE_TYPE_STOPSIMULATION_VALUE, s); + } + + + //###################### Test Dispatcher Thread ########################### + + @Test + public void dispatcherThreadFailsJobsWithNoSimulationReference() throws SQLException, DataAccessException, InterruptedException { + DispatcherTestUtils.insertOrUpdateStatus(mockSimulationDB, SimulationJobStatus.SchedulerStatus.WAITING); + SimulationDispatcher simulationDispatcher = SimulationDispatcher.simulationDispatcherCreator(mockSimulationDB, mockMessagingServiceInternal, + mockMessagingServiceSim, mockHtcProxy, true); + SimulationDispatcher.DispatchThread thread = simulationDispatcher.dispatchThread; + synchronized (thread.dispatcherNotifyObject){ + thread.dispatcherNotifyObject.notify(); + } + + // Check that the simulation is in waiting, for the dispatcher hasn't consumed it's request yet + SimulationJobStatus jobStatus = mockSimulationDB.getLatestSimulationJobStatus(DispatcherTestUtils.simKey, 0); + Assertions.assertTrue(jobStatus.getSchedulerStatus().isWaiting(), "Still waiting."); + + synchronized (thread.finishListener){ + thread.finishListener.wait(); + } + + // Makes sure that requests that have no simulation reference within the DB are failed + jobStatus = mockSimulationDB.getLatestSimulationJobStatus(DispatcherTestUtils.simKey, 0); + Assertions.assertTrue(jobStatus.getSchedulerStatus().isFailed(), "Simulation gets aborted since theres no simulation in DB."); + + } + + @Test + public void dispatcherThreadDispatchesWaitingJobsWithSimulationsIn() throws SQLException, DataAccessException, InterruptedException, PropertyVetoException, MathException, ExpressionBindingException { + SimulationDispatcher simulationDispatcher = SimulationDispatcher.simulationDispatcherCreator(mockSimulationDB, mockMessagingServiceInternal, + mockMessagingServiceSim, mockHtcProxy, true); + SimulationDispatcher.DispatchThread thread = simulationDispatcher.dispatchThread; + // Create and insert simulation. Then ensure that this simulation has it's job status changed to dispatched + Simulation mockSimulation = DispatcherTestUtils.createMockSimulation(20, 20, 20); + mockSimulationDB.insertSimulation(DispatcherTestUtils.alice, mockSimulation); + DispatcherTestUtils.insertOrUpdateStatus(mockSimulation.getKey(), DispatcherTestUtils.jobIndex, DispatcherTestUtils.taskID, DispatcherTestUtils.alice, + SimulationJobStatus.SchedulerStatus.WAITING, mockSimulationDB); + synchronized (thread.dispatcherNotifyObject){ + thread.dispatcherNotifyObject.notify(); + } + synchronized (thread.finishListener){ + thread.finishListener.wait(); + } + + SimulationJobStatus jobStatus = mockSimulationDB.getLatestSimulationJobStatus(mockSimulation.getKey(), 0); + Assertions.assertTrue(jobStatus.getSchedulerStatus().isDispatched(), "Dispatches"); + } + + + + //###################### Test Simulation Monitor ########################## + // Rig the mock simulation DB to return a simulation job status that features some misbehavior of what's expected, + // which prompts for removal by the zombie killer + @Test + public void zombieKillerTest() throws SQLException, DataAccessException, InterruptedException, IOException { + SimulationDispatcher.INITIAL_ZOMBIE_DELAY = 10; + SimulationDispatcher simulationDispatcher = SimulationDispatcher.simulationDispatcherCreator(mockSimulationDB, mockMessagingServiceInternal, + mockMessagingServiceSim, mockHtcProxy, false); + DispatcherTestUtils.insertOrUpdateStatus(mockSimulationDB); + mockHtcProxy.jobsKilledSafely.clear(); + + mockSimulationDB.badLatestSimulation = MockSimulationDB.BadLatestSimulation.HIGHER_TASK_ID; + SimulationDispatcher.SimulationMonitor.ZombieKiller zombieKiller = simulationDispatcher.simMonitor.initialZombieKiller; + zombieKiller.run(); + Assertions.assertTrue(logOutPut.toString().contains(SimulationDispatcher.SimulationMonitor.ZombieKiller.newJobFound)); + Assertions.assertEquals(1, mockHtcProxy.jobsKilledSafely.size()); + + mockSimulationDB.badLatestSimulation = MockSimulationDB.BadLatestSimulation.RETURN_NULL; + zombieKiller.run(); + Assertions.assertTrue(logOutPut.toString().contains(SimulationDispatcher.SimulationMonitor.ZombieKiller.noJob)); + Assertions.assertEquals(2, mockHtcProxy.jobsKilledSafely.size()); + + mockSimulationDB.badLatestSimulation = MockSimulationDB.BadLatestSimulation.IS_DONE; + zombieKiller.run(); + Assertions.assertTrue(logOutPut.toString().contains(SimulationDispatcher.SimulationMonitor.ZombieKiller.jobIsAlreadyDone)); + Assertions.assertEquals(3, mockHtcProxy.jobsKilledSafely.size()); + } + + @Test + public void queueFlusherKillIdleJobs() throws SQLException, DataAccessException, InterruptedException { + SimulationDispatcher simulationDispatcher = SimulationDispatcher.simulationDispatcherCreator(mockSimulationDB, mockMessagingServiceInternal, + mockMessagingServiceSim, mockHtcProxy, false); + DispatcherTestUtils.insertOrUpdateStatus(mockSimulationDB); + + SimulationDispatcher.SimulationMonitor simMonitor = simulationDispatcher.simMonitor; + SimulationDispatcher.SimulationMonitor.QueueFlusher queueFlusher = simMonitor.initialQueueFlusher; + SimulationStateMachine sm = simulationDispatcher.simDispatcherEngine.getSimulationStateMachine(DispatcherTestUtils.simKey, DispatcherTestUtils.jobIndex); + sm.setSolverProcessTimestamp(0); + Thread queueThread = new Thread(queueFlusher); + queueThread.start(); + int retries = 0; + while (queueThread.getState() != Thread.State.TIMED_WAITING){ + if (retries == 10){ + break; + } + Thread.sleep(500); + retries += 1; + } + synchronized (simMonitor.monitorNotifyObject){ + simMonitor.monitorNotifyObject.notify(); + } + synchronized (queueFlusher.finishListener){ + queueFlusher.finishListener.wait(); + } + + SimulationJobStatus status = mockSimulationDB.getLatestSimulationJobStatus(DispatcherTestUtils.simKey, DispatcherTestUtils.jobIndex); + Assertions.assertTrue(status.getSchedulerStatus().isFailed()); + Assertions.assertTrue(mockHtcProxy.jobsKilledUnsafely.contains(status.getSimulationExecutionStatus().getHtcJobID())); + Assertions.assertTrue(logOutPut.toString().contains(SimulationDispatcher.SimulationMonitor.QueueFlusher.timeOutFailure)); + + } + + @Test + public void queueFlusherKillsNoLongerReferencedSims() throws SQLException, DataAccessException, InterruptedException { + SimulationDispatcher simulationDispatcher = SimulationDispatcher.simulationDispatcherCreator(mockSimulationDB, mockMessagingServiceInternal, + mockMessagingServiceSim, mockHtcProxy, false); + SimulationDispatcher.SimulationMonitor simMonitor = simulationDispatcher.simMonitor; + SimulationDispatcher.SimulationMonitor.QueueFlusher queueFlusher = simMonitor.initialQueueFlusher; + + mockSimulationDB.insertUnreferencedSimKey(DispatcherTestUtils.simKey); + DispatcherTestUtils.insertOrUpdateStatus(mockSimulationDB); + Thread queueThread = new Thread(queueFlusher); + queueThread.start(); + int retries = 0; + while (queueThread.getState() != Thread.State.TIMED_WAITING){ + if (retries == 10){ + break; + } + Thread.sleep(500); + retries += 1; + } + synchronized (simMonitor.monitorNotifyObject){ + simMonitor.monitorNotifyObject.notify(); + } + synchronized (queueFlusher.finishListener){ + queueFlusher.finishListener.wait(); + } + SimulationJobStatus status = mockSimulationDB.getLatestSimulationJobStatus(DispatcherTestUtils.simKey, DispatcherTestUtils.jobIndex); + Assertions.assertTrue(status.getSchedulerStatus().isFailed()); + Assertions.assertTrue(mockHtcProxy.jobsKilledUnsafely.contains(status.getSimulationExecutionStatus().getHtcJobID())); + Assertions.assertTrue(logOutPut.toString().contains(SimulationDispatcher.SimulationMonitor.QueueFlusher.unreferencedFailure)); + } + +} diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/SimulationStateMachineTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/SimulationStateMachineTest.java new file mode 100644 index 0000000000..754624a7ae --- /dev/null +++ b/vcell-server/src/test/java/cbit/vcell/message/server/dispatcher/SimulationStateMachineTest.java @@ -0,0 +1,274 @@ +package cbit.vcell.message.server.dispatcher; + +import cbit.rmi.event.WorkerEvent; +import cbit.vcell.math.MathException; +import cbit.vcell.message.VCMessagingException; +import cbit.vcell.message.VCellTopic; +import cbit.vcell.message.messages.StatusMessage; +import cbit.vcell.parser.ExpressionBindingException; +import cbit.vcell.server.SimulationJobStatus; +import cbit.vcell.solver.Simulation; +import cbit.vcell.solver.VCSimulationIdentifier; +import cbit.vcell.solver.server.SimulationMessage; +import org.junit.jupiter.api.*; +import org.vcell.util.DataAccessException; +import org.vcell.util.document.KeyValue; +import org.vcell.util.document.User; + +import java.beans.PropertyVetoException; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.NoSuchElementException; + +@Tag("Fast") +public class SimulationStateMachineTest { + private static final User testUser = DispatcherTestUtils.alice; + private static final MockVCMessageSession testMessageSession = new MockVCMessageSession(); + private static final int jobIndex = DispatcherTestUtils.jobIndex; + private static final int taskID = DispatcherTestUtils.taskID; + private static final KeyValue simKey = DispatcherTestUtils.simKey; + private static final VCSimulationIdentifier simID = DispatcherTestUtils.simID; + + private MockSimulationDB simulationDB; + private SimulationStateMachine stateMachine; + + @BeforeAll + public static void setSystemProperties(){ + DispatcherTestUtils.setRequiredProperties(); + } + + @AfterAll + public static void restoreSystemProperties(){ + DispatcherTestUtils.restoreRequiredProperties(); + } + + @BeforeEach + public void setUp(){ + simulationDB = new MockSimulationDB(); + stateMachine = new SimulationStateMachine(simKey, jobIndex); + } + + private record ChangedStateValues( + VCSimulationIdentifier simID, + SimulationJobStatus.SchedulerStatus schedulerStatus, + int workerEventJob, + int taskID, + String changesResult + ){ } + + private WorkerEvent createWorkerEvent(ChangedStateValues w){ + SimulationMessage acceptedSimulationMessage = SimulationMessage.workerAccepted("accepted"); + return new WorkerEvent(w.workerEventJob, simKey, + w.simID, jobIndex, "", + w.taskID, null, null, + acceptedSimulationMessage); + } + + private SimulationJobStatus getLatestJobSubmission() throws SQLException, DataAccessException { + return simulationDB.getLatestSimulationJobStatus(simKey, jobIndex); + } + + private SimulationJobStatus getClientTopicMessage(){ + return (SimulationJobStatus) testMessageSession.getTopicMessage(VCellTopic.ClientStatusTopic).getObjectContent(); + } + + @Test + public void workerEventRejectionsTest() throws SQLException, DataAccessException { + int taskID = 16; + + ArrayList changedValues = new ArrayList<>(){{ + add(new ChangedStateValues(simID, SimulationJobStatus.SchedulerStatus.RUNNING, WorkerEvent.JOB_WORKER_ALIVE, taskID, "No old status.")); // no old status failure + add(new ChangedStateValues(simID, SimulationJobStatus.SchedulerStatus.COMPLETED, WorkerEvent.JOB_WORKER_ALIVE, taskID, "Work is already done.")); // work is done failure + add(new ChangedStateValues(simID, SimulationJobStatus.SchedulerStatus.RUNNING, WorkerEvent.JOB_WORKER_ALIVE, 0, "Task ID is lower")); // old status has higher number taskID failure + }}; + + for (int i = 0; i < changedValues.size(); i++){ + ChangedStateValues workerEventChangedValues = changedValues.get(i); + if (i > 1) { + DispatcherTestUtils.insertOrUpdateStatus(simKey, jobIndex, taskID, testUser, workerEventChangedValues.schedulerStatus, simulationDB); + } + WorkerEvent workerEvent = createWorkerEvent(workerEventChangedValues); + Assertions.assertFalse(stateMachine.isWorkerEventOkay(workerEvent, simulationDB), workerEventChangedValues.changesResult); + } + + ChangedStateValues passingWorkerValues= new ChangedStateValues(simID, null, WorkerEvent.JOB_WORKER_ALIVE , taskID, ""); + WorkerEvent passingWorkerEvent = createWorkerEvent(passingWorkerValues); + + for (SimulationJobStatus.SchedulerStatus passingStatus: SimulationJobStatus.SchedulerStatus.values()){ + if (!passingStatus.isDone()){ + DispatcherTestUtils.insertOrUpdateStatus(simKey, jobIndex, taskID, testUser, passingStatus, simulationDB); + Assertions.assertTrue(stateMachine.isWorkerEventOkay(passingWorkerEvent, simulationDB)); + } + } + + } + + @Test + public void stateShouldTransitionToFailure() throws SQLException, DataAccessException, VCMessagingException, PropertyVetoException, MathException, ExpressionBindingException { + ArrayList changedValues = new ArrayList<>(){{ + add(new ChangedStateValues(simID, null, WorkerEvent.JOB_FAILURE, taskID, "The current worker has failed.")); + add(new ChangedStateValues(simID, null, WorkerEvent.JOB_WORKER_EXIT_ERROR, taskID, "The current worker exited with an error.")); + }}; + + for (ChangedStateValues changedValue : changedValues){ + DispatcherTestUtils.insertOrUpdateStatus(simulationDB); + stateMachine.onWorkerEvent(createWorkerEvent(changedValue), simulationDB, testMessageSession); + SimulationJobStatus result = getLatestJobSubmission(); + Assertions.assertTrue(result.getSchedulerStatus().isFailed(), changedValue.changesResult); + Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isFailed(), changedValue.changesResult); + } + + simulationDB = new MockSimulationDB(); + StatusMessage statusMessage = stateMachine.onStartRequest(DispatcherTestUtils.bob, simID, simulationDB, testMessageSession); + Assertions.assertTrue(statusMessage.getSimulationJobStatus().getSchedulerStatus().isFailed(), "Different from initial user that owns the simulation"); + + SimulationJobStatus jobStatus = getLatestJobSubmission(); + Assertions.assertNull(jobStatus, "If it fails on start request, there should be nothing in the DB."); + Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isFailed(), "Only the client receives start request failure status."); + + DispatcherTestUtils.insertOrUpdateStatus(simulationDB); + Assertions.assertThrows(RuntimeException.class, + () -> {stateMachine.onStartRequest(testUser, simID, simulationDB, testMessageSession);}, + "Can't start simulation job unless previous is done."); + Assertions.assertThrows(NoSuchElementException.class,() -> getClientTopicMessage().getSchedulerStatus().isFailed(), "No message sent to client."); + + + DispatcherTestUtils.insertOrUpdateStatus(simulationDB); + jobStatus = getLatestJobSubmission(); + stateMachine.onSystemAbort(jobStatus, "Test Abort", simulationDB, testMessageSession); + jobStatus = getLatestJobSubmission(); + Assertions.assertTrue(jobStatus.getSchedulerStatus().isFailed()); + Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isFailed(), "On abort client gets failed status."); + +// + Simulation memoryIntensiveSimulation = DispatcherTestUtils.createMockSimulation(900, 900, 900); + Simulation powerMemoryIntensiveSimulation = DispatcherTestUtils.createMockSimulation(9000, 9000, 5000, MockSimulationDB.powerUser); + powerMemoryIntensiveSimulation.getSolverTaskDescription().setTimeoutDisabled(true); + + DispatcherTestUtils.insertOrUpdateStatus(simulationDB); + Assertions.assertThrows(RuntimeException.class, + () -> {stateMachine.onDispatch(memoryIntensiveSimulation, getLatestJobSubmission(), simulationDB, testMessageSession);}, + "Can't dispatch simulation that is already running."); + Assertions.assertThrows(NoSuchElementException.class, () -> getClientTopicMessage().getSchedulerStatus().isFailed(), "Client receives failure because simulation is already running."); + + DispatcherTestUtils.insertOrUpdateStatus(simKey, jobIndex, taskID, testUser, SimulationJobStatus.SchedulerStatus.WAITING, simulationDB); + stateMachine.onDispatch(memoryIntensiveSimulation, getLatestJobSubmission(), simulationDB, testMessageSession); + jobStatus = getLatestJobSubmission(); + Assertions.assertTrue(jobStatus.getSchedulerStatus().isFailed(), "Memory size too large"); + Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isFailed(), "Failed because of memory size."); + + DispatcherTestUtils.insertOrUpdateStatus(simKey, jobIndex, taskID, MockSimulationDB.powerUser, SimulationJobStatus.SchedulerStatus.WAITING, simulationDB); + stateMachine.onDispatch(powerMemoryIntensiveSimulation, getLatestJobSubmission(), simulationDB, testMessageSession); + jobStatus = getLatestJobSubmission(); + Assertions.assertTrue(jobStatus.getSchedulerStatus().isFailed(), "Memory size too large"); + Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isFailed(), "Failed because of memory size."); + + DispatcherTestUtils.insertOrUpdateStatus(simulationDB); + statusMessage = stateMachine.onStopRequest(DispatcherTestUtils.bob, getLatestJobSubmission(), simulationDB, testMessageSession); + Assertions.assertTrue(statusMessage.getSimulationJobStatus().getSchedulerStatus().isFailed(), "Stopping as another user."); + Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isFailed(), "Can't stop as another user."); + } + + @Test + public void stateShouldTransitionToWaiting() throws SQLException, VCMessagingException, DataAccessException { + stateMachine.onStartRequest(testUser, simID, simulationDB, testMessageSession); + SimulationJobStatus jobStatus = getLatestJobSubmission(); + Assertions.assertTrue(jobStatus.getSchedulerStatus().isWaiting(), "Just started new task."); + Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isWaiting()); + } + + @Test + public void stateShouldTransitionToDispatched() throws SQLException, DataAccessException, VCMessagingException, PropertyVetoException, MathException, ExpressionBindingException { + DispatcherTestUtils.insertOrUpdateStatus(simKey, jobIndex, taskID, testUser, SimulationJobStatus.SchedulerStatus.WAITING, simulationDB); + WorkerEvent acceptedWorker = createWorkerEvent(new ChangedStateValues(simID, null, WorkerEvent.JOB_ACCEPTED, taskID, "Worker just got accepted")); + stateMachine.onWorkerEvent(acceptedWorker, simulationDB, testMessageSession); + SimulationJobStatus jobStatus = getLatestJobSubmission(); + Assertions.assertTrue(jobStatus.getSchedulerStatus().isDispatched(), "Job recently got accepted, only works if previous state was waiting."); + Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isDispatched()); + + DispatcherTestUtils.insertOrUpdateStatus(simulationDB); + stateMachine.onWorkerEvent(acceptedWorker, simulationDB, testMessageSession); + jobStatus = getLatestJobSubmission(); + Assertions.assertTrue(jobStatus.getSchedulerStatus().isRunning(), "The state has not changed from running, because something that is running can not be dispatched."); + + + DispatcherTestUtils.insertOrUpdateStatus(simKey, jobIndex, taskID, testUser, SimulationJobStatus.SchedulerStatus.WAITING, simulationDB); + Simulation simulation = DispatcherTestUtils.createMockSimulation(50, 50, 50); + stateMachine.onDispatch(simulation, getLatestJobSubmission(), simulationDB, testMessageSession); + jobStatus = getLatestJobSubmission(); + Assertions.assertTrue(jobStatus.getSchedulerStatus().isDispatched()); + Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isDispatched()); + + DispatcherTestUtils.insertOrUpdateStatus(simKey, jobIndex, taskID, MockSimulationDB.powerUser, SimulationJobStatus.SchedulerStatus.WAITING, simulationDB); + simulation = DispatcherTestUtils.createMockSimulation(900, 900, 900, MockSimulationDB.powerUser); + simulation.getSolverTaskDescription().setTimeoutDisabled(true); + stateMachine.onDispatch(simulation, getLatestJobSubmission(), simulationDB, testMessageSession); + jobStatus = getLatestJobSubmission(); + Assertions.assertTrue(jobStatus.getSchedulerStatus().isDispatched()); + Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isDispatched()); + } + + @Test + public void stateShouldTransitionToRunning() throws SQLException, DataAccessException, VCMessagingException { + for (int workerStatus: WorkerEvent.ALL_JOB_EVENTS){ + WorkerEvent workerEvent = createWorkerEvent(new ChangedStateValues(simID, null, workerStatus, taskID, "")); + DispatcherTestUtils.insertOrUpdateStatus(simKey, jobIndex, taskID, testUser, SimulationJobStatus.SchedulerStatus.WAITING, simulationDB); + stateMachine.onWorkerEvent(workerEvent, simulationDB, testMessageSession); + SimulationJobStatus jobStatus = getLatestJobSubmission(); + if (workerEvent.isProgressEvent() || workerEvent.isNewDataEvent() || workerEvent.isStartingEvent() || workerEvent.isWorkerAliveEvent()){ + Assertions.assertTrue(jobStatus.getSchedulerStatus().isRunning()); + Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isRunning()); + } else { + Assertions.assertFalse(jobStatus.getSchedulerStatus().isRunning()); + try { + Assertions.assertFalse(getClientTopicMessage().getSchedulerStatus().isRunning()); + } catch (NoSuchElementException ignored){} + } + } + } + + @Test + public void stateShouldTransitionToCompleted() throws SQLException, VCMessagingException, DataAccessException { + for (int workerStatus : WorkerEvent.ALL_JOB_EVENTS){ + WorkerEvent workerEvent = createWorkerEvent(new ChangedStateValues(simID, SimulationJobStatus.SchedulerStatus.RUNNING, workerStatus, taskID, "")); + DispatcherTestUtils.insertOrUpdateStatus(simulationDB); + stateMachine.onWorkerEvent(workerEvent, simulationDB, testMessageSession); + SimulationJobStatus jobStatus = getLatestJobSubmission(); + if (workerEvent.isCompletedEvent()){ + Assertions.assertTrue(jobStatus.getSchedulerStatus().isCompleted()); + Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isCompleted()); + } else { + Assertions.assertFalse(jobStatus.getSchedulerStatus().isCompleted()); + try { + Assertions.assertFalse(getClientTopicMessage().getSchedulerStatus().isCompleted()); + } catch (NoSuchElementException ignored){} + } + } + } + + @Test + public void stateShouldTransitionToStopped() throws SQLException, DataAccessException, VCMessagingException { + + for (SimulationJobStatus.SchedulerStatus status : SimulationJobStatus.SchedulerStatus.values()){ + DispatcherTestUtils.insertOrUpdateStatus(simKey,jobIndex, taskID,testUser, status, simulationDB); + if (status.isActive()){ + stateMachine.onStopRequest(testUser, getLatestJobSubmission(), simulationDB, testMessageSession); + Assertions.assertTrue(getLatestJobSubmission().getSchedulerStatus().isStopped(), ""); + Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isStopped()); + } else { + StatusMessage statusMessage = stateMachine.onStopRequest(testUser, getLatestJobSubmission(), simulationDB, testMessageSession); + Assertions.assertNull(statusMessage); + try { + Assertions.assertFalse(getClientTopicMessage().getSchedulerStatus().isCompleted()); + } catch (NoSuchElementException ignored){} + } + } + } + + @Test + public void stateShouldTransitionToQueued(){ + System.out.print("Not used in state machine"); + } + + +} diff --git a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java index 7b7955546f..dda5334a7a 100644 --- a/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java +++ b/vcell-server/src/test/java/cbit/vcell/message/server/htc/slurm/SlurmProxyTest.java @@ -79,7 +79,11 @@ public void setup() setProperty(PropertyLoader.htc_vcellsolvers_docker_name, "ghcr.io/virtualcell/vcell-solvers:v0.8.1.2"); setProperty(PropertyLoader.htc_vcellbatch_solver_list, "RungeKuttaFehlberg,HybridMilstein,StochGibson,Langevin,AdamsMoulton,Smoldyn,MovingBoundary,SundialsPDE,CombinedSundials,NFSim"); setProperty(PropertyLoader.htc_vcellbatch_docker_name, "ghcr.io/virtualcell/vcell-batch:7.6.0.43"); - } + + setProperty(PropertyLoader.htcPowerUserMemoryFloorMB, "51200"); + setProperty(PropertyLoader.htcMinMemoryMB, "4096"); + setProperty(PropertyLoader.htcMaxMemoryMB, "81920"); + } @AfterEach public void teardown() { diff --git a/vcell-server/src/test/resources/log4j2-test.xml b/vcell-server/src/test/resources/log4j2-test.xml new file mode 100644 index 0000000000..7d78860d47 --- /dev/null +++ b/vcell-server/src/test/resources/log4j2-test.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub index 009d8ed87a..e9cb49f785 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/adams_moulton/V_REL_274633859_0_0.slurm.sub @@ -8,7 +8,7 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' +# VCell SlurmProxy memory limit source='default memory limit' TMPDIR=/scratch/vcell diff --git a/vcell-server/src/test/resources/slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub index 3e4a33399d..1b2591f59d 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/cvode/V_REL_274630682_0_0.slurm.sub @@ -8,7 +8,7 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' +# VCell SlurmProxy memory limit source='default memory limit' TMPDIR=/scratch/vcell diff --git a/vcell-server/src/test/resources/slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub index 4ac98ad675..c253addba5 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/finite_volume/V_REL_274514696_0_0.slurm.sub @@ -8,7 +8,7 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' +# VCell SlurmProxy memory limit source='default memory limit' TMPDIR=/scratch/vcell diff --git a/vcell-server/src/test/resources/slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub index 742352b14c..858d70d464 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/gibson/V_REL_274635122_0_0.slurm.sub @@ -8,7 +8,7 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' +# VCell SlurmProxy memory limit source='default memory limit' TMPDIR=/scratch/vcell diff --git a/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/V_REL_274641698_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/V_REL_274641698_0_0.slurm.sub index c44c6bbb6d..cbdb1552d7 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/V_REL_274641698_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/gibson_milstein/V_REL_274641698_0_0.slurm.sub @@ -8,7 +8,7 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' +# VCell SlurmProxy memory limit source='default memory limit' TMPDIR=/scratch/vcell diff --git a/vcell-server/src/test/resources/slurm_fixtures/langevin/V_REL_274672135_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/langevin/V_REL_274672135_0_0.slurm.sub index f61745b830..3058343356 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/langevin/V_REL_274672135_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/langevin/V_REL_274672135_0_0.slurm.sub @@ -8,7 +8,7 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' +# VCell SlurmProxy memory limit source='default memory limit' TMPDIR=/scratch/vcell diff --git a/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/V_REL_274641196_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/V_REL_274641196_0_0.slurm.sub index 7ac68f8a5e..4945567251 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/V_REL_274641196_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/moving_boundary/V_REL_274641196_0_0.slurm.sub @@ -8,7 +8,7 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' +# VCell SlurmProxy memory limit source='default memory limit' TMPDIR=/scratch/vcell diff --git a/vcell-server/src/test/resources/slurm_fixtures/nfsim/V_REL_274642453_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/nfsim/V_REL_274642453_0_0.slurm.sub index a16c0ffde0..c7f19aa926 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/nfsim/V_REL_274642453_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/nfsim/V_REL_274642453_0_0.slurm.sub @@ -8,7 +8,7 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' +# VCell SlurmProxy memory limit source='default memory limit' TMPDIR=/scratch/vcell diff --git a/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub index ada20fa438..095da3b7b5 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/runge_kutta_fehlberg/V_REL_274631114_0_0.slurm.sub @@ -8,7 +8,7 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' +# VCell SlurmProxy memory limit source='default memory limit' TMPDIR=/scratch/vcell diff --git a/vcell-server/src/test/resources/slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub b/vcell-server/src/test/resources/slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub index 8fb09c12e8..087bc864e5 100644 --- a/vcell-server/src/test/resources/slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub +++ b/vcell-server/src/test/resources/slurm_fixtures/smoldyn/V_REL_274630052_0_0.slurm.sub @@ -8,7 +8,7 @@ #SBATCH --mem=4096M #SBATCH --no-kill #SBATCH --no-requeue -# VCell SlurmProxy memory limit source='Exception NoSuchFileException used FALLBACK_MEM_LIMIT_MB' +# VCell SlurmProxy memory limit source='default memory limit' TMPDIR=/scratch/vcell diff --git a/webapp-ng/Dockerfile-webapp-remote b/webapp-ng/Dockerfile-webapp-remote new file mode 100644 index 0000000000..ded448c15f --- /dev/null +++ b/webapp-ng/Dockerfile-webapp-remote @@ -0,0 +1,24 @@ +# Build stage +FROM node:20.11-alpine3.19 AS build + +RUN apk update && apk add git + +RUN mkdir -p /app + +WORKDIR /app + +COPY package.json . +COPY package-lock.json . + +RUN npm install --legacy-peer-deps + +COPY . . + +RUN npm run build_remote + +# ----------------- + +FROM nginx:1.17.1-alpine +COPY --from=build /app/dist/login-demo /usr/share/nginx/html +COPY ./nginx-custom.conf /etc/nginx/conf.d/default.conf +EXPOSE 80 diff --git a/webapp-ng/angular.json b/webapp-ng/angular.json index 3392559567..024c5f9592 100644 --- a/webapp-ng/angular.json +++ b/webapp-ng/angular.json @@ -139,6 +139,32 @@ "maximumWarning": "6kb" } ] + }, + "configuration_remote": { + "fileReplacements": [ + { + "replace": "src/environments/environment.ts", + "with": "src/environments/environment.remote.ts" + } + ], + "optimization": true, + "outputHashing": "all", + "sourceMap": false, + "namedChunks": false, + "extractLicenses": true, + "vendorChunk": false, + "buildOptimizer": true, + "budgets": [ + { + "type": "initial", + "maximumWarning": "2mb", + "maximumError": "5mb" + }, + { + "type": "anyComponentStyle", + "maximumWarning": "6kb" + } + ] } }, @@ -161,6 +187,9 @@ }, "configuration_island": { "buildTarget": "login-demo:build:configuration_island" + }, + "configuration_remote": { + "buildTarget": "login-demo:build:configuration_remote" } } }, diff --git a/webapp-ng/package.json b/webapp-ng/package.json index debb8a544e..46cc4e5224 100644 --- a/webapp-ng/package.json +++ b/webapp-ng/package.json @@ -9,6 +9,7 @@ "build_dev": "ng build -c configuration_dev", "build_stage": "ng build -c configuration_stage", "build_island": "ng build -c configuration_island", + "build_remote": "ng build -c configuration_remote", "test": "ng test", "test:ci": "ng test --no-watch --no-progress --browsers=ChromeHeadlessCI", "lint": "ng lint", diff --git a/webapp-ng/src/environments/environment.remote.ts b/webapp-ng/src/environments/environment.remote.ts new file mode 100644 index 0000000000..30153365c1 --- /dev/null +++ b/webapp-ng/src/environments/environment.remote.ts @@ -0,0 +1,44 @@ +import config from '../../auth_config.json'; + +const { domain, clientId, authorizationParams: { audience }, apiUri, errorPath } = config as { + domain: string; + clientId: string; + authorizationParams: { + audience?: string; + }, + apiUri: string; + errorPath: string; +}; + +export const environment = { + production: true, + auth: { + domain, + clientId, + authorizationParams: { + audience: `${audience}`, + redirect_uri: window.location.origin, + }, + errorPath, + }, + apiUri: `${apiUri}`, + httpInterceptor: { + allowedList: [ + { + // uri: `${config.apiUri}/api/*`, + // uri: `${apiUri}/api/*`, + // uri: '/api/*', + uri: 'https://minikube-remote/api/*', + + // allowAnonymous: true, + tokenOptions: { + authorizationParams: { + audience: `${audience}`, + scope: 'openid profile email' + } + } + }, + ], + + }, +};